[SpringBoot] SpringBoot integrates Milvus

1. What is Milvus?

Milvus, an open source high-performance vector database, exhibits powerful performance and flexibility in various application scenarios.
Processing and analyzing large-scale vector data is becoming increasingly important in many modern applications. For example, vector data is widely used in areas such as image and video search, recommendation systems, natural language processing, and bioinformatics.

2. Project background

In the company recommendation system, we need to recommend relevant content for users based on their historical behavior and interests. So users and content are represented as vectors, and Milvus is used for similarity matching. By storing user vectors and content vectors in Milvus, and using its efficient similarity query function, we can quickly find the content that best matches user interests and make personalized recommendations.

The generation of the vector is generated by the spark task and written into the data. This article only writes about SpringBoot integrating Milvus to realize the data query part. It is oriented to the C side and its performance has been tested.

3. Maven dependency introduction

I started using version 1.x. Later, due to the new filtering function in 2.x, the version was upgraded to 2.2.3. There are still some differences between version 1 and version 2. It is recommended to use version 2.

<dependency>
    <groupId>io.milvus</groupId>
    <artifactId>milvus-sdk-java</artifactId>
    <version>2.2.3</version>
</dependency>

4. Automatic configuration

@Configuration
public class MilvusConfiguration {
    
    

    /**
     *  milvus ip addr
     */
    @Value("${milvus.config.ipAddr}")
    private String ipAddr;

    /**
     * milvus   port
     */
    @Value("${milvus.config.port}")
    private Integer  port;

    @Bean
    @Scope("singleton")
    public MilvusServiceClient getMilvusClient() {
    
    
        return getMilvusFactory().getMilvusClient();
    }

    @Bean(initMethod = "init", destroyMethod = "close")
    public MilvusRestClientFactory getMilvusFactory() {
    
    
        return  MilvusRestClientFactory.build(ipAddr, port);
    }
}

Five, milvus Rest client package

public class MilvusRestClientFactory {
    
    

    private static String  IP_ADDR;

    private static Integer PORT ;

    private MilvusServiceClient milvusServiceClient;

    private ConnectParam.Builder  connectParamBuilder;


    private static MilvusRestClientFactory milvusRestClientFactory = new MilvusRestClientFactory();

    private MilvusRestClientFactory(){
    
    

    }

    public static MilvusRestClientFactory build(String ipAddr, Integer  port) {
    
    
        IP_ADDR = ipAddr;
        PORT = port;
        return milvusRestClientFactory;
    }

    private ConnectParam.Builder connectParamBuilder(String host, int port) {
    
    
        return  ConnectParam.newBuilder().withHost(host).withPort(port);
    }



    public void init() {
    
    
        connectParamBuilder =  connectParamBuilder(IP_ADDR,PORT);
        ConnectParam connectParam = connectParamBuilder.build();
        milvusServiceClient =new MilvusServiceClient(connectParam);
    }


    public MilvusServiceClient getMilvusClient() {
    
    
        return milvusServiceClient;
    }


    public void close() {
    
    
        if (milvusServiceClient != null) {
    
    
            try {
    
    
                milvusServiceClient.close();
            } catch (Exception e) {
    
    
                e.printStackTrace();
            }
        }
    }
}

6. Query

The data written is different, and the results obtained are different. The last thing I get here is the data collection of Long type, which is for reference only.

Search milvus synchronously

/**
 * 同步搜索milvus
 * @param collectionName 表名
 * @param vectors 查询向量
 * @param topK 最相似的向量个数
 * @return
 */
public List<Long> search(String collectionName, List<List<Float>> vectors, Integer topK) {
    
    

    Assert.notNull(collectionName, "collectionName  is null");
    Assert.notNull(vectors, "vectors is null");
    Assert.notEmpty(vectors, "vectors is empty");
    Assert.notNull(topK, "topK is null");
    int nprobeVectorSize = vectors.get(0).size();
    String paramsInJson = "{"nprobe": " + nprobeVectorSize + "}";
    SearchParam searchParam =
            SearchParam.newBuilder().withCollectionName(collectionName)
                    .withParams(paramsInJson)
                    .withMetricType(MetricType.IP)
                    .withVectors(vectors)
                    .withVectorFieldName("embedding")
                    .withTopK(topK)
                    .build();

    R<SearchResults> searchResultsR = milvusServiceClient.search(searchParam);
    SearchResults searchResultsRData = searchResultsR.getData();
    List<Long> topksList = searchResultsRData.getResults().getIds().getIntId().getDataList();
    return topksList;
}

Search milvus synchronously, add filter search

/**
 * 同步搜索milvus,增加过滤条件搜索
 *
 * @param collectionName 表名
 * @param vectors 查询向量
 * @param topK 最相似的向量个数
 * @param exp 过滤条件:status=1
 * @return
 */
public List<Long> search(String collectionName, List<List<Float>> vectors, Integer topK, String exp) {
    
    
    Assert.notNull(collectionName, "collectionName  is null");
    Assert.notNull(vectors, "vectors is null");
    Assert.notEmpty(vectors, "vectors is empty");
    Assert.notNull(topK, "topK is null");
    Assert.notNull(exp, "exp is null");
    int nprobeVectorSize = vectors.get(0).size();
    String paramsInJson = "{"nprobe": " + nprobeVectorSize + "}";
    SearchParam searchParam =
            SearchParam.newBuilder().withCollectionName(collectionName)
                    .withParams(paramsInJson)
                    .withMetricType(MetricType.IP)
                    .withVectors(vectors)
                    .withExpr(exp)
                    .withVectorFieldName("embedding")
                    .withTopK(topK)
                    .build();

    R<SearchResults> searchResultsR = milvusServiceClient.search(searchParam);
    SearchResults searchResultsRData = searchResultsR.getData();
    List<Long> topksList = searchResultsRData.getResults().getIds().getIntId().getDataList();
    return topksList;
}

Asynchronous search for milvus: for scenarios that do not require high real-time results

/**
 * 异步搜索milvus
 *
 * @param collectionName 表名
 * @param vectors 查询向量
 * @param partitionList 最相似的向量个数
 * @param topK
 * @return
 */
public List<Long> searchAsync(String collectionName, List<List<Float>> vectors,
                              List<String> partitionList, Integer topK) throws ExecutionException, InterruptedException {
    
    

    Assert.notNull(collectionName, "collectionName  is null");
    Assert.notNull(vectors, "vectors is null");
    Assert.notEmpty(vectors, "vectors is empty");
    Assert.notNull(partitionList, "partitionList is null");
    Assert.notEmpty(partitionList, "partitionList is empty");
    Assert.notNull(topK, "topK is null");
    int nprobeVectorSize = vectors.get(0).size();
    String paramsInJson = "{"nprobe": " + nprobeVectorSize + "}";
    SearchParam searchParam =
            SearchParam.newBuilder().withCollectionName(collectionName)
                    .withParams(paramsInJson)
                    .withVectors(vectors)
                    .withTopK(topK)
                    .withPartitionNames(partitionList)
                    .build();
    ListenableFuture<R<SearchResults>> listenableFuture = milvusServiceClient.searchAsync(searchParam);

    List<Long> resultIdsList = listenableFuture.get().getData().getResults().getTopksList();

    return resultIdsList;
}

Get a collection of partitions

/**
 * 获取分区集合
 * @param collectionName 表名
 * @return
 */
public List<String> getPartitionsList(String collectionName) {
    
    
    Assert.notNull(collectionName, "collectionName  is null");
    ShowPartitionsParam searchParam = ShowPartitionsParam.newBuilder().withCollectionName(collectionName).build();
    List<ByteString> byteStrings = milvusServiceClient.showPartitions(searchParam).getData().getPartitionNamesList().asByteStringList();
    List<String> partitionList = Lists.newLinkedList();
    byteStrings.forEach(s -> {
    
    
        partitionList.add(s.toStringUtf8());
    });
    return partitionList;
}

Seven, yml configuration data

milvus:
  config:
    ipAddr: xxx.xxx.xxx.xxx
    port: 19531

おすすめ

転載: blog.csdn.net/u011397981/article/details/131588855