Milvus向量数据库

Milvus基本使用

官网
参考中文文档

Docker安装

下载yaml文件:https://github.com/milvus-io/milvus/releases

Java pom

1
2
3
4
5
6
7
8
9
10
<dependency>
<groupId>io.milvus</groupId>
<artifactId>milvus-sdk-java</artifactId>
<version>2.2.8</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>31.1-jre</version>
</dependency>

建立连接

1
2
3
4
5
6
7
8
9
10
11
12
public static MilvusServiceClient milvusClient;

static {
milvusClient = new MilvusServiceClient(
ConnectParam.newBuilder()
.withHost("192.168.137.123")
.withPort(19530)
// 数据库 不写默认为default
.withDatabaseName("gpt")
.build()
);
}

Database数据库

创建

1
2
3
4
5
milvusClient.createDatabase(
CreateDatabaseParam.newBuilder()
.withDatabaseName("questionDatabase")
.build()
);

删除

1
2
3
4
milvusClient.dropDatabase(
DropDatabaseParam.newBuilder()
.withDatabaseName("questionDatabase")
.build());

查询所有数据库

1
milvusClient.listDatabases();

Collections数据集

创建

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
FieldType fieldType1 = FieldType.newBuilder()
.withName("id")
.withDataType(DataType.Int64)
// 必须要有唯一KEY
.withPrimaryKey(true)
.withAutoID(true)
.build();
FieldType fieldType2 = FieldType.newBuilder()
.withName("question")
.withDataType(DataType.VarChar)
// 字符串类型必填 字符串最大长度
.withMaxLength(1000)
.build();
FieldType fieldType3 = FieldType.newBuilder()
.withName("question_vector")
// 向量类型
.withDataType(DataType.FloatVector)
// 向量类型必填 向量纬度
.withDimension(1536)
.build();
CreateCollectionParam createCollectionReq = CreateCollectionParam.newBuilder()
.withCollectionName("questionCollection")
// 要创建的集合的分片数
.withShardsNum(2)
// 数据库 可选
.withDatabaseName("questionDatabase")
.addFieldType(fieldType1)
.addFieldType(fieldType2)
.addFieldType(fieldType3)
.build();
// 创建collection
milvusClient.createCollection(createCollectionReq);

判断是否存在

1
2
3
4
5
R<Boolean> booleanR = milvusClient.hasCollection(HasCollectionParam.newBuilder().withCollectionName("questionCollection").build());
if (booleanR.getData()) {
// 存在
return true;
}

删除

1
2
3
4
5
6
7
8
R<RpcStatus> book = milvusClient.dropCollection(
DropCollectionParam.newBuilder()
.withCollectionName("questionCollection")
.build()
);
if(book.getStatus() == R.Status.Success.getCode()){
return true;
}

加载

所有的查询开始前,必须加载集合或者加载分区

1
2
3
4
5
milvusClient.loadCollection(
LoadCollectionParam.newBuilder()
.withCollectionName("questionCollection")
.build()
);

释放

查询完成后释放

1
2
3
4
milvusClient.releaseCollection(
ReleaseCollectionParam.newBuilder()
.withCollectionName("questionCollection")
.build());

Indexes索引

创建

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
milvusClient.createIndex(
CreateIndexParam.newBuilder()
.withCollectionName("questionCollection")
// 索引名字
.withIndexName("question_vector_index")
// 索引字段
.withFieldName("question_vector")
// 索引类型 浮点型向量
.withIndexType(IndexType.IVF_FLAT)
.withMetricType(MetricType.IP)
// 默认配置不会对小于1024的字段进行索引
.withExtraParam("{\"nlist\":1024}")
.withSyncMode(Boolean.FALSE)
.build()
);

删除索引

1
2
3
4
5
6
7
8
9
10
// 必须先释放才能进行删除
milvusClient.releaseCollection(
ReleaseCollectionParam.newBuilder()
.withCollectionName("questionCollection")
.build());
milvusClient.dropIndex(
DropIndexParam.newBuilder()
.withCollectionName("questionCollection")
.withIndexName("question_vector_index")
.build());

Partition分区

创建

1
2
3
4
5
6
milvusClient.createPartition(
CreatePartitionParam.newBuilder()
.withCollectionName("questionCollection")
.withPartitionName(""partition_one"")
.build()
);

判断是否存在

1
milvusClient.hasPartition(HasPartitionParam.newBuilder().withCollectionName("questionCollection").withPartitionName(partitionOne).build());

删除

1
2
3
4
5
6
7
8
9
10
// 必须先释放才能进行删除
milvusClient.releaseCollection(
ReleaseCollectionParam.newBuilder()
.withCollectionName("questionCollection")
.build());
milvusClient.dropPartition(
DropPartitionParam.newBuilder()
.withCollectionName("questionCollection")
.withPartitionName("partition_one")
.build());

加载

1
2
3
4
5
milvusClient.loadPartitions(
LoadPartitionsParam.newBuilder()
.withCollectionName("questionCollection")
.withPartitionNames(Collections.singletonList("partition_one"))
.build());

释放

1
2
3
4
5
milvusClient.releasePartitions(
ReleasePartitionsParam.newBuilder()
.withCollectionName("questionCollection")
.withPartitionNames(Collections.singletonList("partition_one"))
.build());

Insert,Delete增删

Insert

1
2
3
4
5
6
7
8
9
10
11
List<InsertParam.Field> fields = new ArrayList<>();
Long snowflakeNextId = IdUtil.getSnowflakeNextId();
fields.add(new InsertParam.Field("id", Collections.singletonList(snowflakeNextId)));
fields.add(new InsertParam.Field("question", Collections.singletonList("问题1")));
fields.add(new InsertParam.Field("question_vector", Collections.singletonList([0.222,0.1111])));
InsertParam insertParam = InsertParam.newBuilder()
.withCollectionName("questionCollection")
.withFields(fields)
.withPartitionName("partitionOne")
.build();
milvusClient.insert(insertParam);

Delete

1
2
3
4
5
6
7
milvusClient.delete(
DeleteParam.newBuilder()
.withCollectionName("questionCollection")
.withExpr("id in [1,2]")
.withPartitionName("partitionOne")
.build()
);

Search搜索

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
// 必须先加载集合或者分区
milvusClient.loadCollection(
LoadCollectionParam.newBuilder()
.withCollectionName("gptSystemMessage")
.build()
);
SearchParam searchParam = SearchParam.newBuilder()
.withCollectionName("gptSystemMessage")
.withMetricType(MetricType.IP)
//返回多少条结果
.withTopK(1)
// 查询的字段
.withOutFields(Collections.singletonList("question"))
// 搜索的向量
.withVectors(Collections.singletonList([0.2323,0.333]))
// 匹配的向量
.withVectorFieldName("question_vector")
// 查询参数
.withParams("{\"nprobe\":512}")
// 限制表达式
.withExpr("id < 1000")
.build();
R<SearchResults> respSearch = milvusClient.search(searchParam);
SearchResultsWrapper wrapperSearch = new SearchResultsWrapper(respSearch.getData().getResults());
if (wrapperSearch.getRowRecords().isEmpty()) {
return "没有数据";
}
List<SearchResultsWrapper.IDScore> idScore = wrapperSearch.getIDScore(0);
for (SearchResultsWrapper.IDScore score : idScore) {
Map<String, Object> fieldValues = score.getFieldValues();
// 具体查询字段的内容
Object question = fieldValues.get("question");
// 匹配分数
float score1 = score.getScore();
}
// 释放
milvusClient.releaseCollection(
ReleaseCollectionParam.newBuilder()
.withCollectionName(COLLECTION_NAME)
.build());