update:weaviate_store format (#223)

format weaviate_store.py
This commit is contained in:
magic.chen 2023-06-15 10:45:57 +08:00 committed by GitHub
commit 02bea46c9a
3 changed files with 33 additions and 21 deletions

View File

@ -10,7 +10,8 @@
</a> </a>
</p> </p>
[**简体中文**](README.zh.md)|[**Discord**](https://discord.gg/xfNDzZ9t) [**简体中文**](README.zh.md) |[**Discord**](https://discord.gg/xfNDzZ9t) |[**Documents**](https://db-gpt.readthedocs.io/en/latest/)
</div> </div>
## What is DB-GPT? ## What is DB-GPT?

View File

@ -9,7 +9,7 @@
</a> </a>
</p> </p>
[**English**](README.md)|[**Discord**](https://discord.gg/ea6BnZkY) [**English**](README.md)|[**Discord**](https://discord.gg/ea6BnZkY) |[**Documents**](https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-cn/zh_CN/latest/)
</div> </div>
## DB-GPT 是什么? ## DB-GPT 是什么?

View File

@ -26,9 +26,7 @@ class WeaviateStore(VectorStoreBase):
KNOWLEDGE_UPLOAD_ROOT_PATH, ctx["vector_store_name"] + ".vectordb" KNOWLEDGE_UPLOAD_ROOT_PATH, ctx["vector_store_name"] + ".vectordb"
) )
self.vector_store_client = weaviate.Client( self.vector_store_client = weaviate.Client(self.weaviate_url)
self.weaviate_url
)
def similar_search(self, text: str, topk: int) -> None: def similar_search(self, text: str, topk: int) -> None:
"""Perform similar search in Weaviate""" """Perform similar search in Weaviate"""
@ -37,8 +35,13 @@ class WeaviateStore(VectorStoreBase):
"concepts": [text], "concepts": [text],
"distance": 0.75, # prior to v1.14 use "certainty" instead of "distance" "distance": 0.75, # prior to v1.14 use "certainty" instead of "distance"
} }
response = (self.vector_store_client.query.get("Document", ["metadata", "text"]).with_near_vector( response = (
{"vector": nearText}).with_limit(topk).with_additional(["distance"]).do()) self.vector_store_client.query.get("Document", ["metadata", "text"])
.with_near_vector({"vector": nearText})
.with_limit(topk)
.with_additional(["distance"])
.do()
)
return json.dumps(response, indent=2) return json.dumps(response, indent=2)
@ -61,25 +64,37 @@ class WeaviateStore(VectorStoreBase):
{ {
"class": "Document", "class": "Document",
"description": "A document with metadata and text", "description": "A document with metadata and text",
"moduleConfig": {"text2vec-transformers": {"poolingStrategy": "masked_mean", "vectorizeClassName": False} "moduleConfig": {
}, "text2vec-transformers": {
"poolingStrategy": "masked_mean",
"vectorizeClassName": False,
}
},
"properties": [ "properties": [
{ {
"dataType": ["text"], "dataType": ["text"],
"moduleConfig": { "moduleConfig": {
"text2vec-transformers": {"skip": False, "vectorizePropertyName": False}}, "text2vec-transformers": {
"skip": False,
"vectorizePropertyName": False,
}
},
"description": "Metadata of the document", "description": "Metadata of the document",
"name": "metadata" "name": "metadata",
}, },
{ {
"dataType": ["text"], "dataType": ["text"],
"moduleConfig": { "moduleConfig": {
"text2vec-transformers": {"skip": False, "vectorizePropertyName": False}}, "text2vec-transformers": {
"skip": False,
"vectorizePropertyName": False,
}
},
"description": "Text content of the document", "description": "Text content of the document",
"name": "text" "name": "text",
} },
], ],
"vectorizer": "text2vec-transformers" "vectorizer": "text2vec-transformers",
} }
] ]
} }
@ -99,10 +114,6 @@ class WeaviateStore(VectorStoreBase):
# Batch import all documents # Batch import all documents
for i in range(len(texts)): for i in range(len(texts)):
properties = { properties = {"metadata": metadatas[i], "text": texts[i]}
"metadata": metadatas[i],
"text": texts[i]
}
self.vector_store_client.batch.add_data_object( self.vector_store_client.batch.add_data_object(properties, "Document")
properties, "Document")