mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-09 15:03:21 +00:00
add clustered vearch in langchain (#10771)
--------- Co-authored-by: zhanghexian1 <zhanghexian1@jd.com> Co-authored-by: Bagatur <baskaryan@gmail.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
@@ -73,6 +73,7 @@ from langchain.vectorstores.tigris import Tigris
|
||||
from langchain.vectorstores.typesense import Typesense
|
||||
from langchain.vectorstores.usearch import USearch
|
||||
from langchain.vectorstores.vald import Vald
|
||||
from langchain.vectorstores.vearch import Vearch
|
||||
from langchain.vectorstores.vectara import Vectara
|
||||
from langchain.vectorstores.weaviate import Weaviate
|
||||
from langchain.vectorstores.zep import ZepVectorStore
|
||||
@@ -137,6 +138,7 @@ __all__ = [
|
||||
"Typesense",
|
||||
"USearch",
|
||||
"Vald",
|
||||
"Vearch",
|
||||
"Vectara",
|
||||
"VectorStore",
|
||||
"Weaviate",
|
||||
|
@@ -13,44 +13,68 @@ from langchain.vectorstores.base import VectorStore
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import vearch
|
||||
|
||||
DEFAULT_TOPN = 4
|
||||
|
||||
|
||||
class VearchDb(VectorStore):
|
||||
class Vearch(VectorStore):
|
||||
_DEFAULT_TABLE_NAME = "langchain_vearch"
|
||||
_DEFAULT_CLUSTER_DB_NAME = "cluster_client_db"
|
||||
_DEFAULT_VERSION = 1
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
embedding_function: Embeddings,
|
||||
path_or_url: Optional[str] = None,
|
||||
table_name: str = _DEFAULT_TABLE_NAME,
|
||||
metadata_path: Optional[str] = None,
|
||||
db_name: str = _DEFAULT_CLUSTER_DB_NAME,
|
||||
flag: int = _DEFAULT_VERSION,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Initialize vearch vector store"""
|
||||
"""Initialize vearch vector store
|
||||
flag 1 for cluster,0 for standalone
|
||||
"""
|
||||
try:
|
||||
import vearch
|
||||
if flag:
|
||||
import vearch_cluster
|
||||
else:
|
||||
import vearch
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import vearch python package. "
|
||||
"Please install it with `pip install vearch`."
|
||||
"Could not import suitable python package. "
|
||||
"Please install it with `pip install vearch or vearch_cluster`."
|
||||
)
|
||||
|
||||
if metadata_path is None:
|
||||
metadata_path = os.getcwd().replace("\\", "/")
|
||||
if not os.path.isdir(metadata_path):
|
||||
os.makedirs(metadata_path)
|
||||
log_path = os.path.join(metadata_path, "log")
|
||||
if not os.path.isdir(log_path):
|
||||
os.makedirs(log_path)
|
||||
self.vearch_engine = vearch.Engine(metadata_path, log_path)
|
||||
if flag:
|
||||
if path_or_url is None:
|
||||
raise ValueError("Please input url of cluster")
|
||||
if not db_name:
|
||||
db_name = self._DEFAULT_CLUSTER_DB_NAME
|
||||
db_name += "_"
|
||||
db_name += str(uuid.uuid4()).split("-")[-1]
|
||||
self.using_db_name = db_name
|
||||
self.url = path_or_url
|
||||
self.vearch = vearch_cluster.VearchCluster(path_or_url)
|
||||
|
||||
else:
|
||||
if path_or_url is None:
|
||||
metadata_path = os.getcwd().replace("\\", "/")
|
||||
else:
|
||||
metadata_path = path_or_url
|
||||
if not os.path.isdir(metadata_path):
|
||||
os.makedirs(metadata_path)
|
||||
log_path = os.path.join(metadata_path, "log")
|
||||
if not os.path.isdir(log_path):
|
||||
os.makedirs(log_path)
|
||||
self.vearch = vearch.Engine(metadata_path, log_path)
|
||||
self.using_metapath = metadata_path
|
||||
if not table_name:
|
||||
table_name = self._DEFAULT_TABLE_NAME
|
||||
table_name += "_"
|
||||
table_name += str(uuid.uuid4()).split("-")[-1]
|
||||
self.using_table_name = table_name
|
||||
self.using_metapath = metadata_path
|
||||
self.embedding_func = embedding_function
|
||||
self.flag = flag
|
||||
|
||||
@property
|
||||
def embeddings(self) -> Optional[Embeddings]:
|
||||
@@ -58,13 +82,15 @@ class VearchDb(VectorStore):
|
||||
|
||||
@classmethod
|
||||
def from_documents(
|
||||
cls: Type[VearchDb],
|
||||
cls: Type[Vearch],
|
||||
documents: List[Document],
|
||||
embedding: Embeddings,
|
||||
table_name: str = "langchain_vearch",
|
||||
metadata_path: Optional[str] = None,
|
||||
path_or_url: Optional[str] = None,
|
||||
table_name: str = _DEFAULT_TABLE_NAME,
|
||||
db_name: str = _DEFAULT_CLUSTER_DB_NAME,
|
||||
flag: int = _DEFAULT_VERSION,
|
||||
**kwargs: Any,
|
||||
) -> VearchDb:
|
||||
) -> Vearch:
|
||||
"""Return Vearch VectorStore"""
|
||||
|
||||
texts = [d.page_content for d in documents]
|
||||
@@ -74,27 +100,34 @@ class VearchDb(VectorStore):
|
||||
texts=texts,
|
||||
embedding=embedding,
|
||||
metadatas=metadatas,
|
||||
path_or_url=path_or_url,
|
||||
table_name=table_name,
|
||||
metadata_path=metadata_path,
|
||||
db_name=db_name,
|
||||
flag=flag,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_texts(
|
||||
cls: Type[VearchDb],
|
||||
cls: Type[Vearch],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
path_or_url: Optional[str] = None,
|
||||
table_name: str = _DEFAULT_TABLE_NAME,
|
||||
metadata_path: Optional[str] = None,
|
||||
db_name: str = _DEFAULT_CLUSTER_DB_NAME,
|
||||
flag: int = _DEFAULT_VERSION,
|
||||
**kwargs: Any,
|
||||
) -> VearchDb:
|
||||
) -> Vearch:
|
||||
"""Return Vearch VectorStore"""
|
||||
|
||||
vearch_db = cls(
|
||||
embedding_function=embedding,
|
||||
embedding=embedding,
|
||||
path_or_url=path_or_url,
|
||||
db_name=db_name,
|
||||
table_name=table_name,
|
||||
metadata_path=metadata_path,
|
||||
flag=flag,
|
||||
)
|
||||
vearch_db.add_texts(texts=texts, metadatas=metadatas)
|
||||
return vearch_db
|
||||
@@ -102,19 +135,20 @@ class VearchDb(VectorStore):
|
||||
def _create_table(
|
||||
self,
|
||||
dim: int = 1024,
|
||||
filed_list: List[dict] = [
|
||||
{"filed": "text", "type": "str"},
|
||||
{"filed": "metadata", "type": "str"},
|
||||
field_list: List[dict] = [
|
||||
{"field": "text", "type": "str"},
|
||||
{"field": "metadata", "type": "str"},
|
||||
],
|
||||
) -> int:
|
||||
"""
|
||||
Create VectorStore Table
|
||||
Args:
|
||||
dim:dimension of vector
|
||||
fileds_list: the filed you want to store
|
||||
fields_list: the field you want to store
|
||||
Return:
|
||||
code,0 for success,1 for failed
|
||||
"""
|
||||
|
||||
type_dict = {"int": vearch.dataType.INT, "str": vearch.dataType.STRING}
|
||||
engine_info = {
|
||||
"index_size": 10000,
|
||||
@@ -122,8 +156,8 @@ class VearchDb(VectorStore):
|
||||
"retrieval_param": {"ncentroids": 2048, "nsubvector": 32},
|
||||
}
|
||||
fields = [
|
||||
vearch.GammaFieldInfo(fi["filed"], type_dict[fi["type"]])
|
||||
for fi in filed_list
|
||||
vearch.GammaFieldInfo(fi["field"], type_dict[fi["type"]])
|
||||
for fi in field_list
|
||||
]
|
||||
vector_field = vearch.GammaVectorInfo(
|
||||
name="text_embedding",
|
||||
@@ -135,7 +169,7 @@ class VearchDb(VectorStore):
|
||||
store_param={"cache_size": 10000},
|
||||
has_source=False,
|
||||
)
|
||||
response_code = self.vearch_engine.create_table(
|
||||
response_code = self.vearch.create_table(
|
||||
engine_info,
|
||||
name=self.using_table_name,
|
||||
fields=fields,
|
||||
@@ -143,6 +177,48 @@ class VearchDb(VectorStore):
|
||||
)
|
||||
return response_code
|
||||
|
||||
def _create_space(
|
||||
self,
|
||||
dim: int = 1024,
|
||||
) -> int:
|
||||
"""
|
||||
Create VectorStore space
|
||||
Args:
|
||||
dim:dimension of vector
|
||||
Return:
|
||||
code,0 failed for ,1 for success
|
||||
"""
|
||||
space_config = {
|
||||
"name": self.using_table_name,
|
||||
"partition_num": 1,
|
||||
"replica_num": 1,
|
||||
"engine": {
|
||||
"name": "gamma",
|
||||
"index_size": 1,
|
||||
"retrieval_type": "FLAT",
|
||||
"retrieval_param": {
|
||||
"metric_type": "L2",
|
||||
},
|
||||
},
|
||||
"properties": {
|
||||
"text": {
|
||||
"type": "string",
|
||||
},
|
||||
"metadata": {
|
||||
"type": "string",
|
||||
},
|
||||
"text_embedding": {
|
||||
"type": "vector",
|
||||
"index": True,
|
||||
"dimension": dim,
|
||||
"store_type": "MemoryOnly",
|
||||
},
|
||||
},
|
||||
}
|
||||
response_code = self.vearch.create_space(self.using_db_name, space_config)
|
||||
|
||||
return response_code
|
||||
|
||||
def add_texts(
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
@@ -156,64 +232,104 @@ class VearchDb(VectorStore):
|
||||
embeddings = None
|
||||
if self.embedding_func is not None:
|
||||
embeddings = self.embedding_func.embed_documents(list(texts))
|
||||
table_path = os.path.join(
|
||||
self.using_metapath, self.using_table_name + ".schema"
|
||||
)
|
||||
if not os.path.exists(table_path):
|
||||
if embeddings is None:
|
||||
raise ValueError("embeddings is None")
|
||||
dim = len(embeddings[0])
|
||||
response_code = self._create_table(dim)
|
||||
if response_code:
|
||||
raise ValueError("create table failed!!!")
|
||||
if embeddings is not None and metadatas is not None:
|
||||
doc_items = []
|
||||
for text, metadata, embed in zip(texts, metadatas, embeddings):
|
||||
profiles: dict[str, Any] = {}
|
||||
profiles["text"] = text
|
||||
profiles["metadata"] = metadata["source"]
|
||||
profiles["text_embedding"] = embed
|
||||
doc_items.append(profiles)
|
||||
if embeddings is None:
|
||||
raise ValueError("embeddings is None")
|
||||
if self.flag:
|
||||
dbs_list = self.vearch.list_dbs()
|
||||
if self.using_db_name not in dbs_list:
|
||||
create_db_code = self.vearch.create_db(self.using_db_name)
|
||||
if not create_db_code:
|
||||
raise ValueError("create db failed!!!")
|
||||
space_list = self.vearch.list_spaces(self.using_db_name)
|
||||
if self.using_table_name not in space_list:
|
||||
create_space_code = self._create_space(len(embeddings[0]))
|
||||
if not create_space_code:
|
||||
raise ValueError("create space failed!!!")
|
||||
docid = []
|
||||
if embeddings is not None and metadatas is not None:
|
||||
for text, metadata, embed in zip(texts, metadatas, embeddings):
|
||||
profiles: dict[str, Any] = {}
|
||||
profiles["text"] = text
|
||||
profiles["metadata"] = metadata["source"]
|
||||
embed_np = np.array(embed)
|
||||
profiles["text_embedding"] = {
|
||||
"feature": (embed_np / np.linalg.norm(embed_np)).tolist()
|
||||
}
|
||||
insert_res = self.vearch.insert_one(
|
||||
self.using_db_name, self.using_table_name, profiles
|
||||
)
|
||||
if insert_res["status"] == 200:
|
||||
docid.append(insert_res["_id"])
|
||||
continue
|
||||
else:
|
||||
retry_insert = self.vearch.insert_one(
|
||||
self.using_db_name, self.using_table_name, profiles
|
||||
)
|
||||
docid.append(retry_insert["_id"])
|
||||
continue
|
||||
else:
|
||||
table_path = os.path.join(
|
||||
self.using_metapath, self.using_table_name + ".schema"
|
||||
)
|
||||
if not os.path.exists(table_path):
|
||||
dim = len(embeddings[0])
|
||||
response_code = self._create_table(dim)
|
||||
if response_code:
|
||||
raise ValueError("create table failed!!!")
|
||||
if embeddings is not None and metadatas is not None:
|
||||
doc_items = []
|
||||
for text, metadata, embed in zip(texts, metadatas, embeddings):
|
||||
profiles_v: dict[str, Any] = {}
|
||||
profiles_v["text"] = text
|
||||
profiles_v["metadata"] = metadata["source"]
|
||||
embed_np = np.array(embed)
|
||||
profiles_v["text_embedding"] = embed_np / np.linalg.norm(embed_np)
|
||||
doc_items.append(profiles_v)
|
||||
|
||||
docid = self.vearch_engine.add(doc_items)
|
||||
t_time = 0
|
||||
while len(docid) != len(embeddings):
|
||||
time.sleep(0.5)
|
||||
if t_time > 6:
|
||||
break
|
||||
t_time += 1
|
||||
self.vearch_engine.dump()
|
||||
docid = self.vearch.add(doc_items)
|
||||
t_time = 0
|
||||
while len(docid) != len(embeddings):
|
||||
time.sleep(0.5)
|
||||
if t_time > 6:
|
||||
break
|
||||
t_time += 1
|
||||
self.vearch.dump()
|
||||
return docid
|
||||
|
||||
def _load(self) -> None:
|
||||
"""
|
||||
load vearch engine
|
||||
load vearch engine for standalone vearch
|
||||
"""
|
||||
self.vearch_engine.load()
|
||||
self.vearch.load()
|
||||
|
||||
@classmethod
|
||||
def load_local(
|
||||
cls,
|
||||
embedding: Embeddings,
|
||||
path_or_url: Optional[str] = None,
|
||||
table_name: str = _DEFAULT_TABLE_NAME,
|
||||
metadata_path: Optional[str] = None,
|
||||
db_name: str = _DEFAULT_CLUSTER_DB_NAME,
|
||||
flag: int = _DEFAULT_VERSION,
|
||||
**kwargs: Any,
|
||||
) -> VearchDb:
|
||||
"""Load the local specified table.
|
||||
) -> Vearch:
|
||||
"""Load the local specified table of standalone vearch.
|
||||
Returns:
|
||||
Success or failure of loading the local specified table
|
||||
"""
|
||||
if not metadata_path:
|
||||
if not path_or_url:
|
||||
raise ValueError("No metadata path!!!")
|
||||
if not table_name:
|
||||
raise ValueError("No table name!!!")
|
||||
table_path = os.path.join(metadata_path, table_name + ".schema")
|
||||
table_path = os.path.join(path_or_url, table_name + ".schema")
|
||||
if not os.path.exists(table_path):
|
||||
raise ValueError("vearch vectorbase table not exist!!!")
|
||||
|
||||
vearch_db = cls(
|
||||
embedding_function=embedding,
|
||||
path_or_url=path_or_url,
|
||||
table_name=table_name,
|
||||
metadata_path=metadata_path,
|
||||
db_name=db_name,
|
||||
flag=flag,
|
||||
)
|
||||
vearch_db._load()
|
||||
return vearch_db
|
||||
@@ -228,8 +344,6 @@ class VearchDb(VectorStore):
|
||||
Return docs most similar to query.
|
||||
|
||||
"""
|
||||
if self.vearch_engine is None:
|
||||
raise ValueError("Vearch engine is None!!!")
|
||||
if self.embedding_func is None:
|
||||
raise ValueError("embedding_func is None!!!")
|
||||
embeddings = self.embedding_func.embed_query(query)
|
||||
@@ -243,7 +357,6 @@ class VearchDb(VectorStore):
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""The most k similar documents and scores of the specified query.
|
||||
|
||||
Args:
|
||||
embeddings: embedding vector of the query.
|
||||
k: The k most similar documents to the text query.
|
||||
@@ -252,23 +365,45 @@ class VearchDb(VectorStore):
|
||||
The k most similar documents to the specified text query.
|
||||
0 is dissimilar, 1 is the most similar.
|
||||
"""
|
||||
query_data = {
|
||||
"vector": [
|
||||
{
|
||||
"field": "text_embedding",
|
||||
"feature": np.array(embedding),
|
||||
}
|
||||
],
|
||||
"fields": [],
|
||||
"is_brute_search": 1,
|
||||
"retrieval_param": {"metric_type": "InnerProduct", "nprobe": 20},
|
||||
"topn": k,
|
||||
}
|
||||
query_result = self.vearch_engine.search(query_data)
|
||||
embed = np.array(embedding)
|
||||
if self.flag:
|
||||
query_data = {
|
||||
"query": {
|
||||
"sum": [
|
||||
{
|
||||
"field": "text_embedding",
|
||||
"feature": (embed / np.linalg.norm(embed)).tolist(),
|
||||
}
|
||||
],
|
||||
},
|
||||
"size": k,
|
||||
"fields": ["text", "metadata"],
|
||||
}
|
||||
query_result = self.vearch.search(
|
||||
self.using_db_name, self.using_table_name, query_data
|
||||
)
|
||||
res = query_result["hits"]["hits"]
|
||||
else:
|
||||
query_data = {
|
||||
"vector": [
|
||||
{
|
||||
"field": "text_embedding",
|
||||
"feature": embed / np.linalg.norm(embed),
|
||||
}
|
||||
],
|
||||
"fields": [],
|
||||
"is_brute_search": 1,
|
||||
"retrieval_param": {"metric_type": "InnerProduct", "nprobe": 20},
|
||||
"topn": k,
|
||||
}
|
||||
query_result = self.vearch.search(query_data)
|
||||
res = query_result[0]["result_items"]
|
||||
docs = []
|
||||
for item in query_result[0]["result_items"]:
|
||||
for item in res:
|
||||
content = ""
|
||||
meta_data = {}
|
||||
if self.flag:
|
||||
item = item["_source"]
|
||||
for item_key in item:
|
||||
if item_key == "text":
|
||||
content = item[item_key]
|
||||
@@ -286,7 +421,6 @@ class VearchDb(VectorStore):
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""The most k similar documents and scores of the specified query.
|
||||
|
||||
Args:
|
||||
embeddings: embedding vector of the query.
|
||||
k: The k most similar documents to the text query.
|
||||
@@ -298,23 +432,46 @@ class VearchDb(VectorStore):
|
||||
if self.embedding_func is None:
|
||||
raise ValueError("embedding_func is None!!!")
|
||||
embeddings = self.embedding_func.embed_query(query)
|
||||
query_data = {
|
||||
"vector": [
|
||||
{
|
||||
"field": "text_embedding",
|
||||
"feature": np.array(embeddings),
|
||||
}
|
||||
],
|
||||
"fields": [],
|
||||
"is_brute_search": 1,
|
||||
"retrieval_param": {"metric_type": "InnerProduct", "nprobe": 20},
|
||||
"topn": k,
|
||||
}
|
||||
query_result = self.vearch_engine.search(query_data)
|
||||
embed = np.array(embeddings)
|
||||
if self.flag:
|
||||
query_data = {
|
||||
"query": {
|
||||
"sum": [
|
||||
{
|
||||
"field": "text_embedding",
|
||||
"feature": (embed / np.linalg.norm(embed)).tolist(),
|
||||
}
|
||||
],
|
||||
},
|
||||
"size": k,
|
||||
"fields": ["text_embedding", "text", "metadata"],
|
||||
}
|
||||
query_result = self.vearch.search(
|
||||
self.using_db_name, self.using_table_name, query_data
|
||||
)
|
||||
res = query_result["hits"]["hits"]
|
||||
else:
|
||||
query_data = {
|
||||
"vector": [
|
||||
{
|
||||
"field": "text_embedding",
|
||||
"feature": embed / np.linalg.norm(embed),
|
||||
}
|
||||
],
|
||||
"fields": [],
|
||||
"is_brute_search": 1,
|
||||
"retrieval_param": {"metric_type": "InnerProduct", "nprobe": 20},
|
||||
"topn": k,
|
||||
}
|
||||
query_result = self.vearch.search(query_data)
|
||||
res = query_result[0]["result_items"]
|
||||
results: List[Tuple[Document, float]] = []
|
||||
for item in query_result[0]["result_items"]:
|
||||
for item in res:
|
||||
content = ""
|
||||
meta_data = {}
|
||||
if self.flag:
|
||||
score = item["_score"]
|
||||
item = item["_source"]
|
||||
for item_key in item:
|
||||
if item_key == "text":
|
||||
content = item[item_key]
|
||||
@@ -322,7 +479,7 @@ class VearchDb(VectorStore):
|
||||
if item_key == "metadata":
|
||||
meta_data["source"] = item[item_key]
|
||||
continue
|
||||
if item_key == "score":
|
||||
if self.flag != 1 and item_key == "score":
|
||||
score = item[item_key]
|
||||
continue
|
||||
tmp_res = (Document(page_content=content, metadata=meta_data), score)
|
||||
@@ -351,14 +508,16 @@ class VearchDb(VectorStore):
|
||||
Optional[bool]: True if deletion is successful.
|
||||
False otherwise, None if not implemented.
|
||||
"""
|
||||
if self.vearch_engine is None:
|
||||
raise ValueError("Verach Engine is None!!!")
|
||||
|
||||
ret: Optional[bool] = None
|
||||
tmp_res = []
|
||||
if ids is None or ids.__len__() == 0:
|
||||
return ret
|
||||
for _id in ids:
|
||||
ret = self.vearch_engine.del_doc(_id)
|
||||
if self.flag:
|
||||
ret = self.vearch.delete(self.using_db_name, self.using_table_name, _id)
|
||||
else:
|
||||
ret = self.vearch.del_doc(_id)
|
||||
tmp_res.append(ret)
|
||||
ret = all(i == 0 for i in tmp_res)
|
||||
return ret
|
||||
@@ -376,26 +535,44 @@ class VearchDb(VectorStore):
|
||||
Documents which satisfy the input conditions.
|
||||
"""
|
||||
|
||||
if self.vearch_engine is None:
|
||||
raise ValueError("vearch engine is None!!!")
|
||||
results: Dict[str, Document] = {}
|
||||
if ids is None or ids.__len__() == 0:
|
||||
return results
|
||||
for id in ids:
|
||||
docs_detail = self.vearch_engine.get_doc_by_id(id)
|
||||
if docs_detail == {}:
|
||||
continue
|
||||
|
||||
content = ""
|
||||
meta_info = {}
|
||||
for field in docs_detail:
|
||||
if field == "text":
|
||||
content = docs_detail[field]
|
||||
continue
|
||||
elif field == "metadata":
|
||||
meta_info["source"] = docs_detail[field]
|
||||
continue
|
||||
results[docs_detail["_id"]] = Document(
|
||||
page_content=content, metadata=meta_info
|
||||
if self.flag:
|
||||
query_data = {"query": {"ids": ids}}
|
||||
docs_detail = self.vearch.mget_by_ids(
|
||||
self.using_db_name, self.using_table_name, query_data
|
||||
)
|
||||
for record in docs_detail:
|
||||
if record["found"] is False:
|
||||
continue
|
||||
content = ""
|
||||
meta_info = {}
|
||||
for field in record["_source"]:
|
||||
if field == "text":
|
||||
content = record["_source"][field]
|
||||
continue
|
||||
elif field == "metadata":
|
||||
meta_info["source"] = record["_source"][field]
|
||||
continue
|
||||
results[record["_id"]] = Document(
|
||||
page_content=content, metadata=meta_info
|
||||
)
|
||||
else:
|
||||
for id in ids:
|
||||
docs_detail = self.vearch.get_doc_by_id(id)
|
||||
if docs_detail == {}:
|
||||
continue
|
||||
content = ""
|
||||
meta_info = {}
|
||||
for field in docs_detail:
|
||||
if field == "text":
|
||||
content = docs_detail[field]
|
||||
continue
|
||||
elif field == "metadata":
|
||||
meta_info["source"] = docs_detail[field]
|
||||
continue
|
||||
results[docs_detail["_id"]] = Document(
|
||||
page_content=content, metadata=meta_info
|
||||
)
|
||||
return results
|
||||
|
Reference in New Issue
Block a user