mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-09 14:35:50 +00:00
feature: add metadata information into the embedding file before uplo… (#11553)
Replace this entire comment with: - **Description:** In this modified version of the function, if the metadatas parameter is not None, the function includes the corresponding metadata in the JSON object for each text. This allows the metadata to be stored alongside the text's embedding in the vector store. - - **Issue:** #10924 - **Dependencies:** None - **Tag maintainer:** @hwchase17 @agola11 - **Twitter handle:** @MelliJoaco --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
3c83779661
commit
ef99b06362
@ -116,15 +116,24 @@ class MatchingEngine(VectorStore):
|
||||
Returns:
|
||||
List of ids from adding the texts into the vectorstore.
|
||||
"""
|
||||
texts = list(texts)
|
||||
if metadatas is not None and len(texts) != len(metadatas):
|
||||
raise ValueError(
|
||||
"texts and metadatas do not have the same length. Received "
|
||||
f"{len(texts)} texts and {len(metadatas)} metadatas."
|
||||
)
|
||||
logger.debug("Embedding documents.")
|
||||
embeddings = self.embedding.embed_documents(list(texts))
|
||||
embeddings = self.embedding.embed_documents(texts)
|
||||
jsons = []
|
||||
ids = []
|
||||
# Could be improved with async.
|
||||
for embedding, text in zip(embeddings, texts):
|
||||
for idx, (embedding, text) in enumerate(zip(embeddings, texts)):
|
||||
id = str(uuid.uuid4())
|
||||
ids.append(id)
|
||||
jsons.append({"id": id, "embedding": embedding})
|
||||
json_: dict = {"id": id, "embedding": embedding}
|
||||
if metadatas is not None:
|
||||
json_["metadata"] = metadatas[idx]
|
||||
jsons.append(json)
|
||||
self._upload_to_gcs(text, f"documents/{id}")
|
||||
|
||||
logger.debug(f"Uploaded {len(ids)} documents to GCS.")
|
||||
|
Loading…
Reference in New Issue
Block a user