feature: add metadata information into the embedding file before uplo… (#11553)

Replace this entire comment with:
- **Description:** In this modified version of the function, if the
metadatas parameter is not None, the function includes the corresponding
metadata in the JSON object for each text. This allows the metadata to
be stored alongside the text's embedding in the vector store.
  - 
  - **Issue:** #10924
  - **Dependencies:** None
  - **Tag maintainer:** @hwchase17
@agola11
  - **Twitter handle:** @MelliJoaco

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
Joaquin Menendez 2023-10-11 20:05:13 -03:00 committed by GitHub
parent 3c83779661
commit ef99b06362
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -116,15 +116,24 @@ class MatchingEngine(VectorStore):
Returns:
List of ids from adding the texts into the vectorstore.
"""
texts = list(texts)
if metadatas is not None and len(texts) != len(metadatas):
raise ValueError(
"texts and metadatas do not have the same length. Received "
f"{len(texts)} texts and {len(metadatas)} metadatas."
)
logger.debug("Embedding documents.")
embeddings = self.embedding.embed_documents(list(texts))
embeddings = self.embedding.embed_documents(texts)
jsons = []
ids = []
# Could be improved with async.
for embedding, text in zip(embeddings, texts):
for idx, (embedding, text) in enumerate(zip(embeddings, texts)):
id = str(uuid.uuid4())
ids.append(id)
jsons.append({"id": id, "embedding": embedding})
json_: dict = {"id": id, "embedding": embedding}
if metadatas is not None:
json_["metadata"] = metadatas[idx]
jsons.append(json)
self._upload_to_gcs(text, f"documents/{id}")
logger.debug(f"Uploaded {len(ids)} documents to GCS.")