mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-21 22:29:51 +00:00
Add ids parameter for pinecone from_texts / add_texts (#659)
Allow optionally specifying a list of ids for pinecone rather than having them randomly generated. This also permits editing the embedding/metadata of existing pinecone entries, by id.
This commit is contained in:
parent
54d7f1c933
commit
69998b5fad
@ -54,6 +54,7 @@ class Pinecone(VectorStore):
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
namespace: Optional[str] = None,
|
||||
) -> List[str]:
|
||||
"""Run more texts through the embeddings and add to the vectorstore.
|
||||
@ -61,6 +62,7 @@ class Pinecone(VectorStore):
|
||||
Args:
|
||||
texts: Iterable of strings to add to the vectorstore.
|
||||
metadatas: Optional list of metadatas associated with the texts.
|
||||
ids: Optional list of ids to associate with the texts.
|
||||
namespace: Optional pinecone namespace to add the texts to.
|
||||
|
||||
Returns:
|
||||
@ -69,14 +71,12 @@ class Pinecone(VectorStore):
|
||||
"""
|
||||
# Embed and create the documents
|
||||
docs = []
|
||||
ids = []
|
||||
ids = ids or [str(uuid.uuid4()) for _ in texts]
|
||||
for i, text in enumerate(texts):
|
||||
id = str(uuid.uuid4())
|
||||
embedding = self._embedding_function(text)
|
||||
metadata = metadatas[i] if metadatas else {}
|
||||
metadata[self._text_key] = text
|
||||
docs.append((id, embedding, metadata))
|
||||
ids.append(id)
|
||||
docs.append((ids[i], embedding, metadata))
|
||||
# upsert to Pinecone
|
||||
self._index.upsert(vectors=docs, namespace=namespace)
|
||||
return ids
|
||||
@ -153,6 +153,7 @@ class Pinecone(VectorStore):
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
batch_size: int = 32,
|
||||
text_key: str = "text",
|
||||
index_name: Optional[str] = None,
|
||||
@ -197,7 +198,11 @@ class Pinecone(VectorStore):
|
||||
i_end = min(i + batch_size, len(texts))
|
||||
# get batch of texts and ids
|
||||
lines_batch = texts[i : i + batch_size]
|
||||
ids_batch = [str(uuid.uuid4()) for n in range(i, i_end)]
|
||||
# create ids if not provided
|
||||
if ids:
|
||||
ids_batch = ids[i : i + batch_size]
|
||||
else:
|
||||
ids_batch = [str(uuid.uuid4()) for n in range(i, i_end)]
|
||||
# create embeddings
|
||||
embeds = embedding.embed_documents(lines_batch)
|
||||
# prep metadata and upsert batch
|
||||
|
Loading…
Reference in New Issue
Block a user