mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-20 14:31:44 +00:00
community[patch]: Fixed duplicate input id issue in clarifai vectorstore (#14914)
- **Description:** This PR fixes the issue faces with duplicate input id in Clarifai vectorstore class when ingesting documents into the vectorstore more than the batch size. --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
5642132c0c
commit
c53fab63a3
@ -116,21 +116,23 @@ class Clarifai(VectorStore):
|
|||||||
batch_metadatas = (
|
batch_metadatas = (
|
||||||
metadatas[idx : idx + batch_size] if metadatas else None
|
metadatas[idx : idx + batch_size] if metadatas else None
|
||||||
)
|
)
|
||||||
|
if ids is None:
|
||||||
|
batch_ids = [uuid.uuid4().hex for _ in range(len(batch_texts))]
|
||||||
|
else:
|
||||||
|
batch_ids = ids[idx : idx + batch_size]
|
||||||
if batch_metadatas is not None:
|
if batch_metadatas is not None:
|
||||||
meta_list = []
|
meta_list = []
|
||||||
for meta in batch_metadatas:
|
for meta in batch_metadatas:
|
||||||
meta_struct = Struct()
|
meta_struct = Struct()
|
||||||
meta_struct.update(meta)
|
meta_struct.update(meta)
|
||||||
meta_list.append(meta_struct)
|
meta_list.append(meta_struct)
|
||||||
if ids is None:
|
|
||||||
ids = [uuid.uuid4().hex for _ in range(len(batch_texts))]
|
|
||||||
input_batch = [
|
input_batch = [
|
||||||
input_obj.get_text_input(
|
input_obj.get_text_input(
|
||||||
input_id=ids[id],
|
input_id=batch_ids[i],
|
||||||
raw_text=inp,
|
raw_text=text,
|
||||||
metadata=meta_list[id] if batch_metadatas else None,
|
metadata=meta_list[i] if batch_metadatas else None,
|
||||||
)
|
)
|
||||||
for id, inp in enumerate(batch_texts)
|
for i, text in enumerate(batch_texts)
|
||||||
]
|
]
|
||||||
result_id = input_obj.upload_inputs(inputs=input_batch)
|
result_id = input_obj.upload_inputs(inputs=input_batch)
|
||||||
input_job_ids.extend(result_id)
|
input_job_ids.extend(result_id)
|
||||||
|
Loading…
Reference in New Issue
Block a user