mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-23 15:19:33 +00:00
community: Add configurable text key for indexing and the retriever in Pinecone Hybrid Search (#29697)
**issue** In Langchain, the original content is generally stored under the `text` key. However, the `PineconeHybridSearchRetriever` searches the `context` field in the metadata and cannot change this key. To address this, I have modified the code to allow changing the key to something other than context. In my opinion, following Langchain's conventions, the `text` key seems more appropriate than `context`. However, since I wasn't sure about the author's intent, I have left the default value as `context`.
This commit is contained in:
parent
894b0cac3c
commit
60740c44c5
@ -31,6 +31,7 @@ def create_index(
|
|||||||
ids: Optional[List[str]] = None,
|
ids: Optional[List[str]] = None,
|
||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
namespace: Optional[str] = None,
|
namespace: Optional[str] = None,
|
||||||
|
text_key: str = "context",
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Create an index from a list of contexts.
|
"""Create an index from a list of contexts.
|
||||||
|
|
||||||
@ -69,7 +70,7 @@ def create_index(
|
|||||||
)
|
)
|
||||||
# add context passages as metadata
|
# add context passages as metadata
|
||||||
meta = [
|
meta = [
|
||||||
{"context": context, **metadata}
|
{text_key: context, **metadata}
|
||||||
for context, metadata in zip(context_batch, metadata_batch)
|
for context, metadata in zip(context_batch, metadata_batch)
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -114,7 +115,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
|
|||||||
"""Alpha value for hybrid search."""
|
"""Alpha value for hybrid search."""
|
||||||
namespace: Optional[str] = None
|
namespace: Optional[str] = None
|
||||||
"""Namespace value for index partition."""
|
"""Namespace value for index partition."""
|
||||||
|
text_key: str = "context"
|
||||||
model_config = ConfigDict(
|
model_config = ConfigDict(
|
||||||
arbitrary_types_allowed=True,
|
arbitrary_types_allowed=True,
|
||||||
extra="forbid",
|
extra="forbid",
|
||||||
@ -135,6 +136,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
|
|||||||
ids=ids,
|
ids=ids,
|
||||||
metadatas=metadatas,
|
metadatas=metadatas,
|
||||||
namespace=namespace,
|
namespace=namespace,
|
||||||
|
text_key=self.text_key,
|
||||||
)
|
)
|
||||||
|
|
||||||
@pre_init
|
@pre_init
|
||||||
@ -174,7 +176,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
|
|||||||
)
|
)
|
||||||
final_result = []
|
final_result = []
|
||||||
for res in result["matches"]:
|
for res in result["matches"]:
|
||||||
context = res["metadata"].pop("context")
|
context = res["metadata"].pop(self.text_key)
|
||||||
metadata = res["metadata"]
|
metadata = res["metadata"]
|
||||||
if "score" not in metadata and "score" in res:
|
if "score" not in metadata and "score" in res:
|
||||||
metadata["score"] = res["score"]
|
metadata["score"] = res["score"]
|
||||||
|
Loading…
Reference in New Issue
Block a user