community: Add configurable text key for indexing and the retriever in Pinecone Hybrid Search (#29697)

**issue**

In Langchain, the original content is generally stored under the `text`
key. However, the `PineconeHybridSearchRetriever` searches the `context`
field in the metadata and cannot change this key. To address this, I
have modified the code to allow changing the key to something other than
context.

In my opinion, following Langchain's conventions, the `text` key seems
more appropriate than `context`. However, since I wasn't sure about the
author's intent, I have left the default value as `context`.
This commit is contained in:
Changyong Um 2025-02-10 22:56:37 +09:00 committed by GitHub
parent 894b0cac3c
commit 60740c44c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -31,6 +31,7 @@ def create_index(
ids: Optional[List[str]] = None,
metadatas: Optional[List[dict]] = None,
namespace: Optional[str] = None,
text_key: str = "context",
) -> None:
"""Create an index from a list of contexts.
@ -69,7 +70,7 @@ def create_index(
)
# add context passages as metadata
meta = [
{"context": context, **metadata}
{text_key: context, **metadata}
for context, metadata in zip(context_batch, metadata_batch)
]
@ -114,7 +115,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
"""Alpha value for hybrid search."""
namespace: Optional[str] = None
"""Namespace value for index partition."""
text_key: str = "context"
model_config = ConfigDict(
arbitrary_types_allowed=True,
extra="forbid",
@ -135,6 +136,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
ids=ids,
metadatas=metadatas,
namespace=namespace,
text_key=self.text_key,
)
@pre_init
@ -174,7 +176,7 @@ class PineconeHybridSearchRetriever(BaseRetriever):
)
final_result = []
for res in result["matches"]:
context = res["metadata"].pop("context")
context = res["metadata"].pop(self.text_key)
metadata = res["metadata"]
if "score" not in metadata and "score" in res:
metadata["score"] = res["score"]