mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-13 02:57:22 +00:00
Upgrade to using a literal for specifying the extra which is the recommended approach in pydantic 2. This works correctly also in pydantic v1. ```python from pydantic.v1 import BaseModel class Foo(BaseModel, extra="forbid"): x: int Foo(x=5, y=1) ``` And ```python from pydantic.v1 import BaseModel class Foo(BaseModel): x: int class Config: extra = "forbid" Foo(x=5, y=1) ``` ## Enum -> literal using grit pattern: ``` engine marzano(0.1) language python or { `extra=Extra.allow` => `extra="allow"`, `extra=Extra.forbid` => `extra="forbid"`, `extra=Extra.ignore` => `extra="ignore"` } ``` Resorted attributes in config and removed doc-string in case we will need to deal with going back and forth between pydantic v1 and v2 during the 0.3 release. (This will reduce merge conflicts.) ## Sort attributes in Config: ``` engine marzano(0.1) language python function sort($values) js { return $values.text.split(',').sort().join("\n"); } class_definition($name, $body) as $C where { $name <: `Config`, $body <: block($statements), $values = [], $statements <: some bubble($values) assignment() as $A where { $values += $A }, $body => sort($values), } ```
85 lines
2.8 KiB
Python
85 lines
2.8 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import TYPE_CHECKING, Dict, Optional, Sequence
|
|
|
|
from langchain_core.callbacks.manager import Callbacks
|
|
from langchain_core.documents import BaseDocumentCompressor, Document
|
|
from langchain_core.pydantic_v1 import root_validator
|
|
|
|
if TYPE_CHECKING:
|
|
from flashrank import Ranker, RerankRequest
|
|
else:
|
|
# Avoid pydantic annotation issues when actually instantiating
|
|
# while keeping this import optional
|
|
try:
|
|
from flashrank import Ranker, RerankRequest
|
|
except ImportError:
|
|
pass
|
|
|
|
DEFAULT_MODEL_NAME = "ms-marco-MultiBERT-L-12"
|
|
|
|
|
|
class FlashrankRerank(BaseDocumentCompressor):
|
|
"""Document compressor using Flashrank interface."""
|
|
|
|
client: Ranker
|
|
"""Flashrank client to use for compressing documents"""
|
|
top_n: int = 3
|
|
"""Number of documents to return."""
|
|
score_threshold: float = 0.0
|
|
"""Minimum relevance threshold to return."""
|
|
model: Optional[str] = None
|
|
"""Model to use for reranking."""
|
|
prefix_metadata: str = ""
|
|
"""Prefix for flashrank_rerank metadata keys"""
|
|
|
|
class Config:
|
|
arbitrary_types_allowed = True
|
|
extra = "forbid"
|
|
|
|
@root_validator(pre=True)
|
|
def validate_environment(cls, values: Dict) -> Dict:
|
|
"""Validate that api key and python package exists in environment."""
|
|
if "client" in values:
|
|
return values
|
|
else:
|
|
try:
|
|
from flashrank import Ranker
|
|
except ImportError:
|
|
raise ImportError(
|
|
"Could not import flashrank python package. "
|
|
"Please install it with `pip install flashrank`."
|
|
)
|
|
|
|
values["model"] = values.get("model", DEFAULT_MODEL_NAME)
|
|
values["client"] = Ranker(model_name=values["model"])
|
|
return values
|
|
|
|
def compress_documents(
|
|
self,
|
|
documents: Sequence[Document],
|
|
query: str,
|
|
callbacks: Optional[Callbacks] = None,
|
|
) -> Sequence[Document]:
|
|
passages = [
|
|
{"id": i, "text": doc.page_content, "meta": doc.metadata}
|
|
for i, doc in enumerate(documents)
|
|
]
|
|
|
|
rerank_request = RerankRequest(query=query, passages=passages)
|
|
rerank_response = self.client.rerank(rerank_request)[: self.top_n]
|
|
final_results = []
|
|
|
|
for r in rerank_response:
|
|
if r["score"] >= self.score_threshold:
|
|
doc = Document(
|
|
page_content=r["text"],
|
|
metadata={
|
|
self.prefix_metadata + "id": r["id"],
|
|
self.prefix_metadata + "relevance_score": r["score"],
|
|
**r["meta"],
|
|
},
|
|
)
|
|
final_results.append(doc)
|
|
return final_results
|