mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-09 06:53:59 +00:00
community[minor]: [Pebblo] Enhance PebbloSafeLoader to take anonymize flag (#26812)
- **Description:** The flag is named `anonymize_snippets`. When set to true, the Pebblo server will anonymize snippets by redacting all personally identifiable information (PII) from the snippets going into VectorDB and the generated reports - **Issue:** NA - **Dependencies:** NA - **docs**: Updated
This commit is contained in:
@@ -154,6 +154,8 @@ class Doc(BaseModel):
|
||||
"""Owner of the source of the loader."""
|
||||
classifier_location: str
|
||||
"""Location of the classifier."""
|
||||
anonymize_snippets: bool
|
||||
"""Whether to anonymize snippets going into VectorDB and the generated reports"""
|
||||
|
||||
|
||||
def get_full_path(path: str) -> str:
|
||||
@@ -424,6 +426,8 @@ class PebbloLoaderAPIWrapper(BaseModel):
|
||||
"""URL of the Pebblo Classifier"""
|
||||
cloud_url: Optional[str]
|
||||
"""URL of the Pebblo Cloud"""
|
||||
anonymize_snippets: bool = False
|
||||
"""Whether to anonymize snippets going into VectorDB and the generated reports"""
|
||||
|
||||
def __init__(self, **kwargs: Any):
|
||||
"""Validate that api key in environment."""
|
||||
@@ -522,6 +526,8 @@ class PebbloLoaderAPIWrapper(BaseModel):
|
||||
# If local classifier is used add the classified information
|
||||
# and remove doc content
|
||||
self.update_doc_data(payload["docs"], classified_docs)
|
||||
# Remove the anonymize_snippets key from payload
|
||||
payload.pop("anonymize_snippets", None)
|
||||
self.send_docs_to_pebblo_cloud(payload)
|
||||
elif self.classifier_location == "pebblo-cloud":
|
||||
logger.warning("API key is missing for sending docs to Pebblo cloud.")
|
||||
@@ -599,6 +605,7 @@ class PebbloLoaderAPIWrapper(BaseModel):
|
||||
"loading_end": "false",
|
||||
"source_owner": source_owner,
|
||||
"classifier_location": self.classifier_location,
|
||||
"anonymize_snippets": self.anonymize_snippets,
|
||||
}
|
||||
if loading_end is True:
|
||||
payload["loading_end"] = "true"
|
||||
|
Reference in New Issue
Block a user