mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-22 06:39:52 +00:00
Anonymizer small fixes (#11915)
This commit is contained in:
parent
90e9ec6962
commit
42dcc502c7
@ -16,7 +16,7 @@
|
||||
"source": [
|
||||
"# QA with private data protection\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/use_cases/question_answering/qa_privacy_protection.ipynb)\n",
|
||||
"[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/guides/privacy/presidio_data_anonymization/qa_privacy_protection.ipynb)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"In this notebook, we will look at building a basic system for question answering, based on private data. Before feeding the LLM with this data, we need to protect it so that it doesn't go to an external API (e.g. OpenAI, Anthropic). Then, after receiving the model output, we would like the data to be restored to its original form. Below you can observe an example flow of this QA system:\n",
|
||||
|
@ -173,9 +173,25 @@ class PresidioAnonymizer(PresidioAnonymizerBase):
|
||||
"Change your language configuration file to add more languages."
|
||||
)
|
||||
|
||||
# Check supported entities for given language
|
||||
# e.g. IT_FISCAL_CODE is not supported for English in Presidio by default
|
||||
# If you want to use it, you need to add a recognizer manually
|
||||
supported_entities = []
|
||||
for recognizer in self._analyzer.get_recognizers(language):
|
||||
recognizer_dict = recognizer.to_dict()
|
||||
supported_entities.extend(
|
||||
[recognizer_dict["supported_entity"]]
|
||||
if "supported_entity" in recognizer_dict
|
||||
else recognizer_dict["supported_entities"]
|
||||
)
|
||||
|
||||
entities_to_analyze = list(
|
||||
set(supported_entities).intersection(set(self.analyzed_fields))
|
||||
)
|
||||
|
||||
analyzer_results = self._analyzer.analyze(
|
||||
text,
|
||||
entities=self.analyzed_fields,
|
||||
entities=entities_to_analyze,
|
||||
language=language,
|
||||
allow_list=allow_list,
|
||||
)
|
||||
@ -268,9 +284,25 @@ class PresidioReversibleAnonymizer(PresidioAnonymizerBase, ReversibleAnonymizerB
|
||||
"Change your language configuration file to add more languages."
|
||||
)
|
||||
|
||||
# Check supported entities for given language
|
||||
# e.g. IT_FISCAL_CODE is not supported for English in Presidio by default
|
||||
# If you want to use it, you need to add a recognizer manually
|
||||
supported_entities = []
|
||||
for recognizer in self._analyzer.get_recognizers(language):
|
||||
recognizer_dict = recognizer.to_dict()
|
||||
supported_entities.extend(
|
||||
[recognizer_dict["supported_entity"]]
|
||||
if "supported_entity" in recognizer_dict
|
||||
else recognizer_dict["supported_entities"]
|
||||
)
|
||||
|
||||
entities_to_analyze = list(
|
||||
set(supported_entities).intersection(set(self.analyzed_fields))
|
||||
)
|
||||
|
||||
analyzer_results = self._analyzer.analyze(
|
||||
text,
|
||||
entities=self.analyzed_fields,
|
||||
entities=entities_to_analyze,
|
||||
language=language,
|
||||
allow_list=allow_list,
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user