mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-14 05:56:40 +00:00
Anonymizer small fixes (#11915)
This commit is contained in:
@@ -173,9 +173,25 @@ class PresidioAnonymizer(PresidioAnonymizerBase):
|
||||
"Change your language configuration file to add more languages."
|
||||
)
|
||||
|
||||
# Check supported entities for given language
|
||||
# e.g. IT_FISCAL_CODE is not supported for English in Presidio by default
|
||||
# If you want to use it, you need to add a recognizer manually
|
||||
supported_entities = []
|
||||
for recognizer in self._analyzer.get_recognizers(language):
|
||||
recognizer_dict = recognizer.to_dict()
|
||||
supported_entities.extend(
|
||||
[recognizer_dict["supported_entity"]]
|
||||
if "supported_entity" in recognizer_dict
|
||||
else recognizer_dict["supported_entities"]
|
||||
)
|
||||
|
||||
entities_to_analyze = list(
|
||||
set(supported_entities).intersection(set(self.analyzed_fields))
|
||||
)
|
||||
|
||||
analyzer_results = self._analyzer.analyze(
|
||||
text,
|
||||
entities=self.analyzed_fields,
|
||||
entities=entities_to_analyze,
|
||||
language=language,
|
||||
allow_list=allow_list,
|
||||
)
|
||||
@@ -268,9 +284,25 @@ class PresidioReversibleAnonymizer(PresidioAnonymizerBase, ReversibleAnonymizerB
|
||||
"Change your language configuration file to add more languages."
|
||||
)
|
||||
|
||||
# Check supported entities for given language
|
||||
# e.g. IT_FISCAL_CODE is not supported for English in Presidio by default
|
||||
# If you want to use it, you need to add a recognizer manually
|
||||
supported_entities = []
|
||||
for recognizer in self._analyzer.get_recognizers(language):
|
||||
recognizer_dict = recognizer.to_dict()
|
||||
supported_entities.extend(
|
||||
[recognizer_dict["supported_entity"]]
|
||||
if "supported_entity" in recognizer_dict
|
||||
else recognizer_dict["supported_entities"]
|
||||
)
|
||||
|
||||
entities_to_analyze = list(
|
||||
set(supported_entities).intersection(set(self.analyzed_fields))
|
||||
)
|
||||
|
||||
analyzer_results = self._analyzer.analyze(
|
||||
text,
|
||||
entities=self.analyzed_fields,
|
||||
entities=entities_to_analyze,
|
||||
language=language,
|
||||
allow_list=allow_list,
|
||||
)
|
||||
|
Reference in New Issue
Block a user