Anonymizer small fixes (#11915)

This commit is contained in:
maks-operlejn-ds
2023-10-17 19:27:29 +02:00
committed by GitHub
parent 90e9ec6962
commit 42dcc502c7
2 changed files with 35 additions and 3 deletions

View File

@@ -173,9 +173,25 @@ class PresidioAnonymizer(PresidioAnonymizerBase):
"Change your language configuration file to add more languages."
)
# Check supported entities for given language
# e.g. IT_FISCAL_CODE is not supported for English in Presidio by default
# If you want to use it, you need to add a recognizer manually
supported_entities = []
for recognizer in self._analyzer.get_recognizers(language):
recognizer_dict = recognizer.to_dict()
supported_entities.extend(
[recognizer_dict["supported_entity"]]
if "supported_entity" in recognizer_dict
else recognizer_dict["supported_entities"]
)
entities_to_analyze = list(
set(supported_entities).intersection(set(self.analyzed_fields))
)
analyzer_results = self._analyzer.analyze(
text,
entities=self.analyzed_fields,
entities=entities_to_analyze,
language=language,
allow_list=allow_list,
)
@@ -268,9 +284,25 @@ class PresidioReversibleAnonymizer(PresidioAnonymizerBase, ReversibleAnonymizerB
"Change your language configuration file to add more languages."
)
# Check supported entities for given language
# e.g. IT_FISCAL_CODE is not supported for English in Presidio by default
# If you want to use it, you need to add a recognizer manually
supported_entities = []
for recognizer in self._analyzer.get_recognizers(language):
recognizer_dict = recognizer.to_dict()
supported_entities.extend(
[recognizer_dict["supported_entity"]]
if "supported_entity" in recognizer_dict
else recognizer_dict["supported_entities"]
)
entities_to_analyze = list(
set(supported_entities).intersection(set(self.analyzed_fields))
)
analyzer_results = self._analyzer.analyze(
text,
entities=self.analyzed_fields,
entities=entities_to_analyze,
language=language,
allow_list=allow_list,
)