core[patch], langchain[patch], experimental[patch]: import CI (#14414)

This commit is contained in:
Erick Friis
2023-12-08 11:28:55 -08:00
committed by GitHub
parent ba083887e5
commit b3f226e8f8
12 changed files with 177 additions and 113 deletions

View File

@@ -21,6 +21,11 @@ extended_tests:
integration_tests:
poetry run pytest tests/integration_tests
check_imports: langchain_experimental/**/*.py
for f in $^ ; do \
python -c "from importlib.machinery import SourceFileLoader; SourceFileLoader('x', '$$f').load_module()" || exit 1; \
done
######################
# LINTING AND FORMATTING

View File

@@ -1,10 +1,11 @@
import re
from collections import defaultdict
from dataclasses import dataclass, field
from typing import Dict, List
from typing import TYPE_CHECKING, Dict, List
from presidio_analyzer import RecognizerResult
from presidio_anonymizer.entities import EngineResult
if TYPE_CHECKING:
from presidio_analyzer import RecognizerResult
from presidio_anonymizer.entities import EngineResult
MappingDataType = Dict[str, Dict[str, str]]
@@ -62,8 +63,8 @@ class DeanonymizerMapping:
def create_anonymizer_mapping(
original_text: str,
analyzer_results: List[RecognizerResult],
anonymizer_results: EngineResult,
analyzer_results: List["RecognizerResult"],
anonymizer_results: "EngineResult",
is_reversed: bool = False,
) -> MappingDataType:
"""Creates or updates the mapping used to anonymize and/or deanonymize text.

View File

@@ -23,28 +23,62 @@ from langchain_experimental.data_anonymizer.faker_presidio_mapping import (
get_pseudoanonymizer_mapping,
)
try:
from presidio_analyzer import AnalyzerEngine
if TYPE_CHECKING:
from presidio_analyzer import AnalyzerEngine, EntityRecognizer
from presidio_analyzer.nlp_engine import NlpEngineProvider
except ImportError as e:
raise ImportError(
"Could not import presidio_analyzer, please install with "
"`pip install presidio-analyzer`. You will also need to download a "
"spaCy model to use the analyzer, e.g. "
"`python -m spacy download en_core_web_lg`."
) from e
try:
from presidio_anonymizer import AnonymizerEngine
from presidio_anonymizer.entities import OperatorConfig
except ImportError as e:
raise ImportError(
"Could not import presidio_anonymizer, please install with "
"`pip install presidio-anonymizer`."
) from e
if TYPE_CHECKING:
from presidio_analyzer import EntityRecognizer
def _import_analyzer_engine() -> "AnalyzerEngine":
try:
from presidio_analyzer import AnalyzerEngine
except ImportError as e:
raise ImportError(
"Could not import presidio_analyzer, please install with "
"`pip install presidio-analyzer`. You will also need to download a "
"spaCy model to use the analyzer, e.g. "
"`python -m spacy download en_core_web_lg`."
) from e
return AnalyzerEngine
def _import_nlp_engine_provider() -> "NlpEngineProvider":
try:
from presidio_analyzer.nlp_engine import NlpEngineProvider
except ImportError as e:
raise ImportError(
"Could not import presidio_analyzer, please install with "
"`pip install presidio-analyzer`. You will also need to download a "
"spaCy model to use the analyzer, e.g. "
"`python -m spacy download en_core_web_lg`."
) from e
return NlpEngineProvider
def _import_anonymizer_engine() -> "AnonymizerEngine":
try:
from presidio_anonymizer import AnonymizerEngine
except ImportError as e:
raise ImportError(
"Could not import presidio_anonymizer, please install with "
"`pip install presidio-anonymizer`."
) from e
return AnonymizerEngine
def _import_operator_config() -> "OperatorConfig":
try:
from presidio_anonymizer.entities import OperatorConfig
except ImportError as e:
raise ImportError(
"Could not import presidio_anonymizer, please install with "
"`pip install presidio-anonymizer`."
) from e
return OperatorConfig
# Configuring Anonymizer for multiple languages
# Detailed description and examples can be found here:
@@ -89,6 +123,11 @@ class PresidioAnonymizerBase(AnonymizerBase):
Defaults to None, in which case faker will be seeded randomly
and provide random values.
"""
OperatorConfig = _import_operator_config()
AnalyzerEngine = _import_analyzer_engine()
NlpEngineProvider = _import_nlp_engine_provider()
AnonymizerEngine = _import_anonymizer_engine()
self.analyzed_fields = (
analyzed_fields
if analyzed_fields is not None