mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-23 03:19:38 +00:00
experimental: docstrings update (#18048)
Added missed docstrings. Formatted docsctrings to the consistent format.
This commit is contained in:
@@ -10,8 +10,8 @@ DEFAULT_DEANONYMIZER_MATCHING_STRATEGY = exact_matching_strategy
|
||||
|
||||
|
||||
class AnonymizerBase(ABC):
|
||||
"""
|
||||
Base abstract class for anonymizers.
|
||||
"""Base abstract class for anonymizers.
|
||||
|
||||
It is public and non-virtual because it allows
|
||||
wrapping the behavior for all methods in a base class.
|
||||
"""
|
||||
@@ -22,7 +22,8 @@ class AnonymizerBase(ABC):
|
||||
language: Optional[str] = None,
|
||||
allow_list: Optional[List[str]] = None,
|
||||
) -> str:
|
||||
"""Anonymize text"""
|
||||
"""Anonymize text."""
|
||||
|
||||
return self._anonymize(text, language, allow_list)
|
||||
|
||||
@abstractmethod
|
||||
|
@@ -11,7 +11,7 @@ MappingDataType = Dict[str, Dict[str, str]]
|
||||
|
||||
|
||||
def format_duplicated_operator(operator_name: str, count: int) -> str:
|
||||
"""Format the operator name with the count"""
|
||||
"""Format the operator name with the count."""
|
||||
|
||||
clean_operator_name = re.sub(r"[<>]", "", operator_name)
|
||||
clean_operator_name = re.sub(r"_\d+$", "", clean_operator_name)
|
||||
@@ -24,17 +24,20 @@ def format_duplicated_operator(operator_name: str, count: int) -> str:
|
||||
|
||||
@dataclass
|
||||
class DeanonymizerMapping:
|
||||
"""Deanonymizer mapping."""
|
||||
|
||||
mapping: MappingDataType = field(
|
||||
default_factory=lambda: defaultdict(lambda: defaultdict(str))
|
||||
)
|
||||
|
||||
@property
|
||||
def data(self) -> MappingDataType:
|
||||
"""Return the deanonymizer mapping"""
|
||||
"""Return the deanonymizer mapping."""
|
||||
return {k: dict(v) for k, v in self.mapping.items()}
|
||||
|
||||
def update(self, new_mapping: MappingDataType) -> None:
|
||||
"""Update the deanonymizer mapping with new values
|
||||
"""Update the deanonymizer mapping with new values.
|
||||
|
||||
Duplicated values will not be added
|
||||
If there are multiple entities of the same type, the mapping will
|
||||
include a count to differentiate them. For example, if there are
|
||||
@@ -67,7 +70,8 @@ def create_anonymizer_mapping(
|
||||
anonymizer_results: "EngineResult",
|
||||
is_reversed: bool = False,
|
||||
) -> MappingDataType:
|
||||
"""Creates or updates the mapping used to anonymize and/or deanonymize text.
|
||||
"""Create or update the mapping used to anonymize and/or
|
||||
deanonymize a text.
|
||||
|
||||
This method exploits the results returned by the
|
||||
analysis and anonymization processes.
|
||||
|
@@ -5,8 +5,8 @@ from langchain_experimental.data_anonymizer.deanonymizer_mapping import MappingD
|
||||
|
||||
|
||||
def exact_matching_strategy(text: str, deanonymizer_mapping: MappingDataType) -> str:
|
||||
"""
|
||||
Exact matching strategy for deanonymization.
|
||||
"""Exact matching strategy for deanonymization.
|
||||
|
||||
It replaces all the anonymized entities with the original ones.
|
||||
|
||||
Args:
|
||||
@@ -23,8 +23,8 @@ def exact_matching_strategy(text: str, deanonymizer_mapping: MappingDataType) ->
|
||||
def case_insensitive_matching_strategy(
|
||||
text: str, deanonymizer_mapping: MappingDataType
|
||||
) -> str:
|
||||
"""
|
||||
Case insensitive matching strategy for deanonymization.
|
||||
"""Case insensitive matching strategy for deanonymization.
|
||||
|
||||
It replaces all the anonymized entities with the original ones
|
||||
irrespective of their letter case.
|
||||
|
||||
@@ -48,8 +48,8 @@ def case_insensitive_matching_strategy(
|
||||
def fuzzy_matching_strategy(
|
||||
text: str, deanonymizer_mapping: MappingDataType, max_l_dist: int = 3
|
||||
) -> str:
|
||||
"""
|
||||
Fuzzy matching strategy for deanonymization.
|
||||
"""Fuzzy matching strategy for deanonymization.
|
||||
|
||||
It uses fuzzy matching to find the position of the anonymized entity in the text.
|
||||
It replaces all the anonymized entities with the original ones.
|
||||
|
||||
@@ -93,9 +93,9 @@ def fuzzy_matching_strategy(
|
||||
def combined_exact_fuzzy_matching_strategy(
|
||||
text: str, deanonymizer_mapping: MappingDataType, max_l_dist: int = 3
|
||||
) -> str:
|
||||
"""
|
||||
RECOMMENDED STRATEGY.
|
||||
Combined exact and fuzzy matching strategy for deanonymization.
|
||||
"""Combined exact and fuzzy matching strategy for deanonymization.
|
||||
|
||||
It is a RECOMMENDED STRATEGY.
|
||||
|
||||
Args:
|
||||
text: text to deanonymize
|
||||
@@ -118,8 +118,8 @@ def ngram_fuzzy_matching_strategy(
|
||||
fuzzy_threshold: int = 85,
|
||||
use_variable_length: bool = True,
|
||||
) -> str:
|
||||
"""
|
||||
N-gram fuzzy matching strategy for deanonymization.
|
||||
"""N-gram fuzzy matching strategy for deanonymization.
|
||||
|
||||
It replaces all the anonymized entities with the original ones.
|
||||
It uses fuzzy matching to find the position of the anonymized entity in the text.
|
||||
It generates n-grams of the same length as the anonymized entity from the text and
|
||||
|
@@ -3,6 +3,8 @@ from typing import Callable, Dict, Optional
|
||||
|
||||
|
||||
def get_pseudoanonymizer_mapping(seed: Optional[int] = None) -> Dict[str, Callable]:
|
||||
"""Get a mapping of entities to pseudo anonymize them."""
|
||||
|
||||
try:
|
||||
from faker import Faker
|
||||
except ImportError as e:
|
||||
|
@@ -98,6 +98,11 @@ DEFAULT_LANGUAGES_CONFIG = {
|
||||
|
||||
|
||||
class PresidioAnonymizerBase(AnonymizerBase):
|
||||
"""Base Anonymizer using Microsoft Presidio.
|
||||
|
||||
See more: https://microsoft.github.io/presidio/
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
analyzed_fields: Optional[List[str]] = None,
|
||||
@@ -180,6 +185,8 @@ class PresidioAnonymizerBase(AnonymizerBase):
|
||||
|
||||
|
||||
class PresidioAnonymizer(PresidioAnonymizerBase):
|
||||
"""Anonymizer using Microsoft Presidio."""
|
||||
|
||||
def _anonymize(
|
||||
self,
|
||||
text: str,
|
||||
@@ -258,6 +265,8 @@ class PresidioAnonymizer(PresidioAnonymizerBase):
|
||||
|
||||
|
||||
class PresidioReversibleAnonymizer(PresidioAnonymizerBase, ReversibleAnonymizerBase):
|
||||
"""Reversible Anonymizer using Microsoft Presidio."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
analyzed_fields: Optional[List[str]] = None,
|
||||
|
Reference in New Issue
Block a user