experimental: docstrings update (#18048)

Added missed docstrings. Formatted docsctrings to the consistent format.
2025-09-23 03:19:38 +00:00 · 2024-02-23 18:24:16 -08:00
parent 56b955fc31
commit 3f6bf852ea
61 changed files with 316 additions and 102 deletions
--- a/libs/experimental/langchain_experimental/data_anonymizer/base.py
+++ b/libs/experimental/langchain_experimental/data_anonymizer/base.py
@@ -10,8 +10,8 @@ DEFAULT_DEANONYMIZER_MATCHING_STRATEGY = exact_matching_strategy


 class AnonymizerBase(ABC):
-    """
-    Base abstract class for anonymizers.
+    """Base abstract class for anonymizers.
+
    It is public and non-virtual because it allows
        wrapping the behavior for all methods in a base class.
    """
@@ -22,7 +22,8 @@ class AnonymizerBase(ABC):
        language: Optional[str] = None,
        allow_list: Optional[List[str]] = None,
    ) -> str:
-        """Anonymize text"""
+        """Anonymize text."""
+
        return self._anonymize(text, language, allow_list)

    @abstractmethod
--- a/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_mapping.py
+++ b/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_mapping.py
@@ -11,7 +11,7 @@ MappingDataType = Dict[str, Dict[str, str]]


 def format_duplicated_operator(operator_name: str, count: int) -> str:
-    """Format the operator name with the count"""
+    """Format the operator name with the count."""

    clean_operator_name = re.sub(r"[<>]", "", operator_name)
    clean_operator_name = re.sub(r"_\d+$", "", clean_operator_name)
@@ -24,17 +24,20 @@ def format_duplicated_operator(operator_name: str, count: int) -> str:

@dataclass
 class DeanonymizerMapping:
+    """Deanonymizer mapping."""
+
    mapping: MappingDataType = field(
        default_factory=lambda: defaultdict(lambda: defaultdict(str))
    )

    @property
    def data(self) -> MappingDataType:
-        """Return the deanonymizer mapping"""
+        """Return the deanonymizer mapping."""
        return {k: dict(v) for k, v in self.mapping.items()}

    def update(self, new_mapping: MappingDataType) -> None:
-        """Update the deanonymizer mapping with new values
+        """Update the deanonymizer mapping with new values.
+
        Duplicated values will not be added
        If there are multiple entities of the same type, the mapping will
        include a count to differentiate them. For example, if there are
@@ -67,7 +70,8 @@ def create_anonymizer_mapping(
    anonymizer_results: "EngineResult",
    is_reversed: bool = False,
 ) -> MappingDataType:
-    """Creates or updates the mapping used to anonymize and/or deanonymize text.
+    """Create or update the mapping used to anonymize and/or
+     deanonymize a text.

    This method exploits the results returned by the
    analysis and anonymization processes.
--- a/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_matching_strategies.py
+++ b/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_matching_strategies.py
@@ -5,8 +5,8 @@ from langchain_experimental.data_anonymizer.deanonymizer_mapping import MappingD


 def exact_matching_strategy(text: str, deanonymizer_mapping: MappingDataType) -> str:
-    """
-    Exact matching strategy for deanonymization.
+    """Exact matching strategy for deanonymization.
+
    It replaces all the anonymized entities with the original ones.

    Args:
@@ -23,8 +23,8 @@ def exact_matching_strategy(text: str, deanonymizer_mapping: MappingDataType) ->
 def case_insensitive_matching_strategy(
    text: str, deanonymizer_mapping: MappingDataType
 ) -> str:
-    """
-    Case insensitive matching strategy for deanonymization.
+    """Case insensitive matching strategy for deanonymization.
+
    It replaces all the anonymized entities with the original ones
        irrespective of their letter case.

@@ -48,8 +48,8 @@ def case_insensitive_matching_strategy(
 def fuzzy_matching_strategy(
    text: str, deanonymizer_mapping: MappingDataType, max_l_dist: int = 3
 ) -> str:
-    """
-    Fuzzy matching strategy for deanonymization.
+    """Fuzzy matching strategy for deanonymization.
+
    It uses fuzzy matching to find the position of the anonymized entity in the text.
    It replaces all the anonymized entities with the original ones.

@@ -93,9 +93,9 @@ def fuzzy_matching_strategy(
 def combined_exact_fuzzy_matching_strategy(
    text: str, deanonymizer_mapping: MappingDataType, max_l_dist: int = 3
 ) -> str:
-    """
-    RECOMMENDED STRATEGY.
-    Combined exact and fuzzy matching strategy for deanonymization.
+    """Combined exact and fuzzy matching strategy for deanonymization.
+
+    It is a RECOMMENDED STRATEGY.

    Args:
        text: text to deanonymize
@@ -118,8 +118,8 @@ def ngram_fuzzy_matching_strategy(
    fuzzy_threshold: int = 85,
    use_variable_length: bool = True,
 ) -> str:
-    """
-    N-gram fuzzy matching strategy for deanonymization.
+    """N-gram fuzzy matching strategy for deanonymization.
+
    It replaces all the anonymized entities with the original ones.
    It uses fuzzy matching to find the position of the anonymized entity in the text.
    It generates n-grams of the same length as the anonymized entity from the text and
--- a/libs/experimental/langchain_experimental/data_anonymizer/faker_presidio_mapping.py
+++ b/libs/experimental/langchain_experimental/data_anonymizer/faker_presidio_mapping.py
@@ -3,6 +3,8 @@ from typing import Callable, Dict, Optional


 def get_pseudoanonymizer_mapping(seed: Optional[int] = None) -> Dict[str, Callable]:
+    """Get a mapping of entities to pseudo anonymize them."""
+
    try:
        from faker import Faker
    except ImportError as e:
--- a/libs/experimental/langchain_experimental/data_anonymizer/presidio.py
+++ b/libs/experimental/langchain_experimental/data_anonymizer/presidio.py
@@ -98,6 +98,11 @@ DEFAULT_LANGUAGES_CONFIG = {


 class PresidioAnonymizerBase(AnonymizerBase):
+    """Base Anonymizer using Microsoft Presidio.
+
+    See more: https://microsoft.github.io/presidio/
+    """
+
    def __init__(
        self,
        analyzed_fields: Optional[List[str]] = None,
@@ -180,6 +185,8 @@ class PresidioAnonymizerBase(AnonymizerBase):


 class PresidioAnonymizer(PresidioAnonymizerBase):
+    """Anonymizer using Microsoft Presidio."""
+
    def _anonymize(
        self,
        text: str,
@@ -258,6 +265,8 @@ class PresidioAnonymizer(PresidioAnonymizerBase):


 class PresidioReversibleAnonymizer(PresidioAnonymizerBase, ReversibleAnonymizerBase):
+    """Reversible Anonymizer using Microsoft Presidio."""
+
    def __init__(
        self,
        analyzed_fields: Optional[List[str]] = None,