mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-16 06:53:16 +00:00
@@ -126,3 +126,76 @@ def test_non_faker_values() -> None:
|
||||
anonymizer = PresidioAnonymizer(add_default_faker_operators=False)
|
||||
anonymized_text = anonymizer.anonymize(text)
|
||||
assert anonymized_text == expected_result
|
||||
|
||||
|
||||
@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
|
||||
def test_exact_matching_strategy() -> None:
|
||||
"""
|
||||
Test exact matching strategy for deanonymization.
|
||||
"""
|
||||
from langchain_experimental.data_anonymizer import (
|
||||
deanonymizer_matching_strategies as dms,
|
||||
)
|
||||
|
||||
deanonymizer_mapping = {
|
||||
"PERSON": {"Maria Lynch": "Slim Shady"},
|
||||
"PHONE_NUMBER": {"7344131647": "313-666-7440"},
|
||||
"EMAIL_ADDRESS": {"wdavis@example.net": "real.slim.shady@gmail.com"},
|
||||
"CREDIT_CARD": {"213186379402654": "4916 0387 9536 0861"},
|
||||
}
|
||||
|
||||
text = (
|
||||
"Are you Maria Lynch? I found your card with number 213186379402654. "
|
||||
"Is this your phone number: 7344131647? "
|
||||
"Is this your email address: wdavis@example.net"
|
||||
)
|
||||
|
||||
deanonymized_text = dms.exact_matching_strategy(text, deanonymizer_mapping)
|
||||
|
||||
for original_value in [
|
||||
"Slim Shady",
|
||||
"313-666-7440",
|
||||
"real.slim.shady@gmail.com",
|
||||
"4916 0387 9536 0861",
|
||||
]:
|
||||
assert original_value in deanonymized_text
|
||||
|
||||
|
||||
@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
|
||||
def test_best_matching_strategy() -> None:
|
||||
"""
|
||||
Test exact matching strategy for deanonymization.
|
||||
"""
|
||||
from langchain_experimental.data_anonymizer import (
|
||||
deanonymizer_matching_strategies as dms,
|
||||
)
|
||||
|
||||
deanonymizer_mapping = {
|
||||
"PERSON": {"Maria Lynch": "Slim Shady"},
|
||||
"PHONE_NUMBER": {"7344131647": "313-666-7440"},
|
||||
"EMAIL_ADDRESS": {"wdavis@example.net": "real.slim.shady@gmail.com"},
|
||||
"CREDIT_CARD": {"213186379402654": "4916 0387 9536 0861"},
|
||||
}
|
||||
|
||||
# Changed some values:
|
||||
# - "Maria Lynch" -> "Maria K. Lynch"
|
||||
# - "7344131647" -> "734-413-1647"
|
||||
# - "213186379402654" -> "2131 8637 9402 654"
|
||||
# - "wdavis@example.net" -> the same to test exact match
|
||||
text = (
|
||||
"Are you Maria K. Lynch? I found your card with number 2131 8637 9402 654. "
|
||||
"Is this your phone number: 734-413-1647?"
|
||||
"Is this your email address: wdavis@example.net"
|
||||
)
|
||||
|
||||
deanonymized_text = dms.combined_exact_fuzzy_matching_strategy(
|
||||
text, deanonymizer_mapping
|
||||
)
|
||||
|
||||
for original_value in [
|
||||
"Slim Shady",
|
||||
"313-666-7440",
|
||||
"real.slim.shady@gmail.com",
|
||||
"4916 0387 9536 0861",
|
||||
]:
|
||||
assert original_value in deanonymized_text
|
||||
|
Reference in New Issue
Block a user