langchain[patch]: Migrate document transformers (#21098)

Migrate document transformers
This commit is contained in:
Eugene Yurtsev 2024-04-30 16:20:02 -04:00 committed by GitHub
parent aec13a6123
commit 9b6d04a187
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 301 additions and 61 deletions

View File

@ -14,29 +14,50 @@
Document Document
""" # noqa: E501 """ # noqa: E501
import warnings from typing import TYPE_CHECKING, Any
from typing import Any
from langchain_core._api import LangChainDeprecationWarning from langchain._api import create_importer
from langchain.utils.interactive_env import is_interactive_env if TYPE_CHECKING:
from langchain_community.document_transformers import (
BeautifulSoupTransformer,
DoctranPropertyExtractor,
DoctranQATransformer,
DoctranTextTranslator,
EmbeddingsClusteringFilter,
EmbeddingsRedundantFilter,
GoogleTranslateTransformer,
Html2TextTransformer,
LongContextReorder,
NucliaTextTransformer,
OpenAIMetadataTagger,
get_stateful_documents,
)
# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"BeautifulSoupTransformer": "langchain_community.document_transformers",
"DoctranQATransformer": "langchain_community.document_transformers",
"DoctranTextTranslator": "langchain_community.document_transformers",
"DoctranPropertyExtractor": "langchain_community.document_transformers",
"EmbeddingsClusteringFilter": "langchain_community.document_transformers",
"EmbeddingsRedundantFilter": "langchain_community.document_transformers",
"GoogleTranslateTransformer": "langchain_community.document_transformers",
"get_stateful_documents": "langchain_community.document_transformers",
"LongContextReorder": "langchain_community.document_transformers",
"NucliaTextTransformer": "langchain_community.document_transformers",
"OpenAIMetadataTagger": "langchain_community.document_transformers",
"Html2TextTransformer": "langchain_community.document_transformers",
}
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
def __getattr__(name: str) -> Any: def __getattr__(name: str) -> Any:
from langchain_community import document_transformers """Look up attributes dynamically."""
return _import_attribute(name)
# If not in interactive env, raise warning.
if not is_interactive_env():
warnings.warn(
"Importing document transformers from langchain is deprecated. Importing "
"from langchain will no longer be supported as of langchain==0.2.0. "
"Please import from langchain-community instead:\n\n"
f"`from langchain_community.document_transformers import {name}`.\n\n"
"To install langchain-community run `pip install -U langchain-community`.",
category=LangChainDeprecationWarning,
)
return getattr(document_transformers, name)
__all__ = [ __all__ = [

View File

@ -1,5 +1,25 @@
from langchain_community.document_transformers.beautiful_soup_transformer import ( from typing import TYPE_CHECKING, Any
BeautifulSoupTransformer,
)
__all__ = ["BeautifulSoupTransformer"] from langchain._api import create_importer
if TYPE_CHECKING:
from langchain_community.document_transformers import BeautifulSoupTransformer
# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"BeautifulSoupTransformer": "langchain_community.document_transformers"
}
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)
__all__ = [
"BeautifulSoupTransformer",
]

View File

@ -1,5 +1,25 @@
from langchain_community.document_transformers.doctran_text_extract import ( from typing import TYPE_CHECKING, Any
DoctranPropertyExtractor,
)
__all__ = ["DoctranPropertyExtractor"] from langchain._api import create_importer
if TYPE_CHECKING:
from langchain_community.document_transformers import DoctranPropertyExtractor
# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"DoctranPropertyExtractor": "langchain_community.document_transformers"
}
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)
__all__ = [
"DoctranPropertyExtractor",
]

View File

@ -1,5 +1,25 @@
from langchain_community.document_transformers.doctran_text_qa import ( from typing import TYPE_CHECKING, Any
DoctranQATransformer,
)
__all__ = ["DoctranQATransformer"] from langchain._api import create_importer
if TYPE_CHECKING:
from langchain_community.document_transformers import DoctranQATransformer
# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"DoctranQATransformer": "langchain_community.document_transformers"
}
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)
__all__ = [
"DoctranQATransformer",
]

View File

@ -1,5 +1,25 @@
from langchain_community.document_transformers.doctran_text_translate import ( from typing import TYPE_CHECKING, Any
DoctranTextTranslator,
)
__all__ = ["DoctranTextTranslator"] from langchain._api import create_importer
if TYPE_CHECKING:
from langchain_community.document_transformers import DoctranTextTranslator
# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"DoctranTextTranslator": "langchain_community.document_transformers"
}
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)
__all__ = [
"DoctranTextTranslator",
]

View File

@ -1,12 +1,45 @@
from langchain_community.document_transformers.embeddings_redundant_filter import ( from typing import TYPE_CHECKING, Any
from langchain._api import create_importer
if TYPE_CHECKING:
from langchain_community.document_transformers import (
EmbeddingsClusteringFilter, EmbeddingsClusteringFilter,
EmbeddingsRedundantFilter, EmbeddingsRedundantFilter,
get_stateful_documents,
)
from langchain_community.document_transformers.embeddings_redundant_filter import (
_DocumentWithState, _DocumentWithState,
_filter_similar_embeddings, _filter_similar_embeddings,
_get_embeddings_from_stateful_docs, _get_embeddings_from_stateful_docs,
get_stateful_documents,
) )
# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"EmbeddingsRedundantFilter": "langchain_community.document_transformers",
"EmbeddingsClusteringFilter": "langchain_community.document_transformers",
"_DocumentWithState": (
"langchain_community.document_transformers.embeddings_redundant_filter"
),
"get_stateful_documents": "langchain_community.document_transformers",
"_get_embeddings_from_stateful_docs": (
"langchain_community.document_transformers.embeddings_redundant_filter"
),
"_filter_similar_embeddings": (
"langchain_community.document_transformers.embeddings_redundant_filter"
),
}
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)
__all__ = [ __all__ = [
"EmbeddingsRedundantFilter", "EmbeddingsRedundantFilter",
"EmbeddingsClusteringFilter", "EmbeddingsClusteringFilter",

View File

@ -1,5 +1,25 @@
from langchain_community.document_transformers.google_translate import ( from typing import TYPE_CHECKING, Any
GoogleTranslateTransformer,
)
__all__ = ["GoogleTranslateTransformer"] from langchain._api import create_importer
if TYPE_CHECKING:
from langchain_community.document_transformers import GoogleTranslateTransformer
# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"GoogleTranslateTransformer": "langchain_community.document_transformers"
}
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)
__all__ = [
"GoogleTranslateTransformer",
]

View File

@ -1,3 +1,25 @@
from langchain_community.document_transformers.html2text import Html2TextTransformer from typing import TYPE_CHECKING, Any
__all__ = ["Html2TextTransformer"] from langchain._api import create_importer
if TYPE_CHECKING:
from langchain_community.document_transformers import Html2TextTransformer
# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"Html2TextTransformer": "langchain_community.document_transformers"
}
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)
__all__ = [
"Html2TextTransformer",
]

View File

@ -1,5 +1,23 @@
from langchain_community.document_transformers.long_context_reorder import ( from typing import TYPE_CHECKING, Any
LongContextReorder,
)
__all__ = ["LongContextReorder"] from langchain._api import create_importer
if TYPE_CHECKING:
from langchain_community.document_transformers import LongContextReorder
# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {"LongContextReorder": "langchain_community.document_transformers"}
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)
__all__ = [
"LongContextReorder",
]

View File

@ -1,5 +1,25 @@
from langchain_community.document_transformers.nuclia_text_transform import ( from typing import TYPE_CHECKING, Any
NucliaTextTransformer,
)
__all__ = ["NucliaTextTransformer"] from langchain._api import create_importer
if TYPE_CHECKING:
from langchain_community.document_transformers import NucliaTextTransformer
# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"NucliaTextTransformer": "langchain_community.document_transformers"
}
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)
__all__ = [
"NucliaTextTransformer",
]

View File

@ -1,6 +1,32 @@
from typing import TYPE_CHECKING, Any
from langchain._api import create_importer
if TYPE_CHECKING:
from langchain_community.document_transformers import OpenAIMetadataTagger
from langchain_community.document_transformers.openai_functions import ( from langchain_community.document_transformers.openai_functions import (
OpenAIMetadataTagger,
create_metadata_tagger, create_metadata_tagger,
) )
__all__ = ["OpenAIMetadataTagger", "create_metadata_tagger"] # Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"OpenAIMetadataTagger": "langchain_community.document_transformers",
"create_metadata_tagger": (
"langchain_community.document_transformers.openai_functions"
),
}
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)
__all__ = [
"OpenAIMetadataTagger",
"create_metadata_tagger",
]