From 32d087fcb89393a0ae587fbfbd0604888f114006 Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Wed, 22 Nov 2023 10:10:26 -0800 Subject: [PATCH] REFACTOR: combine core documents files (#13733) --- libs/core/langchain_core/documents/__init__.py | 4 ++++ .../langchain_core/{documents.py => documents/base.py} | 0 .../transformers.py} | 0 libs/core/tests/unit_tests/documents/__init__.py | 0 libs/core/tests/unit_tests/documents/test_imports.py | 7 +++++++ .../document_transformers/beautiful_soup_transformer.py | 3 +-- .../document_transformers/doctran_text_extract.py | 3 +-- .../langchain/document_transformers/doctran_text_qa.py | 3 +-- .../document_transformers/doctran_text_translate.py | 3 +-- .../document_transformers/embeddings_redundant_filter.py | 3 +-- .../langchain/document_transformers/google_translate.py | 3 +-- .../langchain/langchain/document_transformers/html2text.py | 3 +-- .../document_transformers/long_context_reorder.py | 3 +-- .../document_transformers/nuclia_text_transform.py | 3 +-- .../langchain/document_transformers/openai_functions.py | 3 +-- .../langchain/retrievers/document_compressors/base.py | 3 +-- libs/langchain/langchain/schema/__init__.py | 3 +-- libs/langchain/langchain/schema/document.py | 3 +-- libs/langchain/langchain/text_splitter.py | 2 +- 19 files changed, 25 insertions(+), 27 deletions(-) create mode 100644 libs/core/langchain_core/documents/__init__.py rename libs/core/langchain_core/{documents.py => documents/base.py} (100%) rename libs/core/langchain_core/{document_transformers.py => documents/transformers.py} (100%) create mode 100644 libs/core/tests/unit_tests/documents/__init__.py create mode 100644 libs/core/tests/unit_tests/documents/test_imports.py diff --git a/libs/core/langchain_core/documents/__init__.py b/libs/core/langchain_core/documents/__init__.py new file mode 100644 index 00000000000..895d4d7d48a --- /dev/null +++ b/libs/core/langchain_core/documents/__init__.py @@ -0,0 +1,4 @@ +from langchain_core.documents.base import Document +from langchain_core.documents.transformers import BaseDocumentTransformer + +__all__ = ["Document", "BaseDocumentTransformer"] diff --git a/libs/core/langchain_core/documents.py b/libs/core/langchain_core/documents/base.py similarity index 100% rename from libs/core/langchain_core/documents.py rename to libs/core/langchain_core/documents/base.py diff --git a/libs/core/langchain_core/document_transformers.py b/libs/core/langchain_core/documents/transformers.py similarity index 100% rename from libs/core/langchain_core/document_transformers.py rename to libs/core/langchain_core/documents/transformers.py diff --git a/libs/core/tests/unit_tests/documents/__init__.py b/libs/core/tests/unit_tests/documents/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/libs/core/tests/unit_tests/documents/test_imports.py b/libs/core/tests/unit_tests/documents/test_imports.py new file mode 100644 index 00000000000..d239af62bb4 --- /dev/null +++ b/libs/core/tests/unit_tests/documents/test_imports.py @@ -0,0 +1,7 @@ +from langchain_core.documents import __all__ + +EXPECTED_ALL = ["Document", "BaseDocumentTransformer"] + + +def test_all_imports() -> None: + assert set(__all__) == set(EXPECTED_ALL) diff --git a/libs/langchain/langchain/document_transformers/beautiful_soup_transformer.py b/libs/langchain/langchain/document_transformers/beautiful_soup_transformer.py index 459ca55c95e..a276b1899e1 100644 --- a/libs/langchain/langchain/document_transformers/beautiful_soup_transformer.py +++ b/libs/langchain/langchain/document_transformers/beautiful_soup_transformer.py @@ -1,7 +1,6 @@ from typing import Any, Iterator, List, Sequence, cast -from langchain_core.document_transformers import BaseDocumentTransformer -from langchain_core.documents import Document +from langchain_core.documents import BaseDocumentTransformer, Document class BeautifulSoupTransformer(BaseDocumentTransformer): diff --git a/libs/langchain/langchain/document_transformers/doctran_text_extract.py b/libs/langchain/langchain/document_transformers/doctran_text_extract.py index 25c85811736..a0178f36731 100644 --- a/libs/langchain/langchain/document_transformers/doctran_text_extract.py +++ b/libs/langchain/langchain/document_transformers/doctran_text_extract.py @@ -1,7 +1,6 @@ from typing import Any, List, Optional, Sequence -from langchain_core.document_transformers import BaseDocumentTransformer -from langchain_core.documents import Document +from langchain_core.documents import BaseDocumentTransformer, Document from langchain.utils import get_from_env diff --git a/libs/langchain/langchain/document_transformers/doctran_text_qa.py b/libs/langchain/langchain/document_transformers/doctran_text_qa.py index bcd6b3ce6b0..10c7cd7a0cb 100644 --- a/libs/langchain/langchain/document_transformers/doctran_text_qa.py +++ b/libs/langchain/langchain/document_transformers/doctran_text_qa.py @@ -1,7 +1,6 @@ from typing import Any, Optional, Sequence -from langchain_core.document_transformers import BaseDocumentTransformer -from langchain_core.documents import Document +from langchain_core.documents import BaseDocumentTransformer, Document from langchain.utils import get_from_env diff --git a/libs/langchain/langchain/document_transformers/doctran_text_translate.py b/libs/langchain/langchain/document_transformers/doctran_text_translate.py index d59c1bea004..3bb390f88d8 100644 --- a/libs/langchain/langchain/document_transformers/doctran_text_translate.py +++ b/libs/langchain/langchain/document_transformers/doctran_text_translate.py @@ -1,7 +1,6 @@ from typing import Any, Optional, Sequence -from langchain_core.document_transformers import BaseDocumentTransformer -from langchain_core.documents import Document +from langchain_core.documents import BaseDocumentTransformer, Document from langchain.utils import get_from_env diff --git a/libs/langchain/langchain/document_transformers/embeddings_redundant_filter.py b/libs/langchain/langchain/document_transformers/embeddings_redundant_filter.py index 0e504881f4d..d7772a3092d 100644 --- a/libs/langchain/langchain/document_transformers/embeddings_redundant_filter.py +++ b/libs/langchain/langchain/document_transformers/embeddings_redundant_filter.py @@ -2,8 +2,7 @@ from typing import Any, Callable, List, Sequence import numpy as np -from langchain_core.document_transformers import BaseDocumentTransformer -from langchain_core.documents import Document +from langchain_core.documents import BaseDocumentTransformer, Document from langchain_core.embeddings import Embeddings from langchain_core.pydantic_v1 import BaseModel, Field diff --git a/libs/langchain/langchain/document_transformers/google_translate.py b/libs/langchain/langchain/document_transformers/google_translate.py index 098cbafbbe1..2837dc43ae4 100644 --- a/libs/langchain/langchain/document_transformers/google_translate.py +++ b/libs/langchain/langchain/document_transformers/google_translate.py @@ -1,7 +1,6 @@ from typing import Any, Optional, Sequence -from langchain_core.document_transformers import BaseDocumentTransformer -from langchain_core.documents import Document +from langchain_core.documents import BaseDocumentTransformer, Document from langchain.utilities.vertexai import get_client_info diff --git a/libs/langchain/langchain/document_transformers/html2text.py b/libs/langchain/langchain/document_transformers/html2text.py index d8455af2882..4d59e73ab2e 100644 --- a/libs/langchain/langchain/document_transformers/html2text.py +++ b/libs/langchain/langchain/document_transformers/html2text.py @@ -1,7 +1,6 @@ from typing import Any, Sequence -from langchain_core.document_transformers import BaseDocumentTransformer -from langchain_core.documents import Document +from langchain_core.documents import BaseDocumentTransformer, Document class Html2TextTransformer(BaseDocumentTransformer): diff --git a/libs/langchain/langchain/document_transformers/long_context_reorder.py b/libs/langchain/langchain/document_transformers/long_context_reorder.py index e76027c78c0..32eda6b4826 100644 --- a/libs/langchain/langchain/document_transformers/long_context_reorder.py +++ b/libs/langchain/langchain/document_transformers/long_context_reorder.py @@ -1,8 +1,7 @@ """Reorder documents""" from typing import Any, List, Sequence -from langchain_core.document_transformers import BaseDocumentTransformer -from langchain_core.documents import Document +from langchain_core.documents import BaseDocumentTransformer, Document from langchain_core.pydantic_v1 import BaseModel diff --git a/libs/langchain/langchain/document_transformers/nuclia_text_transform.py b/libs/langchain/langchain/document_transformers/nuclia_text_transform.py index 03fc8d07e6c..e1d9c2d568a 100644 --- a/libs/langchain/langchain/document_transformers/nuclia_text_transform.py +++ b/libs/langchain/langchain/document_transformers/nuclia_text_transform.py @@ -3,8 +3,7 @@ import json import uuid from typing import Any, Sequence -from langchain_core.document_transformers import BaseDocumentTransformer -from langchain_core.documents import Document +from langchain_core.documents import BaseDocumentTransformer, Document from langchain.tools.nuclia.tool import NucliaUnderstandingAPI diff --git a/libs/langchain/langchain/document_transformers/openai_functions.py b/libs/langchain/langchain/document_transformers/openai_functions.py index 17b10690fe9..f695a881f5a 100644 --- a/libs/langchain/langchain/document_transformers/openai_functions.py +++ b/libs/langchain/langchain/document_transformers/openai_functions.py @@ -1,8 +1,7 @@ """Document transformers that use OpenAI Functions models""" from typing import Any, Dict, Optional, Sequence, Type, Union -from langchain_core.document_transformers import BaseDocumentTransformer -from langchain_core.documents import Document +from langchain_core.documents import BaseDocumentTransformer, Document from langchain_core.language_models import BaseLanguageModel from langchain_core.prompts import ChatPromptTemplate from langchain_core.pydantic_v1 import BaseModel diff --git a/libs/langchain/langchain/retrievers/document_compressors/base.py b/libs/langchain/langchain/retrievers/document_compressors/base.py index 799112766a4..75e05e29006 100644 --- a/libs/langchain/langchain/retrievers/document_compressors/base.py +++ b/libs/langchain/langchain/retrievers/document_compressors/base.py @@ -3,8 +3,7 @@ from abc import ABC, abstractmethod from inspect import signature from typing import List, Optional, Sequence, Union -from langchain_core.document_transformers import BaseDocumentTransformer -from langchain_core.documents import Document +from langchain_core.documents import BaseDocumentTransformer, Document from langchain_core.pydantic_v1 import BaseModel from langchain.callbacks.manager import Callbacks diff --git a/libs/langchain/langchain/schema/__init__.py b/libs/langchain/langchain/schema/__init__.py index 8876b71bcd2..ac920bfedf8 100644 --- a/libs/langchain/langchain/schema/__init__.py +++ b/libs/langchain/langchain/schema/__init__.py @@ -2,8 +2,7 @@ from langchain_core.agents import AgentAction, AgentFinish from langchain_core.caches import BaseCache from langchain_core.chat_history import BaseChatMessageHistory -from langchain_core.document_transformers import BaseDocumentTransformer -from langchain_core.documents import Document +from langchain_core.documents import BaseDocumentTransformer, Document from langchain_core.exceptions import LangChainException, OutputParserException from langchain_core.memory import BaseMemory from langchain_core.messages import ( diff --git a/libs/langchain/langchain/schema/document.py b/libs/langchain/langchain/schema/document.py index b426b47b189..8b5b254be95 100644 --- a/libs/langchain/langchain/schema/document.py +++ b/libs/langchain/langchain/schema/document.py @@ -1,4 +1,3 @@ -from langchain_core.document_transformers import BaseDocumentTransformer -from langchain_core.documents import Document +from langchain_core.documents import BaseDocumentTransformer, Document __all__ = ["Document", "BaseDocumentTransformer"] diff --git a/libs/langchain/langchain/text_splitter.py b/libs/langchain/langchain/text_splitter.py index 856cd9417ca..43f15ecca32 100644 --- a/libs/langchain/langchain/text_splitter.py +++ b/libs/langchain/langchain/text_splitter.py @@ -51,7 +51,7 @@ from typing import ( ) import requests -from langchain_core.document_transformers import BaseDocumentTransformer +from langchain_core.documents import BaseDocumentTransformer from langchain.docstore.document import Document