mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-09 13:00:34 +00:00
airbyte: remove from master (#27837)
This commit is contained in:
parent
ee63d21915
commit
002e1c9055
1
libs/partners/airbyte/.gitignore
vendored
1
libs/partners/airbyte/.gitignore
vendored
@ -1 +0,0 @@
|
|||||||
__pycache__
|
|
@ -1,21 +0,0 @@
|
|||||||
MIT License
|
|
||||||
|
|
||||||
Copyright (c) 2024 LangChain, Inc.
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
|
||||||
in the Software without restriction, including without limitation the rights
|
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
|
||||||
furnished to do so, subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all
|
|
||||||
copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
SOFTWARE.
|
|
@ -1,59 +0,0 @@
|
|||||||
.PHONY: all format lint test tests integration_tests docker_tests help extended_tests
|
|
||||||
|
|
||||||
# Default target executed when no arguments are given to make.
|
|
||||||
all: help
|
|
||||||
|
|
||||||
# Define a variable for the test file path.
|
|
||||||
TEST_FILE ?= tests/unit_tests/
|
|
||||||
integration_test integration_tests: TEST_FILE = tests/integration_tests/
|
|
||||||
|
|
||||||
test tests integration_test integration_tests:
|
|
||||||
poetry run pytest $(TEST_FILE)
|
|
||||||
|
|
||||||
test_watch:
|
|
||||||
poetry run ptw --snapshot-update --now . -- -vv $(TEST_FILE)
|
|
||||||
|
|
||||||
|
|
||||||
######################
|
|
||||||
# LINTING AND FORMATTING
|
|
||||||
######################
|
|
||||||
|
|
||||||
# Define a variable for Python and notebook files.
|
|
||||||
PYTHON_FILES=.
|
|
||||||
MYPY_CACHE=.mypy_cache
|
|
||||||
lint format: PYTHON_FILES=.
|
|
||||||
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/airbyte --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
|
|
||||||
lint_package: PYTHON_FILES=langchain_airbyte
|
|
||||||
lint_tests: PYTHON_FILES=tests
|
|
||||||
lint_tests: MYPY_CACHE=.mypy_cache_test
|
|
||||||
|
|
||||||
lint lint_diff lint_package lint_tests:
|
|
||||||
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff $(PYTHON_FILES)
|
|
||||||
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
|
|
||||||
[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
|
|
||||||
|
|
||||||
format format_diff:
|
|
||||||
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES)
|
|
||||||
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff --select I --fix $(PYTHON_FILES)
|
|
||||||
|
|
||||||
spell_check:
|
|
||||||
poetry run codespell --toml pyproject.toml
|
|
||||||
|
|
||||||
spell_fix:
|
|
||||||
poetry run codespell --toml pyproject.toml -w
|
|
||||||
|
|
||||||
check_imports: $(shell find langchain_airbyte -name '*.py')
|
|
||||||
poetry run python ./scripts/check_imports.py $^
|
|
||||||
|
|
||||||
######################
|
|
||||||
# HELP
|
|
||||||
######################
|
|
||||||
|
|
||||||
help:
|
|
||||||
@echo '----'
|
|
||||||
@echo 'check_imports - check imports'
|
|
||||||
@echo 'format - run code formatters'
|
|
||||||
@echo 'lint - run linters'
|
|
||||||
@echo 'test - run unit tests'
|
|
||||||
@echo 'tests - run unit tests'
|
|
||||||
@echo 'test TEST_FILE=<test_file> - run all tests in file'
|
|
@ -1,27 +0,0 @@
|
|||||||
# langchain-airbyte
|
|
||||||
|
|
||||||
This package contains the LangChain integration with Airbyte
|
|
||||||
|
|
||||||
## Installation
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install -U langchain-airbyte
|
|
||||||
```
|
|
||||||
|
|
||||||
The integration package doesn't have any global environment variables that need to be
|
|
||||||
set, but some integrations (e.g. `source-github`) may need credentials passed in.
|
|
||||||
|
|
||||||
## Document Loaders
|
|
||||||
|
|
||||||
`AirbyteLoader` class exposes a single document loader for Airbyte sources.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from langchain_airbyte import AirbyteLoader
|
|
||||||
|
|
||||||
loader = AirbyteLoader(
|
|
||||||
source="source-faker",
|
|
||||||
stream="users",
|
|
||||||
config={"count": 100},
|
|
||||||
)
|
|
||||||
docs = loader.load()
|
|
||||||
```
|
|
@ -1,3 +0,0 @@
|
|||||||
from langchain_airbyte.document_loaders import AirbyteLoader
|
|
||||||
|
|
||||||
__all__ = ["AirbyteLoader"]
|
|
@ -1,127 +0,0 @@
|
|||||||
"""Airbyte vector stores."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import (
|
|
||||||
TYPE_CHECKING,
|
|
||||||
Any,
|
|
||||||
AsyncIterator,
|
|
||||||
Dict,
|
|
||||||
Iterator,
|
|
||||||
List,
|
|
||||||
Mapping,
|
|
||||||
Optional,
|
|
||||||
TypeVar,
|
|
||||||
)
|
|
||||||
|
|
||||||
import airbyte as ab
|
|
||||||
from langchain_core.documents import Document
|
|
||||||
from langchain_core.prompts import PromptTemplate
|
|
||||||
from langchain_core.runnables import run_in_executor
|
|
||||||
from langchain_core.vectorstores import VectorStore
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from langchain_text_splitters import TextSplitter
|
|
||||||
|
|
||||||
VST = TypeVar("VST", bound=VectorStore)
|
|
||||||
|
|
||||||
|
|
||||||
class AirbyteLoader:
|
|
||||||
"""Airbyte Document Loader.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from langchain_airbyte import AirbyteLoader
|
|
||||||
|
|
||||||
loader = AirbyteLoader(
|
|
||||||
source="github",
|
|
||||||
stream="pull_requests",
|
|
||||||
)
|
|
||||||
documents = loader.lazy_load()
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
source: str,
|
|
||||||
stream: str,
|
|
||||||
*,
|
|
||||||
config: Optional[Dict] = None,
|
|
||||||
include_metadata: bool = True,
|
|
||||||
template: Optional[PromptTemplate] = None,
|
|
||||||
):
|
|
||||||
self._airbyte_source = ab.get_source(source, config=config, streams=[stream])
|
|
||||||
self._stream = stream
|
|
||||||
self._template = template
|
|
||||||
self._include_metadata = include_metadata
|
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
|
||||||
"""Load source data into Document objects."""
|
|
||||||
return list(self.lazy_load())
|
|
||||||
|
|
||||||
def load_and_split(
|
|
||||||
self, text_splitter: Optional[TextSplitter] = None
|
|
||||||
) -> List[Document]:
|
|
||||||
"""Load Documents and split into chunks. Chunks are returned as Documents.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
text_splitter: TextSplitter instance to use for splitting documents.
|
|
||||||
Defaults to RecursiveCharacterTextSplitter.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of Documents.
|
|
||||||
"""
|
|
||||||
|
|
||||||
if text_splitter is None:
|
|
||||||
try:
|
|
||||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
||||||
except ImportError as e:
|
|
||||||
raise ImportError(
|
|
||||||
"Unable to import from langchain_text_splitters. Please specify "
|
|
||||||
"text_splitter or install langchain_text_splitters with "
|
|
||||||
"`pip install -U langchain-text-splitters`."
|
|
||||||
) from e
|
|
||||||
_text_splitter: TextSplitter = RecursiveCharacterTextSplitter()
|
|
||||||
else:
|
|
||||||
_text_splitter = text_splitter
|
|
||||||
docs = self.lazy_load()
|
|
||||||
return _text_splitter.split_documents(docs)
|
|
||||||
|
|
||||||
def lazy_load(self) -> Iterator[Document]:
|
|
||||||
"""A lazy loader for Documents."""
|
|
||||||
# if no prompt template defined, use default airbyte documents
|
|
||||||
if not self._template:
|
|
||||||
for document in self._airbyte_source.get_documents(self._stream):
|
|
||||||
# convert airbyte document to langchain document
|
|
||||||
metadata = (
|
|
||||||
{}
|
|
||||||
if not self._include_metadata
|
|
||||||
else {
|
|
||||||
**document.metadata,
|
|
||||||
"_last_modified": document.last_modified,
|
|
||||||
"_id": document.id,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
yield Document(
|
|
||||||
page_content=document.content,
|
|
||||||
metadata=metadata,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
records: Iterator[Mapping[str, Any]] = self._airbyte_source.get_records(
|
|
||||||
self._stream
|
|
||||||
)
|
|
||||||
for record in records:
|
|
||||||
metadata = {} if not self._include_metadata else dict(record)
|
|
||||||
yield Document(
|
|
||||||
page_content=self._template.format(**record), metadata=metadata
|
|
||||||
)
|
|
||||||
|
|
||||||
async def alazy_load(self) -> AsyncIterator[Document]:
|
|
||||||
"""A lazy loader for Documents."""
|
|
||||||
iterator = await run_in_executor(None, self.lazy_load)
|
|
||||||
done = object()
|
|
||||||
while True:
|
|
||||||
doc = await run_in_executor(None, next, iterator, done) # type: ignore[call-arg, arg-type]
|
|
||||||
if doc is done:
|
|
||||||
break
|
|
||||||
yield doc # type: ignore[misc]
|
|
2940
libs/partners/airbyte/poetry.lock
generated
2940
libs/partners/airbyte/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -1,88 +0,0 @@
|
|||||||
[tool.poetry]
|
|
||||||
name = "langchain-airbyte"
|
|
||||||
version = "0.1.1"
|
|
||||||
description = "An integration package connecting Airbyte and LangChain"
|
|
||||||
authors = []
|
|
||||||
readme = "README.md"
|
|
||||||
repository = "https://github.com/langchain-ai/langchain"
|
|
||||||
license = "MIT"
|
|
||||||
|
|
||||||
[tool.poetry.urls]
|
|
||||||
"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/airbyte"
|
|
||||||
"Release Notes" = "https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-airbyte%3D%3D0%22&expanded=true"
|
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
|
||||||
python = ">=3.9,<3.12.4"
|
|
||||||
langchain-core = "^0.3.0.dev"
|
|
||||||
airbyte = "^0.7.3"
|
|
||||||
pydantic = ">=1.10.8,<2"
|
|
||||||
|
|
||||||
[tool.poetry.group.test]
|
|
||||||
optional = true
|
|
||||||
|
|
||||||
[tool.poetry.group.test.dependencies]
|
|
||||||
pytest = "^7.4.3"
|
|
||||||
pytest-asyncio = "^0.23.2"
|
|
||||||
langchain-core = { path = "../../core", develop = true }
|
|
||||||
|
|
||||||
[tool.poetry.group.codespell]
|
|
||||||
optional = true
|
|
||||||
|
|
||||||
[tool.poetry.group.codespell.dependencies]
|
|
||||||
codespell = "^2.2.6"
|
|
||||||
|
|
||||||
[tool.poetry.group.test_integration]
|
|
||||||
optional = true
|
|
||||||
|
|
||||||
[tool.poetry.group.test_integration.dependencies]
|
|
||||||
|
|
||||||
[tool.poetry.group.lint]
|
|
||||||
optional = true
|
|
||||||
|
|
||||||
[tool.poetry.group.lint.dependencies]
|
|
||||||
ruff = "^0.1.8"
|
|
||||||
|
|
||||||
[tool.poetry.group.typing.dependencies]
|
|
||||||
mypy = "^1.7.1"
|
|
||||||
langchain-core = { path = "../../core", develop = true }
|
|
||||||
langchain-text-splitters = { path = "../../text-splitters", develop = true }
|
|
||||||
langchain = { path = "../../langchain", develop = true }
|
|
||||||
|
|
||||||
[tool.poetry.group.dev]
|
|
||||||
optional = true
|
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
|
||||||
langchain-core = { path = "../../core", develop = true }
|
|
||||||
|
|
||||||
[tool.ruff.lint]
|
|
||||||
select = [
|
|
||||||
"E", # pycodestyle
|
|
||||||
"F", # pyflakes
|
|
||||||
"I", # isort
|
|
||||||
"T201", # print
|
|
||||||
]
|
|
||||||
|
|
||||||
[tool.mypy]
|
|
||||||
disallow_untyped_defs = "True"
|
|
||||||
|
|
||||||
[tool.coverage.run]
|
|
||||||
omit = ["tests/*"]
|
|
||||||
|
|
||||||
[build-system]
|
|
||||||
requires = ["poetry-core>=1.0.0"]
|
|
||||||
build-backend = "poetry.core.masonry.api"
|
|
||||||
|
|
||||||
[tool.pytest.ini_options]
|
|
||||||
# --strict-markers will raise errors on unknown marks.
|
|
||||||
# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
|
|
||||||
#
|
|
||||||
# https://docs.pytest.org/en/7.1.x/reference/reference.html
|
|
||||||
# --strict-config any warnings encountered while parsing the `pytest`
|
|
||||||
# section of the configuration file raise errors.
|
|
||||||
addopts = "--strict-markers --strict-config --durations=5"
|
|
||||||
# Registering custom markers.
|
|
||||||
# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
|
|
||||||
markers = [
|
|
||||||
"compile: mark placeholder test used to compile integration tests without running them",
|
|
||||||
]
|
|
||||||
asyncio_mode = "auto"
|
|
@ -1,17 +0,0 @@
|
|||||||
import sys
|
|
||||||
import traceback
|
|
||||||
from importlib.machinery import SourceFileLoader
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
files = sys.argv[1:]
|
|
||||||
has_failure = False
|
|
||||||
for file in files:
|
|
||||||
try:
|
|
||||||
SourceFileLoader("x", file).load_module()
|
|
||||||
except Exception:
|
|
||||||
has_failure = True
|
|
||||||
print(file) # noqa: T201
|
|
||||||
traceback.print_exc()
|
|
||||||
print() # noqa: T201
|
|
||||||
|
|
||||||
sys.exit(1 if has_failure else 0)
|
|
@ -1,18 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eu
|
|
||||||
|
|
||||||
# Initialize a variable to keep track of errors
|
|
||||||
errors=0
|
|
||||||
|
|
||||||
# make sure not importing from langchain, langchain_experimental, or langchain_community
|
|
||||||
git --no-pager grep '^from langchain\.' . && errors=$((errors+1))
|
|
||||||
git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1))
|
|
||||||
git --no-pager grep '^from langchain_community\.' . && errors=$((errors+1))
|
|
||||||
|
|
||||||
# Decide on an exit status based on the errors
|
|
||||||
if [ "$errors" -gt 0 ]; then
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
exit 0
|
|
||||||
fi
|
|
@ -1,7 +0,0 @@
|
|||||||
import pytest
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.compile
|
|
||||||
def test_placeholder() -> None:
|
|
||||||
"""Used for compiling integration tests without running any real tests."""
|
|
||||||
pass
|
|
@ -1,28 +0,0 @@
|
|||||||
"""Test Airbyte embeddings."""
|
|
||||||
|
|
||||||
import os
|
|
||||||
|
|
||||||
from langchain_airbyte import AirbyteLoader
|
|
||||||
|
|
||||||
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
|
|
||||||
|
|
||||||
|
|
||||||
def test_load_github() -> None:
|
|
||||||
"""Test loading from GitHub."""
|
|
||||||
airbyte_loader = AirbyteLoader(
|
|
||||||
source="source-github",
|
|
||||||
stream="issues",
|
|
||||||
config={
|
|
||||||
"repositories": ["airbytehq/quickstarts"],
|
|
||||||
"credentials": {"personal_access_token": GITHUB_TOKEN},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
documents = airbyte_loader.load()
|
|
||||||
assert len(documents) > 0
|
|
||||||
# make sure some documents have body in metadata
|
|
||||||
found_body = False
|
|
||||||
for doc in documents:
|
|
||||||
if "body" in doc.metadata and doc.metadata["body"]:
|
|
||||||
found_body = True
|
|
||||||
break
|
|
||||||
assert found_body, "No documents with body found"
|
|
@ -1,77 +0,0 @@
|
|||||||
from langchain_core.prompts import PromptTemplate
|
|
||||||
|
|
||||||
from langchain_airbyte import AirbyteLoader
|
|
||||||
|
|
||||||
|
|
||||||
def test_initialization() -> None:
|
|
||||||
"""Test integration loader initialization."""
|
|
||||||
AirbyteLoader(
|
|
||||||
source="source-faker",
|
|
||||||
stream="users",
|
|
||||||
config={"count": 3},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_load() -> None:
|
|
||||||
"""Test loading from source."""
|
|
||||||
airbyte_loader = AirbyteLoader(
|
|
||||||
source="source-faker",
|
|
||||||
stream="users",
|
|
||||||
config={"count": 5},
|
|
||||||
)
|
|
||||||
documents = airbyte_loader.load()
|
|
||||||
assert len(documents) == 5
|
|
||||||
|
|
||||||
|
|
||||||
def test_lazy_load() -> None:
|
|
||||||
"""Test lazy loading from source."""
|
|
||||||
airbyte_loader = AirbyteLoader(
|
|
||||||
source="source-faker",
|
|
||||||
stream="users",
|
|
||||||
config={"count": 3},
|
|
||||||
)
|
|
||||||
documents = airbyte_loader.lazy_load()
|
|
||||||
assert len(list(documents)) == 3
|
|
||||||
|
|
||||||
|
|
||||||
async def test_alazy_load() -> None:
|
|
||||||
"""Test async lazy loading from source."""
|
|
||||||
airbyte_loader = AirbyteLoader(
|
|
||||||
source="source-faker",
|
|
||||||
stream="users",
|
|
||||||
config={"count": 3},
|
|
||||||
)
|
|
||||||
documents = airbyte_loader.alazy_load()
|
|
||||||
lendocs = 0
|
|
||||||
async for _ in documents:
|
|
||||||
lendocs += 1
|
|
||||||
assert lendocs == 3
|
|
||||||
|
|
||||||
|
|
||||||
def test_load_with_template() -> None:
|
|
||||||
"""Test loading from source with template."""
|
|
||||||
airbyte_loader = AirbyteLoader(
|
|
||||||
source="source-faker",
|
|
||||||
stream="users",
|
|
||||||
config={"count": 3},
|
|
||||||
template=PromptTemplate.from_template("My name is {name}"),
|
|
||||||
)
|
|
||||||
documents = airbyte_loader.load()
|
|
||||||
assert len(documents) == 3
|
|
||||||
for doc in documents:
|
|
||||||
assert doc.page_content.startswith("My name is ")
|
|
||||||
assert doc.metadata["name"] # should have a name
|
|
||||||
|
|
||||||
|
|
||||||
def test_load_no_metadata() -> None:
|
|
||||||
"""Test loading from source with no metadata."""
|
|
||||||
airbyte_loader = AirbyteLoader(
|
|
||||||
source="source-faker",
|
|
||||||
stream="users",
|
|
||||||
config={"count": 3},
|
|
||||||
include_metadata=False,
|
|
||||||
)
|
|
||||||
documents = airbyte_loader.load()
|
|
||||||
assert len(documents) == 3
|
|
||||||
for doc in documents:
|
|
||||||
assert doc.metadata == {}
|
|
@ -1,9 +0,0 @@
|
|||||||
from langchain_airbyte import __all__
|
|
||||||
|
|
||||||
EXPECTED_ALL = [
|
|
||||||
"AirbyteLoader",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def test_all_imports() -> None:
|
|
||||||
assert sorted(EXPECTED_ALL) == sorted(__all__)
|
|
Loading…
Reference in New Issue
Block a user