airbyte: remove from master (#27837)

This commit is contained in:
Bagatur 2024-11-01 13:59:34 -07:00 committed by GitHub
parent ee63d21915
commit 002e1c9055
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 0 additions and 3422 deletions

View File

@ -1 +0,0 @@
__pycache__

View File

@ -1,21 +0,0 @@
MIT License
Copyright (c) 2024 LangChain, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,59 +0,0 @@
.PHONY: all format lint test tests integration_tests docker_tests help extended_tests
# Default target executed when no arguments are given to make.
all: help
# Define a variable for the test file path.
TEST_FILE ?= tests/unit_tests/
integration_test integration_tests: TEST_FILE = tests/integration_tests/
test tests integration_test integration_tests:
poetry run pytest $(TEST_FILE)
test_watch:
poetry run ptw --snapshot-update --now . -- -vv $(TEST_FILE)
######################
# LINTING AND FORMATTING
######################
# Define a variable for Python and notebook files.
PYTHON_FILES=.
MYPY_CACHE=.mypy_cache
lint format: PYTHON_FILES=.
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/airbyte --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
lint_package: PYTHON_FILES=langchain_airbyte
lint_tests: PYTHON_FILES=tests
lint_tests: MYPY_CACHE=.mypy_cache_test
lint lint_diff lint_package lint_tests:
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
format format_diff:
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff --select I --fix $(PYTHON_FILES)
spell_check:
poetry run codespell --toml pyproject.toml
spell_fix:
poetry run codespell --toml pyproject.toml -w
check_imports: $(shell find langchain_airbyte -name '*.py')
poetry run python ./scripts/check_imports.py $^
######################
# HELP
######################
help:
@echo '----'
@echo 'check_imports - check imports'
@echo 'format - run code formatters'
@echo 'lint - run linters'
@echo 'test - run unit tests'
@echo 'tests - run unit tests'
@echo 'test TEST_FILE=<test_file> - run all tests in file'

View File

@ -1,27 +0,0 @@
# langchain-airbyte
This package contains the LangChain integration with Airbyte
## Installation
```bash
pip install -U langchain-airbyte
```
The integration package doesn't have any global environment variables that need to be
set, but some integrations (e.g. `source-github`) may need credentials passed in.
## Document Loaders
`AirbyteLoader` class exposes a single document loader for Airbyte sources.
```python
from langchain_airbyte import AirbyteLoader
loader = AirbyteLoader(
source="source-faker",
stream="users",
config={"count": 100},
)
docs = loader.load()
```

View File

@ -1,3 +0,0 @@
from langchain_airbyte.document_loaders import AirbyteLoader
__all__ = ["AirbyteLoader"]

View File

@ -1,127 +0,0 @@
"""Airbyte vector stores."""
from __future__ import annotations
from typing import (
TYPE_CHECKING,
Any,
AsyncIterator,
Dict,
Iterator,
List,
Mapping,
Optional,
TypeVar,
)
import airbyte as ab
from langchain_core.documents import Document
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import run_in_executor
from langchain_core.vectorstores import VectorStore
if TYPE_CHECKING:
from langchain_text_splitters import TextSplitter
VST = TypeVar("VST", bound=VectorStore)
class AirbyteLoader:
"""Airbyte Document Loader.
Example:
.. code-block:: python
from langchain_airbyte import AirbyteLoader
loader = AirbyteLoader(
source="github",
stream="pull_requests",
)
documents = loader.lazy_load()
"""
def __init__(
self,
source: str,
stream: str,
*,
config: Optional[Dict] = None,
include_metadata: bool = True,
template: Optional[PromptTemplate] = None,
):
self._airbyte_source = ab.get_source(source, config=config, streams=[stream])
self._stream = stream
self._template = template
self._include_metadata = include_metadata
def load(self) -> List[Document]:
"""Load source data into Document objects."""
return list(self.lazy_load())
def load_and_split(
self, text_splitter: Optional[TextSplitter] = None
) -> List[Document]:
"""Load Documents and split into chunks. Chunks are returned as Documents.
Args:
text_splitter: TextSplitter instance to use for splitting documents.
Defaults to RecursiveCharacterTextSplitter.
Returns:
List of Documents.
"""
if text_splitter is None:
try:
from langchain_text_splitters import RecursiveCharacterTextSplitter
except ImportError as e:
raise ImportError(
"Unable to import from langchain_text_splitters. Please specify "
"text_splitter or install langchain_text_splitters with "
"`pip install -U langchain-text-splitters`."
) from e
_text_splitter: TextSplitter = RecursiveCharacterTextSplitter()
else:
_text_splitter = text_splitter
docs = self.lazy_load()
return _text_splitter.split_documents(docs)
def lazy_load(self) -> Iterator[Document]:
"""A lazy loader for Documents."""
# if no prompt template defined, use default airbyte documents
if not self._template:
for document in self._airbyte_source.get_documents(self._stream):
# convert airbyte document to langchain document
metadata = (
{}
if not self._include_metadata
else {
**document.metadata,
"_last_modified": document.last_modified,
"_id": document.id,
}
)
yield Document(
page_content=document.content,
metadata=metadata,
)
else:
records: Iterator[Mapping[str, Any]] = self._airbyte_source.get_records(
self._stream
)
for record in records:
metadata = {} if not self._include_metadata else dict(record)
yield Document(
page_content=self._template.format(**record), metadata=metadata
)
async def alazy_load(self) -> AsyncIterator[Document]:
"""A lazy loader for Documents."""
iterator = await run_in_executor(None, self.lazy_load)
done = object()
while True:
doc = await run_in_executor(None, next, iterator, done) # type: ignore[call-arg, arg-type]
if doc is done:
break
yield doc # type: ignore[misc]

File diff suppressed because it is too large Load Diff

View File

@ -1,88 +0,0 @@
[tool.poetry]
name = "langchain-airbyte"
version = "0.1.1"
description = "An integration package connecting Airbyte and LangChain"
authors = []
readme = "README.md"
repository = "https://github.com/langchain-ai/langchain"
license = "MIT"
[tool.poetry.urls]
"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/airbyte"
"Release Notes" = "https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-airbyte%3D%3D0%22&expanded=true"
[tool.poetry.dependencies]
python = ">=3.9,<3.12.4"
langchain-core = "^0.3.0.dev"
airbyte = "^0.7.3"
pydantic = ">=1.10.8,<2"
[tool.poetry.group.test]
optional = true
[tool.poetry.group.test.dependencies]
pytest = "^7.4.3"
pytest-asyncio = "^0.23.2"
langchain-core = { path = "../../core", develop = true }
[tool.poetry.group.codespell]
optional = true
[tool.poetry.group.codespell.dependencies]
codespell = "^2.2.6"
[tool.poetry.group.test_integration]
optional = true
[tool.poetry.group.test_integration.dependencies]
[tool.poetry.group.lint]
optional = true
[tool.poetry.group.lint.dependencies]
ruff = "^0.1.8"
[tool.poetry.group.typing.dependencies]
mypy = "^1.7.1"
langchain-core = { path = "../../core", develop = true }
langchain-text-splitters = { path = "../../text-splitters", develop = true }
langchain = { path = "../../langchain", develop = true }
[tool.poetry.group.dev]
optional = true
[tool.poetry.group.dev.dependencies]
langchain-core = { path = "../../core", develop = true }
[tool.ruff.lint]
select = [
"E", # pycodestyle
"F", # pyflakes
"I", # isort
"T201", # print
]
[tool.mypy]
disallow_untyped_defs = "True"
[tool.coverage.run]
omit = ["tests/*"]
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
[tool.pytest.ini_options]
# --strict-markers will raise errors on unknown marks.
# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
#
# https://docs.pytest.org/en/7.1.x/reference/reference.html
# --strict-config any warnings encountered while parsing the `pytest`
# section of the configuration file raise errors.
addopts = "--strict-markers --strict-config --durations=5"
# Registering custom markers.
# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
markers = [
"compile: mark placeholder test used to compile integration tests without running them",
]
asyncio_mode = "auto"

View File

@ -1,17 +0,0 @@
import sys
import traceback
from importlib.machinery import SourceFileLoader
if __name__ == "__main__":
files = sys.argv[1:]
has_failure = False
for file in files:
try:
SourceFileLoader("x", file).load_module()
except Exception:
has_failure = True
print(file) # noqa: T201
traceback.print_exc()
print() # noqa: T201
sys.exit(1 if has_failure else 0)

View File

@ -1,18 +0,0 @@
#!/bin/bash
set -eu
# Initialize a variable to keep track of errors
errors=0
# make sure not importing from langchain, langchain_experimental, or langchain_community
git --no-pager grep '^from langchain\.' . && errors=$((errors+1))
git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1))
git --no-pager grep '^from langchain_community\.' . && errors=$((errors+1))
# Decide on an exit status based on the errors
if [ "$errors" -gt 0 ]; then
exit 1
else
exit 0
fi

View File

@ -1,7 +0,0 @@
import pytest
@pytest.mark.compile
def test_placeholder() -> None:
"""Used for compiling integration tests without running any real tests."""
pass

View File

@ -1,28 +0,0 @@
"""Test Airbyte embeddings."""
import os
from langchain_airbyte import AirbyteLoader
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
def test_load_github() -> None:
"""Test loading from GitHub."""
airbyte_loader = AirbyteLoader(
source="source-github",
stream="issues",
config={
"repositories": ["airbytehq/quickstarts"],
"credentials": {"personal_access_token": GITHUB_TOKEN},
},
)
documents = airbyte_loader.load()
assert len(documents) > 0
# make sure some documents have body in metadata
found_body = False
for doc in documents:
if "body" in doc.metadata and doc.metadata["body"]:
found_body = True
break
assert found_body, "No documents with body found"

View File

@ -1,77 +0,0 @@
from langchain_core.prompts import PromptTemplate
from langchain_airbyte import AirbyteLoader
def test_initialization() -> None:
"""Test integration loader initialization."""
AirbyteLoader(
source="source-faker",
stream="users",
config={"count": 3},
)
def test_load() -> None:
"""Test loading from source."""
airbyte_loader = AirbyteLoader(
source="source-faker",
stream="users",
config={"count": 5},
)
documents = airbyte_loader.load()
assert len(documents) == 5
def test_lazy_load() -> None:
"""Test lazy loading from source."""
airbyte_loader = AirbyteLoader(
source="source-faker",
stream="users",
config={"count": 3},
)
documents = airbyte_loader.lazy_load()
assert len(list(documents)) == 3
async def test_alazy_load() -> None:
"""Test async lazy loading from source."""
airbyte_loader = AirbyteLoader(
source="source-faker",
stream="users",
config={"count": 3},
)
documents = airbyte_loader.alazy_load()
lendocs = 0
async for _ in documents:
lendocs += 1
assert lendocs == 3
def test_load_with_template() -> None:
"""Test loading from source with template."""
airbyte_loader = AirbyteLoader(
source="source-faker",
stream="users",
config={"count": 3},
template=PromptTemplate.from_template("My name is {name}"),
)
documents = airbyte_loader.load()
assert len(documents) == 3
for doc in documents:
assert doc.page_content.startswith("My name is ")
assert doc.metadata["name"] # should have a name
def test_load_no_metadata() -> None:
"""Test loading from source with no metadata."""
airbyte_loader = AirbyteLoader(
source="source-faker",
stream="users",
config={"count": 3},
include_metadata=False,
)
documents = airbyte_loader.load()
assert len(documents) == 3
for doc in documents:
assert doc.metadata == {}

View File

@ -1,9 +0,0 @@
from langchain_airbyte import __all__
EXPECTED_ALL = [
"AirbyteLoader",
]
def test_all_imports() -> None:
assert sorted(EXPECTED_ALL) == sorted(__all__)