mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 07:35:18 +00:00
core[patch], langchain[patch], experimental[patch]: import CI (#14414)
This commit is contained in:
parent
ba083887e5
commit
b3f226e8f8
4
.github/workflows/_all_ci.yml
vendored
4
.github/workflows/_all_ci.yml
vendored
@ -52,8 +52,8 @@ jobs:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
secrets: inherit
|
||||
|
||||
pydantic-compatibility:
|
||||
uses: ./.github/workflows/_pydantic_compatibility.yml
|
||||
dependencies:
|
||||
uses: ./.github/workflows/_dependencies.yml
|
||||
with:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
secrets: inherit
|
||||
|
@ -1,4 +1,4 @@
|
||||
name: pydantic v1/v2 compatibility
|
||||
name: dependencies
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
@ -28,7 +28,7 @@ jobs:
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
name: Pydantic v1/v2 compatibility - Python ${{ matrix.python-version }}
|
||||
name: dependencies - Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
@ -42,7 +42,15 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: poetry install --with test
|
||||
run: poetry install
|
||||
|
||||
- name: Check imports with base dependencies
|
||||
shell: bash
|
||||
run: poetry run make check_imports
|
||||
|
||||
- name: Install test dependencies
|
||||
shell: bash
|
||||
run: poetry install --with test
|
||||
|
||||
- name: Install langchain editable
|
||||
working-directory: ${{ inputs.working-directory }}
|
@ -15,6 +15,10 @@ tests:
|
||||
test_watch:
|
||||
poetry run ptw --snapshot-update --now . -- -vv -x tests/unit_tests
|
||||
|
||||
check_imports: langchain_core/**/*.py
|
||||
for f in $^ ; do \
|
||||
python -c "from importlib.machinery import SourceFileLoader; SourceFileLoader('x', '$$f').load_module()" || exit 1; \
|
||||
done
|
||||
extended_tests:
|
||||
poetry run pytest --only-extended $(TEST_FILE)
|
||||
|
||||
@ -32,7 +36,7 @@ lint_tests: PYTHON_FILES=tests
|
||||
|
||||
lint lint_diff lint_package lint_tests:
|
||||
./scripts/check_pydantic.sh .
|
||||
./scripts/check_imports.sh
|
||||
./scripts/lint_imports.sh
|
||||
poetry run ruff .
|
||||
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
|
||||
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff --select I $(PYTHON_FILES)
|
||||
|
@ -21,6 +21,11 @@ extended_tests:
|
||||
integration_tests:
|
||||
poetry run pytest tests/integration_tests
|
||||
|
||||
check_imports: langchain_experimental/**/*.py
|
||||
for f in $^ ; do \
|
||||
python -c "from importlib.machinery import SourceFileLoader; SourceFileLoader('x', '$$f').load_module()" || exit 1; \
|
||||
done
|
||||
|
||||
|
||||
######################
|
||||
# LINTING AND FORMATTING
|
||||
|
@ -1,10 +1,11 @@
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List
|
||||
from typing import TYPE_CHECKING, Dict, List
|
||||
|
||||
from presidio_analyzer import RecognizerResult
|
||||
from presidio_anonymizer.entities import EngineResult
|
||||
if TYPE_CHECKING:
|
||||
from presidio_analyzer import RecognizerResult
|
||||
from presidio_anonymizer.entities import EngineResult
|
||||
|
||||
MappingDataType = Dict[str, Dict[str, str]]
|
||||
|
||||
@ -62,8 +63,8 @@ class DeanonymizerMapping:
|
||||
|
||||
def create_anonymizer_mapping(
|
||||
original_text: str,
|
||||
analyzer_results: List[RecognizerResult],
|
||||
anonymizer_results: EngineResult,
|
||||
analyzer_results: List["RecognizerResult"],
|
||||
anonymizer_results: "EngineResult",
|
||||
is_reversed: bool = False,
|
||||
) -> MappingDataType:
|
||||
"""Creates or updates the mapping used to anonymize and/or deanonymize text.
|
||||
|
@ -23,28 +23,62 @@ from langchain_experimental.data_anonymizer.faker_presidio_mapping import (
|
||||
get_pseudoanonymizer_mapping,
|
||||
)
|
||||
|
||||
try:
|
||||
from presidio_analyzer import AnalyzerEngine
|
||||
if TYPE_CHECKING:
|
||||
from presidio_analyzer import AnalyzerEngine, EntityRecognizer
|
||||
from presidio_analyzer.nlp_engine import NlpEngineProvider
|
||||
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Could not import presidio_analyzer, please install with "
|
||||
"`pip install presidio-analyzer`. You will also need to download a "
|
||||
"spaCy model to use the analyzer, e.g. "
|
||||
"`python -m spacy download en_core_web_lg`."
|
||||
) from e
|
||||
try:
|
||||
from presidio_anonymizer import AnonymizerEngine
|
||||
from presidio_anonymizer.entities import OperatorConfig
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Could not import presidio_anonymizer, please install with "
|
||||
"`pip install presidio-anonymizer`."
|
||||
) from e
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from presidio_analyzer import EntityRecognizer
|
||||
|
||||
def _import_analyzer_engine() -> "AnalyzerEngine":
|
||||
try:
|
||||
from presidio_analyzer import AnalyzerEngine
|
||||
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Could not import presidio_analyzer, please install with "
|
||||
"`pip install presidio-analyzer`. You will also need to download a "
|
||||
"spaCy model to use the analyzer, e.g. "
|
||||
"`python -m spacy download en_core_web_lg`."
|
||||
) from e
|
||||
return AnalyzerEngine
|
||||
|
||||
|
||||
def _import_nlp_engine_provider() -> "NlpEngineProvider":
|
||||
try:
|
||||
from presidio_analyzer.nlp_engine import NlpEngineProvider
|
||||
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Could not import presidio_analyzer, please install with "
|
||||
"`pip install presidio-analyzer`. You will also need to download a "
|
||||
"spaCy model to use the analyzer, e.g. "
|
||||
"`python -m spacy download en_core_web_lg`."
|
||||
) from e
|
||||
return NlpEngineProvider
|
||||
|
||||
|
||||
def _import_anonymizer_engine() -> "AnonymizerEngine":
|
||||
try:
|
||||
from presidio_anonymizer import AnonymizerEngine
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Could not import presidio_anonymizer, please install with "
|
||||
"`pip install presidio-anonymizer`."
|
||||
) from e
|
||||
return AnonymizerEngine
|
||||
|
||||
|
||||
def _import_operator_config() -> "OperatorConfig":
|
||||
try:
|
||||
from presidio_anonymizer.entities import OperatorConfig
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Could not import presidio_anonymizer, please install with "
|
||||
"`pip install presidio-anonymizer`."
|
||||
) from e
|
||||
return OperatorConfig
|
||||
|
||||
|
||||
# Configuring Anonymizer for multiple languages
|
||||
# Detailed description and examples can be found here:
|
||||
@ -89,6 +123,11 @@ class PresidioAnonymizerBase(AnonymizerBase):
|
||||
Defaults to None, in which case faker will be seeded randomly
|
||||
and provide random values.
|
||||
"""
|
||||
OperatorConfig = _import_operator_config()
|
||||
AnalyzerEngine = _import_analyzer_engine()
|
||||
NlpEngineProvider = _import_nlp_engine_provider()
|
||||
AnonymizerEngine = _import_anonymizer_engine()
|
||||
|
||||
self.analyzed_fields = (
|
||||
analyzed_fields
|
||||
if analyzed_fields is not None
|
||||
|
@ -40,6 +40,11 @@ docker_tests:
|
||||
docker build -t my-langchain-image:test .
|
||||
docker run --rm my-langchain-image:test
|
||||
|
||||
check_imports: langchain/**/*.py
|
||||
for f in $^ ; do \
|
||||
python -c "from importlib.machinery import SourceFileLoader; SourceFileLoader('x', '$$f').load_module()" || exit 1; \
|
||||
done
|
||||
|
||||
######################
|
||||
# LINTING AND FORMATTING
|
||||
######################
|
||||
@ -53,7 +58,7 @@ lint_tests: PYTHON_FILES=tests
|
||||
|
||||
lint lint_diff lint_package lint_tests:
|
||||
./scripts/check_pydantic.sh .
|
||||
./scripts/check_imports.sh
|
||||
./scripts/lint_imports.sh
|
||||
poetry run ruff .
|
||||
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
|
||||
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff --select I $(PYTHON_FILES)
|
||||
|
@ -5,7 +5,6 @@ import json
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
import tiktoken
|
||||
from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator
|
||||
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
@ -15,6 +14,18 @@ if TYPE_CHECKING:
|
||||
from github.PullRequest import PullRequest
|
||||
|
||||
|
||||
def _import_tiktoken() -> Any:
|
||||
"""Import tiktoken."""
|
||||
try:
|
||||
import tiktoken
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"tiktoken is not installed. "
|
||||
"Please install it with `pip install tiktoken`"
|
||||
)
|
||||
return tiktoken
|
||||
|
||||
|
||||
class GitHubAPIWrapper(BaseModel):
|
||||
"""Wrapper for GitHub API."""
|
||||
|
||||
@ -385,6 +396,7 @@ class GitHubAPIWrapper(BaseModel):
|
||||
dict: A dictionary containing the issue's title,
|
||||
body, and comments as a string
|
||||
"""
|
||||
tiktoken = _import_tiktoken()
|
||||
MAX_TOKENS_FOR_FILES = 3_000
|
||||
pr_files = []
|
||||
pr = self.github_repo_instance.get_pull(number=int(pr_number))
|
||||
@ -453,6 +465,7 @@ class GitHubAPIWrapper(BaseModel):
|
||||
total_tokens = 0
|
||||
|
||||
def get_tokens(text: str) -> int:
|
||||
tiktoken = _import_tiktoken()
|
||||
return len(tiktoken.get_encoding("cl100k_base").encode(text))
|
||||
|
||||
def add_to_dict(data_dict: Dict[str, Any], key: str, value: str) -> None:
|
||||
|
@ -1,71 +0,0 @@
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import sqlalchemy
|
||||
from pgvector.sqlalchemy import Vector
|
||||
from sqlalchemy.dialects.postgresql import JSON, UUID
|
||||
from sqlalchemy.orm import Session, relationship
|
||||
|
||||
from langchain.vectorstores.pgvector import BaseModel
|
||||
|
||||
|
||||
class CollectionStore(BaseModel):
|
||||
"""Collection store."""
|
||||
|
||||
__tablename__ = "langchain_pg_collection"
|
||||
|
||||
name = sqlalchemy.Column(sqlalchemy.String)
|
||||
cmetadata = sqlalchemy.Column(JSON)
|
||||
|
||||
embeddings = relationship(
|
||||
"EmbeddingStore",
|
||||
back_populates="collection",
|
||||
passive_deletes=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_by_name(cls, session: Session, name: str) -> Optional["CollectionStore"]:
|
||||
return session.query(cls).filter(cls.name == name).first() # type: ignore
|
||||
|
||||
@classmethod
|
||||
def get_or_create(
|
||||
cls,
|
||||
session: Session,
|
||||
name: str,
|
||||
cmetadata: Optional[dict] = None,
|
||||
) -> Tuple["CollectionStore", bool]:
|
||||
"""
|
||||
Get or create a collection.
|
||||
Returns [Collection, bool] where the bool is True if the collection was created.
|
||||
"""
|
||||
created = False
|
||||
collection = cls.get_by_name(session, name)
|
||||
if collection:
|
||||
return collection, created
|
||||
|
||||
collection = cls(name=name, cmetadata=cmetadata)
|
||||
session.add(collection)
|
||||
session.commit()
|
||||
created = True
|
||||
return collection, created
|
||||
|
||||
|
||||
class EmbeddingStore(BaseModel):
|
||||
"""Embedding store."""
|
||||
|
||||
__tablename__ = "langchain_pg_embedding"
|
||||
|
||||
collection_id = sqlalchemy.Column(
|
||||
UUID(as_uuid=True),
|
||||
sqlalchemy.ForeignKey(
|
||||
f"{CollectionStore.__tablename__}.uuid",
|
||||
ondelete="CASCADE",
|
||||
),
|
||||
)
|
||||
collection = relationship(CollectionStore, back_populates="embeddings")
|
||||
|
||||
embedding: Vector = sqlalchemy.Column(Vector(None))
|
||||
document = sqlalchemy.Column(sqlalchemy.String, nullable=True)
|
||||
cmetadata = sqlalchemy.Column(JSON, nullable=True)
|
||||
|
||||
# custom_id : any user defined id
|
||||
custom_id = sqlalchemy.Column(sqlalchemy.String, nullable=True)
|
@ -7,7 +7,6 @@ import logging
|
||||
import uuid
|
||||
from functools import partial
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
@ -22,8 +21,8 @@ from typing import (
|
||||
import numpy as np
|
||||
import sqlalchemy
|
||||
from sqlalchemy import delete
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.dialects.postgresql import JSON, UUID
|
||||
from sqlalchemy.orm import Session, relationship
|
||||
|
||||
try:
|
||||
from sqlalchemy.orm import declarative_base
|
||||
@ -37,9 +36,6 @@ from langchain_core.vectorstores import VectorStore
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.vectorstores._pgvector_data_models import CollectionStore
|
||||
|
||||
|
||||
class DistanceStrategy(str, enum.Enum):
|
||||
"""Enumerator of the Distance strategies."""
|
||||
@ -64,6 +60,74 @@ class BaseModel(Base):
|
||||
uuid = sqlalchemy.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
|
||||
class CollectionStore(BaseModel):
|
||||
"""Collection store."""
|
||||
|
||||
__tablename__ = "langchain_pg_collection"
|
||||
|
||||
name = sqlalchemy.Column(sqlalchemy.String)
|
||||
cmetadata = sqlalchemy.Column(JSON)
|
||||
|
||||
embeddings = relationship(
|
||||
"EmbeddingStore",
|
||||
back_populates="collection",
|
||||
passive_deletes=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_by_name(cls, session: Session, name: str) -> Optional["CollectionStore"]:
|
||||
return session.query(cls).filter(cls.name == name).first() # type: ignore
|
||||
|
||||
@classmethod
|
||||
def get_or_create(
|
||||
cls,
|
||||
session: Session,
|
||||
name: str,
|
||||
cmetadata: Optional[dict] = None,
|
||||
) -> Tuple["CollectionStore", bool]:
|
||||
"""
|
||||
Get or create a collection.
|
||||
Returns [Collection, bool] where the bool is True if the collection was created.
|
||||
"""
|
||||
created = False
|
||||
collection = cls.get_by_name(session, name)
|
||||
if collection:
|
||||
return collection, created
|
||||
|
||||
collection = cls(name=name, cmetadata=cmetadata)
|
||||
session.add(collection)
|
||||
session.commit()
|
||||
created = True
|
||||
return collection, created
|
||||
|
||||
|
||||
def _get_embedding_store() -> Any:
|
||||
from pgvector.sqlalchemy import Vector
|
||||
|
||||
class EmbeddingStore(BaseModel):
|
||||
"""Embedding store."""
|
||||
|
||||
__tablename__ = "langchain_pg_embedding"
|
||||
|
||||
collection_id = sqlalchemy.Column(
|
||||
UUID(as_uuid=True),
|
||||
sqlalchemy.ForeignKey(
|
||||
f"{CollectionStore.__tablename__}.uuid",
|
||||
ondelete="CASCADE",
|
||||
),
|
||||
)
|
||||
collection = relationship(CollectionStore, back_populates="embeddings")
|
||||
|
||||
embedding: Vector = sqlalchemy.Column(Vector(None))
|
||||
document = sqlalchemy.Column(sqlalchemy.String, nullable=True)
|
||||
cmetadata = sqlalchemy.Column(JSON, nullable=True)
|
||||
|
||||
# custom_id : any user defined id
|
||||
custom_id = sqlalchemy.Column(sqlalchemy.String, nullable=True)
|
||||
|
||||
return EmbeddingStore
|
||||
|
||||
|
||||
def _results_to_docs(docs_and_scores: Any) -> List[Document]:
|
||||
"""Return docs from docs and scores."""
|
||||
return [doc for doc, _ in docs_and_scores]
|
||||
@ -138,13 +202,9 @@ class PGVector(VectorStore):
|
||||
) -> None:
|
||||
"""Initialize the store."""
|
||||
self.create_vector_extension()
|
||||
from langchain.vectorstores._pgvector_data_models import (
|
||||
CollectionStore,
|
||||
EmbeddingStore,
|
||||
)
|
||||
|
||||
self.CollectionStore = CollectionStore
|
||||
self.EmbeddingStore = EmbeddingStore
|
||||
self.EmbeddingStore = _get_embedding_store()
|
||||
self.create_tables_if_not_exists()
|
||||
self.create_collection()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user