mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-22 19:09:57 +00:00
core[patch], langchain[patch], experimental[patch]: import CI (#14414)
This commit is contained in:
@@ -40,6 +40,11 @@ docker_tests:
|
||||
docker build -t my-langchain-image:test .
|
||||
docker run --rm my-langchain-image:test
|
||||
|
||||
check_imports: langchain/**/*.py
|
||||
for f in $^ ; do \
|
||||
python -c "from importlib.machinery import SourceFileLoader; SourceFileLoader('x', '$$f').load_module()" || exit 1; \
|
||||
done
|
||||
|
||||
######################
|
||||
# LINTING AND FORMATTING
|
||||
######################
|
||||
@@ -53,7 +58,7 @@ lint_tests: PYTHON_FILES=tests
|
||||
|
||||
lint lint_diff lint_package lint_tests:
|
||||
./scripts/check_pydantic.sh .
|
||||
./scripts/check_imports.sh
|
||||
./scripts/lint_imports.sh
|
||||
poetry run ruff .
|
||||
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
|
||||
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff --select I $(PYTHON_FILES)
|
||||
|
@@ -5,7 +5,6 @@ import json
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
import tiktoken
|
||||
from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator
|
||||
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
@@ -15,6 +14,18 @@ if TYPE_CHECKING:
|
||||
from github.PullRequest import PullRequest
|
||||
|
||||
|
||||
def _import_tiktoken() -> Any:
|
||||
"""Import tiktoken."""
|
||||
try:
|
||||
import tiktoken
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"tiktoken is not installed. "
|
||||
"Please install it with `pip install tiktoken`"
|
||||
)
|
||||
return tiktoken
|
||||
|
||||
|
||||
class GitHubAPIWrapper(BaseModel):
|
||||
"""Wrapper for GitHub API."""
|
||||
|
||||
@@ -385,6 +396,7 @@ class GitHubAPIWrapper(BaseModel):
|
||||
dict: A dictionary containing the issue's title,
|
||||
body, and comments as a string
|
||||
"""
|
||||
tiktoken = _import_tiktoken()
|
||||
MAX_TOKENS_FOR_FILES = 3_000
|
||||
pr_files = []
|
||||
pr = self.github_repo_instance.get_pull(number=int(pr_number))
|
||||
@@ -453,6 +465,7 @@ class GitHubAPIWrapper(BaseModel):
|
||||
total_tokens = 0
|
||||
|
||||
def get_tokens(text: str) -> int:
|
||||
tiktoken = _import_tiktoken()
|
||||
return len(tiktoken.get_encoding("cl100k_base").encode(text))
|
||||
|
||||
def add_to_dict(data_dict: Dict[str, Any], key: str, value: str) -> None:
|
||||
|
@@ -1,71 +0,0 @@
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import sqlalchemy
|
||||
from pgvector.sqlalchemy import Vector
|
||||
from sqlalchemy.dialects.postgresql import JSON, UUID
|
||||
from sqlalchemy.orm import Session, relationship
|
||||
|
||||
from langchain.vectorstores.pgvector import BaseModel
|
||||
|
||||
|
||||
class CollectionStore(BaseModel):
|
||||
"""Collection store."""
|
||||
|
||||
__tablename__ = "langchain_pg_collection"
|
||||
|
||||
name = sqlalchemy.Column(sqlalchemy.String)
|
||||
cmetadata = sqlalchemy.Column(JSON)
|
||||
|
||||
embeddings = relationship(
|
||||
"EmbeddingStore",
|
||||
back_populates="collection",
|
||||
passive_deletes=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_by_name(cls, session: Session, name: str) -> Optional["CollectionStore"]:
|
||||
return session.query(cls).filter(cls.name == name).first() # type: ignore
|
||||
|
||||
@classmethod
|
||||
def get_or_create(
|
||||
cls,
|
||||
session: Session,
|
||||
name: str,
|
||||
cmetadata: Optional[dict] = None,
|
||||
) -> Tuple["CollectionStore", bool]:
|
||||
"""
|
||||
Get or create a collection.
|
||||
Returns [Collection, bool] where the bool is True if the collection was created.
|
||||
"""
|
||||
created = False
|
||||
collection = cls.get_by_name(session, name)
|
||||
if collection:
|
||||
return collection, created
|
||||
|
||||
collection = cls(name=name, cmetadata=cmetadata)
|
||||
session.add(collection)
|
||||
session.commit()
|
||||
created = True
|
||||
return collection, created
|
||||
|
||||
|
||||
class EmbeddingStore(BaseModel):
|
||||
"""Embedding store."""
|
||||
|
||||
__tablename__ = "langchain_pg_embedding"
|
||||
|
||||
collection_id = sqlalchemy.Column(
|
||||
UUID(as_uuid=True),
|
||||
sqlalchemy.ForeignKey(
|
||||
f"{CollectionStore.__tablename__}.uuid",
|
||||
ondelete="CASCADE",
|
||||
),
|
||||
)
|
||||
collection = relationship(CollectionStore, back_populates="embeddings")
|
||||
|
||||
embedding: Vector = sqlalchemy.Column(Vector(None))
|
||||
document = sqlalchemy.Column(sqlalchemy.String, nullable=True)
|
||||
cmetadata = sqlalchemy.Column(JSON, nullable=True)
|
||||
|
||||
# custom_id : any user defined id
|
||||
custom_id = sqlalchemy.Column(sqlalchemy.String, nullable=True)
|
@@ -7,7 +7,6 @@ import logging
|
||||
import uuid
|
||||
from functools import partial
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
@@ -22,8 +21,8 @@ from typing import (
|
||||
import numpy as np
|
||||
import sqlalchemy
|
||||
from sqlalchemy import delete
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.dialects.postgresql import JSON, UUID
|
||||
from sqlalchemy.orm import Session, relationship
|
||||
|
||||
try:
|
||||
from sqlalchemy.orm import declarative_base
|
||||
@@ -37,9 +36,6 @@ from langchain_core.vectorstores import VectorStore
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.vectorstores._pgvector_data_models import CollectionStore
|
||||
|
||||
|
||||
class DistanceStrategy(str, enum.Enum):
|
||||
"""Enumerator of the Distance strategies."""
|
||||
@@ -64,6 +60,74 @@ class BaseModel(Base):
|
||||
uuid = sqlalchemy.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
|
||||
class CollectionStore(BaseModel):
|
||||
"""Collection store."""
|
||||
|
||||
__tablename__ = "langchain_pg_collection"
|
||||
|
||||
name = sqlalchemy.Column(sqlalchemy.String)
|
||||
cmetadata = sqlalchemy.Column(JSON)
|
||||
|
||||
embeddings = relationship(
|
||||
"EmbeddingStore",
|
||||
back_populates="collection",
|
||||
passive_deletes=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_by_name(cls, session: Session, name: str) -> Optional["CollectionStore"]:
|
||||
return session.query(cls).filter(cls.name == name).first() # type: ignore
|
||||
|
||||
@classmethod
|
||||
def get_or_create(
|
||||
cls,
|
||||
session: Session,
|
||||
name: str,
|
||||
cmetadata: Optional[dict] = None,
|
||||
) -> Tuple["CollectionStore", bool]:
|
||||
"""
|
||||
Get or create a collection.
|
||||
Returns [Collection, bool] where the bool is True if the collection was created.
|
||||
"""
|
||||
created = False
|
||||
collection = cls.get_by_name(session, name)
|
||||
if collection:
|
||||
return collection, created
|
||||
|
||||
collection = cls(name=name, cmetadata=cmetadata)
|
||||
session.add(collection)
|
||||
session.commit()
|
||||
created = True
|
||||
return collection, created
|
||||
|
||||
|
||||
def _get_embedding_store() -> Any:
|
||||
from pgvector.sqlalchemy import Vector
|
||||
|
||||
class EmbeddingStore(BaseModel):
|
||||
"""Embedding store."""
|
||||
|
||||
__tablename__ = "langchain_pg_embedding"
|
||||
|
||||
collection_id = sqlalchemy.Column(
|
||||
UUID(as_uuid=True),
|
||||
sqlalchemy.ForeignKey(
|
||||
f"{CollectionStore.__tablename__}.uuid",
|
||||
ondelete="CASCADE",
|
||||
),
|
||||
)
|
||||
collection = relationship(CollectionStore, back_populates="embeddings")
|
||||
|
||||
embedding: Vector = sqlalchemy.Column(Vector(None))
|
||||
document = sqlalchemy.Column(sqlalchemy.String, nullable=True)
|
||||
cmetadata = sqlalchemy.Column(JSON, nullable=True)
|
||||
|
||||
# custom_id : any user defined id
|
||||
custom_id = sqlalchemy.Column(sqlalchemy.String, nullable=True)
|
||||
|
||||
return EmbeddingStore
|
||||
|
||||
|
||||
def _results_to_docs(docs_and_scores: Any) -> List[Document]:
|
||||
"""Return docs from docs and scores."""
|
||||
return [doc for doc, _ in docs_and_scores]
|
||||
@@ -138,13 +202,9 @@ class PGVector(VectorStore):
|
||||
) -> None:
|
||||
"""Initialize the store."""
|
||||
self.create_vector_extension()
|
||||
from langchain.vectorstores._pgvector_data_models import (
|
||||
CollectionStore,
|
||||
EmbeddingStore,
|
||||
)
|
||||
|
||||
self.CollectionStore = CollectionStore
|
||||
self.EmbeddingStore = EmbeddingStore
|
||||
self.EmbeddingStore = _get_embedding_store()
|
||||
self.create_tables_if_not_exists()
|
||||
self.create_collection()
|
||||
|
||||
|
Reference in New Issue
Block a user