core[patch], langchain[patch], experimental[patch]: import CI (#14414)

This commit is contained in:
Erick Friis
2023-12-08 11:28:55 -08:00
committed by GitHub
parent ba083887e5
commit b3f226e8f8
12 changed files with 177 additions and 113 deletions

View File

@@ -40,6 +40,11 @@ docker_tests:
docker build -t my-langchain-image:test .
docker run --rm my-langchain-image:test
check_imports: langchain/**/*.py
for f in $^ ; do \
python -c "from importlib.machinery import SourceFileLoader; SourceFileLoader('x', '$$f').load_module()" || exit 1; \
done
######################
# LINTING AND FORMATTING
######################
@@ -53,7 +58,7 @@ lint_tests: PYTHON_FILES=tests
lint lint_diff lint_package lint_tests:
./scripts/check_pydantic.sh .
./scripts/check_imports.sh
./scripts/lint_imports.sh
poetry run ruff .
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff --select I $(PYTHON_FILES)

View File

@@ -5,7 +5,6 @@ import json
from typing import TYPE_CHECKING, Any, Dict, List, Optional
import requests
import tiktoken
from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator
from langchain.utils import get_from_dict_or_env
@@ -15,6 +14,18 @@ if TYPE_CHECKING:
from github.PullRequest import PullRequest
def _import_tiktoken() -> Any:
"""Import tiktoken."""
try:
import tiktoken
except ImportError:
raise ImportError(
"tiktoken is not installed. "
"Please install it with `pip install tiktoken`"
)
return tiktoken
class GitHubAPIWrapper(BaseModel):
"""Wrapper for GitHub API."""
@@ -385,6 +396,7 @@ class GitHubAPIWrapper(BaseModel):
dict: A dictionary containing the issue's title,
body, and comments as a string
"""
tiktoken = _import_tiktoken()
MAX_TOKENS_FOR_FILES = 3_000
pr_files = []
pr = self.github_repo_instance.get_pull(number=int(pr_number))
@@ -453,6 +465,7 @@ class GitHubAPIWrapper(BaseModel):
total_tokens = 0
def get_tokens(text: str) -> int:
tiktoken = _import_tiktoken()
return len(tiktoken.get_encoding("cl100k_base").encode(text))
def add_to_dict(data_dict: Dict[str, Any], key: str, value: str) -> None:

View File

@@ -1,71 +0,0 @@
from typing import Optional, Tuple
import sqlalchemy
from pgvector.sqlalchemy import Vector
from sqlalchemy.dialects.postgresql import JSON, UUID
from sqlalchemy.orm import Session, relationship
from langchain.vectorstores.pgvector import BaseModel
class CollectionStore(BaseModel):
"""Collection store."""
__tablename__ = "langchain_pg_collection"
name = sqlalchemy.Column(sqlalchemy.String)
cmetadata = sqlalchemy.Column(JSON)
embeddings = relationship(
"EmbeddingStore",
back_populates="collection",
passive_deletes=True,
)
@classmethod
def get_by_name(cls, session: Session, name: str) -> Optional["CollectionStore"]:
return session.query(cls).filter(cls.name == name).first() # type: ignore
@classmethod
def get_or_create(
cls,
session: Session,
name: str,
cmetadata: Optional[dict] = None,
) -> Tuple["CollectionStore", bool]:
"""
Get or create a collection.
Returns [Collection, bool] where the bool is True if the collection was created.
"""
created = False
collection = cls.get_by_name(session, name)
if collection:
return collection, created
collection = cls(name=name, cmetadata=cmetadata)
session.add(collection)
session.commit()
created = True
return collection, created
class EmbeddingStore(BaseModel):
"""Embedding store."""
__tablename__ = "langchain_pg_embedding"
collection_id = sqlalchemy.Column(
UUID(as_uuid=True),
sqlalchemy.ForeignKey(
f"{CollectionStore.__tablename__}.uuid",
ondelete="CASCADE",
),
)
collection = relationship(CollectionStore, back_populates="embeddings")
embedding: Vector = sqlalchemy.Column(Vector(None))
document = sqlalchemy.Column(sqlalchemy.String, nullable=True)
cmetadata = sqlalchemy.Column(JSON, nullable=True)
# custom_id : any user defined id
custom_id = sqlalchemy.Column(sqlalchemy.String, nullable=True)

View File

@@ -7,7 +7,6 @@ import logging
import uuid
from functools import partial
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
@@ -22,8 +21,8 @@ from typing import (
import numpy as np
import sqlalchemy
from sqlalchemy import delete
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Session
from sqlalchemy.dialects.postgresql import JSON, UUID
from sqlalchemy.orm import Session, relationship
try:
from sqlalchemy.orm import declarative_base
@@ -37,9 +36,6 @@ from langchain_core.vectorstores import VectorStore
from langchain.utils import get_from_dict_or_env
from langchain.vectorstores.utils import maximal_marginal_relevance
if TYPE_CHECKING:
from langchain.vectorstores._pgvector_data_models import CollectionStore
class DistanceStrategy(str, enum.Enum):
"""Enumerator of the Distance strategies."""
@@ -64,6 +60,74 @@ class BaseModel(Base):
uuid = sqlalchemy.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
class CollectionStore(BaseModel):
"""Collection store."""
__tablename__ = "langchain_pg_collection"
name = sqlalchemy.Column(sqlalchemy.String)
cmetadata = sqlalchemy.Column(JSON)
embeddings = relationship(
"EmbeddingStore",
back_populates="collection",
passive_deletes=True,
)
@classmethod
def get_by_name(cls, session: Session, name: str) -> Optional["CollectionStore"]:
return session.query(cls).filter(cls.name == name).first() # type: ignore
@classmethod
def get_or_create(
cls,
session: Session,
name: str,
cmetadata: Optional[dict] = None,
) -> Tuple["CollectionStore", bool]:
"""
Get or create a collection.
Returns [Collection, bool] where the bool is True if the collection was created.
"""
created = False
collection = cls.get_by_name(session, name)
if collection:
return collection, created
collection = cls(name=name, cmetadata=cmetadata)
session.add(collection)
session.commit()
created = True
return collection, created
def _get_embedding_store() -> Any:
from pgvector.sqlalchemy import Vector
class EmbeddingStore(BaseModel):
"""Embedding store."""
__tablename__ = "langchain_pg_embedding"
collection_id = sqlalchemy.Column(
UUID(as_uuid=True),
sqlalchemy.ForeignKey(
f"{CollectionStore.__tablename__}.uuid",
ondelete="CASCADE",
),
)
collection = relationship(CollectionStore, back_populates="embeddings")
embedding: Vector = sqlalchemy.Column(Vector(None))
document = sqlalchemy.Column(sqlalchemy.String, nullable=True)
cmetadata = sqlalchemy.Column(JSON, nullable=True)
# custom_id : any user defined id
custom_id = sqlalchemy.Column(sqlalchemy.String, nullable=True)
return EmbeddingStore
def _results_to_docs(docs_and_scores: Any) -> List[Document]:
"""Return docs from docs and scores."""
return [doc for doc, _ in docs_and_scores]
@@ -138,13 +202,9 @@ class PGVector(VectorStore):
) -> None:
"""Initialize the store."""
self.create_vector_extension()
from langchain.vectorstores._pgvector_data_models import (
CollectionStore,
EmbeddingStore,
)
self.CollectionStore = CollectionStore
self.EmbeddingStore = EmbeddingStore
self.EmbeddingStore = _get_embedding_store()
self.create_tables_if_not_exists()
self.create_collection()