mirror of
https://github.com/hwchase17/langchain.git
synced 2026-03-18 11:07:36 +00:00
env variable based fix for non chatgpt embeddings (#3964)
this is a simple fix for #2219 i also added some documentation for this environment variable
This commit is contained in:
committed by
C.J. Jameson
parent
3c6fa9126a
commit
97abe337ba
@@ -24,6 +24,10 @@ To import this vectorstore:
|
||||
from langchain.vectorstores.pgvector import PGVector
|
||||
```
|
||||
|
||||
PGVector embedding size is not autodetected. If you are using ChatGPT or any other embedding with 1536 dimensions
|
||||
default is fine. If you are going to use for example HuggingFaceEmbeddings you need to set the environment variable named `PGVECTOR_VECTOR_SIZE`
|
||||
to the needed value, In case of HuggingFaceEmbeddings is would be: `PGVECTOR_VECTOR_SIZE=768`
|
||||
|
||||
### Usage
|
||||
|
||||
For a more detailed walkthrough of the PGVector Wrapper, see [this notebook](../modules/indexes/vectorstores/examples/pgvector.ipynb)
|
||||
|
||||
@@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
|
||||
import enum
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
|
||||
|
||||
@@ -19,7 +20,7 @@ from langchain.vectorstores.base import VectorStore
|
||||
Base = declarative_base() # type: Any
|
||||
|
||||
|
||||
ADA_TOKEN_COUNT = 1536
|
||||
PGVECTOR_VECTOR_SIZE = int(os.getenv("PGVECTOR_VECTOR_SIZE", default="1536"))
|
||||
_LANGCHAIN_DEFAULT_COLLECTION_NAME = "langchain"
|
||||
|
||||
|
||||
@@ -79,7 +80,7 @@ class EmbeddingStore(BaseModel):
|
||||
)
|
||||
collection = relationship(CollectionStore, back_populates="embeddings")
|
||||
|
||||
embedding: Vector = sqlalchemy.Column(Vector(ADA_TOKEN_COUNT))
|
||||
embedding: Vector = sqlalchemy.Column(Vector(PGVECTOR_VECTOR_SIZE))
|
||||
document = sqlalchemy.Column(sqlalchemy.String, nullable=True)
|
||||
cmetadata = sqlalchemy.Column(JSON, nullable=True)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user