env variable based fix for non chatgpt embeddings (#3964)

this is a simple fix for #2219 i also added some documentation for this environment variable
2026-07-13 12:14:06 +00:00 · 2023-05-06 01:31:38 +02:00
parent 3c6fa9126a
commit 97abe337ba
2 changed files with 7 additions and 2 deletions
--- a/docs/integrations/pgvector.md
+++ b/docs/integrations/pgvector.md
@@ -24,6 +24,10 @@ To import this vectorstore:
 from langchain.vectorstores.pgvector import PGVector
 ```

+PGVector embedding size is not autodetected. If you are using ChatGPT or any other embedding with 1536 dimensions
+default is fine. If you are going to use for example HuggingFaceEmbeddings you need to set the environment variable named `PGVECTOR_VECTOR_SIZE`
+to the needed value, In case of HuggingFaceEmbeddings is would be: `PGVECTOR_VECTOR_SIZE=768`
+
 ### Usage

 For a more detailed walkthrough of the PGVector Wrapper, see [this notebook](../modules/indexes/vectorstores/examples/pgvector.ipynb)
--- a/langchain/vectorstores/pgvector.py
+++ b/langchain/vectorstores/pgvector.py
@@ -3,6 +3,7 @@ from __future__ import annotations

 import enum
 import logging
+import os
 import uuid
 from typing import Any, Dict, Iterable, List, Optional, Tuple, Type

@@ -19,7 +20,7 @@ from langchain.vectorstores.base import VectorStore
 Base = declarative_base()  # type: Any


-ADA_TOKEN_COUNT = 1536
+PGVECTOR_VECTOR_SIZE = int(os.getenv("PGVECTOR_VECTOR_SIZE", default="1536"))
 _LANGCHAIN_DEFAULT_COLLECTION_NAME = "langchain"


@@ -79,7 +80,7 @@ class EmbeddingStore(BaseModel):
    )
    collection = relationship(CollectionStore, back_populates="embeddings")

-    embedding: Vector = sqlalchemy.Column(Vector(ADA_TOKEN_COUNT))
+    embedding: Vector = sqlalchemy.Column(Vector(PGVECTOR_VECTOR_SIZE))
    document = sqlalchemy.Column(sqlalchemy.String, nullable=True)
    cmetadata = sqlalchemy.Column(JSON, nullable=True)