From 97abe337bacb27cc3fdbe1bb8dd40e040ca06e5b Mon Sep 17 00:00:00 2001
From: Martin Holzhauer <martin.holzhauer@mayflower.de>
Date: Sat, 6 May 2023 01:31:38 +0200
Subject: [PATCH] env variable based fix for non chatgpt embeddings (#3964)

this is a simple fix for #2219

i also added some documentation for this environment variable
---
 docs/integrations/pgvector.md      | 4 ++++
 langchain/vectorstores/pgvector.py | 5 +++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/docs/integrations/pgvector.md b/docs/integrations/pgvector.md
index 3dcf1cb81b7..5da6fdb535d 100644
--- a/docs/integrations/pgvector.md
+++ b/docs/integrations/pgvector.md
@@ -24,6 +24,10 @@ To import this vectorstore:
 from langchain.vectorstores.pgvector import PGVector
 ```
 
+PGVector embedding size is not autodetected. If you are using ChatGPT or any other embedding with 1536 dimensions
+default is fine. If you are going to use for example HuggingFaceEmbeddings you need to set the environment variable named `PGVECTOR_VECTOR_SIZE`
+to the needed value, In case of HuggingFaceEmbeddings is would be: `PGVECTOR_VECTOR_SIZE=768`
+
 ### Usage
 
 For a more detailed walkthrough of the PGVector Wrapper, see [this notebook](../modules/indexes/vectorstores/examples/pgvector.ipynb)
diff --git a/langchain/vectorstores/pgvector.py b/langchain/vectorstores/pgvector.py
index 161e8e2c52e..5edde1b8c5e 100644
--- a/langchain/vectorstores/pgvector.py
+++ b/langchain/vectorstores/pgvector.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 
 import enum
 import logging
+import os
 import uuid
 from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
 
@@ -19,7 +20,7 @@ from langchain.vectorstores.base import VectorStore
 Base = declarative_base()  # type: Any
 
 
-ADA_TOKEN_COUNT = 1536
+PGVECTOR_VECTOR_SIZE = int(os.getenv("PGVECTOR_VECTOR_SIZE", default="1536"))
 _LANGCHAIN_DEFAULT_COLLECTION_NAME = "langchain"
 
 
@@ -79,7 +80,7 @@ class EmbeddingStore(BaseModel):
     )
     collection = relationship(CollectionStore, back_populates="embeddings")
 
-    embedding: Vector = sqlalchemy.Column(Vector(ADA_TOKEN_COUNT))
+    embedding: Vector = sqlalchemy.Column(Vector(PGVECTOR_VECTOR_SIZE))
     document = sqlalchemy.Column(sqlalchemy.String, nullable=True)
     cmetadata = sqlalchemy.Column(JSON, nullable=True)