diff --git a/docs/ecosystem/pgvector.md b/docs/ecosystem/pgvector.md index 3dcf1cb81b7..5da6fdb535d 100644 --- a/docs/ecosystem/pgvector.md +++ b/docs/ecosystem/pgvector.md @@ -24,6 +24,10 @@ To import this vectorstore: from langchain.vectorstores.pgvector import PGVector ``` +PGVector embedding size is not autodetected. If you are using ChatGPT or any other embedding with 1536 dimensions +default is fine. If you are going to use for example HuggingFaceEmbeddings you need to set the environment variable named `PGVECTOR_VECTOR_SIZE` +to the needed value, In case of HuggingFaceEmbeddings is would be: `PGVECTOR_VECTOR_SIZE=768` + ### Usage For a more detailed walkthrough of the PGVector Wrapper, see [this notebook](../modules/indexes/vectorstores/examples/pgvector.ipynb) diff --git a/langchain/vectorstores/pgvector.py b/langchain/vectorstores/pgvector.py index b75839d6607..bf07613aca8 100644 --- a/langchain/vectorstores/pgvector.py +++ b/langchain/vectorstores/pgvector.py @@ -3,6 +3,7 @@ from __future__ import annotations import enum import logging +import os import uuid from typing import Any, Dict, Iterable, List, Optional, Tuple, Type @@ -19,7 +20,7 @@ from langchain.vectorstores.base import VectorStore Base = declarative_base() # type: Any -ADA_TOKEN_COUNT = 1536 +PGVECTOR_VECTOR_SIZE = int(os.getenv("PGVECTOR_VECTOR_SIZE", default="1536")) _LANGCHAIN_DEFAULT_COLLECTION_NAME = "langchain" @@ -79,7 +80,7 @@ class EmbeddingStore(BaseModel): ) collection = relationship(CollectionStore, back_populates="embeddings") - embedding: Vector = sqlalchemy.Column(Vector(ADA_TOKEN_COUNT)) + embedding: Vector = sqlalchemy.Column(Vector(PGVECTOR_VECTOR_SIZE)) document = sqlalchemy.Column(sqlalchemy.String, nullable=True) cmetadata = sqlalchemy.Column(JSON, nullable=True)