diff --git a/constants.py b/constants.py new file mode 100644 index 00000000..d5e4383c --- /dev/null +++ b/constants.py @@ -0,0 +1,11 @@ +from chromadb.config import Settings + +# Define the folder for storing database +PERSIST_DIRECTORY = 'db' + +# Define the Chroma settings +CHROMA_SETTINGS = Settings( + chroma_db_impl='duckdb+parquet', + persist_directory=PERSIST_DIRECTORY, + anonymized_telemetry=False +) \ No newline at end of file diff --git a/ingest.py b/ingest.py index 6cd673c4..2a9a1611 100644 --- a/ingest.py +++ b/ingest.py @@ -3,6 +3,8 @@ from langchain.document_loaders import TextLoader, PDFMinerLoader, CSVLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Chroma from langchain.embeddings import LlamaCppEmbeddings +from constants import PERSIST_DIRECTORY +from constants import CHROMA_SETTINGS def main(): llama_embeddings_model = os.environ.get('LLAMA_EMBEDDINGS_MODEL') @@ -23,7 +25,7 @@ def main(): # Create embeddings llama = LlamaCppEmbeddings(model_path=llama_embeddings_model, n_ctx=model_n_ctx) # Create and store locally vectorstore - db = Chroma.from_documents(texts, llama, persist_directory=persist_directory) + db = Chroma.from_documents(texts, llama, persist_directory=PERSIST_DIRECTORY, client_settings=CHROMA_SETTINGS) db.persist() db = None diff --git a/privateGPT.py b/privateGPT.py index 674b0150..38fab48e 100644 --- a/privateGPT.py +++ b/privateGPT.py @@ -12,10 +12,11 @@ model_type = os.environ.get('MODEL_TYPE') model_path = os.environ.get('MODEL_PATH') model_n_ctx = os.environ.get('MODEL_N_CTX') -def main(): - # Load stored vectorstore +from constants import CHROMA_SETTINGS + +def main(): llama = LlamaCppEmbeddings(model_path=llama_embeddings_model, n_ctx=model_n_ctx) - db = Chroma(persist_directory=persist_directory, embedding_function=llama) + db = Chroma(persist_directory=persist_directory, embedding_function=llama, client_settings=CHROMA_SETTINGS) retriever = db.as_retriever() # Prepare the LLM callbacks = [StreamingStdOutCallbackHandler()] diff --git a/requirements.txt b/requirements.txt index 4c1ce9e5..7ba0b528 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -langchain==0.0.162 +langchain==0.0.166 pygpt4all==1.1.0 chromadb==0.3.22 -llama-cpp-python==0.1.47 +llama-cpp-python==0.1.48 urllib3==1.26.6