Make more updateS

This commit is contained in:
Steven Linn 2024-05-01 08:04:54 -06:00
parent b7f1aaa587
commit 7e2f93c564
5 changed files with 37 additions and 23 deletions

View File

@ -1,5 +1,17 @@
# Notes # Notes
poetry run pip install bs4 openpyxl flask transformers python-pptx Pillow cryptography sudo apt-get install build-essential
sudo apt-get install git gcc make openssl libssl-dev libbz2-dev libreadline-dev
sudo apt-get install lzma
sudo apt-get install liblzma-dev
curl -sSL https://install.python-poetry.org | python3 -
cd privateGPT
poetry install --extras "ui llms-llama-cpp"
CMAKE_ARGS='-DLLAMA_CUBLAS=on' poetry run pip install --force-reinstall --no-cache-dir llama-cpp-python
poetry run python scripts/setup
poetry run pip install bs4 openpyxl flask transformers python-pptx Pillow cryptography llama_index.vector_stores.postgres llama_index.embeddings.huggingface sentencepiece
apt-get install antiword apt-get install antiword
# 🔒 PrivateGPT 📑 # 🔒 PrivateGPT 📑

View File

@ -12,7 +12,7 @@ services:
# PGPT_PROFILES: docker # PGPT_PROFILES: docker
# PGPT_MODE: local # PGPT_MODE: local
postgres: postgres:
image: "postgres" image: "ankane/pgvector:latest"
environment: environment:
POSTGRES_USER: "postgres" POSTGRES_USER: "postgres"
POSTGRES_PASSWORD: "postgres" POSTGRES_PASSWORD: "postgres"
@ -21,14 +21,15 @@ services:
- "5432:5432" - "5432:5432"
volumes: volumes:
- ./postgres_data:/var/lib/postgresql/data - ./postgres_data:/var/lib/postgresql/data
- ./init.sql:/docker-entrypoint-initdb.d/init.sql
restart: always restart: always
phppgadmin: # phppgadmin:
image: "dockage/phppgadmin" # image: "dockage/phppgadmin"
environment: # environment:
PHP_PG_ADMIN_SERVER_HOST: "postgres" # PHP_PG_ADMIN_SERVER_HOST: "postgres"
PHP_PG_ADMIN_SERVER_PORT: "5432" # PHP_PG_ADMIN_SERVER_PORT: "5432"
PHP_PG_ADMIN_SERVER_DEFAULT_DB: "postgres" # PHP_PG_ADMIN_SERVER_DEFAULT_DB: "postgres"
PHP_PG_ADMIN_OWNED_ONLY: "false" # PHP_PG_ADMIN_OWNED_ONLY: "false"
ports: # ports:
- "8080:80" # - "8080:80"
restart: always # restart: always

View File

@ -18,7 +18,8 @@ def _try_loading_included_file_formats() -> dict[str, type[BaseReader]]:
PDFReader, PDFReader,
) )
from llama_index.readers.file.epub import EpubReader # type: ignore from llama_index.readers.file.epub import EpubReader # type: ignore
from llama_index.readers.file.image import ImageReader # type: ignore #from llama_index.readers.file.image import ImageReader # type: ignore
from llama_index.readers.file.image_vision_llm import ImageVisionLLMReader # type: ignore
from llama_index.readers.file.ipynb import IPYNBReader # type: ignore from llama_index.readers.file.ipynb import IPYNBReader # type: ignore
from llama_index.readers.file.markdown import MarkdownReader # type: ignore from llama_index.readers.file.markdown import MarkdownReader # type: ignore
from llama_index.readers.file.mbox import MboxReader # type: ignore from llama_index.readers.file.mbox import MboxReader # type: ignore
@ -41,9 +42,9 @@ def _try_loading_included_file_formats() -> dict[str, type[BaseReader]]:
".pptx": PptxReader, ".pptx": PptxReader,
".ppt": PptxReader, ".ppt": PptxReader,
".pptm": PptxReader, ".pptm": PptxReader,
".jpg": ImageReader, ".jpg": ImageVisionLLMReader,
".png": ImageReader, ".png": ImageVisionLLMReader,
".jpeg": ImageReader, ".jpeg": ImageVisionLLMReader,
".mp3": VideoAudioReader, ".mp3": VideoAudioReader,
".mp4": VideoAudioReader, ".mp4": VideoAudioReader,
".csv": PandasCSVReader, ".csv": PandasCSVReader,
@ -57,9 +58,9 @@ def _try_loading_included_file_formats() -> dict[str, type[BaseReader]]:
".htm": HTMLParser, ".htm": HTMLParser,
".xlsx": XLSXParser, ".xlsx": XLSXParser,
".xml": XMLReader, ".xml": XMLReader,
".eps": ImageReader, ".eps": ImageVisionLLMReader,
".tif": ImageReader, ".tif": ImageVisionLLMReader,
".gif": ImageReader, ".gif": ImageVisionLLMReader,
".doc": DOCParser, ".doc": DOCParser,
} }
return default_file_reader_cls return default_file_reader_cls
@ -119,11 +120,11 @@ class IngestionHelper:
) )
return [] return []
string_reader = StringIterableReader() string_reader = StringIterableReader()
return string_reader.load_data([file_content]) return string_reader.load_data([file_content.replace("\x00", "")])
logger.debug("Specific reader found for extension=%s", extension) logger.debug("Specific reader found for extension=%s", extension)
try: try:
return reader_cls().load_data(file_data) return reader_cls().load_data(str(file_data).replace("\x00", ""))
except Exception as e: except Exception as e:
logger.debug( logger.debug(
"Failed to read file_name=%s e=%s", "Failed to read file_name=%s e=%s",

View File

@ -1 +1 @@
Which files are in the context provided? Which files in the context are responsible for reading Solidworks DWG or DWF formats?

View File

@ -67,7 +67,7 @@ huggingface:
embedding_hf_model_name: BAAI/bge-small-en-v1.5 embedding_hf_model_name: BAAI/bge-small-en-v1.5
vectorstore: vectorstore:
database: qdrant database: postgres
nodestore: nodestore:
database: postgres database: postgres