Make more updateS

This commit is contained in:
Steven Linn 2024-05-01 08:04:54 -06:00
parent b7f1aaa587
commit 7e2f93c564
5 changed files with 37 additions and 23 deletions

View File

@ -1,5 +1,17 @@
# Notes
poetry run pip install bs4 openpyxl flask transformers python-pptx Pillow cryptography
sudo apt-get install build-essential
sudo apt-get install git gcc make openssl libssl-dev libbz2-dev libreadline-dev
sudo apt-get install lzma
sudo apt-get install liblzma-dev
curl -sSL https://install.python-poetry.org | python3 -
cd privateGPT
poetry install --extras "ui llms-llama-cpp"
CMAKE_ARGS='-DLLAMA_CUBLAS=on' poetry run pip install --force-reinstall --no-cache-dir llama-cpp-python
poetry run python scripts/setup
poetry run pip install bs4 openpyxl flask transformers python-pptx Pillow cryptography llama_index.vector_stores.postgres llama_index.embeddings.huggingface sentencepiece
apt-get install antiword
# 🔒 PrivateGPT 📑

View File

@ -12,7 +12,7 @@ services:
# PGPT_PROFILES: docker
# PGPT_MODE: local
postgres:
image: "postgres"
image: "ankane/pgvector:latest"
environment:
POSTGRES_USER: "postgres"
POSTGRES_PASSWORD: "postgres"
@ -21,14 +21,15 @@ services:
- "5432:5432"
volumes:
- ./postgres_data:/var/lib/postgresql/data
- ./init.sql:/docker-entrypoint-initdb.d/init.sql
restart: always
phppgadmin:
image: "dockage/phppgadmin"
environment:
PHP_PG_ADMIN_SERVER_HOST: "postgres"
PHP_PG_ADMIN_SERVER_PORT: "5432"
PHP_PG_ADMIN_SERVER_DEFAULT_DB: "postgres"
PHP_PG_ADMIN_OWNED_ONLY: "false"
ports:
- "8080:80"
restart: always
# phppgadmin:
# image: "dockage/phppgadmin"
# environment:
# PHP_PG_ADMIN_SERVER_HOST: "postgres"
# PHP_PG_ADMIN_SERVER_PORT: "5432"
# PHP_PG_ADMIN_SERVER_DEFAULT_DB: "postgres"
# PHP_PG_ADMIN_OWNED_ONLY: "false"
# ports:
# - "8080:80"
# restart: always

View File

@ -18,7 +18,8 @@ def _try_loading_included_file_formats() -> dict[str, type[BaseReader]]:
PDFReader,
)
from llama_index.readers.file.epub import EpubReader # type: ignore
from llama_index.readers.file.image import ImageReader # type: ignore
#from llama_index.readers.file.image import ImageReader # type: ignore
from llama_index.readers.file.image_vision_llm import ImageVisionLLMReader # type: ignore
from llama_index.readers.file.ipynb import IPYNBReader # type: ignore
from llama_index.readers.file.markdown import MarkdownReader # type: ignore
from llama_index.readers.file.mbox import MboxReader # type: ignore
@ -41,9 +42,9 @@ def _try_loading_included_file_formats() -> dict[str, type[BaseReader]]:
".pptx": PptxReader,
".ppt": PptxReader,
".pptm": PptxReader,
".jpg": ImageReader,
".png": ImageReader,
".jpeg": ImageReader,
".jpg": ImageVisionLLMReader,
".png": ImageVisionLLMReader,
".jpeg": ImageVisionLLMReader,
".mp3": VideoAudioReader,
".mp4": VideoAudioReader,
".csv": PandasCSVReader,
@ -57,9 +58,9 @@ def _try_loading_included_file_formats() -> dict[str, type[BaseReader]]:
".htm": HTMLParser,
".xlsx": XLSXParser,
".xml": XMLReader,
".eps": ImageReader,
".tif": ImageReader,
".gif": ImageReader,
".eps": ImageVisionLLMReader,
".tif": ImageVisionLLMReader,
".gif": ImageVisionLLMReader,
".doc": DOCParser,
}
return default_file_reader_cls
@ -119,11 +120,11 @@ class IngestionHelper:
)
return []
string_reader = StringIterableReader()
return string_reader.load_data([file_content])
return string_reader.load_data([file_content.replace("\x00", "")])
logger.debug("Specific reader found for extension=%s", extension)
try:
return reader_cls().load_data(file_data)
return reader_cls().load_data(str(file_data).replace("\x00", ""))
except Exception as e:
logger.debug(
"Failed to read file_name=%s e=%s",

View File

@ -1 +1 @@
Which files are in the context provided?
Which files in the context are responsible for reading Solidworks DWG or DWF formats?

View File

@ -67,7 +67,7 @@ huggingface:
embedding_hf_model_name: BAAI/bge-small-en-v1.5
vectorstore:
database: qdrant
database: postgres
nodestore:
database: postgres