From 49ef729abc818f983770f0f3d16c18dfa661a5ef Mon Sep 17 00:00:00 2001 From: imartinez Date: Fri, 19 Apr 2024 15:38:25 +0200 Subject: [PATCH 1/6] Allow passing HF access token to download tokenizer. Fallback to default tokenizer. --- private_gpt/components/llm/llm_component.py | 23 +++++++++++++++------ private_gpt/settings/settings.py | 4 ++++ settings.yaml | 1 + 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index dae997cc..baffa4e4 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -22,13 +22,24 @@ class LLMComponent: @inject def __init__(self, settings: Settings) -> None: llm_mode = settings.llm.mode - if settings.llm.tokenizer: - set_global_tokenizer( - AutoTokenizer.from_pretrained( - pretrained_model_name_or_path=settings.llm.tokenizer, - cache_dir=str(models_cache_path), + if settings.llm.tokenizer and settings.llm.mode != "mock": + # Try to download the tokenizer. If it fails, the LLM will still work + # using the default one, which is less accurate. + try: + set_global_tokenizer( + AutoTokenizer.from_pretrained( + pretrained_model_name_or_path=settings.llm.tokenizer, + cache_dir=str(models_cache_path), + token=settings.huggingface.access_token, + ) + ) + except Exception as e: + logger.warning( + "Failed to download tokenizer %s. Falling back to " + "default tokenizer.", + settings.llm.tokenizer, + e, ) - ) logger.info("Initializing the LLM in mode=%s", llm_mode) match settings.llm.mode: diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 5df68114..051cfcab 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -151,6 +151,10 @@ class HuggingFaceSettings(BaseModel): embedding_hf_model_name: str = Field( description="Name of the HuggingFace model to use for embeddings" ) + access_token: str = Field( + None, + description="Huggingface access token, required to download some models", + ) class EmbeddingSettings(BaseModel): diff --git a/settings.yaml b/settings.yaml index dfd719bc..e881a555 100644 --- a/settings.yaml +++ b/settings.yaml @@ -69,6 +69,7 @@ embedding: huggingface: embedding_hf_model_name: BAAI/bge-small-en-v1.5 + access_token: ${HUGGINGFACE_TOKEN:} vectorstore: database: qdrant From 947e737f300adf621d2261d527192f36f3387f8e Mon Sep 17 00:00:00 2001 From: dividebysandwich Date: Fri, 19 Apr 2024 15:40:00 +0200 Subject: [PATCH 2/6] fix: "no such group" error in Dockerfile, added docx2txt and cryptography deps (#1841) * Fixed "no such group" error in Dockerfile, added docx2txt to poetry so docx parsing works out of the box for docker containers * added cryptography dependency for pdf parsing --- Dockerfile.local | 3 +- poetry.lock | 91 +++++++++++++++++++++++------------------------- pyproject.toml | 2 ++ 3 files changed, 48 insertions(+), 48 deletions(-) diff --git a/Dockerfile.local b/Dockerfile.local index a5222116..980a9b08 100644 --- a/Dockerfile.local +++ b/Dockerfile.local @@ -33,7 +33,8 @@ ENV PORT=8080 EXPOSE 8080 # Prepare a non-root user -RUN adduser --system worker +RUN adduser --group worker +RUN adduser --system --ingroup worker worker WORKDIR /home/worker/app RUN mkdir local_data; chown worker local_data diff --git a/poetry.lock b/poetry.lock index 279707d8..961a1299 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiofiles" @@ -515,7 +515,7 @@ files = [ name = "cffi" version = "1.16.0" description = "Foreign Function Interface for Python calling C code." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, @@ -934,57 +934,42 @@ toml = ["tomli"] [[package]] name = "cryptography" -version = "42.0.5" +version = "3.4.8" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -optional = true -python-versions = ">=3.7" +optional = false +python-versions = ">=3.6" files = [ - {file = "cryptography-42.0.5-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:a30596bae9403a342c978fb47d9b0ee277699fa53bbafad14706af51fe543d16"}, - {file = "cryptography-42.0.5-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:b7ffe927ee6531c78f81aa17e684e2ff617daeba7f189f911065b2ea2d526dec"}, - {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2424ff4c4ac7f6b8177b53c17ed5d8fa74ae5955656867f5a8affaca36a27abb"}, - {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:329906dcc7b20ff3cad13c069a78124ed8247adcac44b10bea1130e36caae0b4"}, - {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:b03c2ae5d2f0fc05f9a2c0c997e1bc18c8229f392234e8a0194f202169ccd278"}, - {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f8837fe1d6ac4a8052a9a8ddab256bc006242696f03368a4009be7ee3075cdb7"}, - {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:0270572b8bd2c833c3981724b8ee9747b3ec96f699a9665470018594301439ee"}, - {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:b8cac287fafc4ad485b8a9b67d0ee80c66bf3574f655d3b97ef2e1082360faf1"}, - {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:16a48c23a62a2f4a285699dba2e4ff2d1cff3115b9df052cdd976a18856d8e3d"}, - {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2bce03af1ce5a5567ab89bd90d11e7bbdff56b8af3acbbec1faded8f44cb06da"}, - {file = "cryptography-42.0.5-cp37-abi3-win32.whl", hash = "sha256:b6cd2203306b63e41acdf39aa93b86fb566049aeb6dc489b70e34bcd07adca74"}, - {file = "cryptography-42.0.5-cp37-abi3-win_amd64.whl", hash = "sha256:98d8dc6d012b82287f2c3d26ce1d2dd130ec200c8679b6213b3c73c08b2b7940"}, - {file = "cryptography-42.0.5-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:5e6275c09d2badf57aea3afa80d975444f4be8d3bc58f7f80d2a484c6f9485c8"}, - {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4985a790f921508f36f81831817cbc03b102d643b5fcb81cd33df3fa291a1a1"}, - {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7cde5f38e614f55e28d831754e8a3bacf9ace5d1566235e39d91b35502d6936e"}, - {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:7367d7b2eca6513681127ebad53b2582911d1736dc2ffc19f2c3ae49997496bc"}, - {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:cd2030f6650c089aeb304cf093f3244d34745ce0cfcc39f20c6fbfe030102e2a"}, - {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a2913c5375154b6ef2e91c10b5720ea6e21007412f6437504ffea2109b5a33d7"}, - {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:c41fb5e6a5fe9ebcd58ca3abfeb51dffb5d83d6775405305bfa8715b76521922"}, - {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3eaafe47ec0d0ffcc9349e1708be2aaea4c6dd4978d76bf6eb0cb2c13636c6fc"}, - {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1b95b98b0d2af784078fa69f637135e3c317091b615cd0905f8b8a087e86fa30"}, - {file = "cryptography-42.0.5-cp39-abi3-win32.whl", hash = "sha256:1f71c10d1e88467126f0efd484bd44bca5e14c664ec2ede64c32f20875c0d413"}, - {file = "cryptography-42.0.5-cp39-abi3-win_amd64.whl", hash = "sha256:a011a644f6d7d03736214d38832e030d8268bcff4a41f728e6030325fea3e400"}, - {file = "cryptography-42.0.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9481ffe3cf013b71b2428b905c4f7a9a4f76ec03065b05ff499bb5682a8d9ad8"}, - {file = "cryptography-42.0.5-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:ba334e6e4b1d92442b75ddacc615c5476d4ad55cc29b15d590cc6b86efa487e2"}, - {file = "cryptography-42.0.5-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:ba3e4a42397c25b7ff88cdec6e2a16c2be18720f317506ee25210f6d31925f9c"}, - {file = "cryptography-42.0.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:111a0d8553afcf8eb02a4fea6ca4f59d48ddb34497aa8706a6cf536f1a5ec576"}, - {file = "cryptography-42.0.5-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cd65d75953847815962c84a4654a84850b2bb4aed3f26fadcc1c13892e1e29f6"}, - {file = "cryptography-42.0.5-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:e807b3188f9eb0eaa7bbb579b462c5ace579f1cedb28107ce8b48a9f7ad3679e"}, - {file = "cryptography-42.0.5-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f12764b8fffc7a123f641d7d049d382b73f96a34117e0b637b80643169cec8ac"}, - {file = "cryptography-42.0.5-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:37dd623507659e08be98eec89323469e8c7b4c1407c85112634ae3dbdb926fdd"}, - {file = "cryptography-42.0.5.tar.gz", hash = "sha256:6fe07eec95dfd477eb9530aef5bead34fec819b3aaf6c5bd6d20565da607bfe1"}, + {file = "cryptography-3.4.8-cp36-abi3-macosx_10_10_x86_64.whl", hash = "sha256:a00cf305f07b26c351d8d4e1af84ad7501eca8a342dedf24a7acb0e7b7406e14"}, + {file = "cryptography-3.4.8-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:f44d141b8c4ea5eb4dbc9b3ad992d45580c1d22bf5e24363f2fbf50c2d7ae8a7"}, + {file = "cryptography-3.4.8-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0a7dcbcd3f1913f664aca35d47c1331fce738d44ec34b7be8b9d332151b0b01e"}, + {file = "cryptography-3.4.8-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34dae04a0dce5730d8eb7894eab617d8a70d0c97da76b905de9efb7128ad7085"}, + {file = "cryptography-3.4.8-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1eb7bb0df6f6f583dd8e054689def236255161ebbcf62b226454ab9ec663746b"}, + {file = "cryptography-3.4.8-cp36-abi3-manylinux_2_24_x86_64.whl", hash = "sha256:9965c46c674ba8cc572bc09a03f4c649292ee73e1b683adb1ce81e82e9a6a0fb"}, + {file = "cryptography-3.4.8-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:3c4129fc3fdc0fa8e40861b5ac0c673315b3c902bbdc05fc176764815b43dd1d"}, + {file = "cryptography-3.4.8-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:695104a9223a7239d155d7627ad912953b540929ef97ae0c34c7b8bf30857e89"}, + {file = "cryptography-3.4.8-cp36-abi3-win32.whl", hash = "sha256:21ca464b3a4b8d8e86ba0ee5045e103a1fcfac3b39319727bc0fc58c09c6aff7"}, + {file = "cryptography-3.4.8-cp36-abi3-win_amd64.whl", hash = "sha256:3520667fda779eb788ea00080124875be18f2d8f0848ec00733c0ec3bb8219fc"}, + {file = "cryptography-3.4.8-pp36-pypy36_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:d2a6e5ef66503da51d2110edf6c403dc6b494cc0082f85db12f54e9c5d4c3ec5"}, + {file = "cryptography-3.4.8-pp36-pypy36_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a305600e7a6b7b855cd798e00278161b681ad6e9b7eca94c721d5f588ab212af"}, + {file = "cryptography-3.4.8-pp36-pypy36_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:3fa3a7ccf96e826affdf1a0a9432be74dc73423125c8f96a909e3835a5ef194a"}, + {file = "cryptography-3.4.8-pp37-pypy37_pp73-macosx_10_10_x86_64.whl", hash = "sha256:d9ec0e67a14f9d1d48dd87a2531009a9b251c02ea42851c060b25c782516ff06"}, + {file = "cryptography-3.4.8-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5b0fbfae7ff7febdb74b574055c7466da334a5371f253732d7e2e7525d570498"}, + {file = "cryptography-3.4.8-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94fff993ee9bc1b2440d3b7243d488c6a3d9724cc2b09cdb297f6a886d040ef7"}, + {file = "cryptography-3.4.8-pp37-pypy37_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:8695456444f277af73a4877db9fc979849cd3ee74c198d04fc0776ebc3db52b9"}, + {file = "cryptography-3.4.8-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:cd65b60cfe004790c795cc35f272e41a3df4631e2fb6b35aa7ac6ef2859d554e"}, + {file = "cryptography-3.4.8.tar.gz", hash = "sha256:94cc5ed4ceaefcbe5bf38c8fba6a21fc1d365bb8fb826ea1688e3370b2e24a1c"}, ] [package.dependencies] -cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""} +cffi = ">=1.12" [package.extras] -docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] -docstest = ["pyenchant (>=1.6.11)", "readme-renderer", "sphinxcontrib-spelling (>=4.0.1)"] -nox = ["nox"] -pep8test = ["check-sdist", "click", "mypy", "ruff"] -sdist = ["build"] +docs = ["sphinx (>=1.6.5,!=1.8.0,!=3.1.0,!=3.1.1)", "sphinx-rtd-theme"] +docstest = ["doc8", "pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"] +pep8test = ["black", "flake8", "flake8-import-order", "pep8-naming"] +sdist = ["setuptools-rust (>=0.11.4)"] ssh = ["bcrypt (>=3.1.5)"] -test = ["certifi", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] -test-randomorder = ["pytest-randomly"] +test = ["hypothesis (>=1.11.4,!=3.79.2)", "iso8601", "pretend", "pytest (>=6.0)", "pytest-cov", "pytest-subtests", "pytest-xdist", "pytz"] [[package]] name = "cycler" @@ -1096,6 +1081,16 @@ idna = ["idna (>=2.1,<4.0)"] trio = ["trio (>=0.14,<0.23)"] wmi = ["wmi (>=1.5.1,<2.0.0)"] +[[package]] +name = "docx2txt" +version = "0.8" +description = "A pure python-based utility to extract text and images from docx files." +optional = false +python-versions = "*" +files = [ + {file = "docx2txt-0.8.tar.gz", hash = "sha256:2c06d98d7cfe2d3947e5760a57d924e3ff07745b379c8737723922e7009236e5"}, +] + [[package]] name = "email-validator" version = "2.1.0.post1" @@ -3173,6 +3168,7 @@ optional = true python-versions = ">=3" files = [ {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux1_x86_64.whl", hash = "sha256:64335a8088e2b9d196ae8665430bc6a2b7e6ef2eb877a9c735c804bd4ff6467c"}, + {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux2014_aarch64.whl", hash = "sha256:211a63e7b30a9d62f1a853e19928fbb1a750e3f17a13a3d1f98ff0ced19478dd"}, {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-win_amd64.whl", hash = "sha256:1b2e317e437433753530792f13eece58f0aec21a2b05903be7bffe58a606cbd1"}, ] @@ -3937,7 +3933,7 @@ pyasn1 = ">=0.4.6,<0.6.0" name = "pycparser" version = "2.21" description = "C parser in Python" -optional = true +optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, @@ -4437,6 +4433,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -6330,4 +6327,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "0b3665bd11a604609249ff0267e4e5cf009881d16a84f9774fc54d45a1373e09" +content-hash = "992c2486ee05e66eab29026e4275dd5509074b38a31ead9db2271e6f94f6da08" diff --git a/pyproject.toml b/pyproject.toml index 21132b3d..3d6d1dde 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,8 @@ injector = "^0.21.0" pyyaml = "^6.0.1" watchdog = "^4.0.0" transformers = "^4.38.2" +docx2txt = "^0.8" +cryptography = "^3.1" # LlamaIndex core libs llama-index-core = "^0.10.14" llama-index-readers-file = "^0.1.6" From 2a432bf9c5582a94eb4052b1e80cabdb118d298e Mon Sep 17 00:00:00 2001 From: Marco Repetto <119503933+mrepetto-certx@users.noreply.github.com> Date: Fri, 19 Apr 2024 06:42:19 -0700 Subject: [PATCH 3/6] fix: make embedding_api_base match api_base when on docker (#1859) --- settings-docker.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/settings-docker.yaml b/settings-docker.yaml index cb85afdf..d8037fa6 100644 --- a/settings-docker.yaml +++ b/settings-docker.yaml @@ -23,6 +23,7 @@ ollama: llm_model: ${PGPT_OLLAMA_LLM_MODEL:mistral} embedding_model: ${PGPT_OLLAMA_EMBEDDING_MODEL:nomic-embed-text} api_base: ${PGPT_OLLAMA_API_BASE:http://ollama:11434} + embedding_api_base: ${PGPT_OLLAMA_EMBEDDING_API_BASE:http://ollama:11434} tfs_z: ${PGPT_OLLAMA_TFS_Z:1.0} top_k: ${PGPT_OLLAMA_TOP_K:40} top_p: ${PGPT_OLLAMA_TOP_P:0.9} From c1802e7cf0e56a2603213ec3b6a4af8fadb8a17a Mon Sep 17 00:00:00 2001 From: Daniel Gallego Vico Date: Fri, 19 Apr 2024 17:10:58 +0200 Subject: [PATCH 4/6] fix(docs): Update installation.mdx (#1866) Update repo url --- fern/docs/pages/installation/installation.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fern/docs/pages/installation/installation.mdx b/fern/docs/pages/installation/installation.mdx index 31a7953c..d1d18634 100644 --- a/fern/docs/pages/installation/installation.mdx +++ b/fern/docs/pages/installation/installation.mdx @@ -5,8 +5,8 @@ It is important that you review the Main Concepts before you start the installat * Clone PrivateGPT repository, and navigate to it: ```bash - git clone https://github.com/imartinez/privateGPT - cd privateGPT + git clone https://github.com/zylon-ai/private-gpt + cd private-gpt ``` * Install Python `3.11` (*if you do not have it already*). Ideally through a python version manager like `pyenv`. From e21bf20c10938b24711d9f2c765997f44d7e02a9 Mon Sep 17 00:00:00 2001 From: icsy7867 Date: Tue, 30 Apr 2024 03:53:10 -0400 Subject: [PATCH 5/6] feat: prompt_style applied to all LLMs + extra LLM params. (#1835) * Updated prompt_style to be moved to the main LLM setting since all LLMs from llama_index can utilize this. I also included temperature, context window size, max_tokens, max_new_tokens into the openailike to help ensure the settings are consistent from the other implementations. * Removed prompt_style from llamacpp entirely * Fixed settings-local.yaml to include prompt_style in the LLM settings instead of llamacpp. --- private_gpt/components/llm/llm_component.py | 11 +++++++--- private_gpt/settings/settings.py | 23 ++++++++++----------- settings-local.yaml | 4 ++-- settings.yaml | 2 +- 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index baffa4e4..51d71a3e 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -51,7 +51,7 @@ class LLMComponent: "Local dependencies not found, install with `poetry install --extras llms-llama-cpp`" ) from e - prompt_style = get_prompt_style(settings.llamacpp.prompt_style) + prompt_style = get_prompt_style(settings.llm.prompt_style) settings_kwargs = { "tfs_z": settings.llamacpp.tfs_z, # ollama and llama-cpp "top_k": settings.llamacpp.top_k, # ollama and llama-cpp @@ -109,15 +109,20 @@ class LLMComponent: raise ImportError( "OpenAILike dependencies not found, install with `poetry install --extras llms-openai-like`" ) from e - + prompt_style = get_prompt_style(settings.llm.prompt_style) openai_settings = settings.openai self.llm = OpenAILike( api_base=openai_settings.api_base, api_key=openai_settings.api_key, model=openai_settings.model, is_chat_model=True, - max_tokens=None, + max_tokens=settings.llm.max_new_tokens, api_version="", + temperature=settings.llm.temperature, + context_window=settings.llm.context_window, + max_new_tokens=settings.llm.max_new_tokens, + messages_to_prompt=prompt_style.messages_to_prompt, + completion_to_prompt=prompt_style.completion_to_prompt, ) case "ollama": try: diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 051cfcab..c4c5e20d 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -104,6 +104,17 @@ class LLMSettings(BaseModel): 0.1, description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.", ) + prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field( + "llama2", + description=( + "The prompt style to use for the chat engine. " + "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n" + "If `llama2` - use the llama2 prompt style from the llama_index. Based on ``, `[INST]` and `<>`.\n" + "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n" + "If `mistral` - use the `mistral prompt style. It shoudl look like [INST] {System Prompt} [/INST][INST] { UserInstructions } [/INST]" + "`llama2` is the historic behaviour. `default` might work better with your custom models." + ), + ) class VectorstoreSettings(BaseModel): @@ -117,18 +128,6 @@ class NodeStoreSettings(BaseModel): class LlamaCPPSettings(BaseModel): llm_hf_repo_id: str llm_hf_model_file: str - prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field( - "llama2", - description=( - "The prompt style to use for the chat engine. " - "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n" - "If `llama2` - use the llama2 prompt style from the llama_index. Based on ``, `[INST]` and `<>`.\n" - "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n" - "If `mistral` - use the `mistral prompt style. It shoudl look like [INST] {System Prompt} [/INST][INST] { UserInstructions } [/INST]" - "`llama2` is the historic behaviour. `default` might work better with your custom models." - ), - ) - tfs_z: float = Field( 1.0, description="Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.", diff --git a/settings-local.yaml b/settings-local.yaml index c9d02742..48eeb0ea 100644 --- a/settings-local.yaml +++ b/settings-local.yaml @@ -8,9 +8,9 @@ llm: max_new_tokens: 512 context_window: 3900 tokenizer: mistralai/Mistral-7B-Instruct-v0.2 + prompt_style: "mistral" llamacpp: - prompt_style: "mistral" llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf @@ -24,4 +24,4 @@ vectorstore: database: qdrant qdrant: - path: local_data/private_gpt/qdrant \ No newline at end of file + path: local_data/private_gpt/qdrant diff --git a/settings.yaml b/settings.yaml index e881a555..d8d2500c 100644 --- a/settings.yaml +++ b/settings.yaml @@ -36,6 +36,7 @@ ui: llm: mode: llamacpp + prompt_style: "mistral" # Should be matching the selected model max_new_tokens: 512 context_window: 3900 @@ -53,7 +54,6 @@ rag: top_n: 1 llamacpp: - prompt_style: "mistral" llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting From 9d0d614706581a8bfa57db45f62f84ab23d26f15 Mon Sep 17 00:00:00 2001 From: Patrick Peng Date: Tue, 30 Apr 2024 15:58:19 +0800 Subject: [PATCH 6/6] fix: Replacing unsafe `eval()` with `json.loads()` (#1890) --- private_gpt/components/llm/custom/sagemaker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/private_gpt/components/llm/custom/sagemaker.py b/private_gpt/components/llm/custom/sagemaker.py index e20f5394..bd2aec18 100644 --- a/private_gpt/components/llm/custom/sagemaker.py +++ b/private_gpt/components/llm/custom/sagemaker.py @@ -218,7 +218,7 @@ class SagemakerLLM(CustomLLM): response_body = resp["Body"] response_str = response_body.read().decode("utf-8") - response_dict = eval(response_str) + response_dict = json.loads(response_str) return CompletionResponse( text=response_dict[0]["generated_text"][len(prompt) :], raw=resp