diff --git a/fern/docs/pages/installation/installation.mdx b/fern/docs/pages/installation/installation.mdx index 67431ebd..b47e2b2b 100644 --- a/fern/docs/pages/installation/installation.mdx +++ b/fern/docs/pages/installation/installation.mdx @@ -30,8 +30,8 @@ pyenv local 3.11 PrivateGPT allows to customize the setup -from fully local to cloud based- by deciding the modules to use. Here are the different options available: -- LLM: "llama-cpp", "ollama", "sagemaker", "openai", "openailike" -- Embeddings: "huggingface", "openai", "sagemaker" +- LLM: "llama-cpp", "ollama", "sagemaker", "openai", "openailike", "azopenai" +- Embeddings: "huggingface", "openai", "sagemaker", "azopenai" - Vector stores: "qdrant", "chroma", "postgres" - UI: whether or not to enable UI (Gradio) or just go with the API @@ -49,10 +49,12 @@ Where `` can be any of the following: - llms-sagemaker: adds support for Amazon Sagemaker LLM, requires Sagemaker inference endpoints - llms-openai: adds support for OpenAI LLM, requires OpenAI API key - llms-openai-like: adds support for 3rd party LLM providers that are compatible with OpenAI's API +- llms-azopenai: adds support for Azure OpenAI LLM, requires Azure OpenAI inference endpoints - embeddings-ollama: adds support for Ollama Embeddings, requires Ollama running locally - embeddings-huggingface: adds support for local Embeddings using HuggingFace - embeddings-sagemaker: adds support for Amazon Sagemaker Embeddings, requires Sagemaker inference endpoints - embeddings-openai = adds support for OpenAI Embeddings, requires OpenAI API key +- embeddings-azopenai = adds support for Azure OpenAI Embeddings, requires Azure OpenAI inference endpoints - vector-stores-qdrant: adds support for Qdrant vector store - vector-stores-chroma: adds support for Chroma DB vector store - vector-stores-postgres: adds support for Postgres vector store @@ -160,6 +162,29 @@ PrivateGPT will use the already existing `settings-openai.yaml` settings file, w The UI will be available at http://localhost:8001 +### Non-Private, Azure OpenAI-powered test setup + +If you want to test PrivateGPT with Azure OpenAI's LLM and Embeddings -taking into account your data is going to Azure OpenAI!- you can run the following command: + +You need to have access to Azure OpenAI inference endpoints for the LLM and / or the embeddings, and have Azure OpenAI credentials properly configured. + +Edit the `settings-azopenai.yaml` file to include the correct Azure OpenAI endpoints. + +Then, install PrivateGPT with the following command: +```bash +poetry install --extras "ui llms-azopenai embeddings-azopenai vector-stores-qdrant" +``` + +Once installed, you can run PrivateGPT. + +```bash +PGPT_PROFILES=azopenai make run +``` + +PrivateGPT will use the already existing `settings-azopenai.yaml` settings file, which is already configured to use Azure OpenAI LLM and Embeddings endpoints, and Qdrant. + +The UI will be available at http://localhost:8001 + ### Local, Llama-CPP powered setup If you want to run PrivateGPT fully locally without relying on Ollama, you can run the following command: diff --git a/fern/docs/pages/manual/llms.mdx b/fern/docs/pages/manual/llms.mdx index 3013a0e6..8d9077ea 100644 --- a/fern/docs/pages/manual/llms.mdx +++ b/fern/docs/pages/manual/llms.mdx @@ -98,6 +98,43 @@ to run an OpenAI compatible server. Then, you can run PrivateGPT using the `sett `PGPT_PROFILES=vllm make run` +### Using Azure OpenAI + +If you cannot run a local model (because you don't have a GPU, for example) or for testing purposes, you may +decide to run PrivateGPT using Azure OpenAI as the LLM and Embeddings model. + +In order to do so, create a profile `settings-azopenai.yaml` with the following contents: + +```yaml +llm: + mode: azopenai + +embedding: + mode: azopenai + +azopenai: + api_key: # You could skip this configuration and use the AZ_OPENAI_API_KEY env var instead + azure_endpoint: # You could skip this configuration and use the AZ_OPENAI_ENDPOINT env var instead + api_version: # The API version to use. Default is "2023_05_15" + embedding_deployment_name: # You could skip this configuration and use the AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME env var instead + embedding_model: # Optional model to use. Default is "text-embedding-ada-002" + llm_deployment_name: # You could skip this configuration and use the AZ_OPENAI_LLM_DEPLOYMENT_NAME env var instead + llm_model: # Optional model to use. Default is "gpt-35-turbo" +``` + +And run PrivateGPT loading that profile you just created: + +`PGPT_PROFILES=azopenai make run` + +or + +`PGPT_PROFILES=azopenai poetry run python -m private_gpt` + +When the server is started it will print a log *Application startup complete*. +Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API. +You'll notice the speed and quality of response is higher, given you are using Azure OpenAI's servers for the heavy +computations. + ### Using AWS Sagemaker For a fully private & performant setup, you can choose to have both your LLM and Embeddings model deployed using Sagemaker. diff --git a/poetry.lock b/poetry.lock index 6da365eb..38c2374c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aiofiles" @@ -274,6 +274,42 @@ docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib- tests = ["attrs[tests-no-zope]", "zope-interface"] tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +[[package]] +name = "azure-core" +version = "1.30.1" +description = "Microsoft Azure Core Library for Python" +optional = true +python-versions = ">=3.7" +files = [ + {file = "azure-core-1.30.1.tar.gz", hash = "sha256:26273a254131f84269e8ea4464f3560c731f29c0c1f69ac99010845f239c1a8f"}, + {file = "azure_core-1.30.1-py3-none-any.whl", hash = "sha256:7c5ee397e48f281ec4dd773d67a0a47a0962ed6fa833036057f9ea067f688e74"}, +] + +[package.dependencies] +requests = ">=2.21.0" +six = ">=1.11.0" +typing-extensions = ">=4.6.0" + +[package.extras] +aio = ["aiohttp (>=3.0)"] + +[[package]] +name = "azure-identity" +version = "1.15.0" +description = "Microsoft Azure Identity Library for Python" +optional = true +python-versions = ">=3.7" +files = [ + {file = "azure-identity-1.15.0.tar.gz", hash = "sha256:4c28fc246b7f9265610eb5261d65931183d019a23d4b0e99357facb2e6c227c8"}, + {file = "azure_identity-1.15.0-py3-none-any.whl", hash = "sha256:a14b1f01c7036f11f148f22cd8c16e05035293d714458d6b44ddf534d93eb912"}, +] + +[package.dependencies] +azure-core = ">=1.23.0,<2.0.0" +cryptography = ">=2.5" +msal = ">=1.24.0,<2.0.0" +msal-extensions = ">=0.3.0,<2.0.0" + [[package]] name = "backoff" version = "2.2.1" @@ -475,6 +511,70 @@ files = [ {file = "certifi-2023.11.17.tar.gz", hash = "sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1"}, ] +[[package]] +name = "cffi" +version = "1.16.0" +description = "Foreign Function Interface for Python calling C code." +optional = true +python-versions = ">=3.8" +files = [ + {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, + {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"}, + {file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"}, + {file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"}, + {file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"}, + {file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"}, + {file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"}, + {file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"}, + {file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"}, + {file = "cffi-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324"}, + {file = "cffi-1.16.0-cp38-cp38-win32.whl", hash = "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a"}, + {file = "cffi-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"}, + {file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"}, + {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, + {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, +] + +[package.dependencies] +pycparser = "*" + [[package]] name = "cfgv" version = "3.4.0" @@ -832,6 +932,60 @@ files = [ [package.extras] toml = ["tomli"] +[[package]] +name = "cryptography" +version = "42.0.5" +description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +optional = true +python-versions = ">=3.7" +files = [ + {file = "cryptography-42.0.5-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:a30596bae9403a342c978fb47d9b0ee277699fa53bbafad14706af51fe543d16"}, + {file = "cryptography-42.0.5-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:b7ffe927ee6531c78f81aa17e684e2ff617daeba7f189f911065b2ea2d526dec"}, + {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2424ff4c4ac7f6b8177b53c17ed5d8fa74ae5955656867f5a8affaca36a27abb"}, + {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:329906dcc7b20ff3cad13c069a78124ed8247adcac44b10bea1130e36caae0b4"}, + {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:b03c2ae5d2f0fc05f9a2c0c997e1bc18c8229f392234e8a0194f202169ccd278"}, + {file = "cryptography-42.0.5-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f8837fe1d6ac4a8052a9a8ddab256bc006242696f03368a4009be7ee3075cdb7"}, + {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:0270572b8bd2c833c3981724b8ee9747b3ec96f699a9665470018594301439ee"}, + {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:b8cac287fafc4ad485b8a9b67d0ee80c66bf3574f655d3b97ef2e1082360faf1"}, + {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:16a48c23a62a2f4a285699dba2e4ff2d1cff3115b9df052cdd976a18856d8e3d"}, + {file = "cryptography-42.0.5-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2bce03af1ce5a5567ab89bd90d11e7bbdff56b8af3acbbec1faded8f44cb06da"}, + {file = "cryptography-42.0.5-cp37-abi3-win32.whl", hash = "sha256:b6cd2203306b63e41acdf39aa93b86fb566049aeb6dc489b70e34bcd07adca74"}, + {file = "cryptography-42.0.5-cp37-abi3-win_amd64.whl", hash = "sha256:98d8dc6d012b82287f2c3d26ce1d2dd130ec200c8679b6213b3c73c08b2b7940"}, + {file = "cryptography-42.0.5-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:5e6275c09d2badf57aea3afa80d975444f4be8d3bc58f7f80d2a484c6f9485c8"}, + {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4985a790f921508f36f81831817cbc03b102d643b5fcb81cd33df3fa291a1a1"}, + {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7cde5f38e614f55e28d831754e8a3bacf9ace5d1566235e39d91b35502d6936e"}, + {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:7367d7b2eca6513681127ebad53b2582911d1736dc2ffc19f2c3ae49997496bc"}, + {file = "cryptography-42.0.5-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:cd2030f6650c089aeb304cf093f3244d34745ce0cfcc39f20c6fbfe030102e2a"}, + {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a2913c5375154b6ef2e91c10b5720ea6e21007412f6437504ffea2109b5a33d7"}, + {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:c41fb5e6a5fe9ebcd58ca3abfeb51dffb5d83d6775405305bfa8715b76521922"}, + {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3eaafe47ec0d0ffcc9349e1708be2aaea4c6dd4978d76bf6eb0cb2c13636c6fc"}, + {file = "cryptography-42.0.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1b95b98b0d2af784078fa69f637135e3c317091b615cd0905f8b8a087e86fa30"}, + {file = "cryptography-42.0.5-cp39-abi3-win32.whl", hash = "sha256:1f71c10d1e88467126f0efd484bd44bca5e14c664ec2ede64c32f20875c0d413"}, + {file = "cryptography-42.0.5-cp39-abi3-win_amd64.whl", hash = "sha256:a011a644f6d7d03736214d38832e030d8268bcff4a41f728e6030325fea3e400"}, + {file = "cryptography-42.0.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9481ffe3cf013b71b2428b905c4f7a9a4f76ec03065b05ff499bb5682a8d9ad8"}, + {file = "cryptography-42.0.5-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:ba334e6e4b1d92442b75ddacc615c5476d4ad55cc29b15d590cc6b86efa487e2"}, + {file = "cryptography-42.0.5-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:ba3e4a42397c25b7ff88cdec6e2a16c2be18720f317506ee25210f6d31925f9c"}, + {file = "cryptography-42.0.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:111a0d8553afcf8eb02a4fea6ca4f59d48ddb34497aa8706a6cf536f1a5ec576"}, + {file = "cryptography-42.0.5-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cd65d75953847815962c84a4654a84850b2bb4aed3f26fadcc1c13892e1e29f6"}, + {file = "cryptography-42.0.5-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:e807b3188f9eb0eaa7bbb579b462c5ace579f1cedb28107ce8b48a9f7ad3679e"}, + {file = "cryptography-42.0.5-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f12764b8fffc7a123f641d7d049d382b73f96a34117e0b637b80643169cec8ac"}, + {file = "cryptography-42.0.5-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:37dd623507659e08be98eec89323469e8c7b4c1407c85112634ae3dbdb926fdd"}, + {file = "cryptography-42.0.5.tar.gz", hash = "sha256:6fe07eec95dfd477eb9530aef5bead34fec819b3aaf6c5bd6d20565da607bfe1"}, +] + +[package.dependencies] +cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""} + +[package.extras] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] +docstest = ["pyenchant (>=1.6.11)", "readme-renderer", "sphinxcontrib-spelling (>=4.0.1)"] +nox = ["nox"] +pep8test = ["check-sdist", "click", "mypy", "ruff"] +sdist = ["build"] +ssh = ["bcrypt (>=3.1.5)"] +test = ["certifi", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test-randomorder = ["pytest-randomly"] + [[package]] name = "cycler" version = "0.12.1" @@ -2080,6 +2234,22 @@ local-models = ["optimum[onnxruntime] (>=1.13.2,<2.0.0)", "sentencepiece (>=0.1. postgres = ["asyncpg (>=0.28.0,<0.29.0)", "pgvector (>=0.1.0,<0.2.0)", "psycopg2-binary (>=2.9.9,<3.0.0)"] query-tools = ["guidance (>=0.0.64,<0.0.65)", "jsonpath-ng (>=1.6.0,<2.0.0)", "lm-format-enforcer (>=0.4.3,<0.5.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "scikit-learn", "spacy (>=3.7.1,<4.0.0)"] +[[package]] +name = "llama-index-embeddings-azure-openai" +version = "0.1.6" +description = "llama-index embeddings azure openai integration" +optional = true +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "llama_index_embeddings_azure_openai-0.1.6-py3-none-any.whl", hash = "sha256:a84a6d7d67296690e5d20070ce5d9920ec56b0d339338d276eae2a7b2f822b9e"}, + {file = "llama_index_embeddings_azure_openai-0.1.6.tar.gz", hash = "sha256:05092b1b31bd0f45257d161f1e5a17261c60e688f4c6a4fe316557349ac2aebc"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.11.post1,<0.11.0" +llama-index-embeddings-openai = ">=0.1.3,<0.2.0" +llama-index-llms-azure-openai = ">=0.1.3,<0.2.0" + [[package]] name = "llama-index-embeddings-huggingface" version = "0.1.4" @@ -2125,6 +2295,23 @@ files = [ [package.dependencies] llama-index-core = ">=0.10.1,<0.11.0" +[[package]] +name = "llama-index-llms-azure-openai" +version = "0.1.5" +description = "llama-index llms azure openai integration" +optional = true +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "llama_index_llms_azure_openai-0.1.5-py3-none-any.whl", hash = "sha256:180805a7114198155aad7cc3abdf599142c59242d366b11ee8a9150de35b7773"}, + {file = "llama_index_llms_azure_openai-0.1.5.tar.gz", hash = "sha256:5a1c3d1a6a4fe4d03acb50b61594e6775dc86a431738afa291f3708029299a92"}, +] + +[package.dependencies] +azure-identity = ">=1.15.0,<2.0.0" +httpx = "*" +llama-index-core = ">=0.10.11.post1,<0.11.0" +llama-index-llms-openai = ">=0.1.1,<0.2.0" + [[package]] name = "llama-index-llms-llama-cpp" version = "0.1.3" @@ -2572,6 +2759,44 @@ docs = ["sphinx"] gmpy = ["gmpy2 (>=2.1.0a4)"] tests = ["pytest (>=4.6)"] +[[package]] +name = "msal" +version = "1.27.0" +description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect." +optional = true +python-versions = ">=2.7" +files = [ + {file = "msal-1.27.0-py2.py3-none-any.whl", hash = "sha256:572d07149b83e7343a85a3bcef8e581167b4ac76befcbbb6eef0c0e19643cdc0"}, + {file = "msal-1.27.0.tar.gz", hash = "sha256:3109503c038ba6b307152b0e8d34f98113f2e7a78986e28d0baf5b5303afda52"}, +] + +[package.dependencies] +cryptography = ">=0.6,<45" +PyJWT = {version = ">=1.0.0,<3", extras = ["crypto"]} +requests = ">=2.0.0,<3" + +[package.extras] +broker = ["pymsalruntime (>=0.13.2,<0.15)"] + +[[package]] +name = "msal-extensions" +version = "1.1.0" +description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism." +optional = true +python-versions = ">=3.7" +files = [ + {file = "msal-extensions-1.1.0.tar.gz", hash = "sha256:6ab357867062db7b253d0bd2df6d411c7891a0ee7308d54d1e4317c1d1c54252"}, + {file = "msal_extensions-1.1.0-py3-none-any.whl", hash = "sha256:01be9711b4c0b1a151450068eeb2c4f0997df3bba085ac299de3a66f585e382f"}, +] + +[package.dependencies] +msal = ">=0.4.1,<2.0.0" +packaging = "*" +portalocker = [ + {version = ">=1.0,<3", markers = "platform_system != \"Windows\""}, + {version = ">=1.6,<3", markers = "platform_system == \"Windows\""}, +] + [[package]] name = "multidict" version = "6.0.4" @@ -2948,6 +3173,7 @@ optional = true python-versions = ">=3" files = [ {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux1_x86_64.whl", hash = "sha256:64335a8088e2b9d196ae8665430bc6a2b7e6ef2eb877a9c735c804bd4ff6467c"}, + {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux2014_aarch64.whl", hash = "sha256:211a63e7b30a9d62f1a853e19928fbb1a750e3f17a13a3d1f98ff0ced19478dd"}, {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-win_amd64.whl", hash = "sha256:1b2e317e437433753530792f13eece58f0aec21a2b05903be7bffe58a606cbd1"}, ] @@ -3708,6 +3934,17 @@ files = [ [package.dependencies] pyasn1 = ">=0.4.6,<0.6.0" +[[package]] +name = "pycparser" +version = "2.21" +description = "C parser in Python" +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, + {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, +] + [[package]] name = "pydantic" version = "2.5.2" @@ -3902,6 +4139,26 @@ files = [ plugins = ["importlib-metadata"] windows-terminal = ["colorama (>=0.4.6)"] +[[package]] +name = "pyjwt" +version = "2.8.0" +description = "JSON Web Token implementation in Python" +optional = true +python-versions = ">=3.7" +files = [ + {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"}, + {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"}, +] + +[package.dependencies] +cryptography = {version = ">=3.4.0", optional = true, markers = "extra == \"crypto\""} + +[package.extras] +crypto = ["cryptography (>=3.4.0)"] +dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] + [[package]] name = "pymupdf" version = "1.23.25" @@ -5938,10 +6195,12 @@ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.link testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] [extras] +embeddings-azopenai = ["llama-index-embeddings-azure-openai"] embeddings-huggingface = ["llama-index-embeddings-huggingface"] embeddings-ollama = ["llama-index-embeddings-ollama"] embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] +llms-azopenai = ["llama-index-llms-azure-openai"] llms-llama-cpp = ["llama-index-llms-llama-cpp"] llms-ollama = ["llama-index-llms-ollama"] llms-openai = ["llama-index-llms-openai"] @@ -5956,4 +6215,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "689df29f4f2209e7ae6638563f4bb25700d1454098d0c728a164a708d42fa377" +content-hash = "3d5f21e5e41ea66d655891a6d9b01bcdd8348b275e27a54e90b65ac9d5719981" diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py index f384262d..2967c38b 100644 --- a/private_gpt/components/embedding/embedding_component.py +++ b/private_gpt/components/embedding/embedding_component.py @@ -72,6 +72,24 @@ class EmbeddingComponent: model_name=ollama_settings.embedding_model, base_url=ollama_settings.api_base, ) + case "azopenai": + try: + from llama_index.embeddings.azure_openai import ( # type: ignore + AzureOpenAIEmbedding, + ) + except ImportError as e: + raise ImportError( + "Azure OpenAI dependencies not found, install with `poetry install --extras embeddings-azopenai`" + ) from e + + azopenai_settings = settings.azopenai + self.embedding_model = AzureOpenAIEmbedding( + model=azopenai_settings.embedding_model, + deployment_name=azopenai_settings.embedding_deployment_name, + api_key=azopenai_settings.api_key, + azure_endpoint=azopenai_settings.azure_endpoint, + api_version=azopenai_settings.api_version, + ) case "mock": # Not a random number, is the dimensionality used by # the default embedding model diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index d4e13a58..953209a8 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -132,5 +132,23 @@ class LLMComponent: context_window=settings.llm.context_window, additional_kwargs=settings_kwargs, ) + case "azopenai": + try: + from llama_index.llms.azure_openai import ( # type: ignore + AzureOpenAI, + ) + except ImportError as e: + raise ImportError( + "Azure OpenAI dependencies not found, install with `poetry install --extras llms-azopenai`" + ) from e + + azopenai_settings = settings.azopenai + self.llm = AzureOpenAI( + model=azopenai_settings.llm_model, + deployment_name=azopenai_settings.llm_deployment_name, + api_key=azopenai_settings.api_key, + azure_endpoint=azopenai_settings.azure_endpoint, + api_version=azopenai_settings.api_version, + ) case "mock": self.llm = MockLLM() diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 868a3cde..4c274384 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -81,7 +81,9 @@ class DataSettings(BaseModel): class LLMSettings(BaseModel): - mode: Literal["llamacpp", "openai", "openailike", "sagemaker", "mock", "ollama"] + mode: Literal[ + "llamacpp", "openai", "openailike", "azopenai", "sagemaker", "mock", "ollama" + ] max_new_tokens: int = Field( 256, description="The maximum number of token that the LLM is authorized to generate in one completion.", @@ -152,7 +154,7 @@ class HuggingFaceSettings(BaseModel): class EmbeddingSettings(BaseModel): - mode: Literal["huggingface", "openai", "sagemaker", "ollama", "mock"] + mode: Literal["huggingface", "openai", "azopenai", "sagemaker", "ollama", "mock"] ingest_mode: Literal["simple", "batch", "parallel"] = Field( "simple", description=( @@ -239,6 +241,25 @@ class OllamaSettings(BaseModel): ) +class AzureOpenAISettings(BaseModel): + api_key: str + azure_endpoint: str + api_version: str = Field( + "2023_05_15", + description="The API version to use for this operation. This follows the YYYY-MM-DD format.", + ) + embedding_deployment_name: str + embedding_model: str = Field( + "text-embedding-ada-002", + description="OpenAI Model to use. Example: 'text-embedding-ada-002'.", + ) + llm_deployment_name: str + llm_model: str = Field( + "gpt-35-turbo", + description="OpenAI Model to use. Example: 'gpt-4'.", + ) + + class UISettings(BaseModel): enabled: bool path: str @@ -349,6 +370,7 @@ class Settings(BaseModel): sagemaker: SagemakerSettings openai: OpenAISettings ollama: OllamaSettings + azopenai: AzureOpenAISettings vectorstore: VectorstoreSettings nodestore: NodeStoreSettings qdrant: QdrantSettings | None = None diff --git a/pyproject.toml b/pyproject.toml index 1391fb21..d5689998 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,9 +21,11 @@ llama-index-llms-llama-cpp = {version = "^0.1.3", optional = true} llama-index-llms-openai = {version = "^0.1.6", optional = true} llama-index-llms-openai-like = {version ="^0.1.3", optional = true} llama-index-llms-ollama = {version ="^0.1.2", optional = true} +llama-index-llms-azure-openai = {version ="^0.1.5", optional = true} llama-index-embeddings-ollama = {version ="^0.1.2", optional = true} llama-index-embeddings-huggingface = {version ="^0.1.4", optional = true} llama-index-embeddings-openai = {version ="^0.1.6", optional = true} +llama-index-embeddings-azure-openai = {version ="^0.1.6", optional = true} llama-index-vector-stores-qdrant = {version ="^0.1.3", optional = true} llama-index-vector-stores-chroma = {version ="^0.1.4", optional = true} llama-index-vector-stores-postgres = {version ="^0.1.2", optional = true} @@ -45,10 +47,12 @@ llms-openai = ["llama-index-llms-openai"] llms-openai-like = ["llama-index-llms-openai-like"] llms-ollama = ["llama-index-llms-ollama"] llms-sagemaker = ["boto3"] +llms-azopenai = ["llama-index-llms-azure-openai"] embeddings-ollama = ["llama-index-embeddings-ollama"] embeddings-huggingface = ["llama-index-embeddings-huggingface"] embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] +embeddings-azopenai = ["llama-index-embeddings-azure-openai"] vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] vector-stores-chroma = ["llama-index-vector-stores-chroma"] vector-stores-postgres = ["llama-index-vector-stores-postgres"] diff --git a/settings-azopenai.yaml b/settings-azopenai.yaml new file mode 100644 index 00000000..7e4b47c1 --- /dev/null +++ b/settings-azopenai.yaml @@ -0,0 +1,17 @@ +server: + env_name: ${APP_ENV:azopenai} + +llm: + mode: azopenai + +embedding: + mode: azopenai + +azopenai: + api_key: ${AZ_OPENAI_API_KEY:} + azure_endpoint: ${AZ_OPENAI_ENDPOINT:} + embedding_deployment_name: ${AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME:} + llm_deployment_name: ${AZ_OPENAI_LLM_DEPLOYMENT_NAME:} + api_version: "2023-05-15" + embedding_model: text-embedding-ada-002 + llm_model: gpt-35-turbo \ No newline at end of file diff --git a/settings.yaml b/settings.yaml index 862844a1..0b4cb341 100644 --- a/settings.yaml +++ b/settings.yaml @@ -89,3 +89,12 @@ ollama: llm_model: llama2 embedding_model: nomic-embed-text api_base: http://localhost:11434 + +azopenai: + api_key: ${AZ_OPENAI_API_KEY:} + azure_endpoint: ${AZ_OPENAI_ENDPOINT:} + embedding_deployment_name: ${AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME:} + llm_deployment_name: ${AZ_OPENAI_LLM_DEPLOYMENT_NAME:} + api_version: "2023-05-15" + embedding_model: text-embedding-ada-002 + llm_model: gpt-35-turbo