Stop using the fake tensorrt-llm package. Update documentation.

This commit is contained in:
imartinez 2024-03-08 00:52:01 +01:00
parent 937c52354b
commit a93db2850c
3 changed files with 5 additions and 15 deletions

View File

@ -137,6 +137,8 @@ Follow these steps to set up a local TensorRT-powered PrivateGPT:
- Nvidia Cuda 12.2 or higher is currently required to run TensorRT-LLM.
- Install tensorrt_llm via pip with pip install --no-cache-dir --extra-index-url https://pypi.nvidia.com tensorrt-llm as explained [here](https://pypi.org/project/tensorrt-llm/)
- For this example we will use Llama2. The Llama2 model files need to be created via scripts following the instructions [here](https://github.com/NVIDIA/trt-llm-rag-windows/blob/release/1.0/README.md#building-trt-engine).
The following files will be created from following the steps in the link:

14
poetry.lock generated
View File

@ -4906,16 +4906,6 @@ files = [
[package.extras]
doc = ["reno", "sphinx", "tornado (>=4.5)"]
[[package]]
name = "tensorrt-llm"
version = "0.8.0"
description = "A fake package to warn the user they are not installing the correct package."
optional = true
python-versions = ">=3.7, <4"
files = [
{file = "tensorrt-llm-0.8.0.tar.gz", hash = "sha256:8bd59bf59766bb16f81bd330ca38765a532a21a35d323fd33929c80a6ec53eaf"},
]
[[package]]
name = "tiktoken"
version = "0.5.2"
@ -5971,7 +5961,7 @@ embeddings-huggingface = ["llama-index-embeddings-huggingface"]
embeddings-openai = ["llama-index-embeddings-openai"]
embeddings-sagemaker = ["boto3"]
llms-llama-cpp = ["llama-index-llms-llama-cpp"]
llms-nvidia-tensorrt = ["llama-index-llms-nvidia-tensorrt", "tensorrt_llm"]
llms-nvidia-tensorrt = ["llama-index-llms-nvidia-tensorrt"]
llms-ollama = ["llama-index-llms-ollama"]
llms-openai = ["llama-index-llms-openai"]
llms-openai-like = ["llama-index-llms-openai-like"]
@ -5984,4 +5974,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.12"
content-hash = "da01e96bb8eb18aa3b6608cf60384771ad674b6ec7a26a685a62274c0302c8f9"
content-hash = "39f0ac666402807cde29f763c14dfb6b2fc9862c0cd31de398c67a1fedbb4b12"

View File

@ -29,8 +29,6 @@ llama-index-vector-stores-postgres = {version ="^0.1.2", optional = true}
llama-index-llms-nvidia-tensorrt = {version ="^0.1.2", optional = true}
# Optional Sagemaker dependency
boto3 = {version ="^1.34.51", optional = true}
# Optional Nvidia TensorRT dependency
tensorrt_llm = {version ="^0.8.0", optional = true}
# Optional UI
gradio = {version ="^4.19.2", optional = true}
@ -41,7 +39,7 @@ llms-openai = ["llama-index-llms-openai"]
llms-openai-like = ["llama-index-llms-openai-like"]
llms-ollama = ["llama-index-llms-ollama"]
llms-sagemaker = ["boto3"]
llms-nvidia-tensorrt = ["tensorrt_llm", "llama-index-llms-nvidia-tensorrt"]
llms-nvidia-tensorrt = ["llama-index-llms-nvidia-tensorrt"]
embeddings-huggingface = ["llama-index-embeddings-huggingface"]
embeddings-openai = ["llama-index-embeddings-openai"]
embeddings-sagemaker = ["boto3"]