diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index 8cfee4b6..767767b9 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -113,7 +113,9 @@ class LLMComponent: ) case "tensorrt": try: - from llama_index.llms.nvidia_tensorrt import LocalTensorRTLLM # type: ignore + from llama_index.llms.nvidia_tensorrt import ( # type: ignore + LocalTensorRTLLM, + ) except ImportError as e: raise ImportError( "Nvidia TensorRTLLM dependencies not found, install with `poetry install --extras llms-nvidia-tensorrt`" diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index db65d227..f3cae19d 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -81,7 +81,9 @@ class DataSettings(BaseModel): class LLMSettings(BaseModel): - mode: Literal["llamacpp", "openai", "openailike", "sagemaker", "mock", "ollama", "tensorrt"] + mode: Literal[ + "llamacpp", "openai", "openailike", "sagemaker", "mock", "ollama", "tensorrt" + ] max_new_tokens: int = Field( 256, description="The maximum number of token that the LLM is authorized to generate in one completion.",