From f4bed8a04c31fd8d6acb3b6bc12adcc464c30b82 Mon Sep 17 00:00:00 2001 From: "Blake (Yung Cher Ho)" Date: Sun, 3 Sep 2023 22:45:59 +0100 Subject: [PATCH] Takeoff baseurl support (#10091) ## Description This PR introduces a minor change to the TitanTakeoff integration. Instead of specifying a port on localhost, this PR will allow users to specify a baseURL instead. This will allow users to use the integration if they have TitanTakeoff deployed externally (not on localhost). This removes the hardcoded reference to localhost "http://localhost:{port}". ### Info about Titan Takeoff Titan Takeoff is an inference server created by [TitanML](https://www.titanml.co/) that allows you to deploy large language models locally on your hardware in a single command. Most generative model architectures are included, such as Falcon, Llama 2, GPT2, T5 and many more. Read more about Titan Takeoff here: - [Blog](https://medium.com/@TitanML/introducing-titan-takeoff-6c30e55a8e1e) - [Docs](https://docs.titanml.co/docs/titan-takeoff/getting-started) ### Dependencies No new dependencies are introduced. However, users will need to install the titan-iris package in their local environment and start the Titan Takeoff inferencing server in order to use the Titan Takeoff integration. Thanks for your help and please let me know if you have any questions. cc: @hwchase17 @baskaryan --------- Co-authored-by: Harrison Chase --- .../integrations/llms/titan_takeoff.ipynb | 34 +++++++++++++++---- .../langchain/langchain/llms/titan_takeoff.py | 12 ++++--- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/docs/extras/integrations/llms/titan_takeoff.ipynb b/docs/extras/integrations/llms/titan_takeoff.ipynb index 1886a92de1c..5b8fddcb1e2 100644 --- a/docs/extras/integrations/llms/titan_takeoff.ipynb +++ b/docs/extras/integrations/llms/titan_takeoff.ipynb @@ -42,7 +42,7 @@ "metadata": {}, "source": [ "## Choose a Model\n", - "Iris Takeoff supports many of the most powerful generative text models, such as Falcon, MPT, and Llama. See the [supported models](https://docs.titanml.co/docs/titan-takeoff/supported-models) for more information. For information about using your own models, see the [custom models](https://docs.titanml.co/docs/titan-takeoff/Advanced/custom-models).\n", + "Takeoff supports many of the most powerful generative text models, such as Falcon, MPT, and Llama. See the [supported models](https://docs.titanml.co/docs/titan-takeoff/supported-models) for more information. For information about using your own models, see the [custom models](https://docs.titanml.co/docs/titan-takeoff/Advanced/custom-models).\n", "\n", "Going forward in this demo we will be using the falcon 7B instruct model. This is a good open source model that is trained to follow instructions, and is small enough to easily inference even on CPUs.\n", "\n", @@ -64,8 +64,7 @@ "source": [ "iris takeoff --model tiiuae/falcon-7b-instruct --device cpu\n", "iris takeoff --model tiiuae/falcon-7b-instruct --device cuda # Nvidia GPU required\n", - "iris takeoff --model tiiuae/falcon-7b-instruct --device cpu --port 5000 # run on port 5000 (default: 8000)\n", - "```" + "iris takeoff --model tiiuae/falcon-7b-instruct --device cpu --port 5000 # run on port 5000 (default: 8000)" ] }, { @@ -73,8 +72,29 @@ "metadata": {}, "source": [ "You will then be directed to a login page, where you will need to create an account to proceed.\n", - "After logging in, run the command onscreen to check whether the server is ready. When it is ready, you can start using the Takeoff integration\n", + "After logging in, run the command onscreen to check whether the server is ready. When it is ready, you can start using the Takeoff integration.\n", "\n", + "To shutdown the server, run the following command. You will be presented with options on which Takeoff server to shut down, in case you have multiple running servers.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "iris takeoff --shutdown # shutdown the server" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ "## Inferencing your model\n", "To access your LLM, use the TitanTakeoff LLM wrapper:" ] @@ -88,7 +108,7 @@ "from langchain.llms import TitanTakeoff\n", "\n", "llm = TitanTakeoff(\n", - " port=8000,\n", + " baseURL=\"http://localhost:8000\",\n", " generate_max_length=128,\n", " temperature=1.0\n", ")\n", @@ -102,7 +122,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "No parameters are needed by default, but a port can be specified and [generation parameters](https://docs.titanml.co/docs/titan-takeoff/Advanced/generation-parameters) can be supplied.\n", + "No parameters are needed by default, but a baseURL that points to your desired URL where Takeoff is running can be specified and [generation parameters](https://docs.titanml.co/docs/titan-takeoff/Advanced/generation-parameters) can be supplied.\n", "\n", "### Streaming\n", "Streaming is also supported via the streaming flag:" @@ -117,7 +137,7 @@ "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", "from langchain.callbacks.manager import CallbackManager\n", "\n", - "llm = TitanTakeoff(port=8000, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), streaming=True)\n", + "llm = TitanTakeoff(callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), streaming=True)\n", "\n", "prompt = \"What is the capital of France?\"\n", "\n", diff --git a/libs/langchain/langchain/llms/titan_takeoff.py b/libs/langchain/langchain/llms/titan_takeoff.py index 517918fa993..d87784aceaf 100644 --- a/libs/langchain/langchain/llms/titan_takeoff.py +++ b/libs/langchain/langchain/llms/titan_takeoff.py @@ -10,8 +10,10 @@ from langchain.schema.output import GenerationChunk class TitanTakeoff(LLM): - port: int = 8000 - """Specifies the port to use for the Titan Takeoff API. Default = 8000.""" + base_url: str = "http://localhost:8000" + """Specifies the baseURL to use for the Titan Takeoff API. + Default = http://localhost:8000. + """ generate_max_length: int = 128 """Maximum generation length. Default = 128.""" @@ -92,7 +94,7 @@ class TitanTakeoff(LLM): text_output += chunk.text return text_output - url = f"http://localhost:{self.port}/generate" + url = f"{self.base_url}/generate" params = {"text": prompt, **self._default_params} response = requests.post(url, json=params) @@ -139,7 +141,7 @@ class TitanTakeoff(LLM): response = model(prompt) """ - url = f"http://localhost:{self.port}/generate_stream" + url = f"{self.base_url}/generate_stream" params = {"text": prompt, **self._default_params} response = requests.post(url, json=params, stream=True) @@ -154,4 +156,4 @@ class TitanTakeoff(LLM): @property def _identifying_params(self) -> Mapping[str, Any]: """Get the identifying parameters.""" - return {"port": self.port, **{}, **self._default_params} + return {"base_url": self.base_url, **{}, **self._default_params}