diff --git a/docs/extras/integrations/llms/titan_takeoff.ipynb b/docs/extras/integrations/llms/titan_takeoff.ipynb index 1886a92de1c..5b8fddcb1e2 100644 --- a/docs/extras/integrations/llms/titan_takeoff.ipynb +++ b/docs/extras/integrations/llms/titan_takeoff.ipynb @@ -42,7 +42,7 @@ "metadata": {}, "source": [ "## Choose a Model\n", - "Iris Takeoff supports many of the most powerful generative text models, such as Falcon, MPT, and Llama. See the [supported models](https://docs.titanml.co/docs/titan-takeoff/supported-models) for more information. For information about using your own models, see the [custom models](https://docs.titanml.co/docs/titan-takeoff/Advanced/custom-models).\n", + "Takeoff supports many of the most powerful generative text models, such as Falcon, MPT, and Llama. See the [supported models](https://docs.titanml.co/docs/titan-takeoff/supported-models) for more information. For information about using your own models, see the [custom models](https://docs.titanml.co/docs/titan-takeoff/Advanced/custom-models).\n", "\n", "Going forward in this demo we will be using the falcon 7B instruct model. This is a good open source model that is trained to follow instructions, and is small enough to easily inference even on CPUs.\n", "\n", @@ -64,8 +64,7 @@ "source": [ "iris takeoff --model tiiuae/falcon-7b-instruct --device cpu\n", "iris takeoff --model tiiuae/falcon-7b-instruct --device cuda # Nvidia GPU required\n", - "iris takeoff --model tiiuae/falcon-7b-instruct --device cpu --port 5000 # run on port 5000 (default: 8000)\n", - "```" + "iris takeoff --model tiiuae/falcon-7b-instruct --device cpu --port 5000 # run on port 5000 (default: 8000)" ] }, { @@ -73,8 +72,29 @@ "metadata": {}, "source": [ "You will then be directed to a login page, where you will need to create an account to proceed.\n", - "After logging in, run the command onscreen to check whether the server is ready. When it is ready, you can start using the Takeoff integration\n", + "After logging in, run the command onscreen to check whether the server is ready. When it is ready, you can start using the Takeoff integration.\n", "\n", + "To shutdown the server, run the following command. You will be presented with options on which Takeoff server to shut down, in case you have multiple running servers.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "iris takeoff --shutdown # shutdown the server" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ "## Inferencing your model\n", "To access your LLM, use the TitanTakeoff LLM wrapper:" ] @@ -88,7 +108,7 @@ "from langchain.llms import TitanTakeoff\n", "\n", "llm = TitanTakeoff(\n", - " port=8000,\n", + " baseURL=\"http://localhost:8000\",\n", " generate_max_length=128,\n", " temperature=1.0\n", ")\n", @@ -102,7 +122,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "No parameters are needed by default, but a port can be specified and [generation parameters](https://docs.titanml.co/docs/titan-takeoff/Advanced/generation-parameters) can be supplied.\n", + "No parameters are needed by default, but a baseURL that points to your desired URL where Takeoff is running can be specified and [generation parameters](https://docs.titanml.co/docs/titan-takeoff/Advanced/generation-parameters) can be supplied.\n", "\n", "### Streaming\n", "Streaming is also supported via the streaming flag:" @@ -117,7 +137,7 @@ "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", "from langchain.callbacks.manager import CallbackManager\n", "\n", - "llm = TitanTakeoff(port=8000, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), streaming=True)\n", + "llm = TitanTakeoff(callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), streaming=True)\n", "\n", "prompt = \"What is the capital of France?\"\n", "\n", diff --git a/libs/langchain/langchain/llms/titan_takeoff.py b/libs/langchain/langchain/llms/titan_takeoff.py index 517918fa993..d87784aceaf 100644 --- a/libs/langchain/langchain/llms/titan_takeoff.py +++ b/libs/langchain/langchain/llms/titan_takeoff.py @@ -10,8 +10,10 @@ from langchain.schema.output import GenerationChunk class TitanTakeoff(LLM): - port: int = 8000 - """Specifies the port to use for the Titan Takeoff API. Default = 8000.""" + base_url: str = "http://localhost:8000" + """Specifies the baseURL to use for the Titan Takeoff API. + Default = http://localhost:8000. + """ generate_max_length: int = 128 """Maximum generation length. Default = 128.""" @@ -92,7 +94,7 @@ class TitanTakeoff(LLM): text_output += chunk.text return text_output - url = f"http://localhost:{self.port}/generate" + url = f"{self.base_url}/generate" params = {"text": prompt, **self._default_params} response = requests.post(url, json=params) @@ -139,7 +141,7 @@ class TitanTakeoff(LLM): response = model(prompt) """ - url = f"http://localhost:{self.port}/generate_stream" + url = f"{self.base_url}/generate_stream" params = {"text": prompt, **self._default_params} response = requests.post(url, json=params, stream=True) @@ -154,4 +156,4 @@ class TitanTakeoff(LLM): @property def _identifying_params(self) -> Mapping[str, Any]: """Get the identifying parameters.""" - return {"port": self.port, **{}, **self._default_params} + return {"base_url": self.base_url, **{}, **self._default_params}