From 021bb9be841c255bacdd9ae9627e7a4efb1f28f4 Mon Sep 17 00:00:00 2001 From: Jonathon Belotti Date: Thu, 20 Jul 2023 18:53:06 -0400 Subject: [PATCH] Update Modal.com integration docs (#8014) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hey, I'm a Modal Labs engineer and I'm making this docs update after getting a user question in [our beta Slack space](https://join.slack.com/t/modalbetatesters/shared_invite/zt-1xl9gbob8-1QDgUY7_PRPg6dQ49hqEeQ) about the Langchain integration docs. 🔗 [Modal beta-testers link to docs discussion thread](https://modalbetatesters.slack.com/archives/C031Z7DBQFL/p1689777700594819?thread_ts=1689775859.855849&cid=C031Z7DBQFL) --- docs/extras/ecosystem/integrations/modal.mdx | 93 ++++++++++++------- .../models/llms/integrations/modal.ipynb | 75 +++++++++++---- 2 files changed, 119 insertions(+), 49 deletions(-) diff --git a/docs/extras/ecosystem/integrations/modal.mdx b/docs/extras/ecosystem/integrations/modal.mdx index 7338e88e01a..6d6854c92a7 100644 --- a/docs/extras/ecosystem/integrations/modal.mdx +++ b/docs/extras/ecosystem/integrations/modal.mdx @@ -1,66 +1,95 @@ # Modal -This page covers how to use the Modal ecosystem within LangChain. -It is broken into two parts: installation and setup, and then references to specific Modal wrappers. +This page covers how to use the Modal ecosystem to run LangChain custom LLMs. +It is broken into two parts: + +1. Modal installation and web endpoint deployment +2. Using deployed web endpoint with `LLM` wrapper class. ## Installation and Setup -- Install with `pip install modal-client` + +- Install with `pip install modal` - Run `modal token new` ## Define your Modal Functions and Webhooks -You must include a prompt. There is a rigid response structure. +You must include a prompt. There is a rigid response structure: ```python class Item(BaseModel): prompt: str -@stub.webhook(method="POST") -def my_webhook(item: Item): - return {"prompt": my_function.call(item.prompt)} +@stub.function() +@modal.web_endpoint(method="POST") +def get_text(item: Item): + return {"prompt": run_gpt2.call(item.prompt)} ``` -An example with GPT2: +The following is an example with the GPT2 model: ```python from pydantic import BaseModel import modal -stub = modal.Stub("example-get-started") - -volume = modal.SharedVolume().persist("gpt2_model_vol") CACHE_PATH = "/root/model_cache" -@stub.function( - gpu="any", - image=modal.Image.debian_slim().pip_install( - "tokenizers", "transformers", "torch", "accelerate" - ), - shared_volumes={CACHE_PATH: volume}, - retries=3, -) -def run_gpt2(text: str): - from transformers import GPT2Tokenizer, GPT2LMHeadModel - tokenizer = GPT2Tokenizer.from_pretrained('gpt2') - model = GPT2LMHeadModel.from_pretrained('gpt2') - encoded_input = tokenizer(text, return_tensors='pt').input_ids - output = model.generate(encoded_input, max_length=50, do_sample=True) - return tokenizer.decode(output[0], skip_special_tokens=True) - class Item(BaseModel): prompt: str -@stub.webhook(method="POST") +stub = modal.Stub(name="example-get-started-with-langchain") + +def download_model(): + from transformers import GPT2Tokenizer, GPT2LMHeadModel + tokenizer = GPT2Tokenizer.from_pretrained('gpt2') + model = GPT2LMHeadModel.from_pretrained('gpt2') + tokenizer.save_pretrained(CACHE_PATH) + model.save_pretrained(CACHE_PATH) + +# Define a container image for the LLM function below, which +# downloads and stores the GPT-2 model. +image = modal.Image.debian_slim().pip_install( + "tokenizers", "transformers", "torch", "accelerate" +).run_function(download_model) + +@stub.function( + gpu="any", + image=image, + retries=3, +) +def run_gpt2(text: str): + from transformers import GPT2Tokenizer, GPT2LMHeadModel + tokenizer = GPT2Tokenizer.from_pretrained(CACHE_PATH) + model = GPT2LMHeadModel.from_pretrained(CACHE_PATH) + encoded_input = tokenizer(text, return_tensors='pt').input_ids + output = model.generate(encoded_input, max_length=50, do_sample=True) + return tokenizer.decode(output[0], skip_special_tokens=True) + +@stub.function() +@modal.web_endpoint(method="POST") def get_text(item: Item): return {"prompt": run_gpt2.call(item.prompt)} ``` -## Wrappers +### Deploy the web endpoint -### LLM +Deploy the web endpoint to Modal cloud with the [`modal deploy`](https://modal.com/docs/reference/cli/deploy) CLI command. +Your web endpoint will acquire a persistent URL under the `modal.run` domain. + +## LLM wrapper around Modal web endpoint + +The `Modal` LLM wrapper class which will accept your deployed web endpoint's URL. -There exists an Modal LLM wrapper, which you can access with ```python from langchain.llms import Modal -``` \ No newline at end of file + +endpoint_url = "https://ecorp--custom-llm-endpoint.modal.run" # REPLACE ME with your deployed Modal web endpoint's URL + +llm = Modal(endpoint_url=endpoint_url) +llm_chain = LLMChain(prompt=prompt, llm=llm) + +question = "What NFL team won the Super Bowl in the year Justin Beiber was born?" + +llm_chain.run(question) +``` + diff --git a/docs/extras/modules/model_io/models/llms/integrations/modal.ipynb b/docs/extras/modules/model_io/models/llms/integrations/modal.ipynb index 2b569fc7e47..719c7ce54cd 100644 --- a/docs/extras/modules/model_io/models/llms/integrations/modal.ipynb +++ b/docs/extras/modules/model_io/models/llms/integrations/modal.ipynb @@ -6,12 +6,12 @@ "source": [ "# Modal\n", "\n", - "The [Modal Python Library](https://modal.com/docs/guide) provides convenient, on-demand access to serverless cloud compute from Python scripts on your local computer. \n", - "The `Modal` itself does not provide any LLMs but only the infrastructure.\n", + "The [Modal cloud platform](https://modal.com/docs/guide) provides convenient, on-demand access to serverless cloud compute from Python scripts on your local computer. \n", + "Use `modal` to run your own custom LLM models instead of depending on LLM APIs.\n", "\n", - "This example goes over how to use LangChain to interact with `Modal`.\n", + "This example goes over how to use LangChain to interact with a `modal` HTTPS [web endpoint](https://modal.com/docs/guide/webhooks).\n", "\n", - "[Here](https://modal.com/docs/guide/ex/potus_speech_qanda) is another example how to use LangChain to interact with `Modal`." + "[_Question-answering with LangChain_](https://modal.com/docs/guide/ex/potus_speech_qanda) is another example of how to use LangChain alonside `Modal`. In that example, Modal runs the LangChain application end-to-end and uses OpenAI as its LLM API." ] }, { @@ -22,7 +22,7 @@ }, "outputs": [], "source": [ - "!pip install modal-client" + "!pip install modal" ] }, { @@ -36,20 +36,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[?25lLaunching login page in your browser window\u001b[33m...\u001b[0m\n", - "\u001b[2KIf this is not showing up, please copy this URL into your web browser manually:\n", - "\u001b[2Km⠙\u001b[0m Waiting for authentication in the web browser...\n", - "\u001b]8;id=417802;https://modal.com/token-flow/tf-ptEuGecm7T1T5YQe42kwM1\u001b\\\u001b[4;94mhttps://modal.com/token-flow/tf-ptEuGecm7T1T5YQe42kwM1\u001b[0m\u001b]8;;\u001b\\\n", - "\n", - "\u001b[2K\u001b[32m⠙\u001b[0m Waiting for authentication in the web browser...\n", - "\u001b[1A\u001b[2K^C\n", - "\n", - "\u001b[31mAborted.\u001b[0m\n" + "Launching login page in your browser window...\n", + "If this is not showing up, please copy this URL into your web browser manually:\n", + "https://modal.com/token-flow/tf-Dzm3Y01234mqmm1234Vcu3\n" ] } ], "source": [ - "# register and get a new token\n", + "# Register an account with Modal and get a new token.\n", "\n", "!modal token new" ] @@ -58,7 +52,53 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Follow [these instructions](https://modal.com/docs/guide/secrets) to deal with secrets." + "The [`langchain.llms.modal.Modal`](https://github.com/hwchase17/langchain/blame/master/langchain/llms/modal.py) integration class requires that you deploy a Modal application with a web endpoint that complies with the following JSON interface:\n", + "\n", + "1. The LLM prompt is accepted as a `str` value under the key `\"prompt\"`\n", + "2. The LLM response returned as a `str` value under the key `\"prompt\"`\n", + "\n", + "**Example request JSON:**\n", + "\n", + "```json\n", + "{\n", + " \"prompt\": \"Identify yourself, bot!\",\n", + " \"extra\": \"args are allowed\",\n", + "}\n", + "```\n", + "\n", + "**Example response JSON:**\n", + "\n", + "```json\n", + "{\n", + " \"prompt\": \"This is the LLM speaking\",\n", + "}\n", + "```\n", + "\n", + "An example 'dummy' Modal web endpoint function fulfilling this interface would be\n", + "\n", + "```python\n", + "...\n", + "...\n", + "\n", + "class Request(BaseModel):\n", + " prompt: str\n", + "\n", + "@stub.function()\n", + "@modal.web_endpoint(method=\"POST\")\n", + "def web(request: Request):\n", + " _ = request # ignore input\n", + " return {\"prompt\": \"hello world\"}\n", + "```\n", + "\n", + "* See Modal's [web endpoints](https://modal.com/docs/guide/webhooks#passing-arguments-to-web-endpoints) guide for the basics of setting up an endpoint that fulfils this interface.\n", + "* See Modal's ['Run Falcon-40B with AutoGPTQ'](https://modal.com/docs/guide/ex/falcon_gptq) open-source LLM example as a starting point for your custom LLM!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you have a deployed Modal web endpoint, you can pass its URL into the `langchain.llms.modal.Modal` LLM class. This class can then function as a building block in your chain." ] }, { @@ -90,7 +130,8 @@ "metadata": {}, "outputs": [], "source": [ - "llm = Modal(endpoint_url=\"YOUR_ENDPOINT_URL\")" + "endpoint_url = \"https://ecorp--custom-llm-endpoint.modal.run\" # REPLACE ME with your deployed Modal web endpoint's URL\n", + "llm = Modal(endpoint_url=endpoint_url)" ] }, {