diff --git a/.gitignore b/.gitignore index a864d8f242a..b78b26ef9d9 100644 --- a/.gitignore +++ b/.gitignore @@ -178,3 +178,4 @@ docs/docs/build docs/docs/node_modules docs/docs/yarn.lock _dist +docs/docs/templates \ No newline at end of file diff --git a/docs/docs/integrations/llms/huggingface_pipelines.ipynb b/docs/docs/integrations/llms/huggingface_pipelines.ipynb index a5db50b1dcd..b6d41475ddd 100644 --- a/docs/docs/integrations/llms/huggingface_pipelines.ipynb +++ b/docs/docs/integrations/llms/huggingface_pipelines.ipynb @@ -1,214 +1,218 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "959300d4", - "metadata": {}, - "source": [ - "# Hugging Face Local Pipelines\n", - "\n", - "Hugging Face models can be run locally through the `HuggingFacePipeline` class.\n", - "\n", - "The [Hugging Face Model Hub](https://huggingface.co/models) hosts over 120k models, 20k datasets, and 50k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.\n", - "\n", - "These can be called from LangChain either through this local pipeline wrapper or by calling their hosted inference endpoints through the HuggingFaceHub class. For more information on the hosted pipelines, see the [HuggingFaceHub](huggingface_hub.html) notebook." - ] + "cells": [ + { + "cell_type": "markdown", + "id": "959300d4", + "metadata": {}, + "source": [ + "# Hugging Face Local Pipelines\n", + "\n", + "Hugging Face models can be run locally through the `HuggingFacePipeline` class.\n", + "\n", + "The [Hugging Face Model Hub](https://huggingface.co/models) hosts over 120k models, 20k datasets, and 50k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.\n", + "\n", + "These can be called from LangChain either through this local pipeline wrapper or by calling their hosted inference endpoints through the HuggingFaceHub class. For more information on the hosted pipelines, see the [HuggingFaceHub](huggingface_hub.html) notebook." + ] + }, + { + "cell_type": "markdown", + "id": "4c1b8450-5eaf-4d34-8341-2d785448a1ff", + "metadata": { + "tags": [] + }, + "source": [ + "To use, you should have the ``transformers`` python [package installed](https://pypi.org/project/transformers/), as well as [pytorch](https://pytorch.org/get-started/locally/). You can also install `xformer` for a more memory-efficient attention implementation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d772b637-de00-4663-bd77-9bc96d798db2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%pip install transformers --quiet" + ] + }, + { + "cell_type": "markdown", + "id": "91ad075f-71d5-4bc8-ab91-cc0ad5ef16bb", + "metadata": {}, + "source": [ + "### Model Loading\n", + "\n", + "Models can be loaded by specifying the model parameters using the `from_model_id` method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "165ae236-962a-4763-8052-c4836d78a5d2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.llms.huggingface_pipeline import HuggingFacePipeline\n", + "\n", + "hf = HuggingFacePipeline.from_model_id(\n", + " model_id=\"gpt2\",\n", + " task=\"text-generation\",\n", + " pipeline_kwargs={\"max_new_tokens\": 10},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "00104b27-0c15-4a97-b198-4512337ee211", + "metadata": {}, + "source": [ + "They can also be loaded by passing in an existing `transformers` pipeline directly" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.llms.huggingface_pipeline import HuggingFacePipeline\n", + "from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline\n", + "\n", + "model_id = \"gpt2\"\n", + "tokenizer = AutoTokenizer.from_pretrained(model_id)\n", + "model = AutoModelForCausalLM.from_pretrained(model_id)\n", + "pipe = pipeline(\"text-generation\", model=model, tokenizer=tokenizer, max_new_tokens=10)\n", + "hf = HuggingFacePipeline(pipeline=pipe)" + ], + "id": "7f426a4f" + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Chain\n", + "\n", + "With the model loaded into memory, you can compose it with a prompt to\n", + "form a chain." + ], + "id": "60e7ba8d" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3acf0069", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.prompts import PromptTemplate\n", + "\n", + "template = \"\"\"Question: {question}\n", + "\n", + "Answer: Let's think step by step.\"\"\"\n", + "prompt = PromptTemplate.from_template(template)\n", + "\n", + "chain = prompt | hf\n", + "\n", + "question = \"What is electroencephalography?\"\n", + "\n", + "print(chain.invoke({\"question\": question}))" + ] + }, + { + "cell_type": "markdown", + "id": "dbbc3a37", + "metadata": {}, + "source": [ + "### GPU Inference\n", + "\n", + "When running on a machine with GPU, you can specify the `device=n` parameter to put the model on the specified device.\n", + "Defaults to `-1` for CPU inference.\n", + "\n", + "If you have multiple-GPUs and/or the model is too large for a single GPU, you can specify `device_map=\"auto\"`, which requires and uses the [Accelerate](https://huggingface.co/docs/accelerate/index) library to automatically determine how to load the model weights. \n", + "\n", + "*Note*: both `device` and `device_map` should not be specified together and can lead to unexpected behavior." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gpu_llm = HuggingFacePipeline.from_model_id(\n", + " model_id=\"gpt2\",\n", + " task=\"text-generation\",\n", + " device=0, # replace with device_map=\"auto\" to use the accelerate library.\n", + " pipeline_kwargs={\"max_new_tokens\": 10},\n", + ")\n", + "\n", + "gpu_chain = prompt | gpu_llm\n", + "\n", + "question = \"What is electroencephalography?\"\n", + "\n", + "print(gpu_chain.invoke({\"question\": question}))" + ], + "id": "703c91c8" + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Batch GPU Inference\n", + "\n", + "If running on a device with GPU, you can also run inference on the GPU in batch mode." + ], + "id": "59276016" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "097ba62f", + "metadata": {}, + "outputs": [], + "source": [ + "gpu_llm = HuggingFacePipeline.from_model_id(\n", + " model_id=\"bigscience/bloom-1b7\",\n", + " task=\"text-generation\",\n", + " device=0, # -1 for CPU\n", + " batch_size=2, # adjust as needed based on GPU map and model size.\n", + " model_kwargs={\"temperature\": 0, \"max_length\": 64},\n", + ")\n", + "\n", + "gpu_chain = prompt | gpu_llm.bind(stop=[\"\\n\\n\"])\n", + "\n", + "questions = []\n", + "for i in range(4):\n", + " questions.append({\"question\": f\"What is the number {i} in french?\"})\n", + "\n", + "answers = gpu_chain.batch(questions)\n", + "for answer in answers:\n", + " print(answer)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + } }, - { - "cell_type": "markdown", - "id": "4c1b8450-5eaf-4d34-8341-2d785448a1ff", - "metadata": { - "tags": [] - }, - "source": [ - "To use, you should have the ``transformers`` python [package installed](https://pypi.org/project/transformers/), as well as [pytorch](https://pytorch.org/get-started/locally/). You can also install `xformer` for a more memory-efficient attention implementation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d772b637-de00-4663-bd77-9bc96d798db2", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "%pip install transformers --quiet" - ] - }, - { - "cell_type": "markdown", - "id": "91ad075f-71d5-4bc8-ab91-cc0ad5ef16bb", - "metadata": {}, - "source": [ - "### Model Loading\n", - "\n", - "Models can be loaded by specifying the model parameters using the `from_model_id` method." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "165ae236-962a-4763-8052-c4836d78a5d2", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from langchain.llms.huggingface_pipeline import HuggingFacePipeline\n", - "\n", - "hf = HuggingFacePipeline.from_model_id(\n", - " model_id=\"gpt2\",\n", - " task=\"text-generation\",\n", - " pipeline_kwargs={\"max_new_tokens\": 10},\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "00104b27-0c15-4a97-b198-4512337ee211", - "metadata": {}, - "source": [ - "They can also be loaded by passing in an existing `transformers` pipeline directly" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.llms.huggingface_pipeline import HuggingFacePipeline\n", - "from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline\n", - "\n", - "model_id = \"gpt2\"\n", - "tokenizer = AutoTokenizer.from_pretrained(model_id)\n", - "model = AutoModelForCausalLM.from_pretrained(model_id)\n", - "pipe = pipeline(\"text-generation\", model=model, tokenizer=tokenizer, max_new_tokens=10)\n", - "hf = HuggingFacePipeline(pipeline=pipe)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create Chain\n", - "\n", - "With the model loaded into memory, you can compose it with a prompt to\n", - "form a chain." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3acf0069", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.prompts import PromptTemplate\n", - "\n", - "template = \"\"\"Question: {question}\n", - "\n", - "Answer: Let's think step by step.\"\"\"\n", - "prompt = PromptTemplate.from_template(template)\n", - "\n", - "chain = prompt | hf\n", - "\n", - "question = \"What is electroencephalography?\"\n", - "\n", - "print(chain.invoke({\"question\": question}))" - ] - }, - { - "cell_type": "markdown", - "id": "dbbc3a37", - "metadata": {}, - "source": [ - "### GPU Inference\n", - "\n", - "When running on a machine with GPU, you can specify the `device=n` parameter to put the model on the specified device.\n", - "Defaults to `-1` for CPU inference.\n", - "\n", - "If you have multiple-GPUs and/or the model is too large for a single GPU, you can specify `device_map=\"auto\"`, which requires and uses the [Accelerate](https://huggingface.co/docs/accelerate/index) library to automatically determine how to load the model weights. \n", - "\n", - "*Note*: both `device` and `device_map` should not be specified together and can lead to unexpected behavior." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gpu_llm = HuggingFacePipeline.from_model_id(\n", - " model_id=\"gpt2\",\n", - " task=\"text-generation\",\n", - " device=0, # replace with device_map=\"auto\" to use the accelerate library.\n", - " pipeline_kwargs={\"max_new_tokens\": 10},\n", - ")\n", - "\n", - "gpu_chain = prompt | gpu_llm\n", - "\n", - "question = \"What is electroencephalography?\"\n", - "\n", - "print(gpu_chain.invoke({\"question\": question}))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Batch GPU Inference\n", - "\n", - "If running on a device with GPU, you can also run inference on the GPU in batch mode." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "097ba62f", - "metadata": {}, - "outputs": [], - "source": [ - "gpu_llm = HuggingFacePipeline.from_model_id(\n", - " model_id=\"bigscience/bloom-1b7\",\n", - " task=\"text-generation\",\n", - " device=0, # -1 for CPU\n", - " batch_size=2, # adjust as needed based on GPU map and model size.\n", - " model_kwargs={\"temperature\": 0, \"max_length\": 64},\n", - ")\n", - "\n", - "gpu_chain = prompt | gpu_llm.bind(stop=[\"\\n\\n\"])\n", - "\n", - "questions = []\n", - "for i in range(4):\n", - " questions.append({\"question\": f\"What is the number {i} in french?\"})\n", - "\n", - "answers = gpu_chain.batch(questions)\n", - "for answer in answers:\n", - " print(answer)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/docs/docusaurus.config.js b/docs/docusaurus.config.js index 82200fb823b..707377fb466 100644 --- a/docs/docusaurus.config.js +++ b/docs/docusaurus.config.js @@ -175,6 +175,11 @@ const config = { label: "More", position: "left", items: [ + { + type: "docSidebar", + sidebarId: "templates", + label: "Templates", + }, { to: "/docs/community", label: "Community", diff --git a/docs/scripts/copy_templates.py b/docs/scripts/copy_templates.py new file mode 100644 index 00000000000..11120d4d0fa --- /dev/null +++ b/docs/scripts/copy_templates.py @@ -0,0 +1,36 @@ +import glob +import os +from pathlib import Path +import re +import shutil + + +TEMPLATES_DIR = Path(os.path.abspath(__file__)).parents[2] / "templates" +DOCS_TEMPLATES_DIR = Path(os.path.abspath(__file__)).parents[1] / "docs" / "templates" + + +readmes = list(glob.glob(str(TEMPLATES_DIR) + "/*/README.md")) +destinations = [readme[len(str(TEMPLATES_DIR)) + 1:-10] + ".md" for readme in readmes] +for source, destination in zip(readmes, destinations): + full_destination = DOCS_TEMPLATES_DIR / destination + shutil.copyfile(source, full_destination) + with open(full_destination, "r") as f: + content = f.read() + # remove images + content = re.sub("\!\[.*?\]\((.*?)\)", "", content) + with open(full_destination, "w") as f: + f.write(content) + +sidebar_hidden = """--- +sidebar_class_name: hidden +--- + +""" +TEMPLATES_INDEX_DESTINATION = DOCS_TEMPLATES_DIR / "index.md" +with open(TEMPLATES_INDEX_DESTINATION, "r") as f: + content = f.read() +# replace relative links +content = re.sub("\]\(\.\.\/", "](/docs/templates/", content) +with open(TEMPLATES_INDEX_DESTINATION, "w") as f: + f.write(sidebar_hidden + content) + diff --git a/docs/sidebars.js b/docs/sidebars.js index 7cceaa08a21..cab6c2e451a 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -123,4 +123,14 @@ module.exports = { guides: [ {type: "autogenerated", dirName: "guides" } ], + templates: [ + { + type: "category", + label: "Templates", + items: [ + { type: "autogenerated", dirName: "templates" }, + ], + link: { type: 'doc', id: "templates/index" } + }, + ], }; diff --git a/docs/vercel_build.sh b/docs/vercel_build.sh index a57f11a8045..8b53bd806b5 100755 --- a/docs/vercel_build.sh +++ b/docs/vercel_build.sh @@ -47,8 +47,11 @@ source .venv/bin/activate python3.11 -m pip install --upgrade pip python3.11 -m pip install -r vercel_requirements.txt python3.11 scripts/model_feat_table.py -nbdoc_build --srcdir docs +mkdir docs/templates +cp ../templates/docs/INDEX.md docs/templates/index.md +python3.11 scripts/copy_templates.py cp ../cookbook/README.md src/pages/cookbook.mdx cp ../.github/CONTRIBUTING.md docs/contributing.md wget https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md -O docs/langserve.md +nbdoc_build --srcdir docs python3.11 scripts/generate_api_reference_links.py