Separate platforms integrations docs (#10609)

2025-06-29 18:08:36 +00:00 · 2023-09-15 12:18:57 -07:00 · 2023-09-15 12:18:57 -07:00 · 2ae568dcf5
commit 2ae568dcf5
parent 6d3670c7d8
35 changed files with 415 additions and 770 deletions
--- a/docs/docs_skeleton/sidebars.js
+++ b/docs/docs_skeleton/sidebars.js
@ -69,7 +69,10 @@ module.exports = {
      type: "category",
      label: "Additional resources",
      collapsed: true,
-      items: [{ type: "autogenerated", dirName: "additional_resources" }, { type: "link", label: "Gallery", href: "https://github.com/kyrolabs/awesome-langchain" }],
+      items: [
+        { type: "autogenerated", dirName: "additional_resources" },
+        { type: "link", label: "Gallery", href: "https://github.com/kyrolabs/awesome-langchain" }
+      ],
      link: {
        type: 'generated-index',
        slug: "additional_resources",
@ -80,12 +83,38 @@ module.exports = {
  integrations: [
    {
      type: "category",
-      label: "Integrations",
+      label: "Providers",
      collapsible: false,
-      items: [{ type: "autogenerated", dirName: "integrations" }],
+      items: [
+        { type: "autogenerated", dirName: "integrations/platforms" },
+        { type: "category", label: "More", collapsed: true, items: [{type:"autogenerated", dirName: "integrations/providers" }]},
+      ],
      link: {
        type: 'generated-index',
-      slug: "integrations",
+        slug: "integrations/providers",
+      },
+    },
+    {
+      type: "category",
+      label: "Components",
+      collapsible: false,
+      items: [
+        { type: "category", label: "LLMs", collapsed: true, items: [{type:"autogenerated", dirName: "integrations/llms" }], link: {type: "generated-index", slug: "integrations/llms" }},
+        { type: "category", label: "Chat models", collapsed: true, items: [{type:"autogenerated", dirName: "integrations/chat" }], link: {type: "generated-index", slug: "integrations/chat" }},
+        { type: "category", label: "Document loaders", collapsed: true, items: [{type:"autogenerated", dirName: "integrations/document_loaders" }], link: {type: "generated-index", slug: "integrations/document_loaders" }},
+        { type: "category", label: "Document transformers", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/document_transformers" }], link: {type: "generated-index", slug: "integrations/document_transformers" }},
+        { type: "category", label: "Text embedding models", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/text_embedding" }], link: {type: "generated-index", slug: "integrations/text_embedding" }},
+        { type: "category", label: "Vector stores", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/vectorstores" }], link: {type: "generated-index", slug: "integrations/vectorstores" }},
+        { type: "category", label: "Retrievers", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/retrievers" }], link: {type: "generated-index", slug: "integrations/retrievers" }},
+        { type: "category", label: "Tools", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/tools" }], link: {type: "generated-index", slug: "integrations/tools" }},
+        { type: "category", label: "Agents and toolkits", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/toolkits" }], link: {type: "generated-index", slug: "integrations/toolkits" }},
+        { type: "category", label: "Memory", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/memory" }], link: {type: "generated-index", slug: "integrations/memory" }},
+        { type: "category", label: "Callbacks", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/callbacks" }], link: {type: "generated-index", slug: "integrations/callbacks" }},
+        { type: "category", label: "Chat loaders", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/chat_loaders" }], link: {type: "generated-index", slug: "integrations/chat_loaders" }},
+      ],
+      link: {
+        type: 'generated-index',
+      slug: "integrations/components",
      },
    },
  ],
--- a/docs/docs_skeleton/vercel.json
+++ b/docs/docs_skeleton/vercel.json
@ -1,5 +1,77 @@
 {
  "redirects": [
+    {
+      "source":  "/docs/integrations/providers/amazon_api_gateway",
+      "destination": "/docs/integrations/platform/aws"
+    },
+    {
+      "source":  "/docs/integrations/providers/azure_blob_storage",
+      "destination": "/docs/integrations/platform/microsoft"
+    },
+    {
+      "source":  "/docs/integrations/providers/google_vertexai_matchingengine",
+      "destination": "/docs/integrations/platform/google"
+    },
+    {
+      "source":  "/docs/integrations/providers/aws_s3",
+      "destination": "/docs/integrations/platform/aws"
+    },
+    {
+      "source":  "/docs/integrations/providers/azure_openai",
+      "destination": "/docs/integrations/platform/microsoft"
+    },
+    {
+      "source":  "/docs/integrations/providers/azure_blob_storage",
+      "destination": "/docs/integrations/platform/microsoft"
+    },
+    {
+      "source":  "/docs/integrations/providers/azure_cognitive_search_",
+      "destination": "/docs/integrations/platform/microsoft"
+    },
+    {
+      "source":  "/docs/integrations/providers/bedrock",
+      "destination": "/docs/integrations/platform/aws"
+    },
+    {
+      "source":  "/docs/integrations/providers/google_bigquery",
+      "destination": "/docs/integrations/platform/google"
+    },
+    {
+      "source":  "/docs/integrations/providers/google_cloud_storage",
+      "destination": "/docs/integrations/platform/google"
+    },
+    {
+      "source":  "/docs/integrations/providers/google_drive",
+      "destination": "/docs/integrations/platform/google"
+    },
+    {
+      "source":  "/docs/integrations/providers/google_search",
+      "destination": "/docs/integrations/platform/google"
+    },
+    {
+      "source":  "/docs/integrations/providers/microsoft_onedrive",
+      "destination": "/docs/integrations/platform/microsoft"
+    },
+    {
+      "source":  "/docs/integrations/providers/microsoft_powerpoint",
+      "destination": "/docs/integrations/platform/microsoft"
+    },
+    {
+      "source":  "/docs/integrations/providers/microsoft_word",
+      "destination": "/docs/integrations/platform/microsoft"
+    },
+    {
+      "source":  "/docs/integrations/providers/sagemaker_endpoint",
+      "destination": "/docs/integrations/platform/aws"
+    },
+    {
+      "source":  "/docs/integrations/providers/sagemaker_tracking",
+      "destination": "/docs/integrations/callbacks/sagemaker_tracking"
+    },
+    {
+      "source":  "/docs/integrations/providers/openai",
+      "destination": "/docs/integrations/callbacks/openai"
+    },
    {
      "source": "/docs/modules/data_connection/caching_embeddings(/?)",
      "destination": "/docs/modules/data_connection/text_embedding/caching_embeddings"
--- a/docs/extras/integrations/callbacks/index.mdx
+++ b/docs/extras/integrations/callbacks/index.mdx
@ -1,9 +0,0 @@
---
-sidebar_position: 0
---
-
-# Callbacks
-
-import DocCardList from "@theme/DocCardList";
-
-<DocCardList />
--- a/docs/extras/integrations/callbacks/sagemaker_tracking.ipynb
+++ b/docs/extras/integrations/callbacks/sagemaker_tracking.ipynb
--- a/docs/extras/integrations/chat/index.mdx
+++ b/docs/extras/integrations/chat/index.mdx
@ -1,9 +0,0 @@
---
-sidebar_position: 0
---
-
-# Chat models
-
-import DocCardList from "@theme/DocCardList";
-
-<DocCardList />
--- a/docs/extras/integrations/chat_loaders/index.mdx
+++ b/docs/extras/integrations/chat_loaders/index.mdx
@ -1,188 +0,0 @@
---
-sidebar_position: 0
---
-
-# Chat loaders
-
-Like document loaders, chat loaders are utilities designed to help load conversations from popular communication platforms such as Facebook, Slack, Discord, etc. These are loaded into memory as LangChain chat message objects. Such utilities facilitate tasks such as fine-tuning a language model to match your personal style or voice. 
-
-This brief guide will illustrate the process using [OpenAI's fine-tuning API](https://platform.openai.com/docs/guides/fine-tuning) comprised of six steps:
-
-1. Export your Facebook Messenger chat data in a compatible format for your intended chat loader.
-2. Load the chat data into memory as LangChain chat message objects. (_this is what is covered in each integration notebook in this section of the documentation_).
-    - Assign a person to the "AI" role and optionally filter, group, and merge messages.
-3. Export these acquired messages in a format expected by the fine-tuning API.
-4. Upload this data to OpenAI.
-5. Fine-tune your model.
-6. Implement the fine-tuned model in LangChain.
-
-This guide is not wholly comprehensive but is designed to take you through the fundamentals of going from raw data to fine-tuned model.
-
-We will demonstrate the procedure through an example of fine-tuning a `gpt-3.5-turbo` model on Facebook Messenger data. 
-
-### 1. Export your chat data
-
-To export your Facebook messenger data, you can follow the [instructions here](https://www.zapptales.com/en/download-facebook-messenger-chat-history-how-to/). 
-
-:::important JSON format
-You must select "JSON format" (instead of HTML) when exporting your data to be compatible with the current loader.
-:::
-
-OpenAI requires at least 10 examples to fine-tune your model, but they recommend between 50-100 for more optimal results.
-You can use the example data stored at [this google drive link](https://drive.google.com/file/d/1rh1s1o2i7B-Sk1v9o8KNgivLVGwJ-osV/view?usp=sharing) to test the process.
-
-### 2. Load the chat
-
-Once you've obtained your chat data, you can load it into memory as LangChain chat message objects. Here’s an example of loading data using the Python code:
-
-```python
-from langchain.chat_loaders.facebook_messenger import FolderFacebookMessengerChatLoader
-
-loader = FolderFacebookMessengerChatLoader(
-    path="./facebook_messenger_chats",
-)
-
-chat_sessions = loader.load()
-```
-
-In this snippet, we point the loader to a directory of Facebook chat dumps which are then loaded as multiple "sessions" of messages, one session per conversation file.
-
-Once you've loaded the messages, you should decide which person you want to fine-tune the model to (usually yourself). You can also decide to merge consecutive messages from the same sender into a single chat message.
-For both of these tasks, you can use the chat_loaders utilities to do so:
-
-```
-from langchain.chat_loaders.utils import (
-    merge_chat_runs,
-    map_ai_messages,
-)
-
-merged_sessions = merge_chat_runs(chat_sessions)
-alternating_sessions = list(map_ai_messages(merged_sessions, "My Name"))
-```
-
-### 3. Export messages to OpenAI format
-
-Convert the chat messages to dictionaries using the `convert_messages_for_finetuning` function. Then, group the data into chunks for better context modeling and overlap management.
-
-```python
-from langchain.adapters.openai import convert_messages_for_finetuning
-
-openai_messages = convert_messages_for_finetuning(chat_sessions)
-```
-
-At this point, the data is ready for upload to OpenAI. You can choose to split up conversations into smaller chunks for training if you
-do not have enough conversations to train on. Feel free to play around with different chunk sizes or with adding system messages to the fine-tuning data.
-
-```python
-chunk_size = 8
-overlap = 2
-
-message_groups = [
-    conversation_messages[i: i + chunk_size] 
-    for conversation_messages in openai_messages
-    for i in range(
-        0, len(conversation_messages) - chunk_size + 1, 
-        chunk_size - overlap)
-]
-
-len(message_groups)
-# 9
-```
-
-### 4. Upload the data to OpenAI
-
-Ensure you have set your OpenAI API key by following these [instructions](https://platform.openai.com/account/api-keys), then upload the training file.
-An audit is performed to ensure data compliance, so you may have to wait a few minutes for the dataset to become ready for use.
-
-```python
-import time
-import json
-import io
-
-import openai
-
-my_file = io.BytesIO()
-for group in message_groups:
-    my_file.write((json.dumps({"messages": group}) + "\n").encode('utf-8'))
-
-my_file.seek(0)
-training_file = openai.File.create(
-  file=my_file,
-  purpose='fine-tune'
-)
-
-# Wait while the file is processed
-status = openai.File.retrieve(training_file.id).status
-start_time = time.time()
-while status != "processed":
-    print(f"Status=[{status}]... {time.time() - start_time:.2f}s", end="\r", flush=True)
-    time.sleep(5)
-    status = openai.File.retrieve(training_file.id).status
-print(f"File {training_file.id} ready after {time.time() - start_time:.2f} seconds.")
-```
-
-Once this is done, you can proceed to the model training!
-
-### 5. Fine-tune the model
-
-Start the fine-tuning job with your chosen base model.
-
-```python
-job = openai.FineTuningJob.create(
-    training_file=training_file.id,
-    model="gpt-3.5-turbo",
-)
-```
-
-This might take a while. Check the status with `openai.FineTuningJob.retrieve(job.id).status` and wait for it to report `succeeded`.
-
-```python
-# It may take 10-20+ minutes to complete training.
-status = openai.FineTuningJob.retrieve(job.id).status
-start_time = time.time()
-while status != "succeeded":
-    print(f"Status=[{status}]... {time.time() - start_time:.2f}s", end="\r", flush=True)
-    time.sleep(5)
-    job = openai.FineTuningJob.retrieve(job.id)
-    status = job.status
-```
-
-### 6. Use the model in LangChain
-
-You're almost there! Use the fine-tuned model in LangChain.
-
-```python
-from langchain import chat_models
-
-model_name = job.fine_tuned_model
-# Example: ft:gpt-3.5-turbo-0613:personal::5mty86jblapsed
-model = chat_models.ChatOpenAI(model=model_name)
-```
-
-```python
-from langchain.prompts import ChatPromptTemplate
-from langchain.schema.output_parser import StrOutputParser 
-
-prompt = ChatPromptTemplate.from_messages(
-    [
-        ("human", "{input}"),
-    ]
-)
-
-chain = prompt | model | StrOutputParser()
-
-for tok in chain.stream({"input": "What classes are you taking?"}):
-    print(tok, end="", flush=True)
-
-# The usual - Potions, Transfiguration, Defense Against the Dark Arts. What about you?
-```
-
-And that's it! You've successfully fine-tuned a model and used it in LangChain.
-
-## Supported Chat Loaders
-
-LangChain currently supports the following chat loaders. Feel free to contribute more!
-
-import DocCardList from "@theme/DocCardList";
-
-<DocCardList />
--- a/docs/extras/integrations/document_loaders/index.mdx
+++ b/docs/extras/integrations/document_loaders/index.mdx
@ -1,9 +0,0 @@
---
-sidebar_position: 0
---
-
-# Document loaders
-
-import DocCardList from "@theme/DocCardList";
-
-<DocCardList />
--- a/docs/extras/integrations/document_transformers/index.mdx
+++ b/docs/extras/integrations/document_transformers/index.mdx
@ -1,9 +0,0 @@
---
-sidebar_position: 0
---
-
-# Document transformers
-
-import DocCardList from "@theme/DocCardList";
-
-<DocCardList />
--- a/docs/extras/integrations/llms/index.mdx
+++ b/docs/extras/integrations/llms/index.mdx
@ -1,9 +0,0 @@
---
-sidebar_position: 0
---
-
-# LLMs
-
-import DocCardList from "@theme/DocCardList";
-
-<DocCardList />
--- a/docs/extras/integrations/memory/index.mdx
+++ b/docs/extras/integrations/memory/index.mdx
@ -1,9 +0,0 @@
---
-sidebar_position: 0
---
-
-# Memory
-
-import DocCardList from "@theme/DocCardList";
-
-<DocCardList />
--- a/docs/extras/integrations/platforms/aws.mdx
+++ b/docs/extras/integrations/platforms/aws.mdx
@ -0,0 +1,82 @@
+# AWS
+
+## LLMs
+
+### Bedrock
+
+See a [usage example](/docs/integrations/llms/bedrock).
+
+```python
+from langchain.llms.bedrock import Bedrock
+```
+
+### Amazon API Gateway
+
+[Amazon API Gateway](https://aws.amazon.com/api-gateway/) is a fully managed service that makes it easy for developers to create, publish, maintain, monitor, and secure APIs at any scale. APIs act as the "front door" for applications to access data, business logic, or functionality from your backend services. Using API Gateway, you can create RESTful APIs and WebSocket APIs that enable real-time two-way communication applications. API Gateway supports containerized and serverless workloads, as well as web applications.
+
+API Gateway handles all the tasks involved in accepting and processing up to hundreds of thousands of concurrent API calls, including traffic management, CORS support, authorization and access control, throttling, monitoring, and API version management. API Gateway has no minimum fees or startup costs. You pay for the API calls you receive and the amount of data transferred out and, with the API Gateway tiered pricing model, you can reduce your cost as your API usage scales.
+
+See a [usage example](/docs/integrations/llms/amazon_api_gateway_example).
+
+```python
+from langchain.llms import AmazonAPIGateway
+
+api_url = "https://<api_gateway_id>.execute-api.<region>.amazonaws.com/LATEST/HF"
+# These are sample parameters for Falcon 40B Instruct Deployed from Amazon SageMaker JumpStart
+model_kwargs = {
+    "max_new_tokens": 100,
+    "num_return_sequences": 1,
+    "top_k": 50,
+    "top_p": 0.95,
+    "do_sample": False,
+    "return_full_text": True,
+    "temperature": 0.2,
+}
+llm = AmazonAPIGateway(api_url=api_url, model_kwargs=model_kwargs)
+```
+
+### SageMaker Endpoint
+
+>[Amazon SageMaker](https://aws.amazon.com/sagemaker/) is a system that can build, train, and deploy machine learning (ML) models with fully managed infrastructure, tools, and workflows.
+
+We use `SageMaker` to host our model and expose it as the `SageMaker Endpoint`.
+
+See a [usage example](/docs/integrations/llms/sagemaker).
+
+```python
+from langchain.llms import SagemakerEndpoint
+from langchain.llms.sagemaker_endpoint import LLMContentHandler
+```
+
+## Text Embedding Models
+
+### Bedrock
+
+See a [usage example](/docs/integrations/text_embedding/bedrock).
+```python
+from langchain.embeddings import BedrockEmbeddings
+```
+
+### SageMaker Endpoint
+
+See a [usage example](/docs/integrations/text_embedding/sagemaker-endpoint).
+```python
+from langchain.embeddings import SagemakerEndpointEmbeddings
+from langchain.llms.sagemaker_endpoint import ContentHandlerBase
+```
+
+
+## Document loaders
+
+### AWS S3 Directory
+>[Amazon Simple Storage Service (Amazon S3)](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html) is an object storage service.
+>[AWS S3 Directory](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html)
+>[AWS S3 Buckets](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingBucket.html)
+
+See a [usage example for S3DirectoryLoader](/docs/integrations/document_loaders/aws_s3_directory.html).
+
+See a [usage example for S3FileLoader](/docs/integrations/document_loaders/aws_s3_file.html).
+
+```python
+from langchain.document_loaders import S3DirectoryLoader, S3FileLoader
+```
--- a/docs/extras/integrations/platforms/google.mdx
+++ b/docs/extras/integrations/platforms/google.mdx
@ -0,0 +1,99 @@
+# Google
+
+## Document Loader
+### Google BigQuery
+
+>[Google BigQuery](https://cloud.google.com/bigquery) is a serverless and cost-effective enterprise data warehouse that works across clouds and scales with your data.
+`BigQuery` is a part of the `Google Cloud Platform`.
+
+First, you need to install `google-cloud-bigquery` python package.
+
+```bash
+pip install google-cloud-bigquery
+```
+
+See a [usage example](/docs/integrations/document_loaders/google_bigquery).
+
+```python
+from langchain.document_loaders import BigQueryLoader
+```
+
+### Google Cloud Storage
+
+>[Google Cloud Storage](https://en.wikipedia.org/wiki/Google_Cloud_Storage) is a managed service for storing unstructured data.
+
+First, you need to install `google-cloud-storage` python package.
+
+```bash
+pip install google-cloud-storage
+```
+
+There are two loaders for the `Google Cloud Storage`: the `Directory` and the `File` loaders.
+
+See a [usage example](/docs/integrations/document_loaders/google_cloud_storage_directory).
+
+```python
+from langchain.document_loaders import GCSDirectoryLoader
+```
+See a [usage example](/docs/integrations/document_loaders/google_cloud_storage_file).
+
+```python
+from langchain.document_loaders import GCSFileLoader
+```
+
+### Google Drive
+
+>[Google Drive](https://en.wikipedia.org/wiki/Google_Drive) is a file storage and synchronization service developed by Google.
+
+Currently, only `Google Docs` are supported.
+
+First, you need to install several python package.
+
+```bash
+pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib
+```
+
+See a [usage example and authorizing instructions](/docs/integrations/document_loaders/google_drive.html).
+
+```python
+from langchain.document_loaders import GoogleDriveLoader
+```
+
+## Vector Store
+### Google Vertex AI MatchingEngine
+
+> [Google Vertex AI Matching Engine](https://cloud.google.com/vertex-ai/docs/matching-engine/overview) provides
+> the industry's leading high-scale low latency vector database. These vector databases are commonly
+> referred to as vector similarity-matching or an approximate nearest neighbor (ANN) service.
+
+We need to install several python packages.
+
+```bash
+pip install tensorflow google-cloud-aiplatform tensorflow-hub tensorflow-text
+```
+
+See a [usage example](/docs/integrations/vectorstores/matchingengine).
+
+```python
+from langchain.vectorstores import MatchingEngine
+```
+
+## Tools
+### Google Search
+
+- Install requirements with `pip install google-api-python-client`
+- Set up a Custom Search Engine, following [these instructions](https://stackoverflow.com/questions/37083058/programmatically-searching-google-in-python-using-custom-search)
+- Get an API Key and Custom Search Engine ID from the previous step, and set them as environment variables `GOOGLE_API_KEY` and `GOOGLE_CSE_ID` respectively
+
+There exists a GoogleSearchAPIWrapper utility which wraps this API. To import this utility:
+
+```python
+from langchain.utilities import GoogleSearchAPIWrapper
+```
+For a more detailed walkthrough of this wrapper, see [this notebook](/docs/integrations/tools/google_search.html).
+
+You can easily load this wrapper as a Tool (to use with an Agent). You can do this with:
+```python
+from langchain.agents import load_tools
+tools = load_tools(["google-search"])
+```
--- a/docs/extras/integrations/platforms/microsoft.mdx
+++ b/docs/extras/integrations/platforms/microsoft.mdx
@ -0,0 +1,129 @@
+# Microsoft
+
+## LLM
+### Azure OpenAI
+
+>[Microsoft Azure](https://en.wikipedia.org/wiki/Microsoft_Azure), often referred to as `Azure` is a cloud computing platform run by `Microsoft`, which offers access, management, and development of applications and services through global data centers. It provides a range of capabilities, including software as a service (SaaS), platform as a service (PaaS), and infrastructure as a service (IaaS). `Microsoft Azure` supports many programming languages, tools, and frameworks, including Microsoft-specific and third-party software and systems.
+
+>[Azure OpenAI](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/) is an `Azure` service with powerful language models from `OpenAI` including the `GPT-3`, `Codex` and `Embeddings model` series for content generation, summarization, semantic search, and natural language to code translation.
+
+```bash
+pip install openai tiktoken
+```
+
+Set the environment variables to get access to the `Azure OpenAI` service.
+
+```python
+import os
+
+os.environ["OPENAI_API_TYPE"] = "azure"
+os.environ["OPENAI_API_BASE"] = "https://<your-endpoint.openai.azure.com/"
+os.environ["OPENAI_API_KEY"] = "your AzureOpenAI key"
+os.environ["OPENAI_API_VERSION"] = "2023-05-15"
+```
+
+See a [usage example](/docs/integrations/llms/azure_openai_example).
+
+```python
+from langchain.llms import AzureOpenAI
+```
+
+## Text Embedding Models
+### Azure OpenAI
+
+See a [usage example](/docs/integrations/text_embedding/azureopenai)
+
+```python
+from langchain.embeddings import OpenAIEmbeddings
+```
+
+## Chat Models
+### Azure OpenAI
+
+See a [usage example](/docs/integrations/chat/azure_chat_openai)
+
+```python
+from langchain.chat_models import AzureChatOpenAI
+```
+
+## Document loaders
+
+### Azure Blob Storage
+
+>[Azure Blob Storage](https://learn.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction) is Microsoft's object storage solution for the cloud. Blob Storage is optimized for storing massive amounts of unstructured data. Unstructured data is data that doesn't adhere to a particular data model or definition, such as text or binary data.
+
+>[Azure Files](https://learn.microsoft.com/en-us/azure/storage/files/storage-files-introduction) offers fully managed
+> file shares in the cloud that are accessible via the industry standard Server Message Block (`SMB`) protocol,
+> Network File System (`NFS`) protocol, and `Azure Files REST API`. `Azure Files` are based on the `Azure Blob Storage`.
+
+`Azure Blob Storage` is designed for:
+- Serving images or documents directly to a browser.
+- Storing files for distributed access.
+- Streaming video and audio.
+- Writing to log files.
+- Storing data for backup and restore, disaster recovery, and archiving.
+- Storing data for analysis by an on-premises or Azure-hosted service.
+
+```bash
+pip install azure-storage-blob
+```
+
+See a [usage example for the Azure Blob Storage](/docs/integrations/document_loaders/azure_blob_storage_container.html).
+
+```python
+from langchain.document_loaders import AzureBlobStorageContainerLoader
+```
+
+See a [usage example for the Azure Files](/docs/integrations/document_loaders/azure_blob_storage_file.html).
+
+```python
+from langchain.document_loaders import AzureBlobStorageFileLoader
+```
+
+### Microsoft OneDrive
+
+>[Microsoft OneDrive](https://en.wikipedia.org/wiki/OneDrive) (formerly `SkyDrive`) is a file-hosting service operated by Microsoft.
+
+First, you need to install a python package.
+
+```bash
+pip install o365
+```
+
+See a [usage example](/docs/integrations/document_loaders/microsoft_onedrive).
+
+```python
+from langchain.document_loaders import OneDriveLoader
+```
+
+### Microsoft Word
+
+>[Microsoft Word](https://www.microsoft.com/en-us/microsoft-365/word) is a word processor developed by Microsoft.
+
+See a [usage example](/docs/integrations/document_loaders/microsoft_word).
+
+```python
+from langchain.document_loaders import UnstructuredWordDocumentLoader
+```
+
+
+## Retriever
+### Azure Cognitive Search
+
+>[Azure Cognitive Search](https://learn.microsoft.com/en-us/azure/search/search-what-is-azure-search) (formerly known as `Azure Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.
+
+>Search is foundational to any app that surfaces text to users, where common scenarios include catalog or document search, online retail apps, or data exploration over proprietary content. When you create a search service, you'll work with the following capabilities:
+>- A search engine for full text search over a search index containing user-owned content
+>- Rich indexing, with lexical analysis and optional AI enrichment for content extraction and transformation
+>- Rich query syntax for text search, fuzzy search, autocomplete, geo-search and more
+>- Programmability through REST APIs and client libraries in Azure SDKs
+>- Azure integration at the data layer, machine learning layer, and AI (Cognitive Services)
+
+See [set up instructions](https://learn.microsoft.com/en-us/azure/search/search-create-service-portal).
+
+See a [usage example](/docs/integrations/retrievers/azure_cognitive_search).
+
+```python
+from langchain.retrievers import AzureCognitiveSearchRetriever
+```
+
--- a/docs/extras/integrations/platforms/openai.mdx
+++ b/docs/extras/integrations/platforms/openai.mdx
--- a/docs/extras/integrations/providers/amazon_api_gateway.mdx
+++ b/docs/extras/integrations/providers/amazon_api_gateway.mdx
@ -1,73 +0,0 @@
-# Amazon API Gateway
-
-[Amazon API Gateway](https://aws.amazon.com/api-gateway/) is a fully managed service that makes it easy for developers to create, publish, maintain, monitor, and secure APIs at any scale. APIs act as the "front door" for applications to access data, business logic, or functionality from your backend services. Using API Gateway, you can create RESTful APIs and WebSocket APIs that enable real-time two-way communication applications. API Gateway supports containerized and serverless workloads, as well as web applications.
-
-API Gateway handles all the tasks involved in accepting and processing up to hundreds of thousands of concurrent API calls, including traffic management, CORS support, authorization and access control, throttling, monitoring, and API version management. API Gateway has no minimum fees or startup costs. You pay for the API calls you receive and the amount of data transferred out and, with the API Gateway tiered pricing model, you can reduce your cost as your API usage scales.
-
-## LLM
-
-See a [usage example](/docs/integrations/llms/amazon_api_gateway_example).
-
-```python
-from langchain.llms import AmazonAPIGateway
-
-api_url = "https://<api_gateway_id>.execute-api.<region>.amazonaws.com/LATEST/HF"
-llm = AmazonAPIGateway(api_url=api_url)
-
-# These are sample parameters for Falcon 40B Instruct Deployed from Amazon SageMaker JumpStart
-parameters = {
-    "max_new_tokens": 100,
-    "num_return_sequences": 1,
-    "top_k": 50,
-    "top_p": 0.95,
-    "do_sample": False,
-    "return_full_text": True,
-    "temperature": 0.2,
-}
-
-prompt = "what day comes after Friday?"
-llm.model_kwargs = parameters
-llm(prompt)
->>> 'what day comes after Friday?\nSaturday'
-```
-
-## Agent
-
-```python
-from langchain.agents import load_tools
-from langchain.agents import initialize_agent
-from langchain.agents import AgentType
-from langchain.llms import AmazonAPIGateway
-
-api_url = "https://<api_gateway_id>.execute-api.<region>.amazonaws.com/LATEST/HF"
-llm = AmazonAPIGateway(api_url=api_url)
-
-parameters = {
-    "max_new_tokens": 50,
-    "num_return_sequences": 1,
-    "top_k": 250,
-    "top_p": 0.25,
-    "do_sample": False,
-    "temperature": 0.1,
-}
-
-llm.model_kwargs = parameters
-
-# Next, let's load some tools to use. Note that the `llm-math` tool uses an LLM, so we need to pass that in.
-tools = load_tools(["python_repl", "llm-math"], llm=llm)
-
-# Finally, let's initialize an agent with the tools, the language model, and the type of agent we want to use.
-agent = initialize_agent(
-    tools,
-    llm,
-    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
-    verbose=True,
-)
-
-# Now let's test it out!
-agent.run("""
-Write a Python script that prints "Hello, world!"
-""")
-
->>> 'Hello, world!'
-```
--- a/docs/extras/integrations/providers/aws_s3.mdx
+++ b/docs/extras/integrations/providers/aws_s3.mdx
@ -1,25 +0,0 @@
-# AWS S3 Directory
-
->[Amazon Simple Storage Service (Amazon S3)](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html) is an object storage service.
-
->[AWS S3 Directory](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html)
-
->[AWS S3 Buckets](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingBucket.html)
-
-
-## Installation and Setup
-
-```bash
-pip install boto3
-```
-
-
-## Document Loader
-
-See a [usage example for S3DirectoryLoader](/docs/integrations/document_loaders/aws_s3_directory.html).
-
-See a [usage example for S3FileLoader](/docs/integrations/document_loaders/aws_s3_file.html).
-
-```python
-from langchain.document_loaders import S3DirectoryLoader, S3FileLoader
-```
--- a/docs/extras/integrations/providers/azure_blob_storage.mdx
+++ b/docs/extras/integrations/providers/azure_blob_storage.mdx
@ -1,36 +0,0 @@
-# Azure Blob Storage
-
->[Azure Blob Storage](https://learn.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction) is Microsoft's object storage solution for the cloud. Blob Storage is optimized for storing massive amounts of unstructured data. Unstructured data is data that doesn't adhere to a particular data model or definition, such as text or binary data.
-
->[Azure Files](https://learn.microsoft.com/en-us/azure/storage/files/storage-files-introduction) offers fully managed
-> file shares in the cloud that are accessible via the industry standard Server Message Block (`SMB`) protocol, 
-> Network File System (`NFS`) protocol, and `Azure Files REST API`. `Azure Files` are based on the `Azure Blob Storage`.
-
-`Azure Blob Storage` is designed for:
- Serving images or documents directly to a browser.
- Storing files for distributed access.
- Streaming video and audio.
- Writing to log files.
- Storing data for backup and restore, disaster recovery, and archiving.
- Storing data for analysis by an on-premises or Azure-hosted service.
-
-## Installation and Setup
-
-```bash
-pip install azure-storage-blob
-```
-
-
-## Document Loader
-
-See a [usage example for the Azure Blob Storage](/docs/integrations/document_loaders/azure_blob_storage_container.html).
-
-```python
-from langchain.document_loaders import AzureBlobStorageContainerLoader
-```
-
-See a [usage example for the Azure Files](/docs/integrations/document_loaders/azure_blob_storage_file.html).
-
-```python
-from langchain.document_loaders import AzureBlobStorageFileLoader
-```
--- a/docs/extras/integrations/providers/azure_cognitive_search_.mdx
+++ b/docs/extras/integrations/providers/azure_cognitive_search_.mdx
@ -1,24 +0,0 @@
-# Azure Cognitive Search
-
->[Azure Cognitive Search](https://learn.microsoft.com/en-us/azure/search/search-what-is-azure-search) (formerly known as `Azure Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.
-
->Search is foundational to any app that surfaces text to users, where common scenarios include catalog or document search, online retail apps, or data exploration over proprietary content. When you create a search service, you'll work with the following capabilities:
->- A search engine for full text search over a search index containing user-owned content
->- Rich indexing, with lexical analysis and optional AI enrichment for content extraction and transformation
->- Rich query syntax for text search, fuzzy search, autocomplete, geo-search and more
->- Programmability through REST APIs and client libraries in Azure SDKs
->- Azure integration at the data layer, machine learning layer, and AI (Cognitive Services)
-
-
-## Installation and Setup
-
-See [set up instructions](https://learn.microsoft.com/en-us/azure/search/search-create-service-portal).
-
-
-## Retriever
-
-See a [usage example](/docs/integrations/retrievers/azure_cognitive_search).
-
-```python
-from langchain.retrievers import AzureCognitiveSearchRetriever
-```
--- a/docs/extras/integrations/providers/azure_openai.mdx
+++ b/docs/extras/integrations/providers/azure_openai.mdx
@ -1,50 +0,0 @@
-# Azure OpenAI
-
->[Microsoft Azure](https://en.wikipedia.org/wiki/Microsoft_Azure), often referred to as `Azure` is a cloud computing platform run by `Microsoft`, which offers access, management, and development of applications and services through global data centers. It provides a range of capabilities, including software as a service (SaaS), platform as a service (PaaS), and infrastructure as a service (IaaS). `Microsoft Azure` supports many programming languages, tools, and frameworks, including Microsoft-specific and third-party software and systems.
-
-
->[Azure OpenAI](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/) is an `Azure` service with powerful language models from `OpenAI` including the `GPT-3`, `Codex` and `Embeddings model` series for content generation, summarization, semantic search, and natural language to code translation.
-
-
-## Installation and Setup
-
-```bash
-pip install openai
-pip install tiktoken
-```
-
-
-Set the environment variables to get access to the `Azure OpenAI` service.
-
-```python
-import os
-
-os.environ["OPENAI_API_TYPE"] = "azure"
-os.environ["OPENAI_API_BASE"] = "https://<your-endpoint.openai.azure.com/"
-os.environ["OPENAI_API_KEY"] = "your AzureOpenAI key"
-os.environ["OPENAI_API_VERSION"] = "2023-05-15"
-```
-
-## LLM
-
-See a [usage example](/docs/integrations/llms/azure_openai_example).
-
-```python
-from langchain.llms import AzureOpenAI
-```
-
-## Text Embedding Models
-
-See a [usage example](/docs/integrations/text_embedding/azureopenai)
-
-```python
-from langchain.embeddings import OpenAIEmbeddings
-```
-
-## Chat Models
-
-See a [usage example](/docs/integrations/chat/azure_chat_openai)
-
-```python
-from langchain.chat_models import AzureChatOpenAI
-```
--- a/docs/extras/integrations/providers/bedrock.mdx
+++ b/docs/extras/integrations/providers/bedrock.mdx
@ -1,24 +0,0 @@
-# Bedrock
-
->[Amazon Bedrock](https://aws.amazon.com/bedrock/) is a fully managed service that makes FMs from leading AI startups and Amazon available via an API, so you can choose from a wide range of FMs to find the model that is best suited for your use case.
-
-## Installation and Setup
-
-```bash
-pip install boto3
-```
-
-## LLM
-
-See a [usage example](/docs/integrations/llms/bedrock).
-
-```python
-from langchain.llms.bedrock import Bedrock
-```
-
-## Text Embedding Models
-
-See a [usage example](/docs/integrations/text_embedding/bedrock).
-```python
-from langchain.embeddings import BedrockEmbeddings
-```
--- a/docs/extras/integrations/providers/google_bigquery.mdx
+++ b/docs/extras/integrations/providers/google_bigquery.mdx
@ -1,20 +0,0 @@
-# Google BigQuery
-
->[Google BigQuery](https://cloud.google.com/bigquery) is a serverless and cost-effective enterprise data warehouse that works across clouds and scales with your data.
-`BigQuery` is a part of the `Google Cloud Platform`.
-
-## Installation and Setup
-
-First, you need to install `google-cloud-bigquery` python package.
-
-```bash
-pip install google-cloud-bigquery
-```
-
-## Document Loader
-
-See a [usage example](/docs/integrations/document_loaders/google_bigquery).
-
-```python
-from langchain.document_loaders import BigQueryLoader
-```
--- a/docs/extras/integrations/providers/google_cloud_storage.mdx
+++ b/docs/extras/integrations/providers/google_cloud_storage.mdx
@ -1,26 +0,0 @@
-# Google Cloud Storage
-
->[Google Cloud Storage](https://en.wikipedia.org/wiki/Google_Cloud_Storage) is a managed service for storing unstructured data.
-
-## Installation and Setup
-
-First, you need to install `google-cloud-bigquery` python package.
-
-```bash
-pip install google-cloud-storage
-```
-
-## Document Loader
-
-There are two loaders for the `Google Cloud Storage`: the `Directory` and the `File` loaders.
-
-See a [usage example](/docs/integrations/document_loaders/google_cloud_storage_directory).
-
-```python
-from langchain.document_loaders import GCSDirectoryLoader
-```
-See a [usage example](/docs/integrations/document_loaders/google_cloud_storage_file).
-
-```python
-from langchain.document_loaders import GCSFileLoader
-```
--- a/docs/extras/integrations/providers/google_drive.mdx
+++ b/docs/extras/integrations/providers/google_drive.mdx
@ -1,22 +0,0 @@
-# Google Drive
-
->[Google Drive](https://en.wikipedia.org/wiki/Google_Drive) is a file storage and synchronization service developed by Google.
-
-Currently, only `Google Docs` are supported.
-
-## Installation and Setup
-
-First, you need to install several python package.
-
-```bash
-pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib
-```
-
-## Document Loader
-
-See a [usage example and authorizing instructions](/docs/integrations/document_loaders/google_drive.html).
-
-
-```python
-from langchain.document_loaders import GoogleDriveLoader
-```
--- a/docs/extras/integrations/providers/google_search.mdx
+++ b/docs/extras/integrations/providers/google_search.mdx
@ -1,32 +0,0 @@
-# Google Search
-
-This page covers how to use the Google Search API within LangChain.
-It is broken into two parts: installation and setup, and then references to the specific Google Search wrapper.
-
-## Installation and Setup
- Install requirements with `pip install google-api-python-client`
- Set up a Custom Search Engine, following [these instructions](https://stackoverflow.com/questions/37083058/programmatically-searching-google-in-python-using-custom-search)
- Get an API Key and Custom Search Engine ID from the previous step, and set them as environment variables `GOOGLE_API_KEY` and `GOOGLE_CSE_ID` respectively
-
-## Wrappers
-
-### Utility
-
-There exists a GoogleSearchAPIWrapper utility which wraps this API. To import this utility:
-
-```python
-from langchain.utilities import GoogleSearchAPIWrapper
-```
-
-For a more detailed walkthrough of this wrapper, see [this notebook](/docs/integrations/tools/google_search.html).
-
-### Tool
-
-You can also easily load this wrapper as a Tool (to use with an Agent).
-You can do this with:
-```python
-from langchain.agents import load_tools
-tools = load_tools(["google-search"])
-```
-
-For more information on tools, see [this page](/docs/modules/agents/tools/).
--- a/docs/extras/integrations/providers/google_vertex_ai_matchingengine.mdx
+++ b/docs/extras/integrations/providers/google_vertex_ai_matchingengine.mdx
@ -1,25 +0,0 @@
-# Google Vertex AI MatchingEngine
-
-> [Google Vertex AI Matching Engine](https://cloud.google.com/vertex-ai/docs/matching-engine/overview) provides 
-> the industry's leading high-scale low latency vector database. These vector databases are commonly 
-> referred to as vector similarity-matching or an approximate nearest neighbor (ANN) service.
-
-## Installation and Setup
-
-We need to install several python packages.
-
-```bash
-pip install tensorflow \
-            google-cloud-aiplatform \
-            tensorflow-hub \
-            tensorflow-text 
-```
-
-## Vector Store
-
-See a [usage example](/docs/integrations/vectorstores/matchingengine).
-
-```python
-from langchain.vectorstores import MatchingEngine
-```
-
--- a/docs/extras/integrations/providers/index.mdx
+++ b/docs/extras/integrations/providers/index.mdx
@ -1,9 +0,0 @@
---
-sidebar_position: 1
---
-
-# Grouped by provider
-
-import DocCardList from "@theme/DocCardList";
-
-<DocCardList />
--- a/docs/extras/integrations/providers/microsoft_onedrive.mdx
+++ b/docs/extras/integrations/providers/microsoft_onedrive.mdx
@ -1,22 +0,0 @@
-# Microsoft OneDrive
-
->[Microsoft OneDrive](https://en.wikipedia.org/wiki/OneDrive) (formerly `SkyDrive`) is a file-hosting service operated by Microsoft.
-
-## Installation and Setup
-
-First, you need to install a python package.
-
-```bash
-pip install o365
-```
-
-Then follow instructions [here](/docs/integrations/document_loaders/microsoft_onedrive.html).
-
-## Document Loader
-
-See a [usage example](/docs/integrations/document_loaders/microsoft_onedrive).
-
-
-```python
-from langchain.document_loaders import OneDriveLoader
-```
--- a/docs/extras/integrations/providers/microsoft_powerpoint.mdx
+++ b/docs/extras/integrations/providers/microsoft_powerpoint.mdx
@ -1,16 +0,0 @@
-# Microsoft PowerPoint
-
->[Microsoft PowerPoint](https://en.wikipedia.org/wiki/Microsoft_PowerPoint) is a presentation program by Microsoft.
-
-## Installation and Setup
-
-There isn't any special setup for it.
-
-## Document Loader
-
-See a [usage example](/docs/integrations/document_loaders/microsoft_powerpoint).
-
-
-```python
-from langchain.document_loaders import UnstructuredPowerPointLoader
-```
--- a/docs/extras/integrations/providers/microsoft_word.mdx
+++ b/docs/extras/integrations/providers/microsoft_word.mdx
@ -1,16 +0,0 @@
-# Microsoft Word
-
->[Microsoft Word](https://www.microsoft.com/en-us/microsoft-365/word) is a word processor developed by Microsoft.
-
-## Installation and Setup
-
-There isn't any special setup for it.
-
-## Document Loader
-
-See a [usage example](/docs/integrations/document_loaders/microsoft_word).
-
-
-```python
-from langchain.document_loaders import UnstructuredWordDocumentLoader
-```
--- a/docs/extras/integrations/providers/sagemaker_endpoint.mdx
+++ b/docs/extras/integrations/providers/sagemaker_endpoint.mdx
@ -1,56 +0,0 @@
-# SageMaker Endpoint
-
->[Amazon SageMaker](https://aws.amazon.com/sagemaker/) is a system that can build, train, and deploy machine learning (ML) models with fully managed infrastructure, tools, and workflows.
-
-We use `SageMaker` to host our model and expose it as the `SageMaker Endpoint`.
-
-
-## Installation and Setup
-
-```bash
-pip install boto3
-```
-
-For instructions on how to expose model as a `SageMaker Endpoint`, please see [here](https://www.philschmid.de/custom-inference-huggingface-sagemaker). 
-
-**Note**: In order to handle batched requests, we need to adjust the return line in the `predict_fn()` function within the custom `inference.py` script:
-
-Change from
-
-```
-return {"vectors": sentence_embeddings[0].tolist()}
-```
-
-to:
-
-```
-return {"vectors": sentence_embeddings.tolist()}
-```
-
-
-
-We have to set up following required parameters of the `SagemakerEndpoint` call:
- `endpoint_name`: The name of the endpoint from the deployed Sagemaker model.
-    Must be unique within an AWS Region.
- `credentials_profile_name`: The name of the profile in the ~/.aws/credentials or ~/.aws/config files, which
-    has either access keys or role information specified.
-    If not specified, the default credential profile or, if on an EC2 instance,
-    credentials from IMDS will be used.
-    See [this guide](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html).
-
-## LLM
-
-See a [usage example](/docs/integrations/llms/sagemaker).
-
-```python
-from langchain import SagemakerEndpoint
-from langchain.llms.sagemaker_endpoint import LLMContentHandler
-```
-
-## Text Embedding Models
-
-See a [usage example](/docs/integrations/text_embedding/sagemaker-endpoint).
-```python
-from langchain.embeddings import SagemakerEndpointEmbeddings
-from langchain.llms.sagemaker_endpoint import ContentHandlerBase
-```
--- a/docs/extras/integrations/retrievers/index.mdx
+++ b/docs/extras/integrations/retrievers/index.mdx
@ -1,9 +0,0 @@
---
-sidebar_position: 0
---
-
-# Retrievers
-
-import DocCardList from "@theme/DocCardList";
-
-<DocCardList />
--- a/docs/extras/integrations/text_embedding/index.mdx
+++ b/docs/extras/integrations/text_embedding/index.mdx
@ -1,9 +0,0 @@
---
-sidebar_position: 0
---
-
-# Text embedding models
-
-import DocCardList from "@theme/DocCardList";
-
-<DocCardList />
--- a/docs/extras/integrations/toolkits/index.mdx
+++ b/docs/extras/integrations/toolkits/index.mdx
@ -1,12 +0,0 @@
---
-sidebar_position: 0
---
-
-# Agents & Toolkits
-
-Agents and Toolkits are placed in the same directory because they are always used together.
-
-
-import DocCardList from "@theme/DocCardList";
-
-<DocCardList />
--- a/docs/extras/integrations/tools/index.mdx
+++ b/docs/extras/integrations/tools/index.mdx
@ -1,9 +0,0 @@
---
-sidebar_position: 0
---
-
-# Tools
-
-import DocCardList from "@theme/DocCardList";
-
-<DocCardList />
--- a/docs/extras/integrations/vectorstores/index.mdx
+++ b/docs/extras/integrations/vectorstores/index.mdx
@ -1,9 +0,0 @@
---
-sidebar_position: 0
---
-
-# Vector stores
-
-import DocCardList from "@theme/DocCardList";
-
-<DocCardList />