Separate platforms integrations docs (#10609)

This commit is contained in:
Bagatur 2023-09-15 12:18:57 -07:00 committed by GitHub
parent 6d3670c7d8
commit 2ae568dcf5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
35 changed files with 415 additions and 770 deletions

View File

@ -69,7 +69,10 @@ module.exports = {
type: "category",
label: "Additional resources",
collapsed: true,
items: [{ type: "autogenerated", dirName: "additional_resources" }, { type: "link", label: "Gallery", href: "https://github.com/kyrolabs/awesome-langchain" }],
items: [
{ type: "autogenerated", dirName: "additional_resources" },
{ type: "link", label: "Gallery", href: "https://github.com/kyrolabs/awesome-langchain" }
],
link: {
type: 'generated-index',
slug: "additional_resources",
@ -80,12 +83,38 @@ module.exports = {
integrations: [
{
type: "category",
label: "Integrations",
label: "Providers",
collapsible: false,
items: [{ type: "autogenerated", dirName: "integrations" }],
items: [
{ type: "autogenerated", dirName: "integrations/platforms" },
{ type: "category", label: "More", collapsed: true, items: [{type:"autogenerated", dirName: "integrations/providers" }]},
],
link: {
type: 'generated-index',
slug: "integrations",
slug: "integrations/providers",
},
},
{
type: "category",
label: "Components",
collapsible: false,
items: [
{ type: "category", label: "LLMs", collapsed: true, items: [{type:"autogenerated", dirName: "integrations/llms" }], link: {type: "generated-index", slug: "integrations/llms" }},
{ type: "category", label: "Chat models", collapsed: true, items: [{type:"autogenerated", dirName: "integrations/chat" }], link: {type: "generated-index", slug: "integrations/chat" }},
{ type: "category", label: "Document loaders", collapsed: true, items: [{type:"autogenerated", dirName: "integrations/document_loaders" }], link: {type: "generated-index", slug: "integrations/document_loaders" }},
{ type: "category", label: "Document transformers", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/document_transformers" }], link: {type: "generated-index", slug: "integrations/document_transformers" }},
{ type: "category", label: "Text embedding models", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/text_embedding" }], link: {type: "generated-index", slug: "integrations/text_embedding" }},
{ type: "category", label: "Vector stores", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/vectorstores" }], link: {type: "generated-index", slug: "integrations/vectorstores" }},
{ type: "category", label: "Retrievers", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/retrievers" }], link: {type: "generated-index", slug: "integrations/retrievers" }},
{ type: "category", label: "Tools", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/tools" }], link: {type: "generated-index", slug: "integrations/tools" }},
{ type: "category", label: "Agents and toolkits", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/toolkits" }], link: {type: "generated-index", slug: "integrations/toolkits" }},
{ type: "category", label: "Memory", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/memory" }], link: {type: "generated-index", slug: "integrations/memory" }},
{ type: "category", label: "Callbacks", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/callbacks" }], link: {type: "generated-index", slug: "integrations/callbacks" }},
{ type: "category", label: "Chat loaders", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/chat_loaders" }], link: {type: "generated-index", slug: "integrations/chat_loaders" }},
],
link: {
type: 'generated-index',
slug: "integrations/components",
},
},
],

View File

@ -1,5 +1,77 @@
{
"redirects": [
{
"source": "/docs/integrations/providers/amazon_api_gateway",
"destination": "/docs/integrations/platform/aws"
},
{
"source": "/docs/integrations/providers/azure_blob_storage",
"destination": "/docs/integrations/platform/microsoft"
},
{
"source": "/docs/integrations/providers/google_vertexai_matchingengine",
"destination": "/docs/integrations/platform/google"
},
{
"source": "/docs/integrations/providers/aws_s3",
"destination": "/docs/integrations/platform/aws"
},
{
"source": "/docs/integrations/providers/azure_openai",
"destination": "/docs/integrations/platform/microsoft"
},
{
"source": "/docs/integrations/providers/azure_blob_storage",
"destination": "/docs/integrations/platform/microsoft"
},
{
"source": "/docs/integrations/providers/azure_cognitive_search_",
"destination": "/docs/integrations/platform/microsoft"
},
{
"source": "/docs/integrations/providers/bedrock",
"destination": "/docs/integrations/platform/aws"
},
{
"source": "/docs/integrations/providers/google_bigquery",
"destination": "/docs/integrations/platform/google"
},
{
"source": "/docs/integrations/providers/google_cloud_storage",
"destination": "/docs/integrations/platform/google"
},
{
"source": "/docs/integrations/providers/google_drive",
"destination": "/docs/integrations/platform/google"
},
{
"source": "/docs/integrations/providers/google_search",
"destination": "/docs/integrations/platform/google"
},
{
"source": "/docs/integrations/providers/microsoft_onedrive",
"destination": "/docs/integrations/platform/microsoft"
},
{
"source": "/docs/integrations/providers/microsoft_powerpoint",
"destination": "/docs/integrations/platform/microsoft"
},
{
"source": "/docs/integrations/providers/microsoft_word",
"destination": "/docs/integrations/platform/microsoft"
},
{
"source": "/docs/integrations/providers/sagemaker_endpoint",
"destination": "/docs/integrations/platform/aws"
},
{
"source": "/docs/integrations/providers/sagemaker_tracking",
"destination": "/docs/integrations/callbacks/sagemaker_tracking"
},
{
"source": "/docs/integrations/providers/openai",
"destination": "/docs/integrations/callbacks/openai"
},
{
"source": "/docs/modules/data_connection/caching_embeddings(/?)",
"destination": "/docs/modules/data_connection/text_embedding/caching_embeddings"

View File

@ -1,9 +0,0 @@
---
sidebar_position: 0
---
# Callbacks
import DocCardList from "@theme/DocCardList";
<DocCardList />

View File

@ -1,9 +0,0 @@
---
sidebar_position: 0
---
# Chat models
import DocCardList from "@theme/DocCardList";
<DocCardList />

View File

@ -1,188 +0,0 @@
---
sidebar_position: 0
---
# Chat loaders
Like document loaders, chat loaders are utilities designed to help load conversations from popular communication platforms such as Facebook, Slack, Discord, etc. These are loaded into memory as LangChain chat message objects. Such utilities facilitate tasks such as fine-tuning a language model to match your personal style or voice.
This brief guide will illustrate the process using [OpenAI's fine-tuning API](https://platform.openai.com/docs/guides/fine-tuning) comprised of six steps:
1. Export your Facebook Messenger chat data in a compatible format for your intended chat loader.
2. Load the chat data into memory as LangChain chat message objects. (_this is what is covered in each integration notebook in this section of the documentation_).
- Assign a person to the "AI" role and optionally filter, group, and merge messages.
3. Export these acquired messages in a format expected by the fine-tuning API.
4. Upload this data to OpenAI.
5. Fine-tune your model.
6. Implement the fine-tuned model in LangChain.
This guide is not wholly comprehensive but is designed to take you through the fundamentals of going from raw data to fine-tuned model.
We will demonstrate the procedure through an example of fine-tuning a `gpt-3.5-turbo` model on Facebook Messenger data.
### 1. Export your chat data
To export your Facebook messenger data, you can follow the [instructions here](https://www.zapptales.com/en/download-facebook-messenger-chat-history-how-to/).
:::important JSON format
You must select "JSON format" (instead of HTML) when exporting your data to be compatible with the current loader.
:::
OpenAI requires at least 10 examples to fine-tune your model, but they recommend between 50-100 for more optimal results.
You can use the example data stored at [this google drive link](https://drive.google.com/file/d/1rh1s1o2i7B-Sk1v9o8KNgivLVGwJ-osV/view?usp=sharing) to test the process.
### 2. Load the chat
Once you've obtained your chat data, you can load it into memory as LangChain chat message objects. Heres an example of loading data using the Python code:
```python
from langchain.chat_loaders.facebook_messenger import FolderFacebookMessengerChatLoader
loader = FolderFacebookMessengerChatLoader(
path="./facebook_messenger_chats",
)
chat_sessions = loader.load()
```
In this snippet, we point the loader to a directory of Facebook chat dumps which are then loaded as multiple "sessions" of messages, one session per conversation file.
Once you've loaded the messages, you should decide which person you want to fine-tune the model to (usually yourself). You can also decide to merge consecutive messages from the same sender into a single chat message.
For both of these tasks, you can use the chat_loaders utilities to do so:
```
from langchain.chat_loaders.utils import (
merge_chat_runs,
map_ai_messages,
)
merged_sessions = merge_chat_runs(chat_sessions)
alternating_sessions = list(map_ai_messages(merged_sessions, "My Name"))
```
### 3. Export messages to OpenAI format
Convert the chat messages to dictionaries using the `convert_messages_for_finetuning` function. Then, group the data into chunks for better context modeling and overlap management.
```python
from langchain.adapters.openai import convert_messages_for_finetuning
openai_messages = convert_messages_for_finetuning(chat_sessions)
```
At this point, the data is ready for upload to OpenAI. You can choose to split up conversations into smaller chunks for training if you
do not have enough conversations to train on. Feel free to play around with different chunk sizes or with adding system messages to the fine-tuning data.
```python
chunk_size = 8
overlap = 2
message_groups = [
conversation_messages[i: i + chunk_size]
for conversation_messages in openai_messages
for i in range(
0, len(conversation_messages) - chunk_size + 1,
chunk_size - overlap)
]
len(message_groups)
# 9
```
### 4. Upload the data to OpenAI
Ensure you have set your OpenAI API key by following these [instructions](https://platform.openai.com/account/api-keys), then upload the training file.
An audit is performed to ensure data compliance, so you may have to wait a few minutes for the dataset to become ready for use.
```python
import time
import json
import io
import openai
my_file = io.BytesIO()
for group in message_groups:
my_file.write((json.dumps({"messages": group}) + "\n").encode('utf-8'))
my_file.seek(0)
training_file = openai.File.create(
file=my_file,
purpose='fine-tune'
)
# Wait while the file is processed
status = openai.File.retrieve(training_file.id).status
start_time = time.time()
while status != "processed":
print(f"Status=[{status}]... {time.time() - start_time:.2f}s", end="\r", flush=True)
time.sleep(5)
status = openai.File.retrieve(training_file.id).status
print(f"File {training_file.id} ready after {time.time() - start_time:.2f} seconds.")
```
Once this is done, you can proceed to the model training!
### 5. Fine-tune the model
Start the fine-tuning job with your chosen base model.
```python
job = openai.FineTuningJob.create(
training_file=training_file.id,
model="gpt-3.5-turbo",
)
```
This might take a while. Check the status with `openai.FineTuningJob.retrieve(job.id).status` and wait for it to report `succeeded`.
```python
# It may take 10-20+ minutes to complete training.
status = openai.FineTuningJob.retrieve(job.id).status
start_time = time.time()
while status != "succeeded":
print(f"Status=[{status}]... {time.time() - start_time:.2f}s", end="\r", flush=True)
time.sleep(5)
job = openai.FineTuningJob.retrieve(job.id)
status = job.status
```
### 6. Use the model in LangChain
You're almost there! Use the fine-tuned model in LangChain.
```python
from langchain import chat_models
model_name = job.fine_tuned_model
# Example: ft:gpt-3.5-turbo-0613:personal::5mty86jblapsed
model = chat_models.ChatOpenAI(model=model_name)
```
```python
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
prompt = ChatPromptTemplate.from_messages(
[
("human", "{input}"),
]
)
chain = prompt | model | StrOutputParser()
for tok in chain.stream({"input": "What classes are you taking?"}):
print(tok, end="", flush=True)
# The usual - Potions, Transfiguration, Defense Against the Dark Arts. What about you?
```
And that's it! You've successfully fine-tuned a model and used it in LangChain.
## Supported Chat Loaders
LangChain currently supports the following chat loaders. Feel free to contribute more!
import DocCardList from "@theme/DocCardList";
<DocCardList />

View File

@ -1,9 +0,0 @@
---
sidebar_position: 0
---
# Document loaders
import DocCardList from "@theme/DocCardList";
<DocCardList />

View File

@ -1,9 +0,0 @@
---
sidebar_position: 0
---
# Document transformers
import DocCardList from "@theme/DocCardList";
<DocCardList />

View File

@ -1,9 +0,0 @@
---
sidebar_position: 0
---
# LLMs
import DocCardList from "@theme/DocCardList";
<DocCardList />

View File

@ -1,9 +0,0 @@
---
sidebar_position: 0
---
# Memory
import DocCardList from "@theme/DocCardList";
<DocCardList />

View File

@ -0,0 +1,82 @@
# AWS
## LLMs
### Bedrock
See a [usage example](/docs/integrations/llms/bedrock).
```python
from langchain.llms.bedrock import Bedrock
```
### Amazon API Gateway
[Amazon API Gateway](https://aws.amazon.com/api-gateway/) is a fully managed service that makes it easy for developers to create, publish, maintain, monitor, and secure APIs at any scale. APIs act as the "front door" for applications to access data, business logic, or functionality from your backend services. Using API Gateway, you can create RESTful APIs and WebSocket APIs that enable real-time two-way communication applications. API Gateway supports containerized and serverless workloads, as well as web applications.
API Gateway handles all the tasks involved in accepting and processing up to hundreds of thousands of concurrent API calls, including traffic management, CORS support, authorization and access control, throttling, monitoring, and API version management. API Gateway has no minimum fees or startup costs. You pay for the API calls you receive and the amount of data transferred out and, with the API Gateway tiered pricing model, you can reduce your cost as your API usage scales.
See a [usage example](/docs/integrations/llms/amazon_api_gateway_example).
```python
from langchain.llms import AmazonAPIGateway
api_url = "https://<api_gateway_id>.execute-api.<region>.amazonaws.com/LATEST/HF"
# These are sample parameters for Falcon 40B Instruct Deployed from Amazon SageMaker JumpStart
model_kwargs = {
"max_new_tokens": 100,
"num_return_sequences": 1,
"top_k": 50,
"top_p": 0.95,
"do_sample": False,
"return_full_text": True,
"temperature": 0.2,
}
llm = AmazonAPIGateway(api_url=api_url, model_kwargs=model_kwargs)
```
### SageMaker Endpoint
>[Amazon SageMaker](https://aws.amazon.com/sagemaker/) is a system that can build, train, and deploy machine learning (ML) models with fully managed infrastructure, tools, and workflows.
We use `SageMaker` to host our model and expose it as the `SageMaker Endpoint`.
See a [usage example](/docs/integrations/llms/sagemaker).
```python
from langchain.llms import SagemakerEndpoint
from langchain.llms.sagemaker_endpoint import LLMContentHandler
```
## Text Embedding Models
### Bedrock
See a [usage example](/docs/integrations/text_embedding/bedrock).
```python
from langchain.embeddings import BedrockEmbeddings
```
### SageMaker Endpoint
See a [usage example](/docs/integrations/text_embedding/sagemaker-endpoint).
```python
from langchain.embeddings import SagemakerEndpointEmbeddings
from langchain.llms.sagemaker_endpoint import ContentHandlerBase
```
## Document loaders
### AWS S3 Directory
>[Amazon Simple Storage Service (Amazon S3)](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html) is an object storage service.
>[AWS S3 Directory](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html)
>[AWS S3 Buckets](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingBucket.html)
See a [usage example for S3DirectoryLoader](/docs/integrations/document_loaders/aws_s3_directory.html).
See a [usage example for S3FileLoader](/docs/integrations/document_loaders/aws_s3_file.html).
```python
from langchain.document_loaders import S3DirectoryLoader, S3FileLoader
```

View File

@ -0,0 +1,99 @@
# Google
## Document Loader
### Google BigQuery
>[Google BigQuery](https://cloud.google.com/bigquery) is a serverless and cost-effective enterprise data warehouse that works across clouds and scales with your data.
`BigQuery` is a part of the `Google Cloud Platform`.
First, you need to install `google-cloud-bigquery` python package.
```bash
pip install google-cloud-bigquery
```
See a [usage example](/docs/integrations/document_loaders/google_bigquery).
```python
from langchain.document_loaders import BigQueryLoader
```
### Google Cloud Storage
>[Google Cloud Storage](https://en.wikipedia.org/wiki/Google_Cloud_Storage) is a managed service for storing unstructured data.
First, you need to install `google-cloud-storage` python package.
```bash
pip install google-cloud-storage
```
There are two loaders for the `Google Cloud Storage`: the `Directory` and the `File` loaders.
See a [usage example](/docs/integrations/document_loaders/google_cloud_storage_directory).
```python
from langchain.document_loaders import GCSDirectoryLoader
```
See a [usage example](/docs/integrations/document_loaders/google_cloud_storage_file).
```python
from langchain.document_loaders import GCSFileLoader
```
### Google Drive
>[Google Drive](https://en.wikipedia.org/wiki/Google_Drive) is a file storage and synchronization service developed by Google.
Currently, only `Google Docs` are supported.
First, you need to install several python package.
```bash
pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib
```
See a [usage example and authorizing instructions](/docs/integrations/document_loaders/google_drive.html).
```python
from langchain.document_loaders import GoogleDriveLoader
```
## Vector Store
### Google Vertex AI MatchingEngine
> [Google Vertex AI Matching Engine](https://cloud.google.com/vertex-ai/docs/matching-engine/overview) provides
> the industry's leading high-scale low latency vector database. These vector databases are commonly
> referred to as vector similarity-matching or an approximate nearest neighbor (ANN) service.
We need to install several python packages.
```bash
pip install tensorflow google-cloud-aiplatform tensorflow-hub tensorflow-text
```
See a [usage example](/docs/integrations/vectorstores/matchingengine).
```python
from langchain.vectorstores import MatchingEngine
```
## Tools
### Google Search
- Install requirements with `pip install google-api-python-client`
- Set up a Custom Search Engine, following [these instructions](https://stackoverflow.com/questions/37083058/programmatically-searching-google-in-python-using-custom-search)
- Get an API Key and Custom Search Engine ID from the previous step, and set them as environment variables `GOOGLE_API_KEY` and `GOOGLE_CSE_ID` respectively
There exists a GoogleSearchAPIWrapper utility which wraps this API. To import this utility:
```python
from langchain.utilities import GoogleSearchAPIWrapper
```
For a more detailed walkthrough of this wrapper, see [this notebook](/docs/integrations/tools/google_search.html).
You can easily load this wrapper as a Tool (to use with an Agent). You can do this with:
```python
from langchain.agents import load_tools
tools = load_tools(["google-search"])
```

View File

@ -0,0 +1,129 @@
# Microsoft
## LLM
### Azure OpenAI
>[Microsoft Azure](https://en.wikipedia.org/wiki/Microsoft_Azure), often referred to as `Azure` is a cloud computing platform run by `Microsoft`, which offers access, management, and development of applications and services through global data centers. It provides a range of capabilities, including software as a service (SaaS), platform as a service (PaaS), and infrastructure as a service (IaaS). `Microsoft Azure` supports many programming languages, tools, and frameworks, including Microsoft-specific and third-party software and systems.
>[Azure OpenAI](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/) is an `Azure` service with powerful language models from `OpenAI` including the `GPT-3`, `Codex` and `Embeddings model` series for content generation, summarization, semantic search, and natural language to code translation.
```bash
pip install openai tiktoken
```
Set the environment variables to get access to the `Azure OpenAI` service.
```python
import os
os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_BASE"] = "https://<your-endpoint.openai.azure.com/"
os.environ["OPENAI_API_KEY"] = "your AzureOpenAI key"
os.environ["OPENAI_API_VERSION"] = "2023-05-15"
```
See a [usage example](/docs/integrations/llms/azure_openai_example).
```python
from langchain.llms import AzureOpenAI
```
## Text Embedding Models
### Azure OpenAI
See a [usage example](/docs/integrations/text_embedding/azureopenai)
```python
from langchain.embeddings import OpenAIEmbeddings
```
## Chat Models
### Azure OpenAI
See a [usage example](/docs/integrations/chat/azure_chat_openai)
```python
from langchain.chat_models import AzureChatOpenAI
```
## Document loaders
### Azure Blob Storage
>[Azure Blob Storage](https://learn.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction) is Microsoft's object storage solution for the cloud. Blob Storage is optimized for storing massive amounts of unstructured data. Unstructured data is data that doesn't adhere to a particular data model or definition, such as text or binary data.
>[Azure Files](https://learn.microsoft.com/en-us/azure/storage/files/storage-files-introduction) offers fully managed
> file shares in the cloud that are accessible via the industry standard Server Message Block (`SMB`) protocol,
> Network File System (`NFS`) protocol, and `Azure Files REST API`. `Azure Files` are based on the `Azure Blob Storage`.
`Azure Blob Storage` is designed for:
- Serving images or documents directly to a browser.
- Storing files for distributed access.
- Streaming video and audio.
- Writing to log files.
- Storing data for backup and restore, disaster recovery, and archiving.
- Storing data for analysis by an on-premises or Azure-hosted service.
```bash
pip install azure-storage-blob
```
See a [usage example for the Azure Blob Storage](/docs/integrations/document_loaders/azure_blob_storage_container.html).
```python
from langchain.document_loaders import AzureBlobStorageContainerLoader
```
See a [usage example for the Azure Files](/docs/integrations/document_loaders/azure_blob_storage_file.html).
```python
from langchain.document_loaders import AzureBlobStorageFileLoader
```
### Microsoft OneDrive
>[Microsoft OneDrive](https://en.wikipedia.org/wiki/OneDrive) (formerly `SkyDrive`) is a file-hosting service operated by Microsoft.
First, you need to install a python package.
```bash
pip install o365
```
See a [usage example](/docs/integrations/document_loaders/microsoft_onedrive).
```python
from langchain.document_loaders import OneDriveLoader
```
### Microsoft Word
>[Microsoft Word](https://www.microsoft.com/en-us/microsoft-365/word) is a word processor developed by Microsoft.
See a [usage example](/docs/integrations/document_loaders/microsoft_word).
```python
from langchain.document_loaders import UnstructuredWordDocumentLoader
```
## Retriever
### Azure Cognitive Search
>[Azure Cognitive Search](https://learn.microsoft.com/en-us/azure/search/search-what-is-azure-search) (formerly known as `Azure Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.
>Search is foundational to any app that surfaces text to users, where common scenarios include catalog or document search, online retail apps, or data exploration over proprietary content. When you create a search service, you'll work with the following capabilities:
>- A search engine for full text search over a search index containing user-owned content
>- Rich indexing, with lexical analysis and optional AI enrichment for content extraction and transformation
>- Rich query syntax for text search, fuzzy search, autocomplete, geo-search and more
>- Programmability through REST APIs and client libraries in Azure SDKs
>- Azure integration at the data layer, machine learning layer, and AI (Cognitive Services)
See [set up instructions](https://learn.microsoft.com/en-us/azure/search/search-create-service-portal).
See a [usage example](/docs/integrations/retrievers/azure_cognitive_search).
```python
from langchain.retrievers import AzureCognitiveSearchRetriever
```

View File

@ -1,73 +0,0 @@
# Amazon API Gateway
[Amazon API Gateway](https://aws.amazon.com/api-gateway/) is a fully managed service that makes it easy for developers to create, publish, maintain, monitor, and secure APIs at any scale. APIs act as the "front door" for applications to access data, business logic, or functionality from your backend services. Using API Gateway, you can create RESTful APIs and WebSocket APIs that enable real-time two-way communication applications. API Gateway supports containerized and serverless workloads, as well as web applications.
API Gateway handles all the tasks involved in accepting and processing up to hundreds of thousands of concurrent API calls, including traffic management, CORS support, authorization and access control, throttling, monitoring, and API version management. API Gateway has no minimum fees or startup costs. You pay for the API calls you receive and the amount of data transferred out and, with the API Gateway tiered pricing model, you can reduce your cost as your API usage scales.
## LLM
See a [usage example](/docs/integrations/llms/amazon_api_gateway_example).
```python
from langchain.llms import AmazonAPIGateway
api_url = "https://<api_gateway_id>.execute-api.<region>.amazonaws.com/LATEST/HF"
llm = AmazonAPIGateway(api_url=api_url)
# These are sample parameters for Falcon 40B Instruct Deployed from Amazon SageMaker JumpStart
parameters = {
"max_new_tokens": 100,
"num_return_sequences": 1,
"top_k": 50,
"top_p": 0.95,
"do_sample": False,
"return_full_text": True,
"temperature": 0.2,
}
prompt = "what day comes after Friday?"
llm.model_kwargs = parameters
llm(prompt)
>>> 'what day comes after Friday?\nSaturday'
```
## Agent
```python
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.llms import AmazonAPIGateway
api_url = "https://<api_gateway_id>.execute-api.<region>.amazonaws.com/LATEST/HF"
llm = AmazonAPIGateway(api_url=api_url)
parameters = {
"max_new_tokens": 50,
"num_return_sequences": 1,
"top_k": 250,
"top_p": 0.25,
"do_sample": False,
"temperature": 0.1,
}
llm.model_kwargs = parameters
# Next, let's load some tools to use. Note that the `llm-math` tool uses an LLM, so we need to pass that in.
tools = load_tools(["python_repl", "llm-math"], llm=llm)
# Finally, let's initialize an agent with the tools, the language model, and the type of agent we want to use.
agent = initialize_agent(
tools,
llm,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=True,
)
# Now let's test it out!
agent.run("""
Write a Python script that prints "Hello, world!"
""")
>>> 'Hello, world!'
```

View File

@ -1,25 +0,0 @@
# AWS S3 Directory
>[Amazon Simple Storage Service (Amazon S3)](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html) is an object storage service.
>[AWS S3 Directory](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html)
>[AWS S3 Buckets](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingBucket.html)
## Installation and Setup
```bash
pip install boto3
```
## Document Loader
See a [usage example for S3DirectoryLoader](/docs/integrations/document_loaders/aws_s3_directory.html).
See a [usage example for S3FileLoader](/docs/integrations/document_loaders/aws_s3_file.html).
```python
from langchain.document_loaders import S3DirectoryLoader, S3FileLoader
```

View File

@ -1,36 +0,0 @@
# Azure Blob Storage
>[Azure Blob Storage](https://learn.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction) is Microsoft's object storage solution for the cloud. Blob Storage is optimized for storing massive amounts of unstructured data. Unstructured data is data that doesn't adhere to a particular data model or definition, such as text or binary data.
>[Azure Files](https://learn.microsoft.com/en-us/azure/storage/files/storage-files-introduction) offers fully managed
> file shares in the cloud that are accessible via the industry standard Server Message Block (`SMB`) protocol,
> Network File System (`NFS`) protocol, and `Azure Files REST API`. `Azure Files` are based on the `Azure Blob Storage`.
`Azure Blob Storage` is designed for:
- Serving images or documents directly to a browser.
- Storing files for distributed access.
- Streaming video and audio.
- Writing to log files.
- Storing data for backup and restore, disaster recovery, and archiving.
- Storing data for analysis by an on-premises or Azure-hosted service.
## Installation and Setup
```bash
pip install azure-storage-blob
```
## Document Loader
See a [usage example for the Azure Blob Storage](/docs/integrations/document_loaders/azure_blob_storage_container.html).
```python
from langchain.document_loaders import AzureBlobStorageContainerLoader
```
See a [usage example for the Azure Files](/docs/integrations/document_loaders/azure_blob_storage_file.html).
```python
from langchain.document_loaders import AzureBlobStorageFileLoader
```

View File

@ -1,24 +0,0 @@
# Azure Cognitive Search
>[Azure Cognitive Search](https://learn.microsoft.com/en-us/azure/search/search-what-is-azure-search) (formerly known as `Azure Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.
>Search is foundational to any app that surfaces text to users, where common scenarios include catalog or document search, online retail apps, or data exploration over proprietary content. When you create a search service, you'll work with the following capabilities:
>- A search engine for full text search over a search index containing user-owned content
>- Rich indexing, with lexical analysis and optional AI enrichment for content extraction and transformation
>- Rich query syntax for text search, fuzzy search, autocomplete, geo-search and more
>- Programmability through REST APIs and client libraries in Azure SDKs
>- Azure integration at the data layer, machine learning layer, and AI (Cognitive Services)
## Installation and Setup
See [set up instructions](https://learn.microsoft.com/en-us/azure/search/search-create-service-portal).
## Retriever
See a [usage example](/docs/integrations/retrievers/azure_cognitive_search).
```python
from langchain.retrievers import AzureCognitiveSearchRetriever
```

View File

@ -1,50 +0,0 @@
# Azure OpenAI
>[Microsoft Azure](https://en.wikipedia.org/wiki/Microsoft_Azure), often referred to as `Azure` is a cloud computing platform run by `Microsoft`, which offers access, management, and development of applications and services through global data centers. It provides a range of capabilities, including software as a service (SaaS), platform as a service (PaaS), and infrastructure as a service (IaaS). `Microsoft Azure` supports many programming languages, tools, and frameworks, including Microsoft-specific and third-party software and systems.
>[Azure OpenAI](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/) is an `Azure` service with powerful language models from `OpenAI` including the `GPT-3`, `Codex` and `Embeddings model` series for content generation, summarization, semantic search, and natural language to code translation.
## Installation and Setup
```bash
pip install openai
pip install tiktoken
```
Set the environment variables to get access to the `Azure OpenAI` service.
```python
import os
os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_BASE"] = "https://<your-endpoint.openai.azure.com/"
os.environ["OPENAI_API_KEY"] = "your AzureOpenAI key"
os.environ["OPENAI_API_VERSION"] = "2023-05-15"
```
## LLM
See a [usage example](/docs/integrations/llms/azure_openai_example).
```python
from langchain.llms import AzureOpenAI
```
## Text Embedding Models
See a [usage example](/docs/integrations/text_embedding/azureopenai)
```python
from langchain.embeddings import OpenAIEmbeddings
```
## Chat Models
See a [usage example](/docs/integrations/chat/azure_chat_openai)
```python
from langchain.chat_models import AzureChatOpenAI
```

View File

@ -1,24 +0,0 @@
# Bedrock
>[Amazon Bedrock](https://aws.amazon.com/bedrock/) is a fully managed service that makes FMs from leading AI startups and Amazon available via an API, so you can choose from a wide range of FMs to find the model that is best suited for your use case.
## Installation and Setup
```bash
pip install boto3
```
## LLM
See a [usage example](/docs/integrations/llms/bedrock).
```python
from langchain.llms.bedrock import Bedrock
```
## Text Embedding Models
See a [usage example](/docs/integrations/text_embedding/bedrock).
```python
from langchain.embeddings import BedrockEmbeddings
```

View File

@ -1,20 +0,0 @@
# Google BigQuery
>[Google BigQuery](https://cloud.google.com/bigquery) is a serverless and cost-effective enterprise data warehouse that works across clouds and scales with your data.
`BigQuery` is a part of the `Google Cloud Platform`.
## Installation and Setup
First, you need to install `google-cloud-bigquery` python package.
```bash
pip install google-cloud-bigquery
```
## Document Loader
See a [usage example](/docs/integrations/document_loaders/google_bigquery).
```python
from langchain.document_loaders import BigQueryLoader
```

View File

@ -1,26 +0,0 @@
# Google Cloud Storage
>[Google Cloud Storage](https://en.wikipedia.org/wiki/Google_Cloud_Storage) is a managed service for storing unstructured data.
## Installation and Setup
First, you need to install `google-cloud-bigquery` python package.
```bash
pip install google-cloud-storage
```
## Document Loader
There are two loaders for the `Google Cloud Storage`: the `Directory` and the `File` loaders.
See a [usage example](/docs/integrations/document_loaders/google_cloud_storage_directory).
```python
from langchain.document_loaders import GCSDirectoryLoader
```
See a [usage example](/docs/integrations/document_loaders/google_cloud_storage_file).
```python
from langchain.document_loaders import GCSFileLoader
```

View File

@ -1,22 +0,0 @@
# Google Drive
>[Google Drive](https://en.wikipedia.org/wiki/Google_Drive) is a file storage and synchronization service developed by Google.
Currently, only `Google Docs` are supported.
## Installation and Setup
First, you need to install several python package.
```bash
pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib
```
## Document Loader
See a [usage example and authorizing instructions](/docs/integrations/document_loaders/google_drive.html).
```python
from langchain.document_loaders import GoogleDriveLoader
```

View File

@ -1,32 +0,0 @@
# Google Search
This page covers how to use the Google Search API within LangChain.
It is broken into two parts: installation and setup, and then references to the specific Google Search wrapper.
## Installation and Setup
- Install requirements with `pip install google-api-python-client`
- Set up a Custom Search Engine, following [these instructions](https://stackoverflow.com/questions/37083058/programmatically-searching-google-in-python-using-custom-search)
- Get an API Key and Custom Search Engine ID from the previous step, and set them as environment variables `GOOGLE_API_KEY` and `GOOGLE_CSE_ID` respectively
## Wrappers
### Utility
There exists a GoogleSearchAPIWrapper utility which wraps this API. To import this utility:
```python
from langchain.utilities import GoogleSearchAPIWrapper
```
For a more detailed walkthrough of this wrapper, see [this notebook](/docs/integrations/tools/google_search.html).
### Tool
You can also easily load this wrapper as a Tool (to use with an Agent).
You can do this with:
```python
from langchain.agents import load_tools
tools = load_tools(["google-search"])
```
For more information on tools, see [this page](/docs/modules/agents/tools/).

View File

@ -1,25 +0,0 @@
# Google Vertex AI MatchingEngine
> [Google Vertex AI Matching Engine](https://cloud.google.com/vertex-ai/docs/matching-engine/overview) provides
> the industry's leading high-scale low latency vector database. These vector databases are commonly
> referred to as vector similarity-matching or an approximate nearest neighbor (ANN) service.
## Installation and Setup
We need to install several python packages.
```bash
pip install tensorflow \
google-cloud-aiplatform \
tensorflow-hub \
tensorflow-text
```
## Vector Store
See a [usage example](/docs/integrations/vectorstores/matchingengine).
```python
from langchain.vectorstores import MatchingEngine
```

View File

@ -1,9 +0,0 @@
---
sidebar_position: 1
---
# Grouped by provider
import DocCardList from "@theme/DocCardList";
<DocCardList />

View File

@ -1,22 +0,0 @@
# Microsoft OneDrive
>[Microsoft OneDrive](https://en.wikipedia.org/wiki/OneDrive) (formerly `SkyDrive`) is a file-hosting service operated by Microsoft.
## Installation and Setup
First, you need to install a python package.
```bash
pip install o365
```
Then follow instructions [here](/docs/integrations/document_loaders/microsoft_onedrive.html).
## Document Loader
See a [usage example](/docs/integrations/document_loaders/microsoft_onedrive).
```python
from langchain.document_loaders import OneDriveLoader
```

View File

@ -1,16 +0,0 @@
# Microsoft PowerPoint
>[Microsoft PowerPoint](https://en.wikipedia.org/wiki/Microsoft_PowerPoint) is a presentation program by Microsoft.
## Installation and Setup
There isn't any special setup for it.
## Document Loader
See a [usage example](/docs/integrations/document_loaders/microsoft_powerpoint).
```python
from langchain.document_loaders import UnstructuredPowerPointLoader
```

View File

@ -1,16 +0,0 @@
# Microsoft Word
>[Microsoft Word](https://www.microsoft.com/en-us/microsoft-365/word) is a word processor developed by Microsoft.
## Installation and Setup
There isn't any special setup for it.
## Document Loader
See a [usage example](/docs/integrations/document_loaders/microsoft_word).
```python
from langchain.document_loaders import UnstructuredWordDocumentLoader
```

View File

@ -1,56 +0,0 @@
# SageMaker Endpoint
>[Amazon SageMaker](https://aws.amazon.com/sagemaker/) is a system that can build, train, and deploy machine learning (ML) models with fully managed infrastructure, tools, and workflows.
We use `SageMaker` to host our model and expose it as the `SageMaker Endpoint`.
## Installation and Setup
```bash
pip install boto3
```
For instructions on how to expose model as a `SageMaker Endpoint`, please see [here](https://www.philschmid.de/custom-inference-huggingface-sagemaker).
**Note**: In order to handle batched requests, we need to adjust the return line in the `predict_fn()` function within the custom `inference.py` script:
Change from
```
return {"vectors": sentence_embeddings[0].tolist()}
```
to:
```
return {"vectors": sentence_embeddings.tolist()}
```
We have to set up following required parameters of the `SagemakerEndpoint` call:
- `endpoint_name`: The name of the endpoint from the deployed Sagemaker model.
Must be unique within an AWS Region.
- `credentials_profile_name`: The name of the profile in the ~/.aws/credentials or ~/.aws/config files, which
has either access keys or role information specified.
If not specified, the default credential profile or, if on an EC2 instance,
credentials from IMDS will be used.
See [this guide](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html).
## LLM
See a [usage example](/docs/integrations/llms/sagemaker).
```python
from langchain import SagemakerEndpoint
from langchain.llms.sagemaker_endpoint import LLMContentHandler
```
## Text Embedding Models
See a [usage example](/docs/integrations/text_embedding/sagemaker-endpoint).
```python
from langchain.embeddings import SagemakerEndpointEmbeddings
from langchain.llms.sagemaker_endpoint import ContentHandlerBase
```

View File

@ -1,9 +0,0 @@
---
sidebar_position: 0
---
# Retrievers
import DocCardList from "@theme/DocCardList";
<DocCardList />

View File

@ -1,9 +0,0 @@
---
sidebar_position: 0
---
# Text embedding models
import DocCardList from "@theme/DocCardList";
<DocCardList />

View File

@ -1,12 +0,0 @@
---
sidebar_position: 0
---
# Agents & Toolkits
Agents and Toolkits are placed in the same directory because they are always used together.
import DocCardList from "@theme/DocCardList";
<DocCardList />

View File

@ -1,9 +0,0 @@
---
sidebar_position: 0
---
# Tools
import DocCardList from "@theme/DocCardList";
<DocCardList />

View File

@ -1,9 +0,0 @@
---
sidebar_position: 0
---
# Vector stores
import DocCardList from "@theme/DocCardList";
<DocCardList />