mirror of
https://github.com/hwchase17/langchain.git
synced 2026-04-19 20:04:11 +00:00
Compare commits
3 Commits
jacob/patc
...
harrison/s
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1732103e19 | ||
|
|
f5879e73cb | ||
|
|
5cba2a1ecc |
183
docs/modules/document_loaders/examples/sagemaker.ipynb
Normal file
183
docs/modules/document_loaders/examples/sagemaker.ipynb
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Defaulting to user installation because normal site-packages is not writeable\n",
|
||||||
|
"Collecting langchain\n",
|
||||||
|
" Downloading langchain-0.0.80-py3-none-any.whl (222 kB)\n",
|
||||||
|
"\u001b[K |████████████████████████████████| 222 kB 2.1 MB/s eta 0:00:01\n",
|
||||||
|
"\u001b[?25hRequirement already satisfied: numpy<2,>=1 in /Users/nmehta/Library/Python/3.9/lib/python/site-packages (from langchain) (1.24.1)\n",
|
||||||
|
"Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /Users/nmehta/Library/Python/3.9/lib/python/site-packages (from langchain) (3.8.3)\n",
|
||||||
|
"Collecting pydantic<2,>=1\n",
|
||||||
|
" Downloading pydantic-1.10.4-cp39-cp39-macosx_11_0_arm64.whl (2.6 MB)\n",
|
||||||
|
"\u001b[K |████████████████████████████████| 2.6 MB 3.3 MB/s eta 0:00:01\n",
|
||||||
|
"\u001b[?25hCollecting SQLAlchemy<2,>=1\n",
|
||||||
|
" Downloading SQLAlchemy-1.4.46.tar.gz (8.5 MB)\n",
|
||||||
|
"\u001b[K |████████████████████████████████| 8.5 MB 23.4 MB/s eta 0:00:01\n",
|
||||||
|
"\u001b[?25hCollecting tenacity<9.0.0,>=8.1.0\n",
|
||||||
|
" Downloading tenacity-8.2.0-py3-none-any.whl (24 kB)\n",
|
||||||
|
"Requirement already satisfied: requests<3,>=2 in /Users/nmehta/Library/Python/3.9/lib/python/site-packages (from langchain) (2.28.2)\n",
|
||||||
|
"Requirement already satisfied: PyYAML<7,>=6 in /Users/nmehta/Library/Python/3.9/lib/python/site-packages (from langchain) (6.0)\n",
|
||||||
|
"Collecting dataclasses-json<0.6.0,>=0.5.7\n",
|
||||||
|
" Downloading dataclasses_json-0.5.7-py3-none-any.whl (25 kB)\n",
|
||||||
|
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /Users/nmehta/Library/Python/3.9/lib/python/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (4.0.2)\n",
|
||||||
|
"Requirement already satisfied: multidict<7.0,>=4.5 in /Users/nmehta/Library/Python/3.9/lib/python/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.4)\n",
|
||||||
|
"Requirement already satisfied: attrs>=17.3.0 in /Users/nmehta/Library/Python/3.9/lib/python/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (22.2.0)\n",
|
||||||
|
"Requirement already satisfied: frozenlist>=1.1.1 in /Users/nmehta/Library/Python/3.9/lib/python/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.3)\n",
|
||||||
|
"Requirement already satisfied: yarl<2.0,>=1.0 in /Users/nmehta/Library/Python/3.9/lib/python/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.8.2)\n",
|
||||||
|
"Requirement already satisfied: aiosignal>=1.1.2 in /Users/nmehta/Library/Python/3.9/lib/python/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n",
|
||||||
|
"Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /Users/nmehta/Library/Python/3.9/lib/python/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.1.1)\n",
|
||||||
|
"Collecting marshmallow<4.0.0,>=3.3.0\n",
|
||||||
|
" Downloading marshmallow-3.19.0-py3-none-any.whl (49 kB)\n",
|
||||||
|
"\u001b[K |████████████████████████████████| 49 kB 26.9 MB/s eta 0:00:01\n",
|
||||||
|
"\u001b[?25hCollecting marshmallow-enum<2.0.0,>=1.5.1\n",
|
||||||
|
" Downloading marshmallow_enum-1.5.1-py2.py3-none-any.whl (4.2 kB)\n",
|
||||||
|
"Collecting typing-inspect>=0.4.0\n",
|
||||||
|
" Downloading typing_inspect-0.8.0-py3-none-any.whl (8.7 kB)\n",
|
||||||
|
"Requirement already satisfied: packaging>=17.0 in /Users/nmehta/Library/Python/3.9/lib/python/site-packages (from marshmallow<4.0.0,>=3.3.0->dataclasses-json<0.6.0,>=0.5.7->langchain) (23.0)\n",
|
||||||
|
"Requirement already satisfied: typing-extensions>=4.2.0 in /Users/nmehta/Library/Python/3.9/lib/python/site-packages (from pydantic<2,>=1->langchain) (4.4.0)\n",
|
||||||
|
"Requirement already satisfied: idna<4,>=2.5 in /Users/nmehta/Library/Python/3.9/lib/python/site-packages (from requests<3,>=2->langchain) (3.4)\n",
|
||||||
|
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/nmehta/Library/Python/3.9/lib/python/site-packages (from requests<3,>=2->langchain) (1.26.14)\n",
|
||||||
|
"Requirement already satisfied: certifi>=2017.4.17 in /Users/nmehta/Library/Python/3.9/lib/python/site-packages (from requests<3,>=2->langchain) (2022.12.7)\n",
|
||||||
|
"Collecting mypy-extensions>=0.3.0\n",
|
||||||
|
" Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n",
|
||||||
|
"Building wheels for collected packages: SQLAlchemy\n",
|
||||||
|
" Building wheel for SQLAlchemy (setup.py) ... \u001b[?25ldone\n",
|
||||||
|
"\u001b[?25h Created wheel for SQLAlchemy: filename=SQLAlchemy-1.4.46-cp39-cp39-macosx_10_9_universal2.whl size=1578667 sha256=9991d70fde083b993d7fe1fd61fca33a279e921f1b8296b02037e24b8cac1097\n",
|
||||||
|
" Stored in directory: /Users/nmehta/Library/Caches/pip/wheels/3c/99/65/57cf5a0ec6e7f3b803a68d31694501e168997e03e80adc903d\n",
|
||||||
|
"Successfully built SQLAlchemy\n",
|
||||||
|
"Installing collected packages: mypy-extensions, marshmallow, typing-inspect, marshmallow-enum, tenacity, SQLAlchemy, pydantic, dataclasses-json, langchain\n",
|
||||||
|
"\u001b[33m WARNING: The script langchain-server is installed in '/Users/nmehta/Library/Python/3.9/bin' which is not on PATH.\n",
|
||||||
|
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n",
|
||||||
|
"Successfully installed SQLAlchemy-1.4.46 dataclasses-json-0.5.7 langchain-0.0.80 marshmallow-3.19.0 marshmallow-enum-1.5.1 mypy-extensions-1.0.0 pydantic-1.10.4 tenacity-8.2.0 typing-inspect-0.8.0\n",
|
||||||
|
"\u001b[33mWARNING: You are using pip version 21.2.4; however, version 23.0 is available.\n",
|
||||||
|
"You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n",
|
||||||
|
"Defaulting to user installation because normal site-packages is not writeable\n",
|
||||||
|
"Collecting html2text\n",
|
||||||
|
" Downloading html2text-2020.1.16-py3-none-any.whl (32 kB)\n",
|
||||||
|
"Installing collected packages: html2text\n",
|
||||||
|
"\u001b[33m WARNING: The script html2text is installed in '/Users/nmehta/Library/Python/3.9/bin' which is not on PATH.\n",
|
||||||
|
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n",
|
||||||
|
"Successfully installed html2text-2020.1.16\n",
|
||||||
|
"\u001b[33mWARNING: You are using pip version 21.2.4; however, version 23.0 is available.\n",
|
||||||
|
"You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"!pip3 install langchain"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.docstore.document import Document"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"example_doc_1 = \"\"\"\n",
|
||||||
|
"Peter and Elizabeth took a taxi to attend the night party in the city. While in the party, Elizabeth collapsed and was rushed to the hospital.\n",
|
||||||
|
"Since she was diagnosed with a brain injury, the doctor told Peter to stay besides her until she gets well.\n",
|
||||||
|
"Therefore, Peter stayed with her at the hospital for 3 days without leaving.\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"docs = [\n",
|
||||||
|
" Document(\n",
|
||||||
|
" page_content=example_doc_1,\n",
|
||||||
|
" )\n",
|
||||||
|
"]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"{'output_text': '3 days'}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from langchain import PromptTemplate, HuggingFaceHub, LLMChain, SagemakerEndpoint\n",
|
||||||
|
"from langchain.chains.question_answering import load_qa_chain\n",
|
||||||
|
"import json\n",
|
||||||
|
"\n",
|
||||||
|
"query = \"\"\"How long was Elizabeth hospitalized?\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"prompt_template = \"\"\"Use the following pieces of context to answer the question at the end.\n",
|
||||||
|
"\n",
|
||||||
|
"{context}\n",
|
||||||
|
"\n",
|
||||||
|
"Question: {question}\n",
|
||||||
|
"Answer:\"\"\"\n",
|
||||||
|
"PROMPT = PromptTemplate(\n",
|
||||||
|
" template=prompt_template, input_variables=[\"context\", \"question\"]\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"def model_input_transform_fn(prompt, model_kwargs):\n",
|
||||||
|
" parameter_payload = {\"inputs\": prompt, \"parameters\": model_kwargs}\n",
|
||||||
|
" return json.dumps(parameter_payload).encode(\"utf-8\") \n",
|
||||||
|
"\n",
|
||||||
|
"chain = load_qa_chain(llm=SagemakerEndpoint(\n",
|
||||||
|
" endpoint_name=\"my-sagemaker-model-endpoint\", \n",
|
||||||
|
" credentials_profile_name=\"credentials-profile-name\", \n",
|
||||||
|
" region_name=\"us-west-2\", \n",
|
||||||
|
" model_kwargs={\"temperature\":1e-10},\n",
|
||||||
|
" content_type=\"application/json\", \n",
|
||||||
|
" model_input_transform_fn=model_input_transform_fn), \n",
|
||||||
|
" prompt=PROMPT) \n",
|
||||||
|
"\n",
|
||||||
|
"chain({\"input_documents\": docs, \"question\": query}, return_only_outputs=True)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.6"
|
||||||
|
},
|
||||||
|
"vscode": {
|
||||||
|
"interpreter": {
|
||||||
|
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -22,7 +22,7 @@ from langchain.chains import (
|
|||||||
VectorDBQAWithSourcesChain,
|
VectorDBQAWithSourcesChain,
|
||||||
)
|
)
|
||||||
from langchain.docstore import InMemoryDocstore, Wikipedia
|
from langchain.docstore import InMemoryDocstore, Wikipedia
|
||||||
from langchain.llms import Anthropic, Cohere, HuggingFaceHub, OpenAI
|
from langchain.llms import Anthropic, Cohere, HuggingFaceHub, OpenAI, SagemakerEndpoint
|
||||||
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
|
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
|
||||||
from langchain.prompts import (
|
from langchain.prompts import (
|
||||||
BasePromptTemplate,
|
BasePromptTemplate,
|
||||||
@@ -60,6 +60,7 @@ __all__ = [
|
|||||||
"ReActChain",
|
"ReActChain",
|
||||||
"Wikipedia",
|
"Wikipedia",
|
||||||
"HuggingFaceHub",
|
"HuggingFaceHub",
|
||||||
|
"SagemakerEndpoint",
|
||||||
"HuggingFacePipeline",
|
"HuggingFacePipeline",
|
||||||
"SQLDatabase",
|
"SQLDatabase",
|
||||||
"SQLDatabaseChain",
|
"SQLDatabaseChain",
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ from langchain.llms.huggingface_hub import HuggingFaceHub
|
|||||||
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
|
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
|
||||||
from langchain.llms.nlpcloud import NLPCloud
|
from langchain.llms.nlpcloud import NLPCloud
|
||||||
from langchain.llms.openai import AzureOpenAI, OpenAI
|
from langchain.llms.openai import AzureOpenAI, OpenAI
|
||||||
|
from langchain.llms.sagemaker_endpoint import SagemakerEndpoint
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"Anthropic",
|
"Anthropic",
|
||||||
@@ -16,6 +17,7 @@ __all__ = [
|
|||||||
"NLPCloud",
|
"NLPCloud",
|
||||||
"OpenAI",
|
"OpenAI",
|
||||||
"HuggingFaceHub",
|
"HuggingFaceHub",
|
||||||
|
"SagemakerEndpoint",
|
||||||
"HuggingFacePipeline",
|
"HuggingFacePipeline",
|
||||||
"AI21",
|
"AI21",
|
||||||
"AzureOpenAI",
|
"AzureOpenAI",
|
||||||
@@ -26,6 +28,7 @@ type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
|
|||||||
"anthropic": Anthropic,
|
"anthropic": Anthropic,
|
||||||
"cohere": Cohere,
|
"cohere": Cohere,
|
||||||
"huggingface_hub": HuggingFaceHub,
|
"huggingface_hub": HuggingFaceHub,
|
||||||
|
"sagemaker_endpoint": SagemakerEndpoint,
|
||||||
"nlpcloud": NLPCloud,
|
"nlpcloud": NLPCloud,
|
||||||
"openai": OpenAI,
|
"openai": OpenAI,
|
||||||
"huggingface_pipeline": HuggingFacePipeline,
|
"huggingface_pipeline": HuggingFacePipeline,
|
||||||
|
|||||||
179
langchain/llms/sagemaker_endpoint.py
Normal file
179
langchain/llms/sagemaker_endpoint.py
Normal file
@@ -0,0 +1,179 @@
|
|||||||
|
"""Wrapper around Sagemaker InvokeEndpoint API."""
|
||||||
|
import json
|
||||||
|
from typing import Any, Callable, Dict, List, Mapping, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Extra, root_validator
|
||||||
|
|
||||||
|
from langchain.llms.base import LLM
|
||||||
|
from langchain.llms.utils import enforce_stop_tokens
|
||||||
|
|
||||||
|
|
||||||
|
class SagemakerEndpoint(LLM, BaseModel):
|
||||||
|
"""Wrapper around custom Sagemaker Inference Endpoints.
|
||||||
|
|
||||||
|
To use, you must supply the endpoint name from your deployed
|
||||||
|
Sagemaker model & the region where it is deployed.
|
||||||
|
|
||||||
|
To authenticate, the AWS client uses the following methods to
|
||||||
|
automatically load credentials:
|
||||||
|
https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html
|
||||||
|
|
||||||
|
If a specific credential profile should be used, you must pass
|
||||||
|
the name of the profile from the ~/.aws/credentials file that is to be used.
|
||||||
|
|
||||||
|
Make sure the credentials / roles used have the required policies to
|
||||||
|
access the Sagemaker endpoint.
|
||||||
|
See: https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies.html
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
Example:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from langchain import SagemakerEndpoint
|
||||||
|
endpoint_name = (
|
||||||
|
"https://runtime.sagemaker.us-west-2.amazonaws.com/endpoints/abcdefghijklmnop/invocations"
|
||||||
|
)
|
||||||
|
region_name = (
|
||||||
|
"us-west-2"
|
||||||
|
)
|
||||||
|
credentials_profile_name = (
|
||||||
|
"default"
|
||||||
|
)
|
||||||
|
se = SagemakerEndpoint(
|
||||||
|
endpoint_name=endpoint_name,
|
||||||
|
region_name=region_name,
|
||||||
|
credentials_profile_name=credentials_profile_name
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
client: Any #: :meta private:
|
||||||
|
|
||||||
|
endpoint_name: str = ""
|
||||||
|
"""The name of the endpoint from the deployed Sagemaker model.
|
||||||
|
Must be unique within an AWS Region."""
|
||||||
|
|
||||||
|
region_name: str = ""
|
||||||
|
"""The aws region where the Sagemaker model is deployed, eg. `us-west-2`."""
|
||||||
|
|
||||||
|
credentials_profile_name: Optional[str] = None
|
||||||
|
"""The name of the profile in the ~/.aws/credentials or ~/.aws/config files, which
|
||||||
|
has either access keys or role information specified.
|
||||||
|
If not specified, the default credential profile or, if on an EC2 instance,
|
||||||
|
credentials from IMDS will be used.
|
||||||
|
See: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html
|
||||||
|
"""
|
||||||
|
|
||||||
|
content_type: Optional[str] = "application/json"
|
||||||
|
"""The MIME type of the input data in the request body to be used in the header
|
||||||
|
for the request to the Sagemaker invoke_endpoint API.
|
||||||
|
Defaults to "application/json"."""
|
||||||
|
|
||||||
|
model_input_transform_fn: Callable[[str, Dict], bytes]
|
||||||
|
"""
|
||||||
|
Function which takes the prompt (str) and model_kwargs (dict) and transforms
|
||||||
|
the input to the format which the model can accept as the request Body.
|
||||||
|
Should return bytes or seekable file-like object in the format specified in the
|
||||||
|
content_type request header.
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
Example:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
def model_input_transform_fn(prompt, model_kwargs):
|
||||||
|
parameter_payload = {"inputs": prompt, "parameters": model_kwargs}
|
||||||
|
return json.dumps(parameter_payload).encode("utf-8")
|
||||||
|
"""
|
||||||
|
|
||||||
|
model_kwargs: Optional[Dict] = None
|
||||||
|
"""Key word arguments to pass to the model."""
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
"""Configuration for this pydantic object."""
|
||||||
|
|
||||||
|
extra = Extra.forbid
|
||||||
|
|
||||||
|
@root_validator()
|
||||||
|
def validate_environment(cls, values: Dict) -> Dict:
|
||||||
|
"""Validate that AWS credentials to and python package exists in environment."""
|
||||||
|
try:
|
||||||
|
import boto3
|
||||||
|
|
||||||
|
try:
|
||||||
|
if values["credentials_profile_name"] is not None:
|
||||||
|
session = boto3.Session(
|
||||||
|
profile_name=values["credentials_profile_name"]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# use default credentials
|
||||||
|
session = boto3.Session()
|
||||||
|
|
||||||
|
values["client"] = session.client(
|
||||||
|
"sagemaker-runtime", region_name=values["region_name"]
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(
|
||||||
|
"Could not load credentials to authenticate with AWS client. "
|
||||||
|
"Please check that credentials in the specified "
|
||||||
|
"profile name are valid."
|
||||||
|
) from e
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
raise ValueError(
|
||||||
|
"Could not import boto3 python package. "
|
||||||
|
"Please it install it with `pip install boto3`."
|
||||||
|
)
|
||||||
|
return values
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _identifying_params(self) -> Mapping[str, Any]:
|
||||||
|
"""Get the identifying parameters."""
|
||||||
|
_model_kwargs = self.model_kwargs or {}
|
||||||
|
return {
|
||||||
|
**{"endpoint_name": self.endpoint_name},
|
||||||
|
**{"model_kwargs": _model_kwargs},
|
||||||
|
}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _llm_type(self) -> str:
|
||||||
|
"""Return type of llm."""
|
||||||
|
return "sagemaker_endpoint"
|
||||||
|
|
||||||
|
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
|
||||||
|
"""Call out to Sagemaker inference endpoint.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt: The prompt to pass into the model.
|
||||||
|
stop: Optional list of stop words to use when generating.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The string generated by the model.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
response = se("Tell me a joke.")
|
||||||
|
"""
|
||||||
|
_model_kwargs = self.model_kwargs or {}
|
||||||
|
if self.model_input_transform_fn is None:
|
||||||
|
raise NotImplementedError("model_input_transform_fn not implemented")
|
||||||
|
|
||||||
|
# send request
|
||||||
|
try:
|
||||||
|
response = self.client.invoke_endpoint(
|
||||||
|
EndpointName=self.endpoint_name,
|
||||||
|
Body=self.model_input_transform_fn(prompt, _model_kwargs),
|
||||||
|
ContentType=self.content_type,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Error raised by inference endpoint: {e}")
|
||||||
|
|
||||||
|
response_json = json.loads(response["Body"].read().decode("utf-8"))
|
||||||
|
text = response_json[0]["generated_text"]
|
||||||
|
if stop is not None:
|
||||||
|
# This is a bit hacky, but I can't figure out a better way to enforce
|
||||||
|
# stop tokens when making calls to the sagemaker endpoint.
|
||||||
|
text = enforce_stop_tokens(text, stop)
|
||||||
|
|
||||||
|
return text
|
||||||
Reference in New Issue
Block a user