Compare commits

..

1 Commits

Author SHA1 Message Date
vowelparrot
109ee78cab Add Script to Proofread Documentation 2023-06-22 18:13:41 -07:00
24 changed files with 1079 additions and 1847 deletions

View File

@@ -1,231 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "e734b314",
"metadata": {},
"source": [
"# OpenAPI calls with OpenAI functions\n",
"\n",
"In this notebook we'll show how to create a chain that automatically makes calls to an API based only on an OpenAPI spec. Under the hood, we're parsing the OpenAPI spec into a JSON schema that the OpenAI functions API can handle. This allows ChatGPT to automatically select and populate the relevant API call to make for any user input. Using the output of ChatGPT we then make the actual API call, and return the result."
]
},
{
"cell_type": "markdown",
"id": "a95f510a",
"metadata": {},
"source": [
"## Query Klarna"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "08e19b64",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n"
]
}
],
"source": [
"from langchain.chains.openai_functions.openapi import get_openapi_chain\n",
"\n",
"chain = get_openapi_chain(\"https://www.klarna.com/us/shopping/public/openai/v0/api-docs/\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "3959f866",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'products': [{'name': \"Tommy Hilfiger Men's Short Sleeve Button-Down Shirt\",\n",
" 'url': 'https://www.klarna.com/us/shopping/pl/cl10001/3204878580/Clothing/Tommy-Hilfiger-Men-s-Short-Sleeve-Button-Down-Shirt/?utm_source=openai&ref-site=openai_plugin',\n",
" 'price': '$26.78',\n",
" 'attributes': ['Material:Linen,Cotton',\n",
" 'Target Group:Man',\n",
" 'Color:Gray,Pink,White,Blue,Beige,Black,Turquoise',\n",
" 'Size:S,XL,M,XXL']},\n",
" {'name': \"Van Heusen Men's Long Sleeve Button-Down Shirt\",\n",
" 'url': 'https://www.klarna.com/us/shopping/pl/cl10001/3201809514/Clothing/Van-Heusen-Men-s-Long-Sleeve-Button-Down-Shirt/?utm_source=openai&ref-site=openai_plugin',\n",
" 'price': '$18.89',\n",
" 'attributes': ['Material:Cotton',\n",
" 'Target Group:Man',\n",
" 'Color:Red,Gray,White,Blue',\n",
" 'Size:XL,XXL']},\n",
" {'name': 'Brixton Bowery Flannel Shirt',\n",
" 'url': 'https://www.klarna.com/us/shopping/pl/cl10001/3202331096/Clothing/Brixton-Bowery-Flannel-Shirt/?utm_source=openai&ref-site=openai_plugin',\n",
" 'price': '$34.48',\n",
" 'attributes': ['Material:Cotton',\n",
" 'Target Group:Man',\n",
" 'Color:Gray,Blue,Black,Orange',\n",
" 'Size:XL,3XL,4XL,5XL,L,M,XXL']},\n",
" {'name': 'Cubavera Four Pocket Guayabera Shirt',\n",
" 'url': 'https://www.klarna.com/us/shopping/pl/cl10001/3202055522/Clothing/Cubavera-Four-Pocket-Guayabera-Shirt/?utm_source=openai&ref-site=openai_plugin',\n",
" 'price': '$23.22',\n",
" 'attributes': ['Material:Polyester,Cotton',\n",
" 'Target Group:Man',\n",
" 'Color:Red,White,Blue,Black',\n",
" 'Size:S,XL,L,M,XXL']},\n",
" {'name': 'Theory Sylvain Shirt - Eclipse',\n",
" 'url': 'https://www.klarna.com/us/shopping/pl/cl10001/3202028254/Clothing/Theory-Sylvain-Shirt-Eclipse/?utm_source=openai&ref-site=openai_plugin',\n",
" 'price': '$86.01',\n",
" 'attributes': ['Material:Polyester,Cotton',\n",
" 'Target Group:Man',\n",
" 'Color:Blue',\n",
" 'Size:S,XL,XS,L,M,XXL']}]}"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain.run(\"What are some options for a men's large blue button down shirt\")"
]
},
{
"cell_type": "markdown",
"id": "6f648c77",
"metadata": {},
"source": [
"## Query a translation service"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "bf6cd695",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n"
]
}
],
"source": [
"chain = get_openapi_chain(\"https://api.speak.com/openapi.yaml\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "1ba51609",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'explanation': '<translation language=\"None\" context=\"None\">\\nNone\\n</translation>\\n\\n<alternatives context=\"None\">\\n1. \"N/A\" *(Formal - used in professional settings to indicate that the answer is not applicable)*\\n2. \"I don\\'t have an answer for that\" *(Neutral - commonly used when one does not know the answer to a question)*\\n3. \"I\\'m not sure\" *(Neutral - similar to the above alternative, used when one is unsure of the answer)*\\n</alternatives>\\n\\n<example-convo language=\"None\">\\n<context>None</context>\\n* Tom: \"Do you know what time the concert starts?\"\\n* Sarah: \"I\\'m sorry, I don\\'t have an answer for that.\"\\n</example-convo>\\n\\n*[Report an issue or leave feedback](https://speak.com/chatgpt?rid=p8i6p14duafpctg4ve7tm48z})*',\n",
" 'extra_response_instructions': 'Use all information in the API response and fully render all Markdown.\\nAlways end your response with a link to report an issue or leave feedback on the plugin.'}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain.run(\"How would you say no thanks in Russian\")"
]
},
{
"cell_type": "markdown",
"id": "4923a291",
"metadata": {},
"source": [
"## Query XKCD"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "a9198f62",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Attempting to load an OpenAPI 3.0.0 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n"
]
}
],
"source": [
"chain = get_openapi_chain(\"https://gist.githubusercontent.com/roaldnefs/053e505b2b7a807290908fe9aa3e1f00/raw/0a212622ebfef501163f91e23803552411ed00e4/openapi.yaml\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "3110c398",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised ServiceUnavailableError: The server is overloaded or not ready yet..\n"
]
},
{
"data": {
"text/plain": [
"{'month': '6',\n",
" 'num': 2793,\n",
" 'link': '',\n",
" 'year': '2023',\n",
" 'news': '',\n",
" 'safe_title': 'Garden Path Sentence',\n",
" 'transcript': '',\n",
" 'alt': 'Arboretum Owner Denied Standing in Garden Path Suit on Grounds Grounds Appealing Appealing',\n",
" 'img': 'https://imgs.xkcd.com/comics/garden_path_sentence.png',\n",
" 'title': 'Garden Path Sentence',\n",
" 'day': '23'}"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain.run(\"What's today's comic?\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "venv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -1,141 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "9b721926",
"metadata": {},
"source": [
"# Open City Data"
]
},
{
"cell_type": "markdown",
"id": "35c00849",
"metadata": {},
"source": [
"[Socrata](https://dev.socrata.com/foundry/data.sfgov.org/vw6y-z8j6) provides an API for city open data. \n",
"\n",
"For a dataset such as [SF crime](https://data.sfgov.org/Public-Safety/Police-Department-Incident-Reports-Historical-2003/tmnf-yvry), to to the `API` tab on top right. \n",
"\n",
"That provides you with the `dataset identifier`.\n",
"\n",
"Use the dataset identifier to grab specific tables for a given city_id (`data.sfgov.org`) - \n",
"\n",
"E.g., `vw6y-z8j6` for [SF 311 data](https://dev.socrata.com/foundry/data.sfgov.org/vw6y-z8j6).\n",
"\n",
"E.g., `tmnf-yvry` for [SF Police data](https://dev.socrata.com/foundry/data.sfgov.org/tmnf-yvry)."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c93cc247",
"metadata": {},
"outputs": [],
"source": [
"! pip install sodapy"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "b3464a02",
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders import OpenCityDataLoader"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "478c5255",
"metadata": {},
"outputs": [],
"source": [
"dataset = \"vw6y-z8j6\" # 311 data\n",
"dataset = \"tmnf-yvry\" # crime data\n",
"loader = OpenCityDataLoader(city_id=\"data.sfgov.org\",\n",
" dataset_id=dataset,\n",
" limit=2000)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "fa914fc1",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Requests made without an app_token will be subject to strict throttling limits.\n"
]
}
],
"source": [
"docs = loader.load()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "73a6def2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'pdid': '4133422003074',\n",
" 'incidntnum': '041334220',\n",
" 'incident_code': '03074',\n",
" 'category': 'ROBBERY',\n",
" 'descript': 'ROBBERY, BODILY FORCE',\n",
" 'dayofweek': 'Monday',\n",
" 'date': '2004-11-22T00:00:00.000',\n",
" 'time': '17:50',\n",
" 'pddistrict': 'INGLESIDE',\n",
" 'resolution': 'NONE',\n",
" 'address': 'GENEVA AV / SANTOS ST',\n",
" 'x': '-122.420084075249',\n",
" 'y': '37.7083109744362',\n",
" 'location': {'type': 'Point',\n",
" 'coordinates': [-122.420084075249, 37.7083109744362]},\n",
" ':@computed_region_26cr_cadq': '9',\n",
" ':@computed_region_rxqg_mtj9': '8',\n",
" ':@computed_region_bh8s_q3mv': '309'}"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"eval(docs[0].page_content)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -42,7 +42,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 6,
"id": "ac273ca1",
"metadata": {},
"outputs": [
@@ -116,7 +116,7 @@
"4 Braves 83.31 94"
]
},
"execution_count": 3,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -127,7 +127,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"id": "66e47a13",
"metadata": {},
"outputs": [],
@@ -137,7 +137,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 7,
"id": "2334caca",
"metadata": {},
"outputs": [],
@@ -147,7 +147,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 8,
"id": "d616c2b0",
"metadata": {},
"outputs": [
@@ -186,7 +186,7 @@
" Document(page_content='Astros', metadata={' \"Payroll (millions)\"': 60.65, ' \"Wins\"': 55})]"
]
},
"execution_count": 6,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -197,52 +197,11 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"id": "beb55c2f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"page_content='Nationals' metadata={' \"Payroll (millions)\"': 81.34, ' \"Wins\"': 98}\n",
"page_content='Reds' metadata={' \"Payroll (millions)\"': 82.2, ' \"Wins\"': 97}\n",
"page_content='Yankees' metadata={' \"Payroll (millions)\"': 197.96, ' \"Wins\"': 95}\n",
"page_content='Giants' metadata={' \"Payroll (millions)\"': 117.62, ' \"Wins\"': 94}\n",
"page_content='Braves' metadata={' \"Payroll (millions)\"': 83.31, ' \"Wins\"': 94}\n",
"page_content='Athletics' metadata={' \"Payroll (millions)\"': 55.37, ' \"Wins\"': 94}\n",
"page_content='Rangers' metadata={' \"Payroll (millions)\"': 120.51, ' \"Wins\"': 93}\n",
"page_content='Orioles' metadata={' \"Payroll (millions)\"': 81.43, ' \"Wins\"': 93}\n",
"page_content='Rays' metadata={' \"Payroll (millions)\"': 64.17, ' \"Wins\"': 90}\n",
"page_content='Angels' metadata={' \"Payroll (millions)\"': 154.49, ' \"Wins\"': 89}\n",
"page_content='Tigers' metadata={' \"Payroll (millions)\"': 132.3, ' \"Wins\"': 88}\n",
"page_content='Cardinals' metadata={' \"Payroll (millions)\"': 110.3, ' \"Wins\"': 88}\n",
"page_content='Dodgers' metadata={' \"Payroll (millions)\"': 95.14, ' \"Wins\"': 86}\n",
"page_content='White Sox' metadata={' \"Payroll (millions)\"': 96.92, ' \"Wins\"': 85}\n",
"page_content='Brewers' metadata={' \"Payroll (millions)\"': 97.65, ' \"Wins\"': 83}\n",
"page_content='Phillies' metadata={' \"Payroll (millions)\"': 174.54, ' \"Wins\"': 81}\n",
"page_content='Diamondbacks' metadata={' \"Payroll (millions)\"': 74.28, ' \"Wins\"': 81}\n",
"page_content='Pirates' metadata={' \"Payroll (millions)\"': 63.43, ' \"Wins\"': 79}\n",
"page_content='Padres' metadata={' \"Payroll (millions)\"': 55.24, ' \"Wins\"': 76}\n",
"page_content='Mariners' metadata={' \"Payroll (millions)\"': 81.97, ' \"Wins\"': 75}\n",
"page_content='Mets' metadata={' \"Payroll (millions)\"': 93.35, ' \"Wins\"': 74}\n",
"page_content='Blue Jays' metadata={' \"Payroll (millions)\"': 75.48, ' \"Wins\"': 73}\n",
"page_content='Royals' metadata={' \"Payroll (millions)\"': 60.91, ' \"Wins\"': 72}\n",
"page_content='Marlins' metadata={' \"Payroll (millions)\"': 118.07, ' \"Wins\"': 69}\n",
"page_content='Red Sox' metadata={' \"Payroll (millions)\"': 173.18, ' \"Wins\"': 69}\n",
"page_content='Indians' metadata={' \"Payroll (millions)\"': 78.43, ' \"Wins\"': 68}\n",
"page_content='Twins' metadata={' \"Payroll (millions)\"': 94.08, ' \"Wins\"': 66}\n",
"page_content='Rockies' metadata={' \"Payroll (millions)\"': 78.06, ' \"Wins\"': 64}\n",
"page_content='Cubs' metadata={' \"Payroll (millions)\"': 88.19, ' \"Wins\"': 61}\n",
"page_content='Astros' metadata={' \"Payroll (millions)\"': 60.65, ' \"Wins\"': 55}\n"
]
}
],
"source": [
"# Use lazy load for larger table, which won't read the full table into memory \n",
"for i in loader.lazy_load():\n",
" print(i)"
]
"outputs": [],
"source": []
}
],
"metadata": {
@@ -261,7 +220,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
"version": "3.10.6"
}
},
"nbformat": 4,

View File

@@ -621,44 +621,14 @@ class AgentExecutor(Chain):
"""Consists of an agent using tools."""
agent: Union[BaseSingleActionAgent, BaseMultiActionAgent]
"""The agent to run for creating a plan and determining actions
to take at each step of the execution loop."""
tools: Sequence[BaseTool]
"""The valid tools the agent can call."""
return_intermediate_steps: bool = False
"""Whether to return the agent's trajectory of intermediate steps
at the end in addition to the final output."""
max_iterations: Optional[int] = 15
"""The maximum number of steps to take before ending the execution
loop.
Setting to 'None' could lead to an infinite loop."""
max_execution_time: Optional[float] = None
"""The maximum amount of wall clock time to spend in the execution
loop.
"""
early_stopping_method: str = "force"
"""The method to use for early stopping if the agent never
returns `AgentFinish`. Either 'force' or 'generate'.
`"force"` returns a string saying that it stopped because it met a
time or iteration limit.
`"generate"` calls the agent's LLM Chain one final time to generate
a final answer based on the previous steps.
"""
handle_parsing_errors: Union[
bool, str, Callable[[OutputParserException], str]
] = False
"""How to handle errors raised by the agent's output parser.
Defaults to `False`, which raises the error.
s
If `true`, the error will be sent back to the LLM as an observation.
If a string, the string itself will be sent to the LLM as an observation.
If a callable function, the function will be called with the exception
as an argument, and the result of that function will be passed to the agent
as an observation.
"""
@classmethod
def from_agent_and_tools(

View File

@@ -16,8 +16,6 @@ def initialize_agent(
callback_manager: Optional[BaseCallbackManager] = None,
agent_path: Optional[str] = None,
agent_kwargs: Optional[dict] = None,
*,
tags: Optional[Sequence[str]] = None,
**kwargs: Any,
) -> AgentExecutor:
"""Load an agent executor given tools and LLM.
@@ -31,13 +29,11 @@ def initialize_agent(
not provided. Defaults to None.
agent_path: Path to serialized agent to use.
agent_kwargs: Additional key word arguments to pass to the underlying agent
tags: Tags to apply to the traced runs.
**kwargs: Additional key word arguments passed to the agent executor
Returns:
An agent executor
"""
tags_ = list(tags) if tags else []
if agent is None and agent_path is None:
agent = AgentType.ZERO_SHOT_REACT_DESCRIPTION
if agent is not None and agent_path is not None:
@@ -51,7 +47,6 @@ def initialize_agent(
f"Got unknown agent type: {agent}. "
f"Valid types are: {AGENT_TO_CLASS.keys()}."
)
tags_.append(agent.value)
agent_cls = AGENT_TO_CLASS[agent]
agent_kwargs = agent_kwargs or {}
agent_obj = agent_cls.from_llm_and_tools(
@@ -61,11 +56,6 @@ def initialize_agent(
agent_obj = load_agent(
agent_path, llm=llm, tools=tools, callback_manager=callback_manager
)
try:
# TODO: Add tags from the serialized object directly.
tags_.append(agent_obj._agent_type)
except NotImplementedError:
pass
else:
raise ValueError(
"Somehow both `agent` and `agent_path` are None, "
@@ -75,6 +65,5 @@ def initialize_agent(
agent=agent_obj,
tools=tools,
callback_manager=callback_manager,
tags=tags_,
**kwargs,
)

View File

@@ -106,7 +106,7 @@ def wandb_tracing_enabled(
@contextmanager
def tracing_v2_enabled(
project_name: Optional[str] = None,
session_name: Optional[str] = None,
*,
example_id: Optional[Union[str, UUID]] = None,
) -> Generator[None, None, None]:
@@ -120,7 +120,7 @@ def tracing_v2_enabled(
example_id = UUID(example_id)
cb = LangChainTracer(
example_id=example_id,
project_name=project_name,
session_name=session_name,
)
tracing_v2_callback_var.set(cb)
yield
@@ -131,12 +131,12 @@ def tracing_v2_enabled(
def trace_as_chain_group(
group_name: str,
*,
project_name: Optional[str] = None,
session_name: Optional[str] = None,
example_id: Optional[Union[str, UUID]] = None,
) -> Generator[CallbackManager, None, None]:
"""Get a callback manager for a chain group in a context manager."""
cb = LangChainTracer(
project_name=project_name,
session_name=session_name,
example_id=example_id,
)
cm = CallbackManager.configure(
@@ -152,12 +152,12 @@ def trace_as_chain_group(
async def atrace_as_chain_group(
group_name: str,
*,
project_name: Optional[str] = None,
session_name: Optional[str] = None,
example_id: Optional[Union[str, UUID]] = None,
) -> AsyncGenerator[AsyncCallbackManager, None]:
"""Get a callback manager for a chain group in a context manager."""
cb = LangChainTracer(
project_name=project_name,
session_name=session_name,
example_id=example_id,
)
cm = AsyncCallbackManager.configure(
@@ -1039,10 +1039,10 @@ def _configure(
tracing_v2_enabled_ = (
env_var_is_set("LANGCHAIN_TRACING_V2") or tracer_v2 is not None
)
tracer_project = os.environ.get(
"LANGCHAIN_PROJECT", os.environ.get("LANGCHAIN_SESSION", "default")
)
tracer_session = os.environ.get("LANGCHAIN_SESSION")
debug = _get_debug()
if tracer_session is None:
tracer_session = "default"
if (
verbose
or debug
@@ -1072,7 +1072,7 @@ def _configure(
callback_manager.add_handler(tracer, True)
else:
handler = LangChainTracerV1()
handler.load_session(tracer_project)
handler.load_session(tracer_session)
callback_manager.add_handler(handler, True)
if wandb_tracing_enabled_ and not any(
isinstance(handler, WandbTracer) for handler in callback_manager.handlers
@@ -1090,7 +1090,7 @@ def _configure(
callback_manager.add_handler(tracer_v2, True)
else:
try:
handler = LangChainTracer(project_name=tracer_project)
handler = LangChainTracer(session_name=tracer_session)
callback_manager.add_handler(handler, True)
except Exception as e:
logger.warning(

View File

@@ -45,7 +45,7 @@ class LangChainTracer(BaseTracer):
def __init__(
self,
example_id: Optional[Union[UUID, str]] = None,
project_name: Optional[str] = None,
session_name: Optional[str] = None,
client: Optional[LangChainPlusClient] = None,
**kwargs: Any,
) -> None:
@@ -55,9 +55,7 @@ class LangChainTracer(BaseTracer):
self.example_id = (
UUID(example_id) if isinstance(example_id, str) else example_id
)
self.project_name = project_name or os.getenv(
"LANGCHAIN_PROJECT", os.getenv("LANGCHAIN_SESSION", "default")
)
self.session_name = session_name or os.getenv("LANGCHAIN_SESSION", "default")
# set max_workers to 1 to process tasks in order
self.executor = ThreadPoolExecutor(max_workers=1)
self.client = client or LangChainPlusClient()
@@ -105,7 +103,7 @@ class LangChainTracer(BaseTracer):
extra["runtime"] = get_runtime_environment()
run_dict["extra"] = extra
try:
self.client.create_run(**run_dict, project_name=self.project_name)
self.client.create_run(**run_dict, session_name=self.session_name)
except Exception as e:
# Errors are swallowed by the thread executor so we need to log them here
log_error_once("post", e)

View File

@@ -1,242 +0,0 @@
import json
import re
from collections import defaultdict
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import requests
from openapi_schema_pydantic import Parameter
from requests import Response
from langchain import BasePromptTemplate, LLMChain
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.manager import CallbackManagerForChainRun
from langchain.chains.base import Chain
from langchain.chains.sequential import SequentialChain
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from langchain.prompts import ChatPromptTemplate
from langchain.tools import APIOperation
from langchain.utilities.openapi import OpenAPISpec
def _get_description(o: Any, prefer_short: bool) -> Optional[str]:
summary = getattr(o, "summary", None)
description = getattr(o, "description", None)
if prefer_short:
return summary or description
return description or summary
def _format_url(url: str, path_params: dict) -> str:
expected_path_param = re.findall(r"{(.*?)}", url)
new_params = {}
for param in expected_path_param:
clean_param = param.lstrip(".;").rstrip("*")
val = path_params[clean_param]
if isinstance(val, list):
if param[0] == ".":
sep = "." if param[-1] == "*" else ","
new_val = "." + sep.join(val)
elif param[0] == ";":
sep = f"{clean_param}=" if param[-1] == "*" else ","
new_val = f"{clean_param}=" + sep.join(val)
else:
new_val = ",".join(val)
elif isinstance(val, dict):
kv_sep = "=" if param[-1] == "*" else ","
kv_strs = [kv_sep.join((k, v)) for k, v in val.items()]
if param[0] == ".":
sep = "."
new_val = "."
elif param[0] == ";":
sep = ";"
new_val = ";"
else:
sep = ","
new_val = ""
new_val += sep.join(kv_strs)
else:
if param[0] == ".":
new_val = f".{val}"
elif param[0] == ";":
new_val = f";{clean_param}={val}"
else:
new_val = val
new_params[param] = new_val
return url.format(**new_params)
def _openapi_params_to_json_schema(params: List[Parameter], spec: OpenAPISpec) -> dict:
properties = {}
required = []
for p in params:
if p.param_schema:
schema = spec.get_schema(p.param_schema)
else:
media_type_schema = list(p.content.values())[0].media_type_schema # type: ignore # noqa: E501
schema = spec.get_schema(media_type_schema)
if p.description and not schema.description:
schema.description = p.description
properties[p.name] = schema.dict(exclude_none=True)
if p.required:
required.append(p.name)
return {"type": "object", "properties": properties, "required": required}
def openapi_spec_to_openai_fn(
spec: OpenAPISpec,
) -> Tuple[List[Dict[str, Any]], Callable]:
"""Convert a valid OpenAPI spec to the JSON Schema format expected for OpenAI
functions.
Args:
spec: OpenAPI spec to convert.
Returns:
Tuple of the OpenAI functions JSON schema and a default function for executing
a request based on the OpenAI function schema.
"""
if not spec.paths:
return [], lambda: None
functions = []
_name_to_call_map = {}
for path in spec.paths:
path_params = {
(p.name, p.param_in): p for p in spec.get_parameters_for_path(path)
}
for method in spec.get_methods_for_path(path):
request_args = {}
op = spec.get_operation(path, method)
op_params = path_params.copy()
for param in spec.get_parameters_for_operation(op):
op_params[(param.name, param.param_in)] = param
params_by_type = defaultdict(list)
for name_loc, p in op_params.items():
params_by_type[name_loc[1]].append(p)
param_loc_to_arg_name = {
"query": "params",
"header": "headers",
"cookie": "cookies",
"path": "path_params",
}
for param_loc, arg_name in param_loc_to_arg_name.items():
if params_by_type[param_loc]:
request_args[arg_name] = _openapi_params_to_json_schema(
params_by_type[param_loc], spec
)
request_body = spec.get_request_body_for_operation(op)
# TODO: Support more MIME types.
if request_body and request_body.content:
media_types = []
for media_type in request_body.content.values():
if media_type.media_type_schema:
schema = spec.get_schema(media_type.media_type_schema)
media_types.append(schema.dict(exclude_none=True))
if len(media_types) == 1:
request_args["data"] = media_types[0]
elif len(media_types) > 1:
request_args["data"] = {"anyOf": media_types}
api_op = APIOperation.from_openapi_spec(spec, path, method)
fn = {
"name": api_op.operation_id,
"description": api_op.description,
"parameters": {
"type": "object",
"properties": request_args,
},
}
functions.append(fn)
_name_to_call_map[fn["name"]] = {
"method": method,
"url": api_op.base_url + api_op.path,
}
def default_call_api(name: str, fn_args: dict, **kwargs: Any) -> Any:
method = _name_to_call_map[name]["method"]
url = _name_to_call_map[name]["url"]
path_params = fn_args.pop("path_params", {})
_format_url(url, path_params)
if "data" in fn_args and isinstance(fn_args["data"], dict):
fn_args["data"] = json.dumps(fn_args["data"])
_kwargs = {**fn_args, **kwargs}
return requests.request(method, url, **_kwargs)
return functions, default_call_api
class SimpleRequestChain(Chain):
request_method: Callable
output_key: str = "response"
input_key: str = "function"
@property
def input_keys(self) -> List[str]:
return [self.input_key]
@property
def output_keys(self) -> List[str]:
return [self.output_key]
def _call(
self,
inputs: Dict[str, Any],
run_manager: Optional[CallbackManagerForChainRun] = None,
) -> Dict[str, Any]:
"""Run the logic of this chain and return the output."""
name = inputs["function"].pop("name")
args = inputs["function"].pop("arguments")
api_response: Response = self.request_method(name, args)
if api_response.status_code != 200:
response = (
f"{api_response.status_code}: {api_response.reason}"
+ f"\nFor {name} "
+ f"Called with args: {args['params']}"
)
else:
try:
response = api_response.json()
except Exception: # noqa: E722
response = api_response.text
return {self.output_key: response}
def get_openapi_chain(
spec: Union[OpenAPISpec, str],
llm: Optional[BaseLanguageModel] = None,
prompt: Optional[BasePromptTemplate] = None,
request_chain: Optional[Chain] = None,
) -> SequentialChain:
if isinstance(spec, str):
for conversion in (
OpenAPISpec.from_url,
OpenAPISpec.from_file,
OpenAPISpec.from_text,
):
try:
spec = conversion(spec) # type: ignore[arg-type]
break
except Exception: # noqa: E722
pass
if isinstance(spec, str):
raise ValueError(f"Unable to parse spec from source {spec}")
openai_fns, call_api_fn = openapi_spec_to_openai_fn(spec)
llm = llm or ChatOpenAI(
model="gpt-3.5-turbo-0613",
)
prompt = prompt or ChatPromptTemplate.from_template(
"Use the provided API's to respond to this user query:\n\n{query}"
)
llm_chain = LLMChain(
llm=llm,
prompt=prompt,
llm_kwargs={"functions": openai_fns},
output_parser=JsonOutputFunctionsParser(args_only=False),
output_key="function",
)
request_chain = request_chain or SimpleRequestChain(request_method=call_api_fn)
return SequentialChain(
chains=[llm_chain, request_chain],
input_variables=llm_chain.input_keys,
output_variables=["response"],
)

View File

@@ -237,18 +237,18 @@ async def _gather_with_concurrency(
return results
async def _tracer_initializer(project_name: Optional[str]) -> Optional[LangChainTracer]:
async def _tracer_initializer(session_name: Optional[str]) -> Optional[LangChainTracer]:
"""
Initialize a tracer to share across tasks.
Args:
project_name: The project name for the tracer.
session_name: The session name for the tracer.
Returns:
A LangChainTracer instance with an active project.
A LangChainTracer instance with an active session.
"""
if project_name:
tracer = LangChainTracer(project_name=project_name)
if session_name:
tracer = LangChainTracer(session_name=session_name)
return tracer
else:
return None
@@ -260,12 +260,12 @@ async def arun_on_examples(
*,
concurrency_level: int = 5,
num_repetitions: int = 1,
project_name: Optional[str] = None,
session_name: Optional[str] = None,
verbose: bool = False,
tags: Optional[List[str]] = None,
) -> Dict[str, Any]:
"""
Run the chain on examples and store traces to the specified project name.
Run the chain on examples and store traces to the specified session name.
Args:
examples: Examples to run the model or chain over
@@ -276,7 +276,7 @@ async def arun_on_examples(
num_repetitions: Number of times to run the model on each example.
This is useful when testing success rates or generating confidence
intervals.
project_name: Project name to use when tracing runs.
session_name: Session name to use when tracing runs.
verbose: Whether to print progress.
tags: Tags to add to the traces.
@@ -307,7 +307,7 @@ async def arun_on_examples(
await _gather_with_concurrency(
concurrency_level,
functools.partial(_tracer_initializer, project_name),
functools.partial(_tracer_initializer, session_name),
*(functools.partial(process_example, e) for e in examples),
)
return results
@@ -386,11 +386,11 @@ def run_on_examples(
llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
*,
num_repetitions: int = 1,
project_name: Optional[str] = None,
session_name: Optional[str] = None,
verbose: bool = False,
tags: Optional[List[str]] = None,
) -> Dict[str, Any]:
"""Run the chain on examples and store traces to the specified project name.
"""Run the chain on examples and store traces to the specified session name.
Args:
examples: Examples to run model or chain over.
@@ -401,14 +401,14 @@ def run_on_examples(
num_repetitions: Number of times to run the model on each example.
This is useful when testing success rates or generating confidence
intervals.
project_name: Project name to use when tracing runs.
session_name: Session name to use when tracing runs.
verbose: Whether to print progress.
tags: Tags to add to the run traces.
Returns:
A dictionary mapping example ids to the model outputs.
"""
results: Dict[str, Any] = {}
tracer = LangChainTracer(project_name=project_name) if project_name else None
tracer = LangChainTracer(session_name=session_name) if session_name else None
for i, example in enumerate(examples):
result = run_llm_or_chain(
example,
@@ -425,13 +425,13 @@ def run_on_examples(
return results
def _get_project_name(
project_name: Optional[str],
def _get_session_name(
session_name: Optional[str],
llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
dataset_name: str,
) -> str:
if project_name is not None:
return project_name
if session_name is not None:
return session_name
current_time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
if isinstance(llm_or_chain_factory, BaseLanguageModel):
model_name = llm_or_chain_factory.__class__.__name__
@@ -446,13 +446,13 @@ async def arun_on_dataset(
*,
concurrency_level: int = 5,
num_repetitions: int = 1,
project_name: Optional[str] = None,
session_name: Optional[str] = None,
verbose: bool = False,
client: Optional[LangChainPlusClient] = None,
tags: Optional[List[str]] = None,
) -> Dict[str, Any]:
"""
Run the chain on a dataset and store traces to the specified project name.
Run the chain on a dataset and store traces to the specified session name.
Args:
client: Client to use to read the dataset.
@@ -464,7 +464,7 @@ async def arun_on_dataset(
num_repetitions: Number of times to run the model on each example.
This is useful when testing success rates or generating confidence
intervals.
project_name: Name of the project to store the traces in.
session_name: Name of the session to store the traces in.
Defaults to {dataset_name}-{chain class name}-{datetime}.
verbose: Whether to print progress.
client: Client to use to read the dataset. If not provided, a new
@@ -472,10 +472,11 @@ async def arun_on_dataset(
tags: Tags to add to each run in the sesssion.
Returns:
A dictionary containing the run's project name and the resulting model outputs.
A dictionary containing the run's session name and the resulting model outputs.
"""
client_ = client or LangChainPlusClient()
project_name = _get_project_name(project_name, llm_or_chain_factory, dataset_name)
session_name = _get_session_name(session_name, llm_or_chain_factory, dataset_name)
client_.create_session(session_name, mode="eval")
dataset = client_.read_dataset(dataset_name=dataset_name)
examples = client_.list_examples(dataset_id=str(dataset.id))
@@ -484,12 +485,12 @@ async def arun_on_dataset(
llm_or_chain_factory,
concurrency_level=concurrency_level,
num_repetitions=num_repetitions,
project_name=project_name,
session_name=session_name,
verbose=verbose,
tags=tags,
)
return {
"project_name": project_name,
"session_name": session_name,
"results": results,
}
@@ -499,12 +500,12 @@ def run_on_dataset(
llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
*,
num_repetitions: int = 1,
project_name: Optional[str] = None,
session_name: Optional[str] = None,
verbose: bool = False,
client: Optional[LangChainPlusClient] = None,
tags: Optional[List[str]] = None,
) -> Dict[str, Any]:
"""Run the chain on a dataset and store traces to the specified project name.
"""Run the chain on a dataset and store traces to the specified session name.
Args:
dataset_name: Name of the dataset to run the chain on.
@@ -515,7 +516,7 @@ def run_on_dataset(
num_repetitions: Number of times to run the model on each example.
This is useful when testing success rates or generating confidence
intervals.
project_name: Name of the project to store the traces in.
session_name: Name of the session to store the traces in.
Defaults to {dataset_name}-{chain class name}-{datetime}.
verbose: Whether to print progress.
client: Client to use to access the dataset. If None, a new client
@@ -523,21 +524,22 @@ def run_on_dataset(
tags: Tags to add to each run in the sesssion.
Returns:
A dictionary containing the run's project name and the resulting model outputs.
A dictionary containing the run's session name and the resulting model outputs.
"""
client_ = client or LangChainPlusClient()
project_name = _get_project_name(project_name, llm_or_chain_factory, dataset_name)
session_name = _get_session_name(session_name, llm_or_chain_factory, dataset_name)
client_.create_session(session_name, mode="eval")
dataset = client_.read_dataset(dataset_name=dataset_name)
examples = client_.list_examples(dataset_id=str(dataset.id))
results = run_on_examples(
examples,
llm_or_chain_factory,
num_repetitions=num_repetitions,
project_name=project_name,
session_name=session_name,
verbose=verbose,
tags=tags,
)
return {
"project_name": project_name,
"session_name": session_name,
"results": results,
}

View File

@@ -75,7 +75,6 @@ from langchain.document_loaders.obsidian import ObsidianLoader
from langchain.document_loaders.odt import UnstructuredODTLoader
from langchain.document_loaders.onedrive import OneDriveLoader
from langchain.document_loaders.onedrive_file import OneDriveFileLoader
from langchain.document_loaders.open_city_data import OpenCityDataLoader
from langchain.document_loaders.pdf import (
MathpixPDFLoader,
OnlinePDFLoader,
@@ -210,7 +209,6 @@ __all__ = [
"OneDriveLoader",
"OnlinePDFLoader",
"OutlookMessageLoader",
"OpenCityDataLoader",
"PDFMinerLoader",
"PDFMinerPDFasHTMLLoader",
"PDFPlumberLoader",

View File

@@ -5,7 +5,7 @@ from langchain.document_loaders.base import BaseLoader
class AirtableLoader(BaseLoader):
"""Loader for Airtable tables."""
"""Loader that loads local airbyte json files."""
def __init__(self, api_token: str, table_id: str, base_id: str):
"""Initialize with API token and the IDs for table and base"""
@@ -14,7 +14,7 @@ class AirtableLoader(BaseLoader):
self.base_id = base_id
def lazy_load(self) -> Iterator[Document]:
"""Lazy load records from table."""
"""Load Table."""
from pyairtable import Table

View File

@@ -1,5 +1,5 @@
"""Load from Dataframe object"""
from typing import Any, Iterator, List
from typing import Any, List
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
@@ -19,15 +19,16 @@ class DataFrameLoader(BaseLoader):
self.data_frame = data_frame
self.page_content_column = page_content_column
def lazy_load(self) -> Iterator[Document]:
"""Lazy load records from dataframe."""
def load(self) -> List[Document]:
"""Load from the dataframe."""
result = []
# For very large dataframes, this needs to yield instead of building a list
# but that would require chaging return type to a generator for BaseLoader
# and all its subclasses, which is a bigger refactor. Marking as future TODO.
# This change will allow us to extend this to Spark and Dask dataframes.
for _, row in self.data_frame.iterrows():
text = row[self.page_content_column]
metadata = row.to_dict()
metadata.pop(self.page_content_column)
yield Document(page_content=text, metadata=metadata)
def load(self) -> List[Document]:
"""Load full dataframe."""
return list(self.lazy_load())
result.append(Document(page_content=text, metadata=metadata))
return result

View File

@@ -1,37 +0,0 @@
from typing import Iterator, List
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
class OpenCityDataLoader(BaseLoader):
"""Loader that loads Open city data."""
def __init__(self, city_id: str, dataset_id: str, limit: int):
"""Initialize with dataset_id"""
""" Example: https://dev.socrata.com/foundry/data.sfgov.org/vw6y-z8j6 """
""" e.g., city_id = data.sfgov.org """
""" e.g., dataset_id = vw6y-z8j6 """
self.city_id = city_id
self.dataset_id = dataset_id
self.limit = limit
def lazy_load(self) -> Iterator[Document]:
"""Lazy load records."""
from sodapy import Socrata
client = Socrata(self.city_id, None)
results = client.get(self.dataset_id, limit=self.limit)
for record in results:
yield Document(
page_content=str(record),
metadata={
"source": self.city_id + "_" + self.dataset_id,
},
)
def load(self) -> List[Document]:
"""Load records."""
return list(self.lazy_load())

File diff suppressed because it is too large Load Diff

View File

@@ -5,8 +5,6 @@ from langchain.schema import BaseLLMOutputParser, ChatGeneration, Generation
class OutputFunctionsParser(BaseLLMOutputParser[Any]):
args_only: bool = True
def parse_result(self, result: List[Generation]) -> Any:
generation = result[0]
if not isinstance(generation, ChatGeneration):
@@ -19,18 +17,13 @@ class OutputFunctionsParser(BaseLLMOutputParser[Any]):
except ValueError as exc:
raise ValueError(f"Could not parse function call: {exc}")
if self.args_only:
return func_call["arguments"]
return func_call
return func_call["arguments"]
class JsonOutputFunctionsParser(OutputFunctionsParser):
def parse_result(self, result: List[Generation]) -> Any:
func = super().parse_result(result)
if self.args_only:
return json.loads(func)
func["arguments"] = json.loads(func["arguments"])
return func
_args = super().parse_result(result)
return json.loads(_args)
class JsonKeyOutputFunctionsParser(JsonOutputFunctionsParser):

View File

@@ -82,7 +82,7 @@ def _get_filtered_args(
"""Get the arguments from a function's signature."""
schema = inferred_model.schema()["properties"]
valid_keys = signature(func).parameters
return {k: schema[k] for k in valid_keys if k not in ("run_manager", "callbacks")}
return {k: schema[k] for k in valid_keys if k != "run_manager"}
class _SchemaConfig:
@@ -108,8 +108,6 @@ def create_schema_from_function(
inferred_model = validated.model # type: ignore
if "run_manager" in inferred_model.__fields__:
del inferred_model.__fields__["run_manager"]
if "callbacks" in inferred_model.__fields__:
del inferred_model.__fields__["callbacks"]
# Pydantic adds placeholder virtual fields we need to strip
valid_properties = _get_filtered_args(inferred_model, func)
return _create_subset_model(

View File

@@ -59,7 +59,7 @@ SCHEMA_TYPE = Union[str, Type, tuple, None, Enum]
class APIPropertyBase(BaseModel):
"""Base model for an API property."""
# The name of the parameter is required and is case-sensitive.
# The name of the parameter is required and is case sensitive.
# If "in" is "path", the "name" field must correspond to a template expression
# within the path field in the Paths Object.
# If "in" is "header" and the "name" field is "Accept", "Content-Type",

View File

@@ -1,2 +1,269 @@
"""Utility functions for parsing an OpenAPI spec. Kept for backwards compat."""
from langchain.utilities.openapi import HTTPVerb, OpenAPISpec # noqa: F401
"""Utility functions for parsing an OpenAPI spec."""
import copy
import json
import logging
import re
from enum import Enum
from pathlib import Path
from typing import Dict, List, Optional, Union
import requests
import yaml
from openapi_schema_pydantic import (
Components,
OpenAPI,
Operation,
Parameter,
PathItem,
Paths,
Reference,
RequestBody,
Schema,
)
from pydantic import ValidationError
logger = logging.getLogger(__name__)
class HTTPVerb(str, Enum):
"""HTTP verbs."""
GET = "get"
PUT = "put"
POST = "post"
DELETE = "delete"
OPTIONS = "options"
HEAD = "head"
PATCH = "patch"
TRACE = "trace"
@classmethod
def from_str(cls, verb: str) -> "HTTPVerb":
"""Parse an HTTP verb."""
try:
return cls(verb)
except ValueError:
raise ValueError(f"Invalid HTTP verb. Valid values are {cls.__members__}")
class OpenAPISpec(OpenAPI):
"""OpenAPI Model that removes misformatted parts of the spec."""
@property
def _paths_strict(self) -> Paths:
if not self.paths:
raise ValueError("No paths found in spec")
return self.paths
def _get_path_strict(self, path: str) -> PathItem:
path_item = self._paths_strict.get(path)
if not path_item:
raise ValueError(f"No path found for {path}")
return path_item
@property
def _components_strict(self) -> Components:
"""Get components or err."""
if self.components is None:
raise ValueError("No components found in spec. ")
return self.components
@property
def _parameters_strict(self) -> Dict[str, Union[Parameter, Reference]]:
"""Get parameters or err."""
parameters = self._components_strict.parameters
if parameters is None:
raise ValueError("No parameters found in spec. ")
return parameters
@property
def _schemas_strict(self) -> Dict[str, Schema]:
"""Get the dictionary of schemas or err."""
schemas = self._components_strict.schemas
if schemas is None:
raise ValueError("No schemas found in spec. ")
return schemas
@property
def _request_bodies_strict(self) -> Dict[str, Union[RequestBody, Reference]]:
"""Get the request body or err."""
request_bodies = self._components_strict.requestBodies
if request_bodies is None:
raise ValueError("No request body found in spec. ")
return request_bodies
def _get_referenced_parameter(self, ref: Reference) -> Union[Parameter, Reference]:
"""Get a parameter (or nested reference) or err."""
ref_name = ref.ref.split("/")[-1]
parameters = self._parameters_strict
if ref_name not in parameters:
raise ValueError(f"No parameter found for {ref_name}")
return parameters[ref_name]
def _get_root_referenced_parameter(self, ref: Reference) -> Parameter:
"""Get the root reference or err."""
parameter = self._get_referenced_parameter(ref)
while isinstance(parameter, Reference):
parameter = self._get_referenced_parameter(parameter)
return parameter
def get_referenced_schema(self, ref: Reference) -> Schema:
"""Get a schema (or nested reference) or err."""
ref_name = ref.ref.split("/")[-1]
schemas = self._schemas_strict
if ref_name not in schemas:
raise ValueError(f"No schema found for {ref_name}")
return schemas[ref_name]
def _get_root_referenced_schema(self, ref: Reference) -> Schema:
"""Get the root reference or err."""
schema = self.get_referenced_schema(ref)
while isinstance(schema, Reference):
schema = self.get_referenced_schema(schema)
return schema
def _get_referenced_request_body(
self, ref: Reference
) -> Optional[Union[Reference, RequestBody]]:
"""Get a request body (or nested reference) or err."""
ref_name = ref.ref.split("/")[-1]
request_bodies = self._request_bodies_strict
if ref_name not in request_bodies:
raise ValueError(f"No request body found for {ref_name}")
return request_bodies[ref_name]
def _get_root_referenced_request_body(
self, ref: Reference
) -> Optional[RequestBody]:
"""Get the root request Body or err."""
request_body = self._get_referenced_request_body(ref)
while isinstance(request_body, Reference):
request_body = self._get_referenced_request_body(request_body)
return request_body
@staticmethod
def _alert_unsupported_spec(obj: dict) -> None:
"""Alert if the spec is not supported."""
warning_message = (
" This may result in degraded performance."
+ " Convert your OpenAPI spec to 3.1.* spec"
+ " for better support."
)
swagger_version = obj.get("swagger")
openapi_version = obj.get("openapi")
if isinstance(openapi_version, str):
if openapi_version != "3.1.0":
logger.warning(
f"Attempting to load an OpenAPI {openapi_version}"
f" spec. {warning_message}"
)
else:
pass
elif isinstance(swagger_version, str):
logger.warning(
f"Attempting to load a Swagger {swagger_version}"
f" spec. {warning_message}"
)
else:
raise ValueError(
"Attempting to load an unsupported spec:"
f"\n\n{obj}\n{warning_message}"
)
@classmethod
def parse_obj(cls, obj: dict) -> "OpenAPISpec":
try:
cls._alert_unsupported_spec(obj)
return super().parse_obj(obj)
except ValidationError as e:
# We are handling possibly misconfigured specs and want to do a best-effort
# job to get a reasonable interface out of it.
new_obj = copy.deepcopy(obj)
for error in e.errors():
keys = error["loc"]
item = new_obj
for key in keys[:-1]:
item = item[key]
item.pop(keys[-1], None)
return cls.parse_obj(new_obj)
@classmethod
def from_spec_dict(cls, spec_dict: dict) -> "OpenAPISpec":
"""Get an OpenAPI spec from a dict."""
return cls.parse_obj(spec_dict)
@classmethod
def from_text(cls, text: str) -> "OpenAPISpec":
"""Get an OpenAPI spec from a text."""
try:
spec_dict = json.loads(text)
except json.JSONDecodeError:
spec_dict = yaml.safe_load(text)
return cls.from_spec_dict(spec_dict)
@classmethod
def from_file(cls, path: Union[str, Path]) -> "OpenAPISpec":
"""Get an OpenAPI spec from a file path."""
path_ = path if isinstance(path, Path) else Path(path)
if not path_.exists():
raise FileNotFoundError(f"{path} does not exist")
with path_.open("r") as f:
return cls.from_text(f.read())
@classmethod
def from_url(cls, url: str) -> "OpenAPISpec":
"""Get an OpenAPI spec from a URL."""
response = requests.get(url)
return cls.from_text(response.text)
@property
def base_url(self) -> str:
"""Get the base url."""
return self.servers[0].url
def get_methods_for_path(self, path: str) -> List[str]:
"""Return a list of valid methods for the specified path."""
path_item = self._get_path_strict(path)
results = []
for method in HTTPVerb:
operation = getattr(path_item, method.value, None)
if isinstance(operation, Operation):
results.append(method.value)
return results
def get_operation(self, path: str, method: str) -> Operation:
"""Get the operation object for a given path and HTTP method."""
path_item = self._get_path_strict(path)
operation_obj = getattr(path_item, method, None)
if not isinstance(operation_obj, Operation):
raise ValueError(f"No {method} method found for {path}")
return operation_obj
def get_parameters_for_operation(self, operation: Operation) -> List[Parameter]:
"""Get the components for a given operation."""
parameters = []
if operation.parameters:
for parameter in operation.parameters:
if isinstance(parameter, Reference):
parameter = self._get_root_referenced_parameter(parameter)
parameters.append(parameter)
return parameters
def get_request_body_for_operation(
self, operation: Operation
) -> Optional[RequestBody]:
"""Get the request body for a given operation."""
request_body = operation.requestBody
if isinstance(request_body, Reference):
request_body = self._get_root_referenced_request_body(request_body)
return request_body
@staticmethod
def get_cleaned_operation_id(operation: Operation, path: str, method: str) -> str:
"""Get a cleaned operation id from an operation id."""
operation_id = operation.operationId
if operation_id is None:
# Replace all punctuation of any kind with underscore
path = re.sub(r"[^a-zA-Z0-9]", "_", path.lstrip("/"))
operation_id = f"{path}_{method}"
return operation_id.replace("-", "_").replace(".", "_").replace("/", "_")

View File

@@ -1,285 +0,0 @@
"""Utility functions for parsing an OpenAPI spec."""
import copy
import json
import logging
import re
from enum import Enum
from pathlib import Path
from typing import Dict, List, Optional, Union
import requests
import yaml
from openapi_schema_pydantic import (
Components,
OpenAPI,
Operation,
Parameter,
PathItem,
Paths,
Reference,
RequestBody,
Schema,
)
from pydantic import ValidationError
logger = logging.getLogger(__name__)
class HTTPVerb(str, Enum):
"""HTTP verbs."""
GET = "get"
PUT = "put"
POST = "post"
DELETE = "delete"
OPTIONS = "options"
HEAD = "head"
PATCH = "patch"
TRACE = "trace"
@classmethod
def from_str(cls, verb: str) -> "HTTPVerb":
"""Parse an HTTP verb."""
try:
return cls(verb)
except ValueError:
raise ValueError(f"Invalid HTTP verb. Valid values are {cls.__members__}")
class OpenAPISpec(OpenAPI):
"""OpenAPI Model that removes misformatted parts of the spec."""
@property
def _paths_strict(self) -> Paths:
if not self.paths:
raise ValueError("No paths found in spec")
return self.paths
def _get_path_strict(self, path: str) -> PathItem:
path_item = self._paths_strict.get(path)
if not path_item:
raise ValueError(f"No path found for {path}")
return path_item
@property
def _components_strict(self) -> Components:
"""Get components or err."""
if self.components is None:
raise ValueError("No components found in spec. ")
return self.components
@property
def _parameters_strict(self) -> Dict[str, Union[Parameter, Reference]]:
"""Get parameters or err."""
parameters = self._components_strict.parameters
if parameters is None:
raise ValueError("No parameters found in spec. ")
return parameters
@property
def _schemas_strict(self) -> Dict[str, Schema]:
"""Get the dictionary of schemas or err."""
schemas = self._components_strict.schemas
if schemas is None:
raise ValueError("No schemas found in spec. ")
return schemas
@property
def _request_bodies_strict(self) -> Dict[str, Union[RequestBody, Reference]]:
"""Get the request body or err."""
request_bodies = self._components_strict.requestBodies
if request_bodies is None:
raise ValueError("No request body found in spec. ")
return request_bodies
def _get_referenced_parameter(self, ref: Reference) -> Union[Parameter, Reference]:
"""Get a parameter (or nested reference) or err."""
ref_name = ref.ref.split("/")[-1]
parameters = self._parameters_strict
if ref_name not in parameters:
raise ValueError(f"No parameter found for {ref_name}")
return parameters[ref_name]
def _get_root_referenced_parameter(self, ref: Reference) -> Parameter:
"""Get the root reference or err."""
parameter = self._get_referenced_parameter(ref)
while isinstance(parameter, Reference):
parameter = self._get_referenced_parameter(parameter)
return parameter
def get_referenced_schema(self, ref: Reference) -> Schema:
"""Get a schema (or nested reference) or err."""
ref_name = ref.ref.split("/")[-1]
schemas = self._schemas_strict
if ref_name not in schemas:
raise ValueError(f"No schema found for {ref_name}")
return schemas[ref_name]
def get_schema(self, schema: Union[Reference, Schema]) -> Schema:
if isinstance(schema, Reference):
return self.get_referenced_schema(schema)
return schema
def _get_root_referenced_schema(self, ref: Reference) -> Schema:
"""Get the root reference or err."""
schema = self.get_referenced_schema(ref)
while isinstance(schema, Reference):
schema = self.get_referenced_schema(schema)
return schema
def _get_referenced_request_body(
self, ref: Reference
) -> Optional[Union[Reference, RequestBody]]:
"""Get a request body (or nested reference) or err."""
ref_name = ref.ref.split("/")[-1]
request_bodies = self._request_bodies_strict
if ref_name not in request_bodies:
raise ValueError(f"No request body found for {ref_name}")
return request_bodies[ref_name]
def _get_root_referenced_request_body(
self, ref: Reference
) -> Optional[RequestBody]:
"""Get the root request Body or err."""
request_body = self._get_referenced_request_body(ref)
while isinstance(request_body, Reference):
request_body = self._get_referenced_request_body(request_body)
return request_body
@staticmethod
def _alert_unsupported_spec(obj: dict) -> None:
"""Alert if the spec is not supported."""
warning_message = (
" This may result in degraded performance."
+ " Convert your OpenAPI spec to 3.1.* spec"
+ " for better support."
)
swagger_version = obj.get("swagger")
openapi_version = obj.get("openapi")
if isinstance(openapi_version, str):
if openapi_version != "3.1.0":
logger.warning(
f"Attempting to load an OpenAPI {openapi_version}"
f" spec. {warning_message}"
)
else:
pass
elif isinstance(swagger_version, str):
logger.warning(
f"Attempting to load a Swagger {swagger_version}"
f" spec. {warning_message}"
)
else:
raise ValueError(
"Attempting to load an unsupported spec:"
f"\n\n{obj}\n{warning_message}"
)
@classmethod
def parse_obj(cls, obj: dict) -> "OpenAPISpec":
try:
cls._alert_unsupported_spec(obj)
return super().parse_obj(obj)
except ValidationError as e:
# We are handling possibly misconfigured specs and want to do a best-effort
# job to get a reasonable interface out of it.
new_obj = copy.deepcopy(obj)
for error in e.errors():
keys = error["loc"]
item = new_obj
for key in keys[:-1]:
item = item[key]
item.pop(keys[-1], None)
return cls.parse_obj(new_obj)
@classmethod
def from_spec_dict(cls, spec_dict: dict) -> "OpenAPISpec":
"""Get an OpenAPI spec from a dict."""
return cls.parse_obj(spec_dict)
@classmethod
def from_text(cls, text: str) -> "OpenAPISpec":
"""Get an OpenAPI spec from a text."""
try:
spec_dict = json.loads(text)
except json.JSONDecodeError:
spec_dict = yaml.safe_load(text)
return cls.from_spec_dict(spec_dict)
@classmethod
def from_file(cls, path: Union[str, Path]) -> "OpenAPISpec":
"""Get an OpenAPI spec from a file path."""
path_ = path if isinstance(path, Path) else Path(path)
if not path_.exists():
raise FileNotFoundError(f"{path} does not exist")
with path_.open("r") as f:
return cls.from_text(f.read())
@classmethod
def from_url(cls, url: str) -> "OpenAPISpec":
"""Get an OpenAPI spec from a URL."""
response = requests.get(url)
return cls.from_text(response.text)
@property
def base_url(self) -> str:
"""Get the base url."""
return self.servers[0].url
def get_methods_for_path(self, path: str) -> List[str]:
"""Return a list of valid methods for the specified path."""
path_item = self._get_path_strict(path)
results = []
for method in HTTPVerb:
operation = getattr(path_item, method.value, None)
if isinstance(operation, Operation):
results.append(method.value)
return results
def get_parameters_for_path(self, path: str) -> List[Parameter]:
path_item = self._get_path_strict(path)
parameters = []
if not path_item.parameters:
return []
for parameter in path_item.parameters:
if isinstance(parameter, Reference):
parameter = self._get_root_referenced_parameter(parameter)
parameters.append(parameter)
return parameters
def get_operation(self, path: str, method: str) -> Operation:
"""Get the operation object for a given path and HTTP method."""
path_item = self._get_path_strict(path)
operation_obj = getattr(path_item, method, None)
if not isinstance(operation_obj, Operation):
raise ValueError(f"No {method} method found for {path}")
return operation_obj
def get_parameters_for_operation(self, operation: Operation) -> List[Parameter]:
"""Get the components for a given operation."""
parameters = []
if operation.parameters:
for parameter in operation.parameters:
if isinstance(parameter, Reference):
parameter = self._get_root_referenced_parameter(parameter)
parameters.append(parameter)
return parameters
def get_request_body_for_operation(
self, operation: Operation
) -> Optional[RequestBody]:
"""Get the request body for a given operation."""
request_body = operation.requestBody
if isinstance(request_body, Reference):
request_body = self._get_root_referenced_request_body(request_body)
return request_body
@staticmethod
def get_cleaned_operation_id(operation: Operation, path: str, method: str) -> str:
"""Get a cleaned operation id from an operation id."""
operation_id = operation.operationId
if operation_id is None:
# Replace all punctuation of any kind with underscore
path = re.sub(r"[^a-zA-Z0-9]", "_", path.lstrip("/"))
operation_id = f"{path}_{method}"
return operation_id.replace("-", "_").replace(".", "_").replace("/", "_")

10
poetry.lock generated
View File

@@ -4362,13 +4362,13 @@ tests = ["doctest", "pytest", "pytest-mock"]
[[package]]
name = "langchainplus-sdk"
version = "0.0.17"
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
version = "0.0.15"
description = "Client library to connect to the LangChainPlus LLM Tracing and Evaluation Platform."
optional = false
python-versions = ">=3.8.1,<4.0"
files = [
{file = "langchainplus_sdk-0.0.17-py3-none-any.whl", hash = "sha256:899675fe850bb0829691ce7643d5c3b4425de1535b6f2d6ce1e5f5457ffb05bf"},
{file = "langchainplus_sdk-0.0.17.tar.gz", hash = "sha256:6520c864a23dcadbe6fb7233a117347f6acc32725a97758e59354704c50de303"},
{file = "langchainplus_sdk-0.0.15-py3-none-any.whl", hash = "sha256:e69bdbc8af6007ef2f774248d2483bbaf2d75712b1acc9ea50eda3b9f6dc567d"},
{file = "langchainplus_sdk-0.0.15.tar.gz", hash = "sha256:ce40e9e3b6d42741f0a2aa89f83a12f2648f38690a9dd57e5fe3a56f2f232908"},
]
[package.dependencies]
@@ -11771,4 +11771,4 @@ text-helpers = ["chardet"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0"
content-hash = "6e495e4f58127a5d2001385404b973896e275f5ca71a6ebe856cb114977189d1"
content-hash = "09d46ad12369c6a16513558618553623cd520c2855bff3b8fe8248e1b18cbb94"

View File

@@ -1,6 +1,6 @@
[tool.poetry]
name = "langchain"
version = "0.0.211"
version = "0.0.209"
description = "Building applications with LLMs through composability"
authors = []
license = "MIT"
@@ -106,7 +106,7 @@ pyspark = {version = "^3.4.0", optional = true}
clarifai = {version = "9.1.0", optional = true}
tigrisdb = {version = "^1.0.0b6", optional = true}
nebula3-python = {version = "^3.4.0", optional = true}
langchainplus-sdk = ">=0.0.17"
langchainplus-sdk = ">=0.0.13"
awadb = {version = "^0.3.3", optional = true}
azure-search-documents = {version = "11.4.0a20230509004", source = "azure-sdk-dev", optional = true}
openllm = {version = ">=0.1.6", optional = true}

60
scripts/clean_file.py Normal file
View File

@@ -0,0 +1,60 @@
import argparse
from typing import Optional, Sequence
from langchain.base_language import BaseLanguageModel
from langchain.chains.llm import LLMChain
from langchain.chat_models import ChatAnthropic, ChatOpenAI
def parse_args(src: Optional[Sequence[str]] = None) -> argparse.Namespace:
"""Parse arguments."""
parser = argparse.ArgumentParser(
description="Apply an LLM to a code file to proofread docstrings and edit grammar.",
)
parser.add_argument("file", type=str, help="File to proofread.")
parser.add_argument(
"--model",
type=str,
default="auto",
help="Model to use.",
choices=["anthropic", "openai", "auto"],
)
return parser.parse_args(src)
def select_model(text: str, model: str) -> BaseLanguageModel:
if model == "anthropic":
return ChatAnthropic(model="claude-v1-100k", temperature=0)
elif model == "openai":
return ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0)
elif model == "auto":
import tiktoken
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo-16k")
num_tokens = len(encoding.encode(text))
if num_tokens > 15800:
return ChatAnthropic(model="claude-v1-100k", temperature=0)
else:
return ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0)
else:
raise ValueError(f"Invalid model {model}")
def main(file: str, model: str) -> str:
"""Run the llm."""
with open(file, "r") as f:
text = f.read()
model_ = select_model(text, model)
template = """Please review the following code and improve the docstrings to make our documentation clean. Update the docstrings and descriptions as needed in the format of scikit-learn. Provide examples if necessary.
```
{code}
```"""
chain = LLMChain.from_string(llm=model_, template=template)
cleaned = chain(text, return_only_outputs=True)["text"]
return cleaned.strip().strip("`")
if __name__ == "__main__":
args = parse_args()
cleaned = main(args.file, args.model)
print(cleaned)

View File

@@ -176,7 +176,7 @@ async def test_arun_on_dataset(monkeypatch: pytest.MonkeyPatch) -> None:
{"result": f"Result for example {example.id}"} for _ in range(n_repetitions)
]
def mock_create_project(*args: Any, **kwargs: Any) -> None:
def mock_create_session(*args: Any, **kwargs: Any) -> None:
pass
with mock.patch.object(
@@ -186,7 +186,7 @@ async def test_arun_on_dataset(monkeypatch: pytest.MonkeyPatch) -> None:
), mock.patch(
"langchain.client.runner_utils._arun_llm_or_chain", new=mock_arun_chain
), mock.patch.object(
LangChainPlusClient, "create_project", new=mock_create_project
LangChainPlusClient, "create_session", new=mock_create_session
):
client = LangChainPlusClient(api_url="http://localhost:1984", api_key="123")
chain = mock.MagicMock()
@@ -195,7 +195,7 @@ async def test_arun_on_dataset(monkeypatch: pytest.MonkeyPatch) -> None:
dataset_name="test",
llm_or_chain_factory=lambda: chain,
concurrency_level=2,
project_name="test_project",
session_name="test_session",
num_repetitions=num_repetitions,
client=client,
)

View File

@@ -19,7 +19,6 @@ from langchain.tools.base import (
StructuredTool,
ToolException,
)
from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler
def test_unnamed_decorator() -> None:
@@ -394,64 +393,6 @@ def test_empty_args_decorator() -> None:
assert empty_tool_input.run({}) == "the empty result"
def test_tool_from_function_with_run_manager() -> None:
"""Test run of tool when using run_manager."""
def foo(bar: str, callbacks: Optional[CallbackManagerForToolRun] = None) -> str:
"""Docstring
Args:
bar: str
"""
assert callbacks is not None
return "foo" + bar
handler = FakeCallbackHandler()
tool = Tool.from_function(foo, name="foo", description="Docstring")
assert tool.run(tool_input={"bar": "bar"}, run_manager=[handler]) == "foobar"
assert tool.run("baz", run_manager=[handler]) == "foobaz"
def test_structured_tool_from_function_with_run_manager() -> None:
"""Test args and schema of structured tool when using callbacks."""
def foo(
bar: int, baz: str, callbacks: Optional[CallbackManagerForToolRun] = None
) -> str:
"""Docstring
Args:
bar: int
baz: str
"""
assert callbacks is not None
return str(bar) + baz
handler = FakeCallbackHandler()
structured_tool = StructuredTool.from_function(foo)
assert structured_tool.args == {
"bar": {"title": "Bar", "type": "integer"},
"baz": {"title": "Baz", "type": "string"},
}
assert structured_tool.args_schema.schema() == {
"properties": {
"bar": {"title": "Bar", "type": "integer"},
"baz": {"title": "Baz", "type": "string"},
},
"title": "fooSchemaSchema",
"type": "object",
"required": ["bar", "baz"],
}
assert (
structured_tool.run(
tool_input={"bar": "10", "baz": "baz"}, run_manger=[handler]
)
== "10baz"
)
def test_named_tool_decorator() -> None:
"""Test functionality when arguments are provided as input to decorator."""