Compare commits

..

1 Commits

Author SHA1 Message Date
Bagatur
a990b63443 rfc 2023-11-22 19:25:15 -08:00
86 changed files with 3805 additions and 2699 deletions

View File

@@ -34,12 +34,12 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": null,
"id": "5740fc70-c513-4ff4-9d72-cfc098f85fef",
"metadata": {},
"outputs": [],
"source": [
"! pip install langchain docugami==0.0.8 dgml-utils==0.3.0 pydantic langchainhub chromadb hnswlib --upgrade --quiet"
"! pip install langchain docugami==0.0.4 dgml-utils==0.2.0 pydantic langchainhub chromadb --upgrade --quiet"
]
},
{
@@ -76,7 +76,98 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 45,
"id": "fc0767d4-9155-4591-855c-ef2e14e0e10f",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import tempfile\n",
"from pathlib import Path\n",
"from pprint import pprint\n",
"from time import sleep\n",
"from typing import Dict, List\n",
"\n",
"import requests\n",
"from docugami import Docugami\n",
"from docugami.types import Document as DocugamiDocument\n",
"\n",
"api_key = os.environ.get(\"DOCUGAMI_API_KEY\")\n",
"if not api_key:\n",
" raise Exception(\"Please set Docugami API key environment variable\")\n",
"\n",
"client = Docugami()\n",
"\n",
"\n",
"def upload_files(local_paths: List[str], docset_name: str) -> List[DocugamiDocument]:\n",
" docset_list_response = client.docsets.list(name=docset_name)\n",
" if docset_list_response and docset_list_response.docsets:\n",
" # Docset already exists with this name\n",
" docset_id = docset_list_response.docsets[0]\n",
" else:\n",
" dg_docset = client.docsets.create(name=docset_name)\n",
" docset_id = dg_docset.id\n",
"\n",
" document_list_response = client.documents.list(limit=int(1e5))\n",
" dg_docs: List[DocugamiDocument] = []\n",
" if document_list_response and document_list_response.documents:\n",
" new_names = [Path(f).name for f in local_paths]\n",
"\n",
" dg_docs = [\n",
" d\n",
" for d in document_list_response.documents\n",
" if Path(d.name).name in new_names\n",
" ]\n",
" existing_names = [Path(d.name).name for d in dg_docs]\n",
"\n",
" # Upload any files not previously uploaded\n",
" for f in local_paths:\n",
" if Path(f).name not in existing_names:\n",
" dg_docs.append(\n",
" client.documents.contents.upload(\n",
" file=Path(f).absolute(),\n",
" docset_id=docset_id,\n",
" )\n",
" )\n",
" return dg_docs\n",
"\n",
"\n",
"def wait_for_xml(dg_docs: List[DocugamiDocument]) -> dict[str, str]:\n",
" dgml_paths: dict[str, str] = {}\n",
" while len(dgml_paths) < len(dg_docs):\n",
" for doc in dg_docs:\n",
" doc = client.documents.retrieve(doc.id) # update with latest\n",
" current_status = doc.status\n",
" if current_status == \"Error\":\n",
" raise Exception(\n",
" \"Document could not be processed, please confirm it is not a zero length, corrupt or password protected file\"\n",
" )\n",
" elif current_status == \"Ready\":\n",
" dgml_url = doc.docset.url + f\"/documents/{doc.id}/dgml\"\n",
" headers = {\"Authorization\": f\"Bearer {api_key}\"}\n",
" dgml_response = requests.get(dgml_url, headers=headers)\n",
" if not dgml_response.ok:\n",
" raise Exception(\n",
" f\"Could not download DGML artifact {dgml_url}: {dgml_response.status_code}\"\n",
" )\n",
" dgml_contents = dgml_response.text\n",
" with tempfile.NamedTemporaryFile(delete=False, mode=\"w\") as temp_file:\n",
" temp_file.write(dgml_contents)\n",
" temp_file_path = temp_file.name\n",
" dgml_paths[doc.name] = temp_file_path\n",
"\n",
" print(f\"{len(dgml_paths)} docs done processing out of {len(dg_docs)}...\")\n",
"\n",
" if len(dgml_paths) == len(dg_docs):\n",
" # done\n",
" return dgml_paths\n",
" else:\n",
" sleep(30) # try again in a bit"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "ce0b2b21-7623-46e7-ae2c-3a9f67e8b9b9",
"metadata": {},
"outputs": [
@@ -84,22 +175,18 @@
"name": "stdout",
"output_type": "stream",
"text": [
"{'Report_CEN23LA277_192541.pdf': '/tmp/tmpa0c77x46',\n",
" 'Report_CEN23LA338_192753.pdf': '/tmp/tmpaftfld2w',\n",
" 'Report_CEN23LA363_192876.pdf': '/tmp/tmpn7gp6be2',\n",
" 'Report_CEN23LA394_192995.pdf': '/tmp/tmp9udymprf',\n",
" 'Report_ERA23LA114_106615.pdf': '/tmp/tmpxdjbh4r_',\n",
" 'Report_WPR23LA254_192532.pdf': '/tmp/tmpz6h75a0h'}\n"
"6 docs done processing out of 6...\n",
"{'Report_CEN23LA277_192541.pdf': '/var/folders/0h/6cchx4k528bdj8cfcsdm0dqr0000gn/T/tmpel3o0rpg',\n",
" 'Report_CEN23LA338_192753.pdf': '/var/folders/0h/6cchx4k528bdj8cfcsdm0dqr0000gn/T/tmpgugb9ut1',\n",
" 'Report_CEN23LA363_192876.pdf': '/var/folders/0h/6cchx4k528bdj8cfcsdm0dqr0000gn/T/tmp3_gf2sky',\n",
" 'Report_CEN23LA394_192995.pdf': '/var/folders/0h/6cchx4k528bdj8cfcsdm0dqr0000gn/T/tmpwmfgoxkl',\n",
" 'Report_ERA23LA114_106615.pdf': '/var/folders/0h/6cchx4k528bdj8cfcsdm0dqr0000gn/T/tmptibrz2yu',\n",
" 'Report_WPR23LA254_192532.pdf': '/var/folders/0h/6cchx4k528bdj8cfcsdm0dqr0000gn/T/tmpvazrbbsi'}\n"
]
}
],
"source": [
"from pprint import pprint\n",
"\n",
"from docugami import Docugami\n",
"from docugami.lib.upload import upload_to_named_docset, wait_for_dgml\n",
"\n",
"#### START DOCSET INFO (please change this values as needed)\n",
"#### START DOCSET INFO (please change)\n",
"DOCSET_NAME = \"NTSB Aviation Incident Reports\"\n",
"FILE_PATHS = [\n",
" \"/Users/tjaffri/ntsb/Report_CEN23LA277_192541.pdf\",\n",
@@ -110,15 +197,13 @@
" \"/Users/tjaffri/ntsb/Report_WPR23LA254_192532.pdf\",\n",
"]\n",
"\n",
"# Note: Please specify ~6 (or more!) similar files to process together as a document set\n",
"# This is currently a requirement for Docugami to automatically detect motifs\n",
"# across the document set to generate a semantic XML Knowledge Graph.\n",
"assert len(FILE_PATHS) > 5, \"Please provide at least 6 files\"\n",
"assert (\n",
" len(FILE_PATHS) > 5\n",
") # Please specify ~6 (or more!) similar files to process together as a document set\n",
"#### END DOCSET INFO\n",
"\n",
"dg_client = Docugami()\n",
"dg_docs = upload_to_named_docset(dg_client, FILE_PATHS, DOCSET_NAME)\n",
"dgml_paths = wait_for_dgml(dg_client, dg_docs)\n",
"dg_docs = upload_files(FILE_PATHS, DOCSET_NAME)\n",
"dgml_paths = wait_for_xml(dg_docs)\n",
"\n",
"pprint(dgml_paths)"
]
@@ -143,7 +228,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 47,
"id": "05fcdd57-090f-44bf-a1fb-2c3609c80e34",
"metadata": {},
"outputs": [
@@ -152,13 +237,13 @@
"output_type": "stream",
"text": [
"found 30 chunks, here are the first few\n",
"<AviationInvestigationFinalReport-section>Aviation </AviationInvestigationFinalReport-section>Investigation Final Report\n",
"<table><tbody><tr><td>Location: </td> <td><Location><TownName>Elbert</TownName>, <USState>Colorado </USState></Location></td> <td>Accident Number: </td> <td><AccidentNumber>CEN23LA277 </AccidentNumber></td></tr> <tr><td><LocationDateTime>Date &amp; Time: </LocationDateTime></td> <td><DateTime><EventDate>June 26, 2023</EventDate>, <EventTime>11:00 Local </EventTime></DateTime></td> <td><DateTimeAccidentNumber>Registration: </DateTimeAccidentNumber></td> <td><Registration>N23161 </Registration></td></tr> <tr><td><LocationAircraft>Aircraft: </LocationAircraft></td> <td><AircraftType>Piper <AircraftType>J3C-50 </AircraftType></AircraftType></td> <td><AircraftAccidentNumber>Aircraft Damage: </AircraftAccidentNumber></td> <td><AircraftDamage>Substantial </AircraftDamage></td></tr> <tr><td><LocationDefiningEvent>Defining Event: </LocationDefiningEvent></td> <td><DefiningEvent>Nose over/nose down </DefiningEvent></td> <td><DefiningEventAccidentNumber>Injuries: </DefiningEventAccidentNumber></td> <td><Injuries><Minor>1 </Minor>Minor </Injuries></td></tr> <tr><td><LocationFlightConductedUnder>Flight Conducted Under: </LocationFlightConductedUnder></td> <td><FlightConductedUnder><Part91-cell>Part <RegulationPart>91</RegulationPart>: General aviation - Personal </Part91-cell></FlightConductedUnder></td><td/><td><FlightConductedUnderCEN23LA277/></td></tr></tbody></table>\n",
"Aviation Investigation Final Report\n",
"<table><tbody><tr><td>Location: </td> <td><Location><TownName>Elbert</TownName>, <USState>Colorado </USState></Location></td> <td>Accident Number: </td> <td><AccidentNumber>CEN23LA277 </AccidentNumber></td></tr> <tr><td><LocationDateTime>Date &amp; Time: </LocationDateTime></td> <td><DateTime><EventDate>June 26, 2023</EventDate>, <EventTime>11:00 Local </EventTime></DateTime></td> <td><DateTimeAccidentNumber>Registration: </DateTimeAccidentNumber></td> <td><Registration>N23161 </Registration></td></tr> <tr><td><LocationAircraft>Aircraft: </LocationAircraft></td> <td><Aircraft>Piper <AircraftType>J3C-50 </AircraftType></Aircraft></td> <td><AircraftAccidentNumber>Aircraft Damage: </AircraftAccidentNumber></td> <td><AircraftDamage>Substantial </AircraftDamage></td></tr> <tr><td><LocationDefiningEvent>Defining Event: </LocationDefiningEvent></td> <td><DefiningEvent>Nose over/nose down </DefiningEvent></td> <td><DefiningEventAccidentNumber>Injuries: </DefiningEventAccidentNumber></td> <td><Injuries><Minor>1 </Minor>Minor </Injuries></td></tr> <tr><td><LocationFlightConductedUnder>Flight Conducted Under: </LocationFlightConductedUnder></td> <td><Part91-cell>Part <RegulationPart>91</RegulationPart>: General aviation - Personal </Part91-cell></td><td/><td><FlightConductedUnderCEN23LA277/></td></tr></tbody></table>\n",
"Analysis\n",
"<TakeoffAccident> <Analysis>The pilot reported that, as the tail lifted during takeoff, the airplane veered left. He attempted to correct with full right rudder and full brakes. However, the airplane subsequently nosed over resulting in substantial damage to the fuselage, lift struts, rudder, and vertical stabilizer. </Analysis></TakeoffAccident>\n",
"<TakeoffAccident> The pilot reported that, as the tail lifted during takeoff, the airplane veered left. He attempted to correct with full right rudder and full brakes. However, the airplane subsequently nosed over resulting in substantial damage to the fuselage, lift struts, rudder, and vertical stabilizer. </TakeoffAccident>\n",
"<AircraftCondition> The pilot reported that there were no preaccident mechanical malfunctions or anomalies with the airplane that would have precluded normal operation. </AircraftCondition>\n",
"<WindConditions> At about the time of the accident, wind was from <WindDirection>180</WindDirection>° at <WindConditions>5 </WindConditions>knots. The pilot decided to depart on runway <Runway>35 </Runway>due to the prevailing airport traffic. He stated that departing with “more favorable wind conditions” may have prevented the accident. </WindConditions>\n",
"<ProbableCauseAndFindings-section>Probable Cause and Findings </ProbableCauseAndFindings-section>\n",
"Probable Cause and Findings\n",
"<ProbableCause> The <ProbableCause>National Transportation Safety Board </ProbableCause>determines the probable cause(s) of this accident to be: </ProbableCause>\n",
"<AccidentCause> The pilot's loss of directional control during takeoff and subsequent excessive use of brakes which resulted in a nose-over. Contributing to the accident was his decision to takeoff downwind. </AccidentCause>\n",
"Page 1 of <PageNumber>5 </PageNumber>\n"
@@ -166,8 +251,6 @@
}
],
"source": [
"from pathlib import Path\n",
"\n",
"from dgml_utils.segmentation import get_chunks_str\n",
"\n",
"# Here we just read the first file, you can do the same for others\n",
@@ -200,7 +283,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 48,
"id": "8a4b49e0-de78-4790-a930-ad7cf324697a",
"metadata": {},
"outputs": [
@@ -260,7 +343,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 109,
"id": "7b697d30-1e94-47f0-87e8-f81d4b180da2",
"metadata": {},
"outputs": [
@@ -270,14 +353,12 @@
"39"
]
},
"execution_count": 6,
"execution_count": 109,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import requests\n",
"\n",
"# Download XML from known URL\n",
"dgml = requests.get(\n",
" \"https://raw.githubusercontent.com/docugami/dgml-utils/main/python/tests/test_data/article/Jane%20Doe.xml\"\n",
@@ -288,7 +369,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 98,
"id": "14714576-6e1d-499b-bcc8-39140bb2fd78",
"metadata": {},
"outputs": [
@@ -298,7 +379,7 @@
"{'h1': 9, 'div': 12, 'p': 3, 'lim h1': 9, 'lim': 1, 'table': 1, 'h1 div': 4}"
]
},
"execution_count": 7,
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
@@ -319,7 +400,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 99,
"id": "5462f29e-fd59-4e0e-9493-ea3b560e523e",
"metadata": {},
"outputs": [
@@ -352,7 +433,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 100,
"id": "2b4ece00-2e43-4254-adc9-66dbb79139a6",
"metadata": {},
"outputs": [
@@ -390,7 +471,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 101,
"id": "08350119-aa22-4ec1-8f65-b1316a0d4123",
"metadata": {},
"outputs": [
@@ -418,7 +499,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 112,
"id": "bcac8294-c54a-4b6e-af9d-3911a69620b2",
"metadata": {},
"outputs": [
@@ -465,7 +546,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 113,
"id": "8e275736-3408-4d7a-990e-4362c88e81f8",
"metadata": {},
"outputs": [],
@@ -496,7 +577,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 114,
"id": "1b12536a-1303-41ad-9948-4eb5a5f32614",
"metadata": {},
"outputs": [],
@@ -513,7 +594,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 115,
"id": "8d8b567c-b442-4bf0-b639-04bd89effc62",
"metadata": {},
"outputs": [],
@@ -538,7 +619,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 116,
"id": "346c3a02-8fea-4f75-a69e-fc9542b99dbc",
"metadata": {},
"outputs": [],
@@ -600,7 +681,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 117,
"id": "f2489de4-51e3-48b4-bbcd-ed9171deadf3",
"metadata": {},
"outputs": [],
@@ -644,17 +725,10 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 120,
"id": "636e992f-823b-496b-a082-8b4fcd479de5",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1\n"
]
},
{
"name": "stdout",
"output_type": "stream",
@@ -696,7 +770,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 121,
"id": "0e4a2f43-dd48-4ae3-8e27-7e87d169965f",
"metadata": {},
"outputs": [
@@ -706,7 +780,7 @@
"669"
]
},
"execution_count": 20,
"execution_count": 121,
"metadata": {},
"output_type": "execute_result"
}
@@ -721,7 +795,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 124,
"id": "56b78fb3-603d-4343-ae72-be54a3c5dd72",
"metadata": {},
"outputs": [
@@ -746,7 +820,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 125,
"id": "d3cc5ba9-8553-4eda-a5d1-b799751186af",
"metadata": {},
"outputs": [],
@@ -758,7 +832,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 126,
"id": "d7c73faf-74cb-400d-8059-b69e2493de38",
"metadata": {},
"outputs": [],
@@ -770,7 +844,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 127,
"id": "4c553722-be42-42ce-83b8-76a17f323f1c",
"metadata": {},
"outputs": [],
@@ -780,7 +854,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 128,
"id": "65dce40b-f1c3-494a-949e-69a9c9544ddb",
"metadata": {},
"outputs": [
@@ -790,7 +864,7 @@
"'The number of training tokens for LLaMA2 is 2.0T for all parameter sizes.'"
]
},
"execution_count": 25,
"execution_count": 128,
"metadata": {},
"output_type": "execute_result"
}
@@ -885,37 +959,14 @@
" </tr>\n",
" </tbody>\n",
"</table>\n",
"```"
"``"
]
},
{
"cell_type": "markdown",
"id": "867f8e11-384c-4aa1-8b3e-c59fb8d5fd7d",
"id": "0879349e-7298-4f2c-b246-f1142e97a8e5",
"metadata": {},
"source": [
"Finally, you can ask other questions that rely on more subtle parsing of the table, e.g.:"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "d38f1459-7d2b-40df-8dcd-e747f85eb144",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'The learning rate for LLaMA2 was 3.0 × 104 for the 7B and 13B models, and 1.5 × 104 for the 34B and 70B models.'"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"llama2_chain.invoke(\"What was the learning rate for LLaMA2?\")"
]
"source": []
}
],
"metadata": {

View File

@@ -1,118 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "6125a85e",
"metadata": {},
"source": [
"# Microsoft OneNote\n",
"\n",
"This notebook covers how to load documents from `OneNote`.\n",
"\n",
"## Prerequisites\n",
"1. Register an application with the [Microsoft identity platform](https://learn.microsoft.com/en-us/azure/active-directory/develop/quickstart-register-app) instructions.\n",
"2. When registration finishes, the Azure portal displays the app registration's Overview pane. You see the Application (client) ID. Also called the `client ID`, this value uniquely identifies your application in the Microsoft identity platform.\n",
"3. During the steps you will be following at **item 1**, you can set the redirect URI as `http://localhost:8000/callback`\n",
"4. During the steps you will be following at **item 1**, generate a new password (`client_secret`) under Application Secrets section.\n",
"5. Follow the instructions at this [document](https://learn.microsoft.com/en-us/azure/active-directory/develop/quickstart-configure-app-expose-web-apis#add-a-scope) to add the following `SCOPES` (`Notes.Read`) to your application.\n",
"6. You need to install the msal and bs4 packages using the commands `pip install msal` and `pip install beautifulsoup4`.\n",
"7. At the end of the steps you must have the following values: \n",
"- `CLIENT_ID`\n",
"- `CLIENT_SECRET`\n",
"\n",
"## 🧑 Instructions for ingesting your documents from OneNote\n",
"\n",
"### 🔑 Authentication\n",
"\n",
"By default, the `OneNoteLoader` expects that the values of `CLIENT_ID` and `CLIENT_SECRET` must be stored as environment variables named `MS_GRAPH_CLIENT_ID` and `MS_GRAPH_CLIENT_SECRET` respectively. You could pass those environment variables through a `.env` file at the root of your application or using the following command in your script.\n",
"\n",
"```python\n",
"os.environ['MS_GRAPH_CLIENT_ID'] = \"YOUR CLIENT ID\"\n",
"os.environ['MS_GRAPH_CLIENT_SECRET'] = \"YOUR CLIENT SECRET\"\n",
"```\n",
"\n",
"This loader uses an authentication called [*on behalf of a user*](https://learn.microsoft.com/en-us/graph/auth-v2-user?context=graph%2Fapi%2F1.0&view=graph-rest-1.0). It is a 2 step authentication with user consent. When you instantiate the loader, it will call will print a url that the user must visit to give consent to the app on the required permissions. The user must then visit this url and give consent to the application. Then the user must copy the resulting page url and paste it back on the console. The method will then return True if the login attempt was successful.\n",
"\n",
"\n",
"```python\n",
"from langchain.document_loaders.onenote import OneNoteLoader\n",
"\n",
"loader = OneNoteLoader(notebook_name=\"NOTEBOOK NAME\", section_name=\"SECTION NAME\", page_title=\"PAGE TITLE\")\n",
"```\n",
"\n",
"Once the authentication has been done, the loader will store a token (`onenote_graph_token.txt`) at `~/.credentials/` folder. This token could be used later to authenticate without the copy/paste steps explained earlier. To use this token for authentication, you need to change the `auth_with_token` parameter to True in the instantiation of the loader.\n",
"\n",
"```python\n",
"from langchain.document_loaders.onenote import OneNoteLoader\n",
"\n",
"loader = OneNoteLoader(notebook_name=\"NOTEBOOK NAME\", section_name=\"SECTION NAME\", page_title=\"PAGE TITLE\", auth_with_token=True)\n",
"```\n",
"\n",
"Alternatively, you can also pass the token directly to the loader. This is useful when you want to authenticate with a token that was generated by another application. For instance, you can use the [Microsoft Graph Explorer](https://developer.microsoft.com/en-us/graph/graph-explorer) to generate a token and then pass it to the loader.\n",
"\n",
"```python\n",
"from langchain.document_loaders.onenote import OneNoteLoader\n",
"\n",
"loader = OneNoteLoader(notebook_name=\"NOTEBOOK NAME\", section_name=\"SECTION NAME\", page_title=\"PAGE TITLE\", access_token=\"TOKEN\")\n",
"```\n",
"\n",
"### 🗂️ Documents loader\n",
"\n",
"#### 📑 Loading pages from a OneNote Notebook\n",
"\n",
"`OneNoteLoader` can load pages from OneNote notebooks stored in OneDrive. You can specify any combination of `notebook_name`, `section_name`, `page_title` to filter for pages under a specific notebook, under a specific section, or with a specific title respectively. For instance, you want to load all pages that are stored under a section called `Recipes` within any of your notebooks OneDrive.\n",
"\n",
"\n",
"```python\n",
"from langchain.document_loaders.onenote import OneNoteLoader\n",
"\n",
"loader = OneNoteLoader(section_name=\"Recipes\", auth_with_token=True)\n",
"documents = loader.load()\n",
"```\n",
"\n",
"#### 📑 Loading pages from a list of Page IDs\n",
"\n",
"Another possibility is to provide a list of `object_ids` for each page you want to load. For that, you will need to query the [Microsoft Graph API](https://developer.microsoft.com/en-us/graph/graph-explorer) to find all the documents ID that you are interested in. This [link](https://learn.microsoft.com/en-us/graph/onenote-get-content#page-collection) provides a list of endpoints that will be helpful to retrieve the documents ID.\n",
"\n",
"For instance, to retrieve information about all pages that are stored in your notebooks, you need make a request to: `https://graph.microsoft.com/v1.0/me/onenote/pages`. Once you have the list of IDs that you are interested in, then you can instantiate the loader with the following parameters.\n",
"\n",
"\n",
"```python\n",
"from langchain.document_loaders.onenote import OneNoteLoader\n",
"\n",
"loader = OneNoteLoader(object_ids=[\"ID_1\", \"ID_2\"], auth_with_token=True)\n",
"documents = loader.load()\n",
"```\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bb36fe41",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -550,7 +550,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"In the first example, supply the path to the specified `json.gbnf` file in order to produce JSON:"
"In the first example, supply the path to the specifed `json.gbnf` file in order to produce JSON:"
]
},
{

View File

@@ -912,7 +912,7 @@
"source": [
"## `Cassandra` caches\n",
"\n",
"You can use Cassandra / Astra DB through CQL for caching LLM responses, choosing from the exact-match `CassandraCache` or the (vector-similarity-based) `CassandraSemanticCache`.\n",
"You can use Cassandra / Astra DB for caching LLM responses, choosing from the exact-match `CassandraCache` or the (vector-similarity-based) `CassandraSemanticCache`.\n",
"\n",
"Let's see both in action in the following cells."
]
@@ -924,7 +924,7 @@
"source": [
"#### Connect to the DB\n",
"\n",
"First you need to establish a `Session` to the DB and to specify a _keyspace_ for the cache table(s). The following gets you connected to Astra DB through CQL (see e.g. [here](https://cassio.org/start_here/#vector-database) for more backends and connection options)."
"First you need to establish a `Session` to the DB and to specify a _keyspace_ for the cache table(s). The following gets you started with an Astra DB instance (see e.g. [here](https://cassio.org/start_here/#vector-database) for more backends and connection options)."
]
},
{
@@ -1132,214 +1132,6 @@
"print(llm(\"How come we always see one face of the moon?\"))"
]
},
{
"cell_type": "markdown",
"id": "8712f8fc-bb89-4164-beb9-c672778bbd91",
"metadata": {},
"source": [
"## `Astra DB` Caches"
]
},
{
"cell_type": "markdown",
"id": "173041d9-e4af-4f68-8461-d302bfc7e1bd",
"metadata": {},
"source": [
"You can easily use [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) as an LLM cache, with either the \"exact\" or the \"semantic-based\" cache.\n",
"\n",
"Make sure you have a running database (it must be a Vector-enabled database to use the Semantic cache) and get the required credentials on your Astra dashboard:\n",
"\n",
"- the API Endpoint looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`\n",
"- the Token looks like `AstraCS:6gBhNmsk135....`"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "feb510b6-99a3-4228-8e11-563051f8178e",
"metadata": {},
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
"ASTRA_DB_API_ENDPOINT = https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com\n",
"ASTRA_DB_APPLICATION_TOKEN = ········\n"
]
}
],
"source": [
"import getpass\n",
"\n",
"ASTRA_DB_API_ENDPOINT = input(\"ASTRA_DB_API_ENDPOINT = \")\n",
"ASTRA_DB_APPLICATION_TOKEN = getpass.getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")"
]
},
{
"cell_type": "markdown",
"id": "ee6d587f-4b7c-43f4-9e90-5129c842a143",
"metadata": {},
"source": [
"### Astra DB exact LLM cache\n",
"\n",
"This will avoid invoking the LLM when the supplied prompt is _exactly_ the same as one encountered already:"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "ad63c146-ee41-4896-90ee-29fcc39f0ed5",
"metadata": {},
"outputs": [],
"source": [
"from langchain.cache import AstraDBCache\n",
"from langchain.globals import set_llm_cache\n",
"\n",
"set_llm_cache(\n",
" AstraDBCache(\n",
" api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
" )\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "83e0fb02-e8eb-4483-9eb1-55b5e14c4487",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"There is no definitive answer to this question as it depends on the interpretation of the terms \"true fakery\" and \"fake truth\". However, one possible interpretation is that a true fakery is a counterfeit or imitation that is intended to deceive, whereas a fake truth is a false statement that is presented as if it were true.\n",
"CPU times: user 70.8 ms, sys: 4.13 ms, total: 74.9 ms\n",
"Wall time: 2.06 s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"print(llm(\"Is a true fakery the same as a fake truth?\"))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "4d20d498-fe28-4e26-8531-2b31c52ee687",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"There is no definitive answer to this question as it depends on the interpretation of the terms \"true fakery\" and \"fake truth\". However, one possible interpretation is that a true fakery is a counterfeit or imitation that is intended to deceive, whereas a fake truth is a false statement that is presented as if it were true.\n",
"CPU times: user 15.1 ms, sys: 3.7 ms, total: 18.8 ms\n",
"Wall time: 531 ms\n"
]
}
],
"source": [
"%%time\n",
"\n",
"print(llm(\"Is a true fakery the same as a fake truth?\"))"
]
},
{
"cell_type": "markdown",
"id": "524b94fa-6162-4880-884d-d008749d14e2",
"metadata": {},
"source": [
"### Astra DB Semantic cache\n",
"\n",
"This cache will do a semantic similarity search and return a hit if it finds a cached entry that is similar enough, For this, you need to provide an `Embeddings` instance of your choice."
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "dc329c55-1cc4-4b74-94f9-61f8990fb214",
"metadata": {},
"outputs": [],
"source": [
"from langchain.embeddings import OpenAIEmbeddings\n",
"\n",
"embedding = OpenAIEmbeddings()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "83952a90-ab14-4e59-87c0-d2bdc1d43e43",
"metadata": {},
"outputs": [],
"source": [
"from langchain.cache import AstraDBSemanticCache\n",
"\n",
"set_llm_cache(\n",
" AstraDBSemanticCache(\n",
" api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
" embedding=embedding,\n",
" collection_name=\"demo_semantic_cache\",\n",
" )\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "d74b249a-94d5-42d0-af74-f7565a994dea",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"There is no definitive answer to this question since it presupposes a great deal about the nature of truth itself, which is a matter of considerable philosophical debate. It is possible, however, to construct scenarios in which something could be considered true despite being false, such as if someone sincerely believes something to be true even though it is not.\n",
"CPU times: user 65.6 ms, sys: 15.3 ms, total: 80.9 ms\n",
"Wall time: 2.72 s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"print(llm(\"Are there truths that are false?\"))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "11973d73-d2f4-46bd-b229-1c589df9b788",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"There is no definitive answer to this question since it presupposes a great deal about the nature of truth itself, which is a matter of considerable philosophical debate. It is possible, however, to construct scenarios in which something could be considered true despite being false, such as if someone sincerely believes something to be true even though it is not.\n",
"CPU times: user 29.3 ms, sys: 6.21 ms, total: 35.5 ms\n",
"Wall time: 1.03 s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"print(llm(\"Is is possible that something false can be also true?\"))"
]
},
{
"cell_type": "markdown",
"id": "0c69d84d",

View File

@@ -1,147 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "90cd3ded",
"metadata": {},
"source": [
"# Astra DB \n",
"\n",
"> DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Cassandra and made conveniently available through an easy-to-use JSON API.\n",
"\n",
"This notebook goes over how to use Astra DB to store chat message history."
]
},
{
"cell_type": "markdown",
"id": "f507f58b-bf22-4a48-8daf-68d869bcd1ba",
"metadata": {},
"source": [
"## Setting up\n",
"\n",
"To run this notebook you need a running Astra DB. Get the connection secrets on your Astra dashboard:\n",
"\n",
"- the API Endpoint looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`;\n",
"- the Token looks like `AstraCS:6gBhNmsk135...`."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d7092199",
"metadata": {},
"outputs": [],
"source": [
"!pip install --quiet \"astrapy>=0.6.2\""
]
},
{
"cell_type": "markdown",
"id": "e3d97b65",
"metadata": {},
"source": [
"### Set up the database connection parameters and secrets"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "163d97f0",
"metadata": {},
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
"ASTRA_DB_API_ENDPOINT = https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com\n",
"ASTRA_DB_APPLICATION_TOKEN = ········\n"
]
}
],
"source": [
"import getpass\n",
"\n",
"ASTRA_DB_API_ENDPOINT = input(\"ASTRA_DB_API_ENDPOINT = \")\n",
"ASTRA_DB_APPLICATION_TOKEN = getpass.getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")"
]
},
{
"cell_type": "markdown",
"id": "55860b2d",
"metadata": {},
"source": [
"Depending on whether local or cloud-based Astra DB, create the corresponding database connection \"Session\" object."
]
},
{
"cell_type": "markdown",
"id": "36c163e8",
"metadata": {},
"source": [
"## Example"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "d15e3302",
"metadata": {},
"outputs": [],
"source": [
"from langchain.memory import AstraDBChatMessageHistory\n",
"\n",
"message_history = AstraDBChatMessageHistory(\n",
" session_id=\"test-session\",\n",
" api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
")\n",
"\n",
"message_history.add_user_message(\"hi!\")\n",
"\n",
"message_history.add_ai_message(\"whats up?\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "64fc465e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[HumanMessage(content='hi!'), AIMessage(content='whats up?')]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"message_history.messages"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -29,47 +29,6 @@ vector_store = AstraDB(
Learn more in the [example notebook](/docs/integrations/vectorstores/astradb).
### LLM Cache
```python
from langchain.globals import set_llm_cache
from langchain.cache import AstraDBCache
set_llm_cache(AstraDBCache(
api_endpoint="...",
token="...",
))
```
Learn more in the [example notebook](/docs/integrations/llms/llm_caching) (scroll to the Astra DB section).
### Semantic LLM Cache
```python
from langchain.globals import set_llm_cache
from langchain.cache import AstraDBSemanticCache
set_llm_cache(AstraDBSemanticCache(
embedding=my_embedding,
api_endpoint="...",
token="...",
))
```
Learn more in the [example notebook](/docs/integrations/llms/llm_caching) (scroll to the appropriate section).
### Chat message history
```python
from langchain.memory import AstraDBChatMessageHistory
message_history = AstraDBChatMessageHistory(
session_id="test-session"
api_endpoint="...",
token="...",
)
```
Learn more in the [example notebook](/docs/integrations/memory/astradb_chat_message_history).
## Apache Cassandra and Astra DB through CQL

View File

@@ -1,182 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Outline\n",
"\n",
">[Outline](https://www.getoutline.com/) is an open-source collaborative knowledge base platform designed for team information sharing.\n",
"\n",
"This notebook shows how to retrieve documents from your Outline instance into the Document format that is used downstream."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setup"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You first need to [create an api key](https://www.getoutline.com/developers#section/Authentication) for your Outline instance. Then you need to set the following environment variables:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ[\"OUTLINE_API_KEY\"] = \"xxx\"\n",
"os.environ[\"OUTLINE_INSTANCE_URL\"] = \"https://app.getoutline.com\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`OutlineRetriever` has these arguments:\n",
"- optional `top_k_results`: default=3. Use it to limit number of documents retrieved.\n",
"- optional `load_all_available_meta`: default=False. By default only the most important fields retrieved: `title`, `source` (the url of the document). If True, other fields also retrieved.\n",
"- optional `doc_content_chars_max` default=4000. Use it to limit the number of characters for each document retrieved.\n",
"\n",
"`get_relevant_documents()` has one argument, `query`: free text which used to find documents in your Outline instance."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Examples"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Running retriever"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from langchain.retrievers import OutlineRetriever"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"retriever = OutlineRetriever()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='This walkthrough demonstrates how to use an agent optimized for conversation. Other agents are often optimized for using tools to figure out the best response, which is not ideal in a conversational setting where you may want the agent to be able to chat with the user as well.\\n\\nIf we compare it to the standard ReAct agent, the main difference is the prompt. We want it to be much more conversational.\\n\\nfrom langchain.agents import AgentType, Tool, initialize_agent\\n\\nfrom langchain.llms import OpenAI\\n\\nfrom langchain.memory import ConversationBufferMemory\\n\\nfrom langchain.utilities import SerpAPIWrapper\\n\\nsearch = SerpAPIWrapper() tools = \\\\[ Tool( name=\"Current Search\", func=search.run, description=\"useful for when you need to answer questions about current events or the current state of the world\", ), \\\\]\\n\\n\\\\\\nllm = OpenAI(temperature=0)\\n\\nUsing LCEL\\n\\nWe will first show how to create this agent using LCEL\\n\\nfrom langchain import hub\\n\\nfrom langchain.agents.format_scratchpad import format_log_to_str\\n\\nfrom langchain.agents.output_parsers import ReActSingleInputOutputParser\\n\\nfrom langchain.tools.render import render_text_description\\n\\nprompt = hub.pull(\"hwchase17/react-chat\")\\n\\nprompt = prompt.partial( tools=render_text_description(tools), tool_names=\", \".join(\\\\[[t.name](http://t.name) for t in tools\\\\]), )\\n\\nllm_with_stop = llm.bind(stop=\\\\[\"\\\\nObservation\"\\\\])\\n\\nagent = ( { \"input\": lambda x: x\\\\[\"input\"\\\\], \"agent_scratchpad\": lambda x: format_log_to_str(x\\\\[\"intermediate_steps\"\\\\]), \"chat_history\": lambda x: x\\\\[\"chat_history\"\\\\], } | prompt | llm_with_stop | ReActSingleInputOutputParser() )\\n\\nfrom langchain.agents import AgentExecutor\\n\\nmemory = ConversationBufferMemory(memory_key=\"chat_history\") agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, memory=memory)\\n\\nagent_executor.invoke({\"input\": \"hi, i am bob\"})\\\\[\"output\"\\\\]\\n\\n```\\n> Entering new AgentExecutor chain...\\n\\nThought: Do I need to use a tool? No\\nFinal Answer: Hi Bob, nice to meet you! How can I help you today?\\n\\n> Finished chain.\\n```\\n\\n\\\\\\n\\'Hi Bob, nice to meet you! How can I help you today?\\'\\n\\nagent_executor.invoke({\"input\": \"whats my name?\"})\\\\[\"output\"\\\\]\\n\\n```\\n> Entering new AgentExecutor chain...\\n\\nThought: Do I need to use a tool? No\\nFinal Answer: Your name is Bob.\\n\\n> Finished chain.\\n```\\n\\n\\\\\\n\\'Your name is Bob.\\'\\n\\nagent_executor.invoke({\"input\": \"what are some movies showing 9/21/2023?\"})\\\\[\"output\"\\\\]\\n\\n```\\n> Entering new AgentExecutor chain...\\n\\nThought: Do I need to use a tool? Yes\\nAction: Current Search\\nAction Input: Movies showing 9/21/2023[\\'September 2023 Movies: The Creator • Dumb Money • Expend4bles • The Kill Room • The Inventor • The Equalizer 3 • PAW Patrol: The Mighty Movie, ...\\'] Do I need to use a tool? No\\nFinal Answer: According to current search, some movies showing on 9/21/2023 are The Creator, Dumb Money, Expend4bles, The Kill Room, The Inventor, The Equalizer 3, and PAW Patrol: The Mighty Movie.\\n\\n> Finished chain.\\n```\\n\\n\\\\\\n\\'According to current search, some movies showing on 9/21/2023 are The Creator, Dumb Money, Expend4bles, The Kill Room, The Inventor, The Equalizer 3, and PAW Patrol: The Mighty Movie.\\'\\n\\n\\\\\\nUse the off-the-shelf agent\\n\\nWe can also create this agent using the off-the-shelf agent class\\n\\nagent_executor = initialize_agent( tools, llm, agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION, verbose=True, memory=memory, )\\n\\nUse a chat model\\n\\nWe can also use a chat model here. The main difference here is in the prompts used.\\n\\nfrom langchain import hub\\n\\nfrom langchain.chat_models import ChatOpenAI\\n\\nprompt = hub.pull(\"hwchase17/react-chat-json\") chat_model = ChatOpenAI(temperature=0, model=\"gpt-4\")\\n\\nprompt = prompt.partial( tools=render_text_description(tools), tool_names=\", \".join(\\\\[[t.name](http://t.name) for t in tools\\\\]), )\\n\\nchat_model_with_stop = chat_model.bind(stop=\\\\[\"\\\\nObservation\"\\\\])\\n\\nfrom langchain.agents.format_scratchpad import format_log_to_messages\\n\\nfrom langchain.agents.output_parsers import JSONAgentOutputParser\\n\\n# We need some extra steering, or the c', metadata={'title': 'Conversational', 'source': 'https://d01.getoutline.com/doc/conversational-B5dBkUgQ4b'}),\n",
" Document(page_content='Quickstart\\n\\nIn this quickstart we\\'ll show you how to:\\n\\nGet setup with LangChain, LangSmith and LangServe\\n\\nUse the most basic and common components of LangChain: prompt templates, models, and output parsers\\n\\nUse LangChain Expression Language, the protocol that LangChain is built on and which facilitates component chaining\\n\\nBuild a simple application with LangChain\\n\\nTrace your application with LangSmith\\n\\nServe your application with LangServe\\n\\nThat\\'s a fair amount to cover! Let\\'s dive in.\\n\\nSetup\\n\\nInstallation\\n\\nTo install LangChain run:\\n\\nPip\\n\\nConda\\n\\npip install langchain\\n\\nFor more details, see our Installation guide.\\n\\nEnvironment\\n\\nUsing LangChain will usually require integrations with one or more model providers, data stores, APIs, etc. For this example, we\\'ll use OpenAI\\'s model APIs.\\n\\nFirst we\\'ll need to install their Python package:\\n\\npip install openai\\n\\nAccessing the API requires an API key, which you can get by creating an account and heading here. Once we have a key we\\'ll want to set it as an environment variable by running:\\n\\nexport OPENAI_API_KEY=\"...\"\\n\\nIf you\\'d prefer not to set an environment variable you can pass the key in directly via the openai_api_key named parameter when initiating the OpenAI LLM class:\\n\\nfrom langchain.chat_models import ChatOpenAI\\n\\nllm = ChatOpenAI(openai_api_key=\"...\")\\n\\nLangSmith\\n\\nMany of the applications you build with LangChain will contain multiple steps with multiple invocations of LLM calls. As these applications get more and more complex, it becomes crucial to be able to inspect what exactly is going on inside your chain or agent. The best way to do this is with LangSmith.\\n\\nNote that LangSmith is not needed, but it is helpful. If you do want to use LangSmith, after you sign up at the link above, make sure to set your environment variables to start logging traces:\\n\\nexport LANGCHAIN_TRACING_V2=\"true\" export LANGCHAIN_API_KEY=...\\n\\nLangServe\\n\\nLangServe helps developers deploy LangChain chains as a REST API. You do not need to use LangServe to use LangChain, but in this guide we\\'ll show how you can deploy your app with LangServe.\\n\\nInstall with:\\n\\npip install \"langserve\\\\[all\\\\]\"\\n\\nBuilding with LangChain\\n\\nLangChain provides many modules that can be used to build language model applications. Modules can be used as standalones in simple applications and they can be composed for more complex use cases. Composition is powered by LangChain Expression Language (LCEL), which defines a unified Runnable interface that many modules implement, making it possible to seamlessly chain components.\\n\\nThe simplest and most common chain contains three things:\\n\\nLLM/Chat Model: The language model is the core reasoning engine here. In order to work with LangChain, you need to understand the different types of language models and how to work with them. Prompt Template: This provides instructions to the language model. This controls what the language model outputs, so understanding how to construct prompts and different prompting strategies is crucial. Output Parser: These translate the raw response from the language model to a more workable format, making it easy to use the output downstream. In this guide we\\'ll cover those three components individually, and then go over how to combine them. Understanding these concepts will set you up well for being able to use and customize LangChain applications. Most LangChain applications allow you to configure the model and/or the prompt, so knowing how to take advantage of this will be a big enabler.\\n\\nLLM / Chat Model\\n\\nThere are two types of language models:\\n\\nLLM: underlying model takes a string as input and returns a string\\n\\nChatModel: underlying model takes a list of messages as input and returns a message\\n\\nStrings are simple, but what exactly are messages? The base message interface is defined by BaseMessage, which has two required attributes:\\n\\ncontent: The content of the message. Usually a string. role: The entity from which the BaseMessage is coming. LangChain provides several ob', metadata={'title': 'Quick Start', 'source': 'https://d01.getoutline.com/doc/quick-start-jGuGGGOTuL'}),\n",
" Document(page_content='This walkthrough showcases using an agent to implement the [ReAct](https://react-lm.github.io/) logic.\\n\\n```javascript\\nfrom langchain.agents import AgentType, initialize_agent, load_tools\\nfrom langchain.llms import OpenAI\\n```\\n\\nFirst, let\\'s load the language model we\\'re going to use to control the agent.\\n\\n```javascript\\nllm = OpenAI(temperature=0)\\n```\\n\\nNext, let\\'s load some tools to use. Note that the llm-math tool uses an LLM, so we need to pass that in.\\n\\n```javascript\\ntools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\\n```\\n\\n## Using LCEL[\\u200b](https://python.langchain.com/docs/modules/agents/agent_types/react#using-lcel \"Direct link to Using LCEL\")\\n\\nWe will first show how to create the agent using LCEL\\n\\n```javascript\\nfrom langchain import hub\\nfrom langchain.agents.format_scratchpad import format_log_to_str\\nfrom langchain.agents.output_parsers import ReActSingleInputOutputParser\\nfrom langchain.tools.render import render_text_description\\n```\\n\\n```javascript\\nprompt = hub.pull(\"hwchase17/react\")\\nprompt = prompt.partial(\\n tools=render_text_description(tools),\\n tool_names=\", \".join([t.name for t in tools]),\\n)\\n```\\n\\n```javascript\\nllm_with_stop = llm.bind(stop=[\"\\\\nObservation\"])\\n```\\n\\n```javascript\\nagent = (\\n {\\n \"input\": lambda x: x[\"input\"],\\n \"agent_scratchpad\": lambda x: format_log_to_str(x[\"intermediate_steps\"]),\\n }\\n | prompt\\n | llm_with_stop\\n | ReActSingleInputOutputParser()\\n)\\n```\\n\\n```javascript\\nfrom langchain.agents import AgentExecutor\\n```\\n\\n```javascript\\nagent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)\\n```\\n\\n```javascript\\nagent_executor.invoke(\\n {\\n \"input\": \"Who is Leo DiCaprio\\'s girlfriend? What is her current age raised to the 0.43 power?\"\\n }\\n)\\n```\\n\\n```javascript\\n \\n \\n > Entering new AgentExecutor chain...\\n I need to find out who Leo DiCaprio\\'s girlfriend is and then calculate her age raised to the 0.43 power.\\n Action: Search\\n Action Input: \"Leo DiCaprio girlfriend\"model Vittoria Ceretti I need to find out Vittoria Ceretti\\'s age\\n Action: Search\\n Action Input: \"Vittoria Ceretti age\"25 years I need to calculate 25 raised to the 0.43 power\\n Action: Calculator\\n Action Input: 25^0.43Answer: 3.991298452658078 I now know the final answer\\n Final Answer: Leo DiCaprio\\'s girlfriend is Vittoria Ceretti and her current age raised to the 0.43 power is 3.991298452658078.\\n \\n > Finished chain.\\n\\n\\n\\n\\n\\n {\\'input\\': \"Who is Leo DiCaprio\\'s girlfriend? What is her current age raised to the 0.43 power?\",\\n \\'output\\': \"Leo DiCaprio\\'s girlfriend is Vittoria Ceretti and her current age raised to the 0.43 power is 3.991298452658078.\"}\\n```\\n\\n## Using ZeroShotReactAgent[\\u200b](https://python.langchain.com/docs/modules/agents/agent_types/react#using-zeroshotreactagent \"Direct link to Using ZeroShotReactAgent\")\\n\\nWe will now show how to use the agent with an off-the-shelf agent implementation\\n\\n```javascript\\nagent_executor = initialize_agent(\\n tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\\n)\\n```\\n\\n```javascript\\nagent_executor.invoke(\\n {\\n \"input\": \"Who is Leo DiCaprio\\'s girlfriend? What is her current age raised to the 0.43 power?\"\\n }\\n)\\n```\\n\\n```javascript\\n \\n \\n > Entering new AgentExecutor chain...\\n I need to find out who Leo DiCaprio\\'s girlfriend is and then calculate her age raised to the 0.43 power.\\n Action: Search\\n Action Input: \"Leo DiCaprio girlfriend\"\\n Observation: model Vittoria Ceretti\\n Thought: I need to find out Vittoria Ceretti\\'s age\\n Action: Search\\n Action Input: \"Vittoria Ceretti age\"\\n Observation: 25 years\\n Thought: I need to calculate 25 raised to the 0.43 power\\n Action: Calculator\\n Action Input: 25^0.43\\n Observation: Answer: 3.991298452658078\\n Thought: I now know the final answer\\n Final Answer: Leo DiCaprio\\'s girlfriend is Vittoria Ceretti and her current age raised to the 0.43 power is 3.991298452658078.\\n \\n > Finished chain.\\n\\n\\n\\n\\n\\n {\\'input\\': \"Who is L', metadata={'title': 'ReAct', 'source': 'https://d01.getoutline.com/doc/react-d6rxRS1MHk'})]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retriever.get_relevant_documents(query=\"LangChain\", doc_content_chars_max=100)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Answering Questions on Outline Documents"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from getpass import getpass\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass(\"OpenAI API Key:\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains import ConversationalRetrievalChain\n",
"from langchain.chat_models import ChatOpenAI\n",
"\n",
"model = ChatOpenAI(model_name=\"gpt-3.5-turbo\")\n",
"qa = ConversationalRetrievalChain.from_llm(model, retriever=retriever)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'question': 'what is langchain?',\n",
" 'chat_history': {},\n",
" 'answer': \"LangChain is a framework for developing applications powered by language models. It provides a set of libraries and tools that enable developers to build context-aware and reasoning-based applications. LangChain allows you to connect language models to various sources of context, such as prompt instructions, few-shot examples, and content, to enhance the model's responses. It also supports the composition of multiple language model components using LangChain Expression Language (LCEL). Additionally, LangChain offers off-the-shelf chains, templates, and integrations for easy application development. LangChain can be used in conjunction with LangSmith for debugging and monitoring chains, and with LangServe for deploying applications as a REST API.\"}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"qa({\"question\": \"what is langchain?\", \"chat_history\": {}})"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -22,7 +22,7 @@
"metadata": {},
"outputs": [],
"source": [
"from pydantic.v1 import BaseModel, Field\n",
"import pydantic\n",
"from langchain.agents import AgentType, initialize_agent\n",
"from langchain.agents.tools import Tool\n",
"from langchain.chains import LLMMathChain\n",
@@ -65,12 +65,12 @@
"primes = {998: 7901, 999: 7907, 1000: 7919}\n",
"\n",
"\n",
"class CalculatorInput(BaseModel):\n",
" question: str = Field()\n",
"class CalculatorInput(pydantic.BaseModel):\n",
" question: str = pydantic.Field()\n",
"\n",
"\n",
"class PrimeInput(BaseModel):\n",
" n: int = Field()\n",
"class PrimeInput(pydantic.BaseModel):\n",
" n: int = pydantic.Field()\n",
"\n",
"\n",
"def is_prime(n: int) -> bool:\n",

View File

@@ -743,7 +743,7 @@
"- [Docs](/docs/modules/model_io/llms)\n",
"- [Integrations](/docs/integrations/llms): Explore over 75 `LLM` integrations.\n",
"\n",
"See a guide on RAG with locally-running models [here](/docs/use_cases/question_answering/local_retrieval_qa)."
"See a guide on RAG with locally-running models [here](/docs/modules/use_cases/question_answering/local_retrieval_qa)."
]
},
{

View File

@@ -13,7 +13,7 @@ tests:
poetry run pytest $(TEST_FILE)
test_watch:
poetry run ptw --snapshot-update --now . -- -vv -x tests/unit_tests
poetry run ptw --snapshot-update --now . -- -x tests/unit_tests
######################

View File

@@ -1,7 +1,6 @@
from langchain_core.language_models.base import (
BaseLanguageModel,
LanguageModelInput,
LanguageModelOutput,
get_tokenizer,
)
from langchain_core.language_models.chat_models import BaseChatModel, SimpleChatModel
@@ -15,5 +14,4 @@ __all__ = [
"LLM",
"LanguageModelInput",
"get_tokenizer",
"LanguageModelOutput",
]

View File

@@ -8,7 +8,6 @@ import yaml
from langchain_core.output_parsers.string import StrOutputParser
from langchain_core.prompts.base import BasePromptTemplate
from langchain_core.prompts.chat import ChatPromptTemplate
from langchain_core.prompts.few_shot import FewShotPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.utils import try_load_from_hub
@@ -155,21 +154,7 @@ def _load_prompt_from_file(file: Union[str, Path]) -> BasePromptTemplate:
return load_prompt_from_config(config)
def _load_chat_prompt(config: Dict) -> ChatPromptTemplate:
"""Load chat prompt from config"""
messages = config.pop("messages")
template = messages[0]["prompt"].pop("template") if messages else None
config.pop("input_variables")
if not template:
raise ValueError("Can't load chat prompt without template")
return ChatPromptTemplate.from_template(template=template, **config)
type_to_loader_dict: Dict[str, Callable[[dict], BasePromptTemplate]] = {
"prompt": _load_prompt,
"few_shot": _load_few_shot_prompt,
"chat": _load_chat_prompt,
}

View File

@@ -1204,9 +1204,7 @@ class RunnableSerializable(Serializable, Runnable[Input, Output]):
def configurable_alternatives(
self,
which: ConfigurableField,
*,
default_key: str = "default",
prefix_keys: bool = False,
**kwargs: Union[Runnable[Input, Output], Callable[[], Runnable[Input, Output]]],
) -> RunnableSerializable[Input, Output]:
from langchain_core.runnables.configurable import (
@@ -1214,11 +1212,7 @@ class RunnableSerializable(Serializable, Runnable[Input, Output]):
)
return RunnableConfigurableAlternatives(
which=which,
default=self,
alternatives=kwargs,
default_key=default_key,
prefix_keys=prefix_keys,
which=which, default=self, alternatives=kwargs, default_key=default_key
)

View File

@@ -220,7 +220,6 @@ class RunnableConfigurableFields(DynamicRunnable[Input, Output]):
annotation=spec.annotation
or self.default.__fields__[field_name].annotation,
default=getattr(self.default, field_name),
is_shared=spec.is_shared,
)
if isinstance(spec, ConfigurableField)
else make_options_spec(
@@ -299,12 +298,6 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
]
default_key: str = "default"
"""The enum value to use for the default option. Defaults to "default"."""
prefix_keys: bool
"""Whether to prefix configurable fields of each alternative with a namespace
of the form <which.id>==<alternative_key>, eg. a key named "temperature" used by
the alternative named "gpt3" becomes "model==gpt3/temperature"."""
@property
def config_specs(self) -> List[ConfigurableFieldSpec]:
@@ -320,37 +313,21 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
),
)
_enums_for_spec[self.which] = cast(Type[StrEnum], which_enum)
return get_unique_config_specs(
# which alternative
[
ConfigurableFieldSpec(
id=self.which.id,
name=self.which.name,
description=self.which.description,
annotation=which_enum,
default=self.default_key,
is_shared=self.which.is_shared,
),
]
# config specs of the default option
+ (
[
prefix_config_spec(s, f"{self.which.id}=={self.default_key}")
for s in self.default.config_specs
]
if self.prefix_keys
else self.default.config_specs
)
# config specs of the alternatives
+ [
prefix_config_spec(s, f"{self.which.id}=={alt_key}")
if self.prefix_keys
else s
for alt_key, alt in self.alternatives.items()
if isinstance(alt, RunnableSerializable)
for s in alt.config_specs
]
)
return [
ConfigurableFieldSpec(
id=self.which.id,
name=self.which.name,
description=self.which.description,
annotation=which_enum,
default=self.default_key,
),
*self.default.config_specs,
] + [
s
for alt in self.alternatives.values()
if isinstance(alt, RunnableSerializable)
for s in alt.config_specs
]
def configurable_fields(
self, **kwargs: AnyConfigurableField
@@ -378,23 +355,6 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
raise ValueError(f"Unknown alternative: {which}")
def prefix_config_spec(
spec: ConfigurableFieldSpec, prefix: str
) -> ConfigurableFieldSpec:
return (
ConfigurableFieldSpec(
id=f"{prefix}/{spec.id}",
name=spec.name,
description=spec.description,
annotation=spec.annotation,
default=spec.default,
is_shared=spec.is_shared,
)
if not spec.is_shared
else spec
)
def make_options_spec(
spec: Union[ConfigurableFieldSingleOption, ConfigurableFieldMultiOption],
description: Optional[str],
@@ -417,7 +377,6 @@ def make_options_spec(
description=spec.description or description,
annotation=enum,
default=spec.default,
is_shared=spec.is_shared,
)
else:
return ConfigurableFieldSpec(
@@ -426,5 +385,4 @@ def make_options_spec(
description=spec.description or description,
annotation=Sequence[enum], # type: ignore[valid-type]
default=spec.default,
is_shared=spec.is_shared,
)

View File

@@ -169,7 +169,6 @@ class RunnableWithMessageHistory(RunnableBindingBase):
name="Session ID",
description="Unique identifier for a session.",
default="",
is_shared=True,
),
]
)

View File

@@ -62,11 +62,7 @@ class RunnablePassthrough(RunnableSerializable[Other, Other]):
.. code-block:: python
from langchain_core.runnables import (
RunnableLambda,
RunnableParallel,
RunnablePassthrough,
)
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
runnable = RunnableParallel(
origin=RunnablePassthrough(),
@@ -76,7 +72,7 @@ class RunnablePassthrough(RunnableSerializable[Other, Other]):
runnable.invoke(1) # {'origin': 1, 'modified': 2}
def fake_llm(prompt: str) -> str: # Fake LLM for the example
def fake_llm(prompt: str) -> str: # Fake LLM for the example
return "completion"
chain = RunnableLambda(fake_llm) | {
@@ -93,7 +89,7 @@ class RunnablePassthrough(RunnableSerializable[Other, Other]):
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
def fake_llm(prompt: str) -> str: # Fake LLM for the example
def fake_llm(prompt: str) -> str: # Fake LLM for the example
return "completion"
runnable = {

View File

@@ -257,7 +257,6 @@ class ConfigurableField(NamedTuple):
name: Optional[str] = None
description: Optional[str] = None
annotation: Optional[Any] = None
is_shared: bool = False
def __hash__(self) -> int:
return hash((self.id, self.annotation))
@@ -272,7 +271,6 @@ class ConfigurableFieldSingleOption(NamedTuple):
name: Optional[str] = None
description: Optional[str] = None
is_shared: bool = False
def __hash__(self) -> int:
return hash((self.id, tuple(self.options.keys()), self.default))
@@ -287,7 +285,6 @@ class ConfigurableFieldMultiOption(NamedTuple):
name: Optional[str] = None
description: Optional[str] = None
is_shared: bool = False
def __hash__(self) -> int:
return hash((self.id, tuple(self.options.keys()), tuple(self.default)))
@@ -302,12 +299,11 @@ class ConfigurableFieldSpec(NamedTuple):
"""A field that can be configured by the user. It is a specification of a field."""
id: str
annotation: Any
name: Optional[str]
description: Optional[str]
name: Optional[str] = None
description: Optional[str] = None
default: Any = None
is_shared: bool = False
default: Any
annotation: Any
def get_unique_config_specs(

20
libs/core/poetry.lock generated
View File

@@ -1147,16 +1147,6 @@ files = [
{file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
{file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
{file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
{file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
{file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
{file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
@@ -1922,7 +1912,6 @@ files = [
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
{file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
{file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
{file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
{file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
@@ -1930,15 +1919,8 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
{file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
{file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
{file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
{file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
@@ -1955,7 +1937,6 @@ files = [
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
{file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
{file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
{file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
{file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
@@ -1963,7 +1944,6 @@ files = [
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
{file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
{file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
{file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},

View File

@@ -1,6 +1,6 @@
[tool.poetry]
name = "langchain-core"
version = "0.0.6"
version = "0.0.4"
description = "Building applications with LLMs through composability"
authors = []
license = "MIT"

File diff suppressed because it is too large Load Diff

View File

@@ -7,7 +7,6 @@ EXPECTED_ALL = [
"BaseLLM",
"LLM",
"LanguageModelInput",
"LanguageModelOutput",
"get_tokenizer",
]

View File

@@ -0,0 +1,59 @@
# serializer version: 1
# name: test_person
'''
{
"lc": 1,
"type": "constructor",
"id": [
"tests",
"unit_tests",
"load",
"test_dump",
"Person"
],
"kwargs": {
"secret": {
"lc": 1,
"type": "secret",
"id": [
"SECRET"
]
},
"you_can_see_me": "hello"
}
}
'''
# ---
# name: test_person.1
'''
{
"lc": 1,
"type": "constructor",
"id": [
"tests",
"unit_tests",
"load",
"test_dump",
"SpecialPerson"
],
"kwargs": {
"another_secret": {
"lc": 1,
"type": "secret",
"id": [
"ANOTHER_SECRET"
]
},
"secret": {
"lc": 1,
"type": "secret",
"id": [
"SECRET"
]
},
"another_visible": "bye",
"you_can_see_me": "hello"
}
}
'''
# ---

View File

@@ -0,0 +1,46 @@
from typing import Any, Dict
from langchain_core.load.dump import dumps
from langchain_core.load.serializable import Serializable
class Person(Serializable):
secret: str
you_can_see_me: str = "hello"
@classmethod
def is_lc_serializable(cls) -> bool:
return True
@property
def lc_secrets(self) -> Dict[str, str]:
return {"secret": "SECRET"}
@property
def lc_attributes(self) -> Dict[str, str]:
return {"you_can_see_me": self.you_can_see_me}
class SpecialPerson(Person):
another_secret: str
another_visible: str = "bye"
# Gets merged with parent class's secrets
@property
def lc_secrets(self) -> Dict[str, str]:
return {"another_secret": "ANOTHER_SECRET"}
# Gets merged with parent class's attributes
@property
def lc_attributes(self) -> Dict[str, str]:
return {"another_visible": self.another_visible}
def test_person(snapshot: Any) -> None:
p = Person(secret="hello")
assert dumps(p, pretty=True) == snapshot
sp = SpecialPerson(another_secret="Wooo", secret="Hmm")
assert dumps(sp, pretty=True) == snapshot
assert Person.lc_id() == ["tests", "unit_tests", "load", "test_dump", "Person"]

View File

@@ -1020,118 +1020,6 @@ def test_configurable_alts_factory() -> None:
assert fake_llm.with_config(configurable={"llm": "chat"}).invoke("...") == "b"
def test_configurable_fields_prefix_keys() -> None:
fake_chat = FakeListChatModel(responses=["b"]).configurable_fields(
responses=ConfigurableFieldMultiOption(
id="responses",
name="Chat Responses",
options={
"hello": "A good morning to you!",
"bye": "See you later!",
"helpful": "How can I help you?",
},
default=["hello", "bye"],
),
# (sleep is a configurable field in FakeListChatModel)
sleep=ConfigurableField(
id="chat_sleep",
is_shared=True,
),
)
fake_llm = (
FakeListLLM(responses=["a"])
.configurable_fields(
responses=ConfigurableField(
id="responses",
name="LLM Responses",
description="A list of fake responses for this LLM",
)
)
.configurable_alternatives(
ConfigurableField(id="llm", name="LLM"),
chat=fake_chat | StrOutputParser(),
prefix_keys=True,
)
)
prompt = PromptTemplate.from_template("Hello, {name}!").configurable_fields(
template=ConfigurableFieldSingleOption(
id="prompt_template",
name="Prompt Template",
description="The prompt template for this chain",
options={
"hello": "Hello, {name}!",
"good_morning": "A very good morning to you, {name}!",
},
default="hello",
)
)
chain = prompt | fake_llm
assert chain.config_schema().schema() == {
"title": "RunnableSequenceConfig",
"type": "object",
"properties": {"configurable": {"$ref": "#/definitions/Configurable"}},
"definitions": {
"LLM": {
"title": "LLM",
"description": "An enumeration.",
"enum": ["chat", "default"],
"type": "string",
},
"Chat_Responses": {
"title": "Chat Responses",
"description": "An enumeration.",
"enum": ["hello", "bye", "helpful"],
"type": "string",
},
"Prompt_Template": {
"title": "Prompt Template",
"description": "An enumeration.",
"enum": ["hello", "good_morning"],
"type": "string",
},
"Configurable": {
"title": "Configurable",
"type": "object",
"properties": {
"prompt_template": {
"title": "Prompt Template",
"description": "The prompt template for this chain",
"default": "hello",
"allOf": [{"$ref": "#/definitions/Prompt_Template"}],
},
"llm": {
"title": "LLM",
"default": "default",
"allOf": [{"$ref": "#/definitions/LLM"}],
},
# not prefixed because marked as shared
"chat_sleep": {
"title": "Chat Sleep",
"type": "number",
},
# prefixed for "chat" option
"llm==chat/responses": {
"title": "Chat Responses",
"default": ["hello", "bye"],
"type": "array",
"items": {"$ref": "#/definitions/Chat_Responses"},
},
# prefixed for "default" option
"llm==default/responses": {
"title": "LLM Responses",
"description": "A list of fake responses for this LLM",
"default": ["a"],
"type": "array",
"items": {"type": "string"},
},
},
},
},
}
def test_configurable_fields_example() -> None:
fake_chat = FakeListChatModel(responses=["b"]).configurable_fields(
responses=ConfigurableFieldMultiOption(

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,400 @@
import json
from typing import Any
from langchain_core.documents.base import Document
from langchain_core.load.load import load
from langchain_core.load.serializable import Serializable
from langchain_core.messages.ai import AIMessage, AIMessageChunk
from langchain_core.messages.base import BaseMessage, BaseMessageChunk
from langchain_core.messages.chat import ChatMessage, ChatMessageChunk
from langchain_core.messages.function import FunctionMessage, FunctionMessageChunk
from langchain_core.messages.human import HumanMessage, HumanMessageChunk
from langchain_core.messages.system import SystemMessage, SystemMessageChunk
from langchain_core.messages.tool import ToolMessage, ToolMessageChunk
from langchain_core.output_parsers.list import (
CommaSeparatedListOutputParser,
MarkdownListOutputParser,
NumberedListOutputParser,
)
from langchain_core.output_parsers.string import StrOutputParser
from langchain_core.outputs.chat_generation import ChatGeneration, ChatGenerationChunk
from langchain_core.outputs.generation import Generation, GenerationChunk
from langchain_core.prompts.chat import (
AIMessagePromptTemplate,
ChatMessagePromptTemplate,
ChatPromptTemplate,
HumanMessagePromptTemplate,
MessagesPlaceholder,
SystemMessagePromptTemplate,
)
from langchain_core.prompts.few_shot import (
FewShotChatMessagePromptTemplate,
FewShotPromptTemplate,
)
from langchain_core.prompts.few_shot_with_templates import FewShotPromptWithTemplates
from langchain_core.prompts.pipeline import PipelinePromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.runnables import ConfigurableField
from langchain_core.runnables.base import (
RunnableBinding,
RunnableBindingBase,
RunnableEach,
RunnableEachBase,
RunnableMap,
RunnableParallel,
RunnableSequence,
)
from langchain_core.runnables.branch import RunnableBranch
from langchain_core.runnables.configurable import (
RunnableConfigurableAlternatives,
RunnableConfigurableFields,
)
from langchain_core.runnables.fallbacks import RunnableWithFallbacks
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.runnables.passthrough import RunnableAssign, RunnablePassthrough
from langchain_core.runnables.retry import RunnableRetry
from langchain_core.runnables.router import RouterRunnable
from tests.unit_tests.fake.memory import ChatMessageHistory
with open("tests/unit_tests/serialization/v0_0_341/snapshot.ambr") as f:
SNAPSHOTS = f.read()
SNAPSHOT_MAP = {
x.split("\n")[0][15:]: json.loads(x.split("'''")[1])
for x in SNAPSHOTS.split("# name: ")
if not x.startswith("#")
}
def load_snapshot(snake_case_class: str) -> str:
return SNAPSHOT_MAP[snake_case_class]
def test_deserialize_system_message() -> None:
snapshot = load_snapshot("system_message")
obj: Any = SystemMessage(content="")
assert load(snapshot) == obj
def test_deserialize_system_message_chunk() -> None:
snapshot = load_snapshot("system_message_chunk")
obj: Any = SystemMessageChunk(content="")
assert load(snapshot) == obj
def test_deserialize_ai_message() -> None:
snapshot = load_snapshot("ai_message")
obj: Any = AIMessage(content="")
assert load(snapshot) == obj
def test_deserialize_ai_message_chunk() -> None:
snapshot = load_snapshot("ai_message_chunk")
obj: Any = AIMessageChunk(content="")
assert load(snapshot) == obj
def test_deserialize_human_message() -> None:
snapshot = load_snapshot("human_message")
obj: Any = HumanMessage(content="")
assert load(snapshot) == obj
def test_deserialize_human_message_chunk() -> None:
snapshot = load_snapshot("human_message_chunk")
obj: Any = HumanMessageChunk(content="")
assert load(snapshot) == obj
def test_deserialize_chat_message() -> None:
snapshot = load_snapshot("chat_message")
obj: Any = ChatMessage(content="", role="")
assert load(snapshot) == obj
def test_deserialize_chat_message_chunk() -> None:
snapshot = load_snapshot("chat_message_chunk")
obj: Any = ChatMessageChunk(content="", role="")
assert load(snapshot) == obj
def test_deserialize_tool_message() -> None:
snapshot = load_snapshot("tool_message")
obj: Any = ToolMessage(content="", tool_call_id="")
assert load(snapshot) == obj
def test_deserialize_tool_message_chunk() -> None:
snapshot = load_snapshot("tool_message_chunk")
obj: Any = ToolMessageChunk(content="", tool_call_id="")
assert load(snapshot) == obj
def test_deserialize_base_message() -> None:
snapshot = load_snapshot("base_message")
obj: Any = BaseMessage(content="", type="")
assert load(snapshot) == obj
def test_deserialize_base_message_chunk() -> None:
snapshot = load_snapshot("base_message_chunk")
obj: Any = BaseMessageChunk(content="", type="")
assert load(snapshot) == obj
def test_deserialize_function_message() -> None:
snapshot = load_snapshot("function_message")
obj: Any = FunctionMessage(content="", name="")
assert load(snapshot) == obj
def test_deserialize_function_message_chunk() -> None:
snapshot = load_snapshot("function_message_chunk")
obj: Any = FunctionMessageChunk(content="", name="")
assert load(snapshot) == obj
def test_deserialize_runnable_configurable_alternatives() -> None:
snapshot = load_snapshot("runnable_configurable_alternatives")
obj: Any = RunnableConfigurableAlternatives(
default=RunnablePassthrough(), which=ConfigurableField(id=""), alternatives={}
)
assert load(snapshot) == obj
def test_deserialize_runnable_configurable_fields() -> None:
snapshot = load_snapshot("runnable_configurable_fields")
obj: Any = RunnableConfigurableFields(default=RunnablePassthrough(), fields={})
assert load(snapshot) == obj
def test_deserialize_runnable_branch() -> None:
snapshot = load_snapshot("runnable_branch")
obj: Any = RunnableBranch(
(RunnablePassthrough(), RunnablePassthrough()), RunnablePassthrough()
)
assert load(snapshot) == obj
def test_deserialize_runnable_retry() -> None:
snapshot = load_snapshot("runnable_retry")
obj: Any = RunnableRetry(bound=RunnablePassthrough())
assert load(snapshot) == obj
def test_deserialize_runnable_with_fallbacks() -> None:
snapshot = load_snapshot("runnable_with_fallbacks")
obj: Any = RunnableWithFallbacks(
runnable=RunnablePassthrough(), fallbacks=(RunnablePassthrough(),)
)
assert load(snapshot) == obj
def test_deserialize_router_runnable() -> None:
snapshot = load_snapshot("router_runnable")
obj: Any = RouterRunnable({"": RunnablePassthrough()})
assert load(snapshot) == obj
def test_deserialize_runnable_assign() -> None:
snapshot = load_snapshot("runnable_assign")
obj: Any = RunnableAssign(mapper=RunnableParallel({}))
assert load(snapshot) == obj
def test_deserialize_runnable_passthrough() -> None:
snapshot = load_snapshot("runnable_passthrough")
obj: Any = RunnablePassthrough()
assert load(snapshot) == obj
def test_deserialize_runnable_binding() -> None:
snapshot = load_snapshot("runnable_binding")
obj: Any = RunnableBinding(bound=RunnablePassthrough())
assert load(snapshot) == obj
def test_deserialize_runnable_binding_base() -> None:
snapshot = load_snapshot("runnable_binding_base")
obj: Any = RunnableBindingBase(bound=RunnablePassthrough())
assert load(snapshot) == obj
def test_deserialize_runnable_each() -> None:
snapshot = load_snapshot("runnable_each")
obj: Any = RunnableEach(bound=RunnablePassthrough())
assert load(snapshot) == obj
def test_deserialize_runnable_each_base() -> None:
snapshot = load_snapshot("runnable_each_base")
obj: Any = RunnableEachBase(bound=RunnablePassthrough())
assert load(snapshot) == obj
def test_deserialize_runnable_map() -> None:
snapshot = load_snapshot("runnable_map")
obj: Any = RunnableMap()
assert load(snapshot) == obj
def test_deserialize_runnable_parallel() -> None:
snapshot = load_snapshot("runnable_parallel")
obj: Any = RunnableParallel()
assert load(snapshot) == obj
def test_deserialize_runnable_sequence() -> None:
snapshot = load_snapshot("runnable_sequence")
obj: Any = RunnableSequence(first=RunnablePassthrough(), last=RunnablePassthrough())
assert load(snapshot) == obj
def test_deserialize_runnable_with_message_history() -> None:
snapshot = load_snapshot("runnable_with_message_history")
def get_chat_history(session_id: str) -> ChatMessageHistory:
return ChatMessageHistory()
obj: Any = RunnableWithMessageHistory(RunnablePassthrough(), get_chat_history)
assert load(snapshot) == obj
def test_deserialize_serializable() -> None:
snapshot = load_snapshot("serializable")
obj = Serializable()
assert load(snapshot) == obj
def test_deserialize_comma_separated_list_output_parser() -> None:
snapshot = load_snapshot("comma_separated_list_output_parser")
obj = CommaSeparatedListOutputParser()
assert load(snapshot) == obj
def test_deserialize_markdown_list_output_parser() -> None:
snapshot = load_snapshot("markdown_list_output_parser")
obj = MarkdownListOutputParser()
assert load(snapshot) == obj
def test_deserialize_numbered_list_output_parser() -> None:
snapshot = load_snapshot("numbered_list_output_parser")
obj = NumberedListOutputParser()
assert load(snapshot) == obj
def test_deserialize_str_output_parser() -> None:
snapshot = load_snapshot("str_output_parser")
obj = StrOutputParser()
assert load(snapshot) == obj
def test_deserialize_few_shot_prompt_with_templates() -> None:
snapshot = load_snapshot("few_shot_prompt_with_templates")
obj: Any = FewShotPromptWithTemplates(
example_prompt=PromptTemplate.from_template(""),
suffix=PromptTemplate.from_template(""),
examples=[],
input_variables=[],
)
assert load(snapshot) == obj
def test_deserialize_few_shot_chat_message_prompt_template() -> None:
snapshot = load_snapshot("few_shot_chat_message_prompt_template")
obj: Any = FewShotChatMessagePromptTemplate(
example_prompt=HumanMessagePromptTemplate.from_template(""), examples=[]
)
assert load(snapshot) == obj
def test_deserialize_few_shot_prompt_template() -> None:
snapshot = load_snapshot("few_shot_prompt_template")
obj: Any = FewShotPromptTemplate(
example_prompt=PromptTemplate.from_template(""),
suffix="",
examples=[],
input_variables=[],
)
assert load(snapshot) == obj
def test_deserialize_ai_message_prompt_template() -> None:
snapshot = load_snapshot("ai_message_prompt_template")
obj: Any = AIMessagePromptTemplate.from_template("")
assert load(snapshot) == obj
def test_deserialize_chat_message_prompt_template() -> None:
snapshot = load_snapshot("chat_message_prompt_template")
obj: Any = ChatMessagePromptTemplate.from_template("", role="")
assert load(snapshot) == obj
def test_deserialize_chat_prompt_template() -> None:
snapshot = load_snapshot("chat_prompt_template")
obj: Any = ChatPromptTemplate.from_template("", role="")
assert load(snapshot) == obj
def test_deserialize_human_message_prompt_template() -> None:
snapshot = load_snapshot("human_message_prompt_template")
obj: Any = HumanMessagePromptTemplate.from_template("")
assert load(snapshot) == obj
def test_deserialize_messages_placeholder() -> None:
snapshot = load_snapshot("messages_placeholder")
obj: Any = MessagesPlaceholder(variable_name="")
assert load(snapshot) == obj
def test_deserialize_system_message_prompt_template() -> None:
snapshot = load_snapshot("system_message_prompt_template")
obj: Any = SystemMessagePromptTemplate.from_template("")
assert load(snapshot) == obj
def test_deserialize_pipeline_prompt_template() -> None:
snapshot = load_snapshot("pipeline_prompt_template")
obj: Any = PipelinePromptTemplate(
pipeline_prompts=[], final_prompt=PromptTemplate.from_template("")
)
assert load(snapshot) == obj
def test_deserialize_prompt_template() -> None:
snapshot = load_snapshot("prompt_template")
obj: Any = PromptTemplate.from_template("")
assert load(snapshot) == obj
def test_deserialize_document() -> None:
snapshot = load_snapshot("document")
obj: Any = Document(page_content="")
assert load(snapshot) == obj
def test_deserialize_generation() -> None:
snapshot = load_snapshot("generation")
obj: Any = Generation(text="")
assert load(snapshot) == obj
def test_deserialize_generation_chunk() -> None:
snapshot = load_snapshot("generation_chunk")
obj: Any = GenerationChunk(text="")
assert load(snapshot) == obj
def test_deserialize_chat_generation() -> None:
snapshot = load_snapshot("chat_generation")
obj: Any = ChatGeneration(message=AIMessage(content=""))
assert load(snapshot) == obj
def test_deserialize_chat_generation_chunk() -> None:
snapshot = load_snapshot("chat_generation_chunk")
obj: Any = ChatGenerationChunk(message=AIMessage(content=""))
assert load(snapshot) == obj

View File

@@ -0,0 +1,334 @@
from typing import Any
from langchain_core.documents.base import Document
from langchain_core.load.dump import dumps
from langchain_core.load.serializable import Serializable
from langchain_core.messages.ai import AIMessage, AIMessageChunk
from langchain_core.messages.base import BaseMessage, BaseMessageChunk
from langchain_core.messages.chat import ChatMessage, ChatMessageChunk
from langchain_core.messages.function import FunctionMessage, FunctionMessageChunk
from langchain_core.messages.human import HumanMessage, HumanMessageChunk
from langchain_core.messages.system import SystemMessage, SystemMessageChunk
from langchain_core.messages.tool import ToolMessage, ToolMessageChunk
from langchain_core.output_parsers.list import (
CommaSeparatedListOutputParser,
MarkdownListOutputParser,
NumberedListOutputParser,
)
from langchain_core.output_parsers.string import StrOutputParser
from langchain_core.outputs.chat_generation import ChatGeneration, ChatGenerationChunk
from langchain_core.outputs.generation import Generation, GenerationChunk
from langchain_core.prompts.chat import (
AIMessagePromptTemplate,
ChatMessagePromptTemplate,
ChatPromptTemplate,
HumanMessagePromptTemplate,
MessagesPlaceholder,
SystemMessagePromptTemplate,
)
from langchain_core.prompts.few_shot import (
FewShotChatMessagePromptTemplate,
FewShotPromptTemplate,
)
from langchain_core.prompts.few_shot_with_templates import FewShotPromptWithTemplates
from langchain_core.prompts.pipeline import PipelinePromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.runnables import ConfigurableField
from langchain_core.runnables.base import (
RunnableBinding,
RunnableBindingBase,
RunnableEach,
RunnableEachBase,
RunnableMap,
RunnableParallel,
RunnableSequence,
)
from langchain_core.runnables.branch import RunnableBranch
from langchain_core.runnables.configurable import (
RunnableConfigurableAlternatives,
RunnableConfigurableFields,
)
from langchain_core.runnables.fallbacks import RunnableWithFallbacks
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.runnables.passthrough import RunnableAssign, RunnablePassthrough
from langchain_core.runnables.retry import RunnableRetry
from langchain_core.runnables.router import RouterRunnable
from tests.unit_tests.fake.memory import ChatMessageHistory
def test_serialize_system_message(snapshot: Any) -> None:
obj: Any = SystemMessage(content="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_system_message_chunk(snapshot: Any) -> None:
obj: Any = SystemMessageChunk(content="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_ai_message(snapshot: Any) -> None:
obj: Any = AIMessage(content="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_ai_message_chunk(snapshot: Any) -> None:
obj: Any = AIMessageChunk(content="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_human_message(snapshot: Any) -> None:
obj: Any = HumanMessage(content="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_human_message_chunk(snapshot: Any) -> None:
obj: Any = HumanMessageChunk(content="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_chat_message(snapshot: Any) -> None:
obj: Any = ChatMessage(content="", role="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_chat_message_chunk(snapshot: Any) -> None:
obj: Any = ChatMessageChunk(content="", role="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_tool_message(snapshot: Any) -> None:
obj: Any = ToolMessage(content="", tool_call_id="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_tool_message_chunk(snapshot: Any) -> None:
obj: Any = ToolMessageChunk(content="", tool_call_id="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_base_message(snapshot: Any) -> None:
obj: Any = BaseMessage(content="", type="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_base_message_chunk(snapshot: Any) -> None:
obj: Any = BaseMessageChunk(content="", type="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_function_message(snapshot: Any) -> None:
obj: Any = FunctionMessage(content="", name="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_function_message_chunk(snapshot: Any) -> None:
obj: Any = FunctionMessageChunk(content="", name="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_runnable_configurable_alternatives(snapshot: Any) -> None:
obj: Any = RunnableConfigurableAlternatives(
default=RunnablePassthrough(), which=ConfigurableField(id=""), alternatives={}
)
assert dumps(obj, pretty=True) == snapshot
def test_serialize_runnable_configurable_fields(snapshot: Any) -> None:
obj: Any = RunnableConfigurableFields(default=RunnablePassthrough(), fields={})
assert dumps(obj, pretty=True) == snapshot
def test_serialize_runnable_branch(snapshot: Any) -> None:
obj: Any = RunnableBranch(
(RunnablePassthrough(), RunnablePassthrough()), RunnablePassthrough()
)
assert dumps(obj, pretty=True) == snapshot
def test_serialize_runnable_retry(snapshot: Any) -> None:
obj: Any = RunnableRetry(bound=RunnablePassthrough())
assert dumps(obj, pretty=True) == snapshot
def test_serialize_runnable_with_fallbacks(snapshot: Any) -> None:
obj: Any = RunnableWithFallbacks(
runnable=RunnablePassthrough(), fallbacks=(RunnablePassthrough(),)
)
assert dumps(obj, pretty=True) == snapshot
def test_serialize_router_runnable(snapshot: Any) -> None:
obj: Any = RouterRunnable({"": RunnablePassthrough()})
assert dumps(obj, pretty=True) == snapshot
def test_serialize_runnable_assign(snapshot: Any) -> None:
obj: Any = RunnableAssign(mapper=RunnableParallel({}))
assert dumps(obj, pretty=True) == snapshot
def test_serialize_runnable_passthrough(snapshot: Any) -> None:
obj: Any = RunnablePassthrough()
assert dumps(obj, pretty=True) == snapshot
def test_serialize_runnable_binding(snapshot: Any) -> None:
obj: Any = RunnableBinding(bound=RunnablePassthrough())
assert dumps(obj, pretty=True) == snapshot
def test_serialize_runnable_binding_base(snapshot: Any) -> None:
obj: Any = RunnableBindingBase(bound=RunnablePassthrough())
assert dumps(obj, pretty=True) == snapshot
def test_serialize_runnable_each(snapshot: Any) -> None:
obj: Any = RunnableEach(bound=RunnablePassthrough())
assert dumps(obj, pretty=True) == snapshot
def test_serialize_runnable_each_base(snapshot: Any) -> None:
obj: Any = RunnableEachBase(bound=RunnablePassthrough())
assert dumps(obj, pretty=True) == snapshot
def test_serialize_runnable_map(snapshot: Any) -> None:
obj: Any = RunnableMap()
assert dumps(obj, pretty=True) == snapshot
def test_serialize_runnable_parallel(snapshot: Any) -> None:
obj: Any = RunnableParallel()
assert dumps(obj, pretty=True) == snapshot
def test_serialize_runnable_sequence(snapshot: Any) -> None:
obj: Any = RunnableSequence(first=RunnablePassthrough(), last=RunnablePassthrough())
assert dumps(obj, pretty=True) == snapshot
def test_serialize_runnable_with_message_history(snapshot: Any) -> None:
def get_chat_history(session_id: str) -> ChatMessageHistory:
return ChatMessageHistory()
obj: Any = RunnableWithMessageHistory(RunnablePassthrough(), get_chat_history)
assert dumps(obj, pretty=True) == snapshot
def test_serialize_serializable(snapshot: Any) -> None:
obj: Any = Serializable()
assert dumps(obj, pretty=True) == snapshot
def test_serialize_comma_separated_list_output_parser(snapshot: Any) -> None:
obj: Any = CommaSeparatedListOutputParser()
assert dumps(obj, pretty=True) == snapshot
def test_serialize_markdown_list_output_parser(snapshot: Any) -> None:
obj: Any = MarkdownListOutputParser()
assert dumps(obj, pretty=True) == snapshot
def test_serialize_numbered_list_output_parser(snapshot: Any) -> None:
obj: Any = NumberedListOutputParser()
assert dumps(obj, pretty=True) == snapshot
def test_serialize_str_output_parser(snapshot: Any) -> None:
obj: Any = StrOutputParser()
assert dumps(obj, pretty=True) == snapshot
def test_serialize_few_shot_prompt_with_templates(snapshot: Any) -> None:
obj: Any = FewShotPromptWithTemplates(
example_prompt=PromptTemplate.from_template(""),
suffix=PromptTemplate.from_template(""),
examples=[],
input_variables=[],
)
assert dumps(obj, pretty=True) == snapshot
def test_serialize_few_shot_chat_message_prompt_template(snapshot: Any) -> None:
obj: Any = FewShotChatMessagePromptTemplate(
example_prompt=HumanMessagePromptTemplate.from_template(""), examples=[]
)
assert dumps(obj, pretty=True) == snapshot
def test_serialize_few_shot_prompt_template(snapshot: Any) -> None:
obj: Any = FewShotPromptTemplate(
example_prompt=PromptTemplate.from_template(""),
suffix="",
examples=[],
input_variables=[],
)
assert dumps(obj, pretty=True) == snapshot
def test_serialize_ai_message_prompt_template(snapshot: Any) -> None:
obj: Any = AIMessagePromptTemplate.from_template("")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_chat_message_prompt_template(snapshot: Any) -> None:
obj: Any = ChatMessagePromptTemplate.from_template("", role="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_chat_prompt_template(snapshot: Any) -> None:
obj: Any = ChatPromptTemplate.from_template("", role="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_human_message_prompt_template(snapshot: Any) -> None:
obj: Any = HumanMessagePromptTemplate.from_template("")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_messages_placeholder(snapshot: Any) -> None:
obj: Any = MessagesPlaceholder(variable_name="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_system_message_prompt_template(snapshot: Any) -> None:
obj: Any = SystemMessagePromptTemplate.from_template("")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_pipeline_prompt_template(snapshot: Any) -> None:
obj: Any = PipelinePromptTemplate(
pipeline_prompts=[], final_prompt=PromptTemplate.from_template("")
)
assert dumps(obj, pretty=True) == snapshot
def test_serialize_prompt_template(snapshot: Any) -> None:
obj: Any = PromptTemplate.from_template("")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_document(snapshot: Any) -> None:
obj: Any = Document(page_content="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_generation(snapshot: Any) -> None:
obj: Any = Generation(text="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_generation_chunk(snapshot: Any) -> None:
obj: Any = GenerationChunk(text="")
assert dumps(obj, pretty=True) == snapshot
def test_serialize_chat_generation(snapshot: Any) -> None:
obj: Any = ChatGeneration(message=AIMessage(content=""))
assert dumps(obj, pretty=True) == snapshot
def test_serialize_chat_generation_chunk(snapshot: Any) -> None:
obj: Any = ChatGenerationChunk(message=AIMessage(content=""))
assert dumps(obj, pretty=True) == snapshot

View File

@@ -1736,16 +1736,6 @@ files = [
{file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
{file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
{file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
{file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
{file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
{file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
@@ -2920,7 +2910,6 @@ files = [
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
{file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
{file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
{file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
{file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
@@ -2928,15 +2917,8 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
{file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
{file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
{file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
{file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
@@ -2953,7 +2935,6 @@ files = [
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
{file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
{file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
{file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
{file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
@@ -2961,7 +2942,6 @@ files = [
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
{file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
{file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
{file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
@@ -3909,9 +3889,7 @@ python-versions = ">=3.7"
files = [
{file = "SQLAlchemy-2.0.23-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:638c2c0b6b4661a4fd264f6fb804eccd392745c5887f9317feb64bb7cb03b3ea"},
{file = "SQLAlchemy-2.0.23-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e3b5036aa326dc2df50cba3c958e29b291a80f604b1afa4c8ce73e78e1c9f01d"},
{file = "SQLAlchemy-2.0.23-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:787af80107fb691934a01889ca8f82a44adedbf5ef3d6ad7d0f0b9ac557e0c34"},
{file = "SQLAlchemy-2.0.23-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c14eba45983d2f48f7546bb32b47937ee2cafae353646295f0e99f35b14286ab"},
{file = "SQLAlchemy-2.0.23-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0666031df46b9badba9bed00092a1ffa3aa063a5e68fa244acd9f08070e936d3"},
{file = "SQLAlchemy-2.0.23-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:89a01238fcb9a8af118eaad3ffcc5dedaacbd429dc6fdc43fe430d3a941ff965"},
{file = "SQLAlchemy-2.0.23-cp310-cp310-win32.whl", hash = "sha256:cabafc7837b6cec61c0e1e5c6d14ef250b675fa9c3060ed8a7e38653bd732ff8"},
{file = "SQLAlchemy-2.0.23-cp310-cp310-win_amd64.whl", hash = "sha256:87a3d6b53c39cd173990de2f5f4b83431d534a74f0e2f88bd16eabb5667e65c6"},
@@ -3948,9 +3926,7 @@ files = [
{file = "SQLAlchemy-2.0.23-cp38-cp38-win_amd64.whl", hash = "sha256:964971b52daab357d2c0875825e36584d58f536e920f2968df8d581054eada4b"},
{file = "SQLAlchemy-2.0.23-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:616fe7bcff0a05098f64b4478b78ec2dfa03225c23734d83d6c169eb41a93e55"},
{file = "SQLAlchemy-2.0.23-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0e680527245895aba86afbd5bef6c316831c02aa988d1aad83c47ffe92655e74"},
{file = "SQLAlchemy-2.0.23-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9585b646ffb048c0250acc7dad92536591ffe35dba624bb8fd9b471e25212a35"},
{file = "SQLAlchemy-2.0.23-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4895a63e2c271ffc7a81ea424b94060f7b3b03b4ea0cd58ab5bb676ed02f4221"},
{file = "SQLAlchemy-2.0.23-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:cc1d21576f958c42d9aec68eba5c1a7d715e5fc07825a629015fe8e3b0657fb0"},
{file = "SQLAlchemy-2.0.23-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:967c0b71156f793e6662dd839da54f884631755275ed71f1539c95bbada9aaab"},
{file = "SQLAlchemy-2.0.23-cp39-cp39-win32.whl", hash = "sha256:0a8c6aa506893e25a04233bc721c6b6cf844bafd7250535abb56cb6cc1368884"},
{file = "SQLAlchemy-2.0.23-cp39-cp39-win_amd64.whl", hash = "sha256:f3420d00d2cb42432c1d0e44540ae83185ccbbc67a6054dcc8ab5387add6620b"},

View File

@@ -1239,318 +1239,3 @@ class SQLAlchemyMd5Cache(BaseCache):
@staticmethod
def get_md5(input_string: str) -> str:
return hashlib.md5(input_string.encode()).hexdigest()
ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME = "langchain_astradb_cache"
class AstraDBCache(BaseCache):
"""
Cache that uses Astra DB as a backend.
It uses a single collection as a kv store
The lookup keys, combined in the _id of the documents, are:
- prompt, a string
- llm_string, a deterministic str representation of the model parameters.
(needed to prevent same-prompt-different-model collisions)
"""
def __init__(
self,
*,
collection_name: str = ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME,
token: Optional[str] = None,
api_endpoint: Optional[str] = None,
astra_db_client: Optional[Any] = None, # 'astrapy.db.AstraDB' if passed
namespace: Optional[str] = None,
):
"""
Create an AstraDB cache using a collection for storage.
Args (only keyword-arguments accepted):
collection_name (str): name of the Astra DB collection to create/use.
token (Optional[str]): API token for Astra DB usage.
api_endpoint (Optional[str]): full URL to the API endpoint,
such as "https://<DB-ID>-us-east1.apps.astra.datastax.com".
astra_db_client (Optional[Any]): *alternative to token+api_endpoint*,
you can pass an already-created 'astrapy.db.AstraDB' instance.
namespace (Optional[str]): namespace (aka keyspace) where the
collection is created. Defaults to the database's "default namespace".
"""
try:
from astrapy.db import (
AstraDB as LibAstraDB,
)
except (ImportError, ModuleNotFoundError):
raise ImportError(
"Could not import a recent astrapy python package. "
"Please install it with `pip install --upgrade astrapy`."
)
# Conflicting-arg checks:
if astra_db_client is not None:
if token is not None or api_endpoint is not None:
raise ValueError(
"You cannot pass 'astra_db_client' to AstraDB if passing "
"'token' and 'api_endpoint'."
)
self.collection_name = collection_name
self.token = token
self.api_endpoint = api_endpoint
self.namespace = namespace
if astra_db_client is not None:
self.astra_db = astra_db_client
else:
self.astra_db = LibAstraDB(
token=self.token,
api_endpoint=self.api_endpoint,
namespace=self.namespace,
)
self.collection = self.astra_db.create_collection(
collection_name=self.collection_name,
)
@staticmethod
def _make_id(prompt: str, llm_string: str) -> str:
return f"{_hash(prompt)}#{_hash(llm_string)}"
def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
"""Look up based on prompt and llm_string."""
doc_id = self._make_id(prompt, llm_string)
item = self.collection.find_one(
filter={
"_id": doc_id,
},
projection={
"body_blob": 1,
},
)["data"]["document"]
if item is not None:
generations = _loads_generations(item["body_blob"])
# this protects against malformed cached items:
if generations is not None:
return generations
else:
return None
else:
return None
def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
"""Update cache based on prompt and llm_string."""
doc_id = self._make_id(prompt, llm_string)
blob = _dumps_generations(return_val)
self.collection.upsert(
{
"_id": doc_id,
"body_blob": blob,
},
)
def delete_through_llm(
self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
) -> None:
"""
A wrapper around `delete` with the LLM being passed.
In case the llm(prompt) calls have a `stop` param, you should pass it here
"""
llm_string = get_prompts(
{**llm.dict(), **{"stop": stop}},
[],
)[1]
return self.delete(prompt, llm_string=llm_string)
def delete(self, prompt: str, llm_string: str) -> None:
"""Evict from cache if there's an entry."""
doc_id = self._make_id(prompt, llm_string)
return self.collection.delete_one(doc_id)
def clear(self, **kwargs: Any) -> None:
"""Clear cache. This is for all LLMs at once."""
self.astra_db.truncate_collection(self.collection_name)
ASTRA_DB_SEMANTIC_CACHE_DEFAULT_THRESHOLD = 0.85
ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME = "langchain_astradb_semantic_cache"
ASTRA_DB_SEMANTIC_CACHE_EMBEDDING_CACHE_SIZE = 16
class AstraDBSemanticCache(BaseCache):
"""
Cache that uses Astra DB as a vector-store backend for semantic
(i.e. similarity-based) lookup.
It uses a single (vector) collection and can store
cached values from several LLMs, so the LLM's 'llm_string' is stored
in the document metadata.
You can choose the preferred similarity (or use the API default) --
remember the threshold might require metric-dependend tuning.
"""
def __init__(
self,
*,
collection_name: str = ASTRA_DB_CACHE_DEFAULT_COLLECTION_NAME,
token: Optional[str] = None,
api_endpoint: Optional[str] = None,
astra_db_client: Optional[Any] = None, # 'astrapy.db.AstraDB' if passed
namespace: Optional[str] = None,
embedding: Embeddings,
metric: Optional[str] = None,
similarity_threshold: float = ASTRA_DB_SEMANTIC_CACHE_DEFAULT_THRESHOLD,
):
"""
Initialize the cache with all relevant parameters.
Args:
collection_name (str): name of the Astra DB collection to create/use.
token (Optional[str]): API token for Astra DB usage.
api_endpoint (Optional[str]): full URL to the API endpoint,
such as "https://<DB-ID>-us-east1.apps.astra.datastax.com".
astra_db_client (Optional[Any]): *alternative to token+api_endpoint*,
you can pass an already-created 'astrapy.db.AstraDB' instance.
namespace (Optional[str]): namespace (aka keyspace) where the
collection is created. Defaults to the database's "default namespace".
embedding (Embedding): Embedding provider for semantic
encoding and search.
metric: the function to use for evaluating similarity of text embeddings.
Defaults to 'cosine' (alternatives: 'euclidean', 'dot_product')
similarity_threshold (float, optional): the minimum similarity
for accepting a (semantic-search) match.
The default score threshold is tuned to the default metric.
Tune it carefully yourself if switching to another distance metric.
"""
try:
from astrapy.db import (
AstraDB as LibAstraDB,
)
except (ImportError, ModuleNotFoundError):
raise ImportError(
"Could not import a recent astrapy python package. "
"Please install it with `pip install --upgrade astrapy`."
)
# Conflicting-arg checks:
if astra_db_client is not None:
if token is not None or api_endpoint is not None:
raise ValueError(
"You cannot pass 'astra_db_client' to AstraDB if passing "
"'token' and 'api_endpoint'."
)
self.embedding = embedding
self.metric = metric
self.similarity_threshold = similarity_threshold
# The contract for this class has separate lookup and update:
# in order to spare some embedding calculations we cache them between
# the two calls.
# Note: each instance of this class has its own `_get_embedding` with
# its own lru.
@lru_cache(maxsize=ASTRA_DB_SEMANTIC_CACHE_EMBEDDING_CACHE_SIZE)
def _cache_embedding(text: str) -> List[float]:
return self.embedding.embed_query(text=text)
self._get_embedding = _cache_embedding
self.embedding_dimension = self._get_embedding_dimension()
self.collection_name = collection_name
self.token = token
self.api_endpoint = api_endpoint
self.namespace = namespace
if astra_db_client is not None:
self.astra_db = astra_db_client
else:
self.astra_db = LibAstraDB(
token=self.token,
api_endpoint=self.api_endpoint,
namespace=self.namespace,
)
self.collection = self.astra_db.create_collection(
collection_name=self.collection_name,
dimension=self.embedding_dimension,
metric=self.metric,
)
def _get_embedding_dimension(self) -> int:
return len(self._get_embedding(text="This is a sample sentence."))
@staticmethod
def _make_id(prompt: str, llm_string: str) -> str:
return f"{_hash(prompt)}#{_hash(llm_string)}"
def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
"""Update cache based on prompt and llm_string."""
doc_id = self._make_id(prompt, llm_string)
llm_string_hash = _hash(llm_string)
embedding_vector = self._get_embedding(text=prompt)
body = _dumps_generations(return_val)
#
self.collection.upsert(
{
"_id": doc_id,
"body_blob": body,
"llm_string_hash": llm_string_hash,
"$vector": embedding_vector,
}
)
def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
"""Look up based on prompt and llm_string."""
hit_with_id = self.lookup_with_id(prompt, llm_string)
if hit_with_id is not None:
return hit_with_id[1]
else:
return None
def lookup_with_id(
self, prompt: str, llm_string: str
) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
"""
Look up based on prompt and llm_string.
If there are hits, return (document_id, cached_entry) for the top hit
"""
prompt_embedding: List[float] = self._get_embedding(text=prompt)
llm_string_hash = _hash(llm_string)
hit = self.collection.vector_find_one(
vector=prompt_embedding,
filter={
"llm_string_hash": llm_string_hash,
},
fields=["body_blob", "_id"],
include_similarity=True,
)
if hit is None or hit["$similarity"] < self.similarity_threshold:
return None
else:
generations = _loads_generations(hit["body_blob"])
if generations is not None:
# this protects against malformed cached items:
return (hit["_id"], generations)
else:
return None
def lookup_with_id_through_llm(
self, prompt: str, llm: LLM, stop: Optional[List[str]] = None
) -> Optional[Tuple[str, RETURN_VAL_TYPE]]:
llm_string = get_prompts(
{**llm.dict(), **{"stop": stop}},
[],
)[1]
return self.lookup_with_id(prompt, llm_string=llm_string)
def delete_by_document_id(self, document_id: str) -> None:
"""
Given this is a "similarity search" cache, an invalidation pattern
that makes sense is first a lookup to get an ID, and then deleting
with that ID. This is for the second step.
"""
self.collection.delete_one(document_id)
def clear(self, **kwargs: Any) -> None:
"""Clear the *whole* semantic cache."""
self.astra_db.truncate_collection(self.collection_name)

View File

@@ -14,9 +14,7 @@ class CypherQueryCorrector:
property_pattern = re.compile(r"\{.+?\}")
node_pattern = re.compile(r"\(.+?\)")
path_pattern = re.compile(
r"(\([^\,\(\)]*?(\{.+\})?[^\,\(\)]*?\))(<?-)(\[.*?\])?(->?)(\([^\,\(\)]*?(\{.+\})?[^\,\(\)]*?\))"
)
path_pattern = re.compile(r"\(.*\).*-.*-.*\(.*\)")
node_relation_node_pattern = re.compile(
r"(\()+(?P<left_node>[^()]*?)\)(?P<relation>.*?)\((?P<right_node>[^()]*?)(\))+"
)
@@ -64,17 +62,7 @@ class CypherQueryCorrector:
Args:
query: cypher query
"""
paths = []
idx = 0
while matched := self.path_pattern.findall(query[idx:]):
matched = matched[0]
matched = [
m for i, m in enumerate(matched) if i not in [1, len(matched) - 1]
]
path = "".join(matched)
idx = query.find(path) + len(path) - len(matched[-1])
paths.append(path)
return paths
return re.findall(self.path_pattern, query)
def judge_direction(self, relation: str) -> str:
"""

View File

@@ -317,8 +317,6 @@ class GoogleDriveLoader(BaseLoader, BaseModel):
docs = loader.load()
for doc in docs:
doc.metadata["source"] = f"https://drive.google.com/file/d/{id}/view"
if "title" not in doc.metadata:
doc.metadata["title"] = f"{file.get('name')}"
return docs
else:

View File

@@ -1,216 +0,0 @@
"""Loads data from OneNote Notebooks"""
from pathlib import Path
from typing import Dict, Iterator, List, Optional
import requests
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
from langchain.pydantic_v1 import BaseModel, BaseSettings, Field, FilePath, SecretStr
class _OneNoteGraphSettings(BaseSettings):
client_id: str = Field(..., env="MS_GRAPH_CLIENT_ID")
client_secret: SecretStr = Field(..., env="MS_GRAPH_CLIENT_SECRET")
class Config:
"""Config for OneNoteGraphSettings."""
env_prefix = ""
case_sentive = False
env_file = ".env"
class OneNoteLoader(BaseLoader, BaseModel):
"""Load pages from OneNote notebooks."""
settings: _OneNoteGraphSettings = Field(default_factory=_OneNoteGraphSettings)
"""Settings for the Microsoft Graph API client."""
auth_with_token: bool = False
"""Whether to authenticate with a token or not. Defaults to False."""
access_token: str = ""
"""Personal access token"""
onenote_api_base_url: str = "https://graph.microsoft.com/v1.0/me/onenote"
"""URL of Microsoft Graph API for OneNote"""
authority_url = "https://login.microsoftonline.com/consumers/"
"""A URL that identifies a token authority"""
token_path: FilePath = Path.home() / ".credentials" / "onenote_graph_token.txt"
"""Path to the file where the access token is stored"""
notebook_name: Optional[str] = None
"""Filter on notebook name"""
section_name: Optional[str] = None
"""Filter on section name"""
page_title: Optional[str] = None
"""Filter on section name"""
object_ids: Optional[List[str]] = None
""" The IDs of the objects to load data from."""
def lazy_load(self) -> Iterator[Document]:
"""
Get pages from OneNote notebooks.
Returns:
A list of Documents with attributes:
- page_content
- metadata
- title
"""
self._auth()
try:
from bs4 import BeautifulSoup
except ImportError:
raise ImportError(
"beautifulsoup4 package not found, please install it with "
"`pip install bs4`"
)
if self.object_ids is not None:
for object_id in self.object_ids:
page_content_html = self._get_page_content(object_id)
soup = BeautifulSoup(page_content_html, "html.parser")
page_title = ""
title_tag = soup.title
if title_tag:
page_title = title_tag.get_text(strip=True)
page_content = soup.get_text(separator="\n", strip=True)
yield Document(
page_content=page_content, metadata={"title": page_title}
)
else:
request_url = self._url
while request_url != "":
response = requests.get(request_url, headers=self._headers, timeout=10)
response.raise_for_status()
pages = response.json()
for page in pages["value"]:
page_id = page["id"]
page_content_html = self._get_page_content(page_id)
soup = BeautifulSoup(page_content_html, "html.parser")
page_title = ""
title_tag = soup.title
if title_tag:
page_content = soup.get_text(separator="\n", strip=True)
yield Document(
page_content=page_content, metadata={"title": page_title}
)
if "@odata.nextLink" in pages:
request_url = pages["@odata.nextLink"]
else:
request_url = ""
def load(self) -> List[Document]:
"""
Get pages from OneNote notebooks.
Returns:
A list of Documents with attributes:
- page_content
- metadata
- title
"""
return list(self.lazy_load())
def _get_page_content(self, page_id: str) -> str:
"""Get page content from OneNote API"""
request_url = self.onenote_api_base_url + f"/pages/{page_id}/content"
response = requests.get(request_url, headers=self._headers, timeout=10)
response.raise_for_status()
return response.text
@property
def _headers(self) -> Dict[str, str]:
"""Return headers for requests to OneNote API"""
return {
"Authorization": f"Bearer {self.access_token}",
}
@property
def _scopes(self) -> List[str]:
"""Return required scopes."""
return ["Notes.Read"]
def _auth(self) -> None:
"""Authenticate with Microsoft Graph API"""
if self.access_token != "":
return
if self.auth_with_token:
with self.token_path.open("r") as token_file:
self.access_token = token_file.read()
else:
try:
from msal import ConfidentialClientApplication
except ImportError as e:
raise ImportError(
"MSAL package not found, please install it with `pip install msal`"
) from e
client_instance = ConfidentialClientApplication(
client_id=self.settings.client_id,
client_credential=self.settings.client_secret.get_secret_value(),
authority=self.authority_url,
)
authorization_request_url = client_instance.get_authorization_request_url(
self._scopes
)
print("Visit the following url to give consent:")
print(authorization_request_url)
authorization_url = input("Paste the authenticated url here:\n")
authorization_code = authorization_url.split("code=")[1].split("&")[0]
access_token_json = client_instance.acquire_token_by_authorization_code(
code=authorization_code, scopes=self._scopes
)
self.access_token = access_token_json["access_token"]
try:
if not self.token_path.parent.exists():
self.token_path.parent.mkdir(parents=True)
except Exception as e:
raise Exception(
f"Could not create the folder {self.token_path.parent} "
+ "to store the access token."
) from e
with self.token_path.open("w") as token_file:
token_file.write(self.access_token)
@property
def _url(self) -> str:
"""Create URL for getting page ids from the OneNoteApi API."""
query_params_list = []
filter_list = []
expand_list = []
query_params_list.append("$select=id")
if self.notebook_name is not None:
filter_list.append(
"parentNotebook/displayName%20eq%20"
+ f"'{self.notebook_name.replace(' ', '%20')}'"
)
expand_list.append("parentNotebook")
if self.section_name is not None:
filter_list.append(
"parentSection/displayName%20eq%20"
+ f"'{self.section_name.replace(' ', '%20')}'"
)
expand_list.append("parentSection")
if self.page_title is not None:
filter_list.append(
"title%20eq%20" + f"'{self.page_title.replace(' ', '%20')}'"
)
if len(expand_list) > 0:
query_params_list.append("$expand=" + ",".join(expand_list))
if len(filter_list) > 0:
query_params_list.append("$filter=" + "%20and%20".join(filter_list))
query_params = "&".join(query_params_list)
if query_params != "":
query_params = "?" + query_params
return f"{self.onenote_api_base_url}/pages{query_params}"

View File

@@ -32,7 +32,6 @@ from langchain.memory.buffer import (
)
from langchain.memory.buffer_window import ConversationBufferWindowMemory
from langchain.memory.chat_message_histories import (
AstraDBChatMessageHistory,
CassandraChatMessageHistory,
ChatMessageHistory,
CosmosDBChatMessageHistory,
@@ -69,7 +68,6 @@ from langchain.memory.vectorstore import VectorStoreRetrieverMemory
from langchain.memory.zep_memory import ZepMemory
__all__ = [
"AstraDBChatMessageHistory",
"CassandraChatMessageHistory",
"ChatMessageHistory",
"CombinedMemory",

View File

@@ -1,6 +1,3 @@
from langchain.memory.chat_message_histories.astradb import (
AstraDBChatMessageHistory,
)
from langchain.memory.chat_message_histories.cassandra import (
CassandraChatMessageHistory,
)
@@ -34,7 +31,6 @@ from langchain.memory.chat_message_histories.xata import XataChatMessageHistory
from langchain.memory.chat_message_histories.zep import ZepChatMessageHistory
__all__ = [
"AstraDBChatMessageHistory",
"ChatMessageHistory",
"CassandraChatMessageHistory",
"CosmosDBChatMessageHistory",

View File

@@ -1,114 +0,0 @@
"""Astra DB - based chat message history, based on astrapy."""
from __future__ import annotations
import json
import time
import typing
from typing import List, Optional
if typing.TYPE_CHECKING:
from astrapy.db import AstraDB as LibAstraDB
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.messages import (
BaseMessage,
message_to_dict,
messages_from_dict,
)
DEFAULT_COLLECTION_NAME = "langchain_message_store"
class AstraDBChatMessageHistory(BaseChatMessageHistory):
"""Chat message history that stores history in Astra DB.
Args (only keyword-arguments accepted):
session_id: arbitrary key that is used to store the messages
of a single chat session.
collection_name (str): name of the Astra DB collection to create/use.
token (Optional[str]): API token for Astra DB usage.
api_endpoint (Optional[str]): full URL to the API endpoint,
such as "https://<DB-ID>-us-east1.apps.astra.datastax.com".
astra_db_client (Optional[Any]): *alternative to token+api_endpoint*,
you can pass an already-created 'astrapy.db.AstraDB' instance.
namespace (Optional[str]): namespace (aka keyspace) where the
collection is created. Defaults to the database's "default namespace".
"""
def __init__(
self,
*,
session_id: str,
collection_name: str = DEFAULT_COLLECTION_NAME,
token: Optional[str] = None,
api_endpoint: Optional[str] = None,
astra_db_client: Optional[LibAstraDB] = None, # type 'astrapy.db.AstraDB'
namespace: Optional[str] = None,
) -> None:
"""Create an Astra DB chat message history."""
try:
from astrapy.db import AstraDB as LibAstraDB
except (ImportError, ModuleNotFoundError):
raise ImportError(
"Could not import a recent astrapy python package. "
"Please install it with `pip install --upgrade astrapy`."
)
# Conflicting-arg checks:
if astra_db_client is not None:
if token is not None or api_endpoint is not None:
raise ValueError(
"You cannot pass 'astra_db_client' to AstraDB if passing "
"'token' and 'api_endpoint'."
)
self.session_id = session_id
self.collection_name = collection_name
self.token = token
self.api_endpoint = api_endpoint
self.namespace = namespace
if astra_db_client is not None:
self.astra_db = astra_db_client
else:
self.astra_db = LibAstraDB(
token=self.token,
api_endpoint=self.api_endpoint,
namespace=self.namespace,
)
self.collection = self.astra_db.create_collection(self.collection_name)
@property
def messages(self) -> List[BaseMessage]: # type: ignore
"""Retrieve all session messages from DB"""
message_blobs = [
doc["body_blob"]
for doc in sorted(
self.collection.paginated_find(
filter={
"session_id": self.session_id,
},
projection={
"timestamp": 1,
"body_blob": 1,
},
),
key=lambda _doc: _doc["timestamp"],
)
]
items = [json.loads(message_blob) for message_blob in message_blobs]
messages = messages_from_dict(items)
return messages
def add_message(self, message: BaseMessage) -> None:
"""Write a message to the table"""
self.collection.insert_one(
{
"timestamp": time.time(),
"session_id": self.session_id,
"body_blob": json.dumps(message_to_dict(message)),
}
)
def clear(self) -> None:
"""Clear session memory from DB"""
self.collection.delete_many(filter={"session_id": self.session_id})

View File

@@ -7,9 +7,6 @@ from langchain_core.prompts.chat import (
ChatMessagePromptTemplate,
ChatPromptTemplate,
HumanMessagePromptTemplate,
MessageLike,
MessageLikeRepresentation,
MessagePromptTemplateT,
MessagesPlaceholder,
SystemMessagePromptTemplate,
_convert_to_message,
@@ -30,7 +27,4 @@ __all__ = [
"ChatPromptValueConcrete",
"_convert_to_message",
"_create_template_from_message_type",
"MessagePromptTemplateT",
"MessageLike",
"MessageLikeRepresentation",
]

View File

@@ -50,7 +50,6 @@ from langchain.retrievers.metal import MetalRetriever
from langchain.retrievers.milvus import MilvusRetriever
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.retrievers.outline import OutlineRetriever
from langchain.retrievers.parent_document_retriever import ParentDocumentRetriever
from langchain.retrievers.pinecone_hybrid_search import PineconeHybridSearchRetriever
from langchain.retrievers.pubmed import PubMedRetriever
@@ -93,7 +92,6 @@ __all__ = [
"MetalRetriever",
"MilvusRetriever",
"MultiQueryRetriever",
"OutlineRetriever",
"PineconeHybridSearchRetriever",
"PubMedRetriever",
"RemoteLangChainRetriever",

View File

@@ -1,20 +0,0 @@
from typing import List
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.utilities.outline import OutlineAPIWrapper
class OutlineRetriever(BaseRetriever, OutlineAPIWrapper):
"""Retriever for Outline API.
It wraps run() to get_relevant_documents().
It uses all OutlineAPIWrapper arguments without any change.
"""
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
return self.run(query=query)

View File

@@ -1,3 +1,3 @@
from langchain_core.caches import RETURN_VAL_TYPE, BaseCache
from langchain_core.caches import BaseCache
__all__ = ["BaseCache", "RETURN_VAL_TYPE"]
__all__ = ["BaseCache"]

View File

@@ -1,15 +1,4 @@
from langchain_core.language_models import (
BaseLanguageModel,
LanguageModelInput,
LanguageModelOutput,
get_tokenizer,
)
from langchain_core.language_models import BaseLanguageModel, get_tokenizer
from langchain_core.language_models.base import _get_token_ids_default_method
__all__ = [
"get_tokenizer",
"BaseLanguageModel",
"_get_token_ids_default_method",
"LanguageModelInput",
"LanguageModelOutput",
]
__all__ = ["get_tokenizer", "BaseLanguageModel", "_get_token_ids_default_method"]

View File

@@ -1,7 +1,6 @@
from langchain_core.messages import (
AIMessage,
AIMessageChunk,
AnyMessage,
BaseMessage,
BaseMessageChunk,
ChatMessage,
@@ -47,5 +46,4 @@ __all__ = [
"_message_to_dict",
"_message_from_dict",
"message_to_dict",
"AnyMessage",
]

View File

@@ -7,7 +7,6 @@ from langchain_core.output_parsers import (
BaseTransformOutputParser,
StrOutputParser,
)
from langchain_core.output_parsers.base import T
# Backwards compatibility.
NoOpOutputParser = StrOutputParser
@@ -21,5 +20,4 @@ __all__ = [
"NoOpOutputParser",
"StrOutputParser",
"OutputParserException",
"T",
]

View File

@@ -1,5 +1,4 @@
from langchain_core.runnables.base import (
Other,
Runnable,
RunnableBinding,
RunnableBindingBase,
@@ -7,22 +6,16 @@ from langchain_core.runnables.base import (
RunnableEachBase,
RunnableGenerator,
RunnableLambda,
RunnableLike,
RunnableParallel,
RunnableSequence,
RunnableSerializable,
coerce_to_runnable,
)
from langchain_core.runnables.utils import Input, Output
# Backwards compatibility.
RunnableMap = RunnableParallel
__all__ = [
"Input",
"Output",
"RunnableLike",
"Other",
"Runnable",
"RunnableSerializable",
"RunnableSequence",

View File

@@ -1,11 +1,3 @@
from langchain_core.runnables.history import (
GetSessionHistoryCallable,
MessagesOrDictWithMessages,
RunnableWithMessageHistory,
)
from langchain_core.runnables.history import RunnableWithMessageHistory
__all__ = [
"RunnableWithMessageHistory",
"GetSessionHistoryCallable",
"MessagesOrDictWithMessages",
]
__all__ = ["RunnableWithMessageHistory"]

View File

@@ -1,3 +1,3 @@
from langchain_core.runnables.retry import RunnableRetry, U
from langchain_core.runnables.retry import RunnableRetry
__all__ = ["RunnableRetry", "U"]
__all__ = ["RunnableRetry"]

View File

@@ -1,7 +1,5 @@
from langchain_core.runnables.utils import (
Addable,
AddableDict,
AnyConfigurableField,
ConfigurableField,
ConfigurableFieldMultiOption,
ConfigurableFieldSingleOption,
@@ -46,6 +44,4 @@ __all__ = [
"gather_with_concurrency",
"Input",
"Output",
"Addable",
"AnyConfigurableField",
]

View File

@@ -1,3 +1,3 @@
from langchain_core.stores import BaseStore, K, V
from langchain_core.stores import BaseStore
__all__ = ["BaseStore", "K", "V"]
__all__ = ["BaseStore"]

View File

@@ -1,3 +1,3 @@
from langchain_core.vectorstores import VST, VectorStore, VectorStoreRetriever
from langchain_core.vectorstores import VectorStore, VectorStoreRetriever
__all__ = ["VectorStore", "VectorStoreRetriever", "VST"]
__all__ = ["VectorStore", "VectorStoreRetriever"]

View File

@@ -122,12 +122,6 @@ def _import_openweathermap() -> Any:
return OpenWeatherMapAPIWrapper
def _import_outline() -> Any:
from langchain.utilities.outline import OutlineAPIWrapper
return OutlineAPIWrapper
def _import_portkey() -> Any:
from langchain.utilities.portkey import Portkey
@@ -257,8 +251,6 @@ def __getattr__(name: str) -> Any:
return _import_metaphor_search()
elif name == "OpenWeatherMapAPIWrapper":
return _import_openweathermap()
elif name == "OutlineAPIWrapper":
return _import_outline()
elif name == "Portkey":
return _import_portkey()
elif name == "PowerBIDataset":
@@ -313,7 +305,6 @@ __all__ = [
"MaxComputeAPIWrapper",
"MetaphorSearchAPIWrapper",
"OpenWeatherMapAPIWrapper",
"OutlineAPIWrapper",
"Portkey",
"PowerBIDataset",
"PubMedAPIWrapper",

View File

@@ -1,96 +0,0 @@
"""Util that calls Outline."""
import logging
from typing import Any, Dict, List, Optional
import requests
from langchain_core.documents import Document
from langchain_core.pydantic_v1 import BaseModel, root_validator
from langchain.utils import get_from_dict_or_env
logger = logging.getLogger(__name__)
OUTLINE_MAX_QUERY_LENGTH = 300
class OutlineAPIWrapper(BaseModel):
"""Wrapper around OutlineAPI.
This wrapper will use the Outline API to query the documents of your instance.
By default it will return the document content of the top-k results.
It limits the document content by doc_content_chars_max.
"""
top_k_results: int = 3
load_all_available_meta: bool = False
doc_content_chars_max: int = 4000
outline_instance_url: Optional[str] = None
outline_api_key: Optional[str] = None
outline_search_endpoint: str = "/api/documents.search"
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that instance url and api key exists in environment."""
outline_instance_url = get_from_dict_or_env(
values, "outline_instance_url", "OUTLINE_INSTANCE_URL"
)
values["outline_instance_url"] = outline_instance_url
outline_api_key = get_from_dict_or_env(
values, "outline_api_key", "OUTLINE_API_KEY"
)
values["outline_api_key"] = outline_api_key
return values
def _result_to_document(self, outline_res: Any) -> Document:
main_meta = {
"title": outline_res["document"]["title"],
"source": self.outline_instance_url + outline_res["document"]["url"],
}
add_meta = (
{
"id": outline_res["document"]["id"],
"ranking": outline_res["ranking"],
"collection_id": outline_res["document"]["collectionId"],
"parent_document_id": outline_res["document"]["parentDocumentId"],
"revision": outline_res["document"]["revision"],
"created_by": outline_res["document"]["createdBy"]["name"],
}
if self.load_all_available_meta
else {}
)
doc = Document(
page_content=outline_res["document"]["text"][: self.doc_content_chars_max],
metadata={
**main_meta,
**add_meta,
},
)
return doc
def _outline_api_query(self, query: str) -> List:
raw_result = requests.post(
f"{self.outline_instance_url}{self.outline_search_endpoint}",
data={"query": query, "limit": self.top_k_results},
headers={"Authorization": f"Bearer {self.outline_api_key}"},
)
if not raw_result.ok:
raise ValueError("Outline API returned an error: ", raw_result.text)
return raw_result.json()["data"]
def run(self, query: str) -> List[Document]:
"""
Run Outline search and get the document content plus the meta information.
Returns: a list of documents.
"""
results = self._outline_api_query(query[:OUTLINE_MAX_QUERY_LENGTH])
docs = []
for result in results[: self.top_k_results]:
if doc := self._result_to_document(result):
docs.append(doc)
return docs

View File

@@ -4128,13 +4128,13 @@ tests = ["pandas (>=1.4)", "pytest", "pytest-asyncio", "pytest-mock"]
[[package]]
name = "langchain-core"
version = "0.0.6"
version = "0.0.4"
description = "Building applications with LLMs through composability"
optional = false
python-versions = ">=3.8.1,<4.0"
files = [
{file = "langchain_core-0.0.6-py3-none-any.whl", hash = "sha256:dcc727ff811159e09fc1d72caae4aaea892611349d5c3fc1c18b3a19573faf27"},
{file = "langchain_core-0.0.6.tar.gz", hash = "sha256:cffd1031764d838ad2a2f3f64477b710923ddad58eb9fe3130ff94b3669e8dd8"},
{file = "langchain_core-0.0.4-py3-none-any.whl", hash = "sha256:f4c6812db462a298a5c2a59d5c041565e68ab5ed9e8790008fbb85cdc54e8667"},
{file = "langchain_core-0.0.4.tar.gz", hash = "sha256:8e50cc7cc76d577c1da41ea290e22ea893ccc4cd0e4486c9df4f0c01d434bb0e"},
]
[package.dependencies]
@@ -4848,13 +4848,13 @@ tests = ["pytest (>=4.6)"]
[[package]]
name = "msal"
version = "1.25.0"
version = "1.24.1"
description = "The Microsoft Authentication Library (MSAL) for Python library"
optional = true
python-versions = ">=2.7"
files = [
{file = "msal-1.25.0-py2.py3-none-any.whl", hash = "sha256:386df621becb506bc315a713ec3d4d5b5d6163116955c7dde23622f156b81af6"},
{file = "msal-1.25.0.tar.gz", hash = "sha256:f44329fdb59f4f044c779164a34474b8a44ad9e4940afbc4c3a3a2bbe90324d9"},
{file = "msal-1.24.1-py2.py3-none-any.whl", hash = "sha256:ce4320688f95c301ee74a4d0e9dbcfe029a63663a8cc61756f40d0d0d36574ad"},
{file = "msal-1.24.1.tar.gz", hash = "sha256:aa0972884b3c6fdec53d9a0bd15c12e5bd7b71ac1b66d746f54d128709f3f8f8"},
]
[package.dependencies]
@@ -11075,7 +11075,7 @@ cli = ["typer"]
cohere = ["cohere"]
docarray = ["docarray"]
embeddings = ["sentence-transformers"]
extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "dashvector", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "geopandas", "gitpython", "google-cloud-documentai", "gql", "html2text", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openai", "openapi-pydantic", "pandas", "pdfminer-six", "pgvector", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict"]
extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "dashvector", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "geopandas", "gitpython", "google-cloud-documentai", "gql", "html2text", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openai", "openapi-pydantic", "pandas", "pdfminer-six", "pgvector", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict"]
javascript = ["esprima"]
llms = ["clarifai", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "openlm", "torch", "transformers"]
openai = ["openai", "tiktoken"]
@@ -11085,4 +11085,4 @@ text-helpers = ["chardet"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0"
content-hash = "37e62f668e1acddc4e462fdac5f694af3916b6edbd1ccde0a54c9a57524d6c92"
content-hash = "56fad2f9566f5553affb797fc78041f15f2d4b95f35e69f79f8a7c0a3db384d4"

View File

@@ -1,6 +1,6 @@
[tool.poetry]
name = "langchain"
version = "0.0.341"
version = "0.0.339rc1"
description = "Building applications with LLMs through composability"
authors = []
license = "MIT"
@@ -12,7 +12,7 @@ langchain-server = "langchain.server:main"
[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
langchain-core = "^0.0.6"
langchain-core = "^0.0.4"
pydantic = ">=1,<3"
SQLAlchemy = ">=1.4,<3"
requests = "^2"
@@ -143,7 +143,6 @@ azure-ai-textanalytics = {version = "^5.3.0", optional = true}
google-cloud-documentai = {version = "^2.20.1", optional = true}
fireworks-ai = {version = "^0.6.0", optional = true, python = ">=3.9,<4.0"}
javelin-sdk = {version = "^0.1.8", optional = true}
msal = {version = "^1.25.0", optional = true}
[tool.poetry.group.test.dependencies]
@@ -342,7 +341,6 @@ extended_testing = [
"atlassian-python-api",
"mwparserfromhell",
"mwxml",
"msal",
"pandas",
"telethon",
"psychicapi",

View File

@@ -1,99 +0,0 @@
"""
Test AstraDB caches. Requires an Astra DB vector instance.
Required to run this test:
- a recent `astrapy` Python package available
- an Astra DB instance;
- the two environment variables set:
export ASTRA_DB_API_ENDPOINT="https://<DB-ID>-us-east1.apps.astra.datastax.com"
export ASTRA_DB_APPLICATION_TOKEN="AstraCS:........."
- optionally this as well (otherwise defaults are used):
export ASTRA_DB_KEYSPACE="my_keyspace"
"""
import os
from typing import Iterator
import pytest
from langchain_core.outputs import Generation, LLMResult
from langchain.cache import AstraDBCache, AstraDBSemanticCache
from langchain.globals import get_llm_cache, set_llm_cache
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
from tests.unit_tests.llms.fake_llm import FakeLLM
def _has_env_vars() -> bool:
return all(
[
"ASTRA_DB_APPLICATION_TOKEN" in os.environ,
"ASTRA_DB_API_ENDPOINT" in os.environ,
]
)
@pytest.fixture(scope="module")
def astradb_cache() -> Iterator[AstraDBCache]:
cache = AstraDBCache(
collection_name="lc_integration_test_cache",
token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"],
namespace=os.environ.get("ASTRA_DB_KEYSPACE"),
)
yield cache
cache.astra_db.delete_collection("lc_integration_test_cache")
@pytest.fixture(scope="module")
def astradb_semantic_cache() -> Iterator[AstraDBSemanticCache]:
fake_embe = FakeEmbeddings()
sem_cache = AstraDBSemanticCache(
collection_name="lc_integration_test_sem_cache",
token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"],
namespace=os.environ.get("ASTRA_DB_KEYSPACE"),
embedding=fake_embe,
)
yield sem_cache
sem_cache.astra_db.delete_collection("lc_integration_test_cache")
@pytest.mark.requires("astrapy")
@pytest.mark.skipif(not _has_env_vars(), reason="Missing Astra DB env. vars")
class TestAstraDBCaches:
def test_astradb_cache(self, astradb_cache: AstraDBCache) -> None:
set_llm_cache(astradb_cache)
llm = FakeLLM()
params = llm.dict()
params["stop"] = None
llm_string = str(sorted([(k, v) for k, v in params.items()]))
get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
output = llm.generate(["foo"])
print(output)
expected_output = LLMResult(
generations=[[Generation(text="fizz")]],
llm_output={},
)
print(expected_output)
assert output == expected_output
astradb_cache.clear()
def test_cassandra_semantic_cache(
self, astradb_semantic_cache: AstraDBSemanticCache
) -> None:
set_llm_cache(astradb_semantic_cache)
llm = FakeLLM()
params = llm.dict()
params["stop"] = None
llm_string = str(sorted([(k, v) for k, v in params.items()]))
get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
output = llm.generate(["bar"]) # same embedding as 'foo'
expected_output = LLMResult(
generations=[[Generation(text="fizz")]],
llm_output={},
)
assert output == expected_output
# clear the cache
astradb_semantic_cache.clear()
output = llm.generate(["bar"]) # 'fizz' is erased away now
assert output != expected_output
astradb_semantic_cache.clear()

View File

@@ -1,104 +0,0 @@
import os
from typing import Iterable
import pytest
from langchain_core.messages import AIMessage, HumanMessage
from langchain.memory import ConversationBufferMemory
from langchain.memory.chat_message_histories.astradb import (
AstraDBChatMessageHistory,
)
def _has_env_vars() -> bool:
return all(
[
"ASTRA_DB_APPLICATION_TOKEN" in os.environ,
"ASTRA_DB_API_ENDPOINT" in os.environ,
]
)
@pytest.fixture(scope="function")
def history1() -> Iterable[AstraDBChatMessageHistory]:
history1 = AstraDBChatMessageHistory(
session_id="session-test-1",
collection_name="langchain_cmh_test",
token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"],
namespace=os.environ.get("ASTRA_DB_KEYSPACE"),
)
yield history1
history1.astra_db.delete_collection("langchain_cmh_test")
@pytest.fixture(scope="function")
def history2() -> Iterable[AstraDBChatMessageHistory]:
history2 = AstraDBChatMessageHistory(
session_id="session-test-2",
collection_name="langchain_cmh_test",
token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"],
namespace=os.environ.get("ASTRA_DB_KEYSPACE"),
)
yield history2
history2.astra_db.delete_collection("langchain_cmh_test")
@pytest.mark.requires("astrapy")
@pytest.mark.skipif(not _has_env_vars(), reason="Missing Astra DB env. vars")
def test_memory_with_message_store(history1: AstraDBChatMessageHistory) -> None:
"""Test the memory with a message store."""
memory = ConversationBufferMemory(
memory_key="baz",
chat_memory=history1,
return_messages=True,
)
assert memory.chat_memory.messages == []
# add some messages
memory.chat_memory.add_ai_message("This is me, the AI")
memory.chat_memory.add_user_message("This is me, the human")
messages = memory.chat_memory.messages
expected = [
AIMessage(content="This is me, the AI"),
HumanMessage(content="This is me, the human"),
]
assert messages == expected
# clear the store
memory.chat_memory.clear()
assert memory.chat_memory.messages == []
@pytest.mark.requires("astrapy")
@pytest.mark.skipif(not _has_env_vars(), reason="Missing Astra DB env. vars")
def test_memory_separate_session_ids(
history1: AstraDBChatMessageHistory, history2: AstraDBChatMessageHistory
) -> None:
"""Test that separate session IDs do not share entries."""
memory1 = ConversationBufferMemory(
memory_key="mk1",
chat_memory=history1,
return_messages=True,
)
memory2 = ConversationBufferMemory(
memory_key="mk2",
chat_memory=history2,
return_messages=True,
)
memory1.chat_memory.add_ai_message("Just saying.")
assert memory2.chat_memory.messages == []
memory2.chat_memory.clear()
assert memory1.chat_memory.messages != []
memory1.chat_memory.clear()
assert memory1.chat_memory.messages == []

View File

@@ -1,116 +0,0 @@
"""Integration test for Outline API Wrapper."""
from typing import List
import pytest
import responses
from langchain_core.documents import Document
from langchain.utilities import OutlineAPIWrapper
OUTLINE_INSTANCE_TEST_URL = "https://app.getoutline.com"
OUTLINE_SUCCESS_RESPONSE = {
"data": [
{
"ranking": 0.3911583,
"context": "Testing Context",
"document": {
"id": "abb2bf15-a597-4255-8b19-b742e3d037bf",
"url": "/doc/quick-start-jGuGGGOTuL",
"title": "Test Title",
"text": "Testing Content",
"createdBy": {"name": "John Doe"},
"revision": 3,
"collectionId": "93f182a4-a591-4d47-83f0-752e7bb2065c",
"parentDocumentId": None,
},
},
],
"status": 200,
"ok": True,
}
OUTLINE_EMPTY_RESPONSE = {
"data": [],
"status": 200,
"ok": True,
}
OUTLINE_ERROR_RESPONSE = {
"ok": False,
"error": "authentication_required",
"status": 401,
"message": "Authentication error",
}
@pytest.fixture
def api_client() -> OutlineAPIWrapper:
return OutlineAPIWrapper(
outline_api_key="api_key", outline_instance_url=OUTLINE_INSTANCE_TEST_URL
)
def assert_docs(docs: List[Document], all_meta: bool = False) -> None:
for doc in docs:
assert doc.page_content
assert doc.metadata
main_meta = {"title", "source"}
assert set(doc.metadata).issuperset(main_meta)
if all_meta:
assert len(set(doc.metadata)) > len(main_meta)
else:
assert len(set(doc.metadata)) == len(main_meta)
@responses.activate
def test_run_success(api_client: OutlineAPIWrapper) -> None:
responses.add(
responses.POST,
api_client.outline_instance_url + api_client.outline_search_endpoint,
json=OUTLINE_SUCCESS_RESPONSE,
status=200,
)
docs = api_client.run("Testing")
assert_docs(docs, all_meta=False)
@responses.activate
def test_run_success_all_meta(api_client: OutlineAPIWrapper) -> None:
api_client.load_all_available_meta = True
responses.add(
responses.POST,
api_client.outline_instance_url + api_client.outline_search_endpoint,
json=OUTLINE_SUCCESS_RESPONSE,
status=200,
)
docs = api_client.run("Testing")
assert_docs(docs, all_meta=True)
@responses.activate
def test_run_no_result(api_client: OutlineAPIWrapper) -> None:
responses.add(
responses.POST,
api_client.outline_instance_url + api_client.outline_search_endpoint,
json=OUTLINE_EMPTY_RESPONSE,
status=200,
)
docs = api_client.run("No Result Test")
assert not docs
@responses.activate
def test_run_error(api_client: OutlineAPIWrapper) -> None:
responses.add(
responses.POST,
api_client.outline_instance_url + api_client.outline_search_endpoint,
json=OUTLINE_ERROR_RESPONSE,
status=401,
)
try:
api_client.run("Testing")
except Exception as e:
assert "Outline API returned an error:" in str(e)

View File

@@ -506,7 +506,3 @@ RETURN a.title AS title, c.text AS text, a.date AS date
ORDER BY date DESC LIMIT 3"
"MATCH (n:`Some Label`)-[:`SOME REL TYPE üäß`]->(m:`Sömé Øther Læbel`) RETURN n,m","(Some Label, SOME REL TYPE üäß, Sömé Øther Læbel)","MATCH (n:`Some Label`)-[:`SOME REL TYPE üäß`]->(m:`Sömé Øther Læbel`) RETURN n,m"
"MATCH (n:`Some Label`)<-[:`SOME REL TYPE üäß`]-(m:`Sömé Øther Læbel`) RETURN n,m","(Some Label, SOME REL TYPE üäß, Sömé Øther Læbel)","MATCH (n:`Some Label`)-[:`SOME REL TYPE üäß`]->(m:`Sömé Øther Læbel`) RETURN n,m"
"MATCH (a:Actor {name: 'Tom Hanks'})-[:ACTED_IN]->(m:Movie) RETURN count(m)","(Movie, IN_GENRE, Genre), (User, RATED, Movie), (Actor, ACTED_IN, Movie), (Actor, DIRECTED, Movie), (Director, DIRECTED, Movie), (Director, ACTED_IN, Movie), (Person, ACTED_IN, Movie), (Person, DIRECTED, Movie)","MATCH (a:Actor {name: 'Tom Hanks'})-[:ACTED_IN]->(m:Movie) RETURN count(m)"
"MATCH (a:Actor)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name","(Movie, IN_GENRE, Genre), (User, RATED, Movie), (Actor, ACTED_IN, Movie), (Actor, DIRECTED, Movie), (Director, DIRECTED, Movie), (Director, ACTED_IN, Movie), (Person, ACTED_IN, Movie), (Person, DIRECTED, Movie)","MATCH (a:Actor)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g1:Genre), (a)-[:ACTED_IN]->(:Movie)-[:IN_GENRE]->(g2:Genre) WHERE g1.name = 'Comedy' AND g2.name = 'Action' RETURN DISTINCT a.name"
"MATCH (a:Actor)-[:ACTED_IN]->(m:Movie) RETURN a.name, COUNT(m) AS movieCount ORDER BY movieCount DESC LIMIT 1","(Movie, IN_GENRE, Genre), (User, RATED, Movie), (Actor, ACTED_IN, Movie), (Actor, DIRECTED, Movie), (Director, DIRECTED, Movie), (Director, ACTED_IN, Movie), (Person, ACTED_IN, Movie), (Person, DIRECTED, Movie)","MATCH (a:Actor)-[:ACTED_IN]->(m:Movie) RETURN a.name, COUNT(m) AS movieCount ORDER BY movieCount DESC LIMIT 1"
"MATCH (g:Genre)<-[:IN_GENRE]-(m:Movie) RETURN g.name, COUNT(m) AS movieCount","(Movie, IN_GENRE, Genre), (User, RATED, Movie), (Actor, ACTED_IN, Movie), (Actor, DIRECTED, Movie), (Director, DIRECTED, Movie), (Director, ACTED_IN, Movie), (Person, ACTED_IN, Movie), (Person, DIRECTED, Movie)","MATCH (g:Genre)<-[:IN_GENRE]-(m:Movie) RETURN g.name, COUNT(m) AS movieCount"
1 statement schema correct_query
506
507
508

View File

@@ -1,165 +0,0 @@
import os
from typing import Any
from unittest.mock import Mock
import pytest
from _pytest.monkeypatch import MonkeyPatch
from pytest_mock import MockerFixture
from langchain.docstore.document import Document
from langchain.document_loaders.onenote import OneNoteLoader
def test_initialization() -> None:
os.environ["MS_GRAPH_CLIENT_ID"] = "CLIENT_ID"
os.environ["MS_GRAPH_CLIENT_SECRET"] = "CLIENT_SECRET"
loader = OneNoteLoader(
notebook_name="test_notebook",
section_name="test_section",
page_title="test_title",
access_token="access_token",
)
assert loader.notebook_name == "test_notebook"
assert loader.section_name == "test_section"
assert loader.page_title == "test_title"
assert loader.access_token == "access_token"
assert loader._headers == {
"Authorization": "Bearer access_token",
}
@pytest.mark.requires("bs4")
def test_load(mocker: MockerFixture) -> None:
os.environ["MS_GRAPH_CLIENT_ID"] = "CLIENT_ID"
os.environ["MS_GRAPH_CLIENT_SECRET"] = "CLIENT_SECRET"
mocker.patch(
"requests.get",
return_value=mocker.MagicMock(json=lambda: {"value": []}, links=None),
)
loader = OneNoteLoader(
notebook_name="test_notebook",
section_name="test_section",
page_title="test_title",
access_token="access_token",
)
documents = loader.load()
assert documents == []
mocker.patch(
"langchain.document_loaders.onenote.OneNoteLoader._get_page_content",
return_value=(
"<html><head><title>Test Title</title></head>"
"<body><p>Test Content</p></body></html>"
),
)
loader = OneNoteLoader(object_ids=["test_id"], access_token="access_token")
documents = loader.load()
assert documents == [
Document(
page_content="Test Title\nTest Content", metadata={"title": "Test Title"}
)
]
class FakeConfidentialClientApplication(Mock):
def get_authorization_request_url(self, *args: Any, **kwargs: Any) -> str:
return "fake_authorization_url"
@pytest.mark.requires("msal")
def test_msal_import(monkeypatch: MonkeyPatch, mocker: MockerFixture) -> None:
os.environ["MS_GRAPH_CLIENT_ID"] = "CLIENT_ID"
os.environ["MS_GRAPH_CLIENT_SECRET"] = "CLIENT_SECRET"
monkeypatch.setattr("builtins.input", lambda _: "invalid_url")
mocker.patch(
"msal.ConfidentialClientApplication",
return_value=FakeConfidentialClientApplication(),
)
loader = OneNoteLoader(
notebook_name="test_notebook",
section_name="test_section",
page_title="test_title",
)
with pytest.raises(IndexError):
loader._auth()
def test_url() -> None:
os.environ["MS_GRAPH_CLIENT_ID"] = "CLIENT_ID"
os.environ["MS_GRAPH_CLIENT_SECRET"] = "CLIENT_SECRET"
loader = OneNoteLoader(
notebook_name="test_notebook",
section_name="test_section",
page_title="test_title",
access_token="access_token",
onenote_api_base_url="https://graph.microsoft.com/v1.0/me/onenote",
)
assert loader._url == (
"https://graph.microsoft.com/v1.0/me/onenote/pages?$select=id"
"&$expand=parentNotebook,parentSection"
"&$filter=parentNotebook/displayName%20eq%20'test_notebook'"
"%20and%20parentSection/displayName%20eq%20'test_section'"
"%20and%20title%20eq%20'test_title'"
)
loader = OneNoteLoader(
notebook_name="test_notebook",
section_name="test_section",
access_token="access_token",
onenote_api_base_url="https://graph.microsoft.com/v1.0/me/onenote",
)
assert loader._url == (
"https://graph.microsoft.com/v1.0/me/onenote/pages?$select=id"
"&$expand=parentNotebook,parentSection"
"&$filter=parentNotebook/displayName%20eq%20'test_notebook'"
"%20and%20parentSection/displayName%20eq%20'test_section'"
)
loader = OneNoteLoader(
notebook_name="test_notebook",
access_token="access_token",
onenote_api_base_url="https://graph.microsoft.com/v1.0/me/onenote",
)
assert loader._url == (
"https://graph.microsoft.com/v1.0/me/onenote/pages?$select=id"
"&$expand=parentNotebook"
"&$filter=parentNotebook/displayName%20eq%20'test_notebook'"
)
loader = OneNoteLoader(
section_name="test_section",
access_token="access_token",
onenote_api_base_url="https://graph.microsoft.com/v1.0/me/onenote",
)
assert loader._url == (
"https://graph.microsoft.com/v1.0/me/onenote/pages?$select=id"
"&$expand=parentSection"
"&$filter=parentSection/displayName%20eq%20'test_section'"
)
loader = OneNoteLoader(
section_name="test_section",
page_title="test_title",
access_token="access_token",
onenote_api_base_url="https://graph.microsoft.com/v1.0/me/onenote",
)
assert loader._url == (
"https://graph.microsoft.com/v1.0/me/onenote/pages?$select=id"
"&$expand=parentSection"
"&$filter=parentSection/displayName%20eq%20'test_section'"
"%20and%20title%20eq%20'test_title'"
)
loader = OneNoteLoader(
page_title="test_title",
access_token="access_token",
onenote_api_base_url="https://graph.microsoft.com/v1.0/me/onenote",
)
assert loader._url == (
"https://graph.microsoft.com/v1.0/me/onenote/pages?$select=id"
"&$filter=title%20eq%20'test_title'"
)

View File

@@ -1,7 +1,6 @@
from langchain.memory import __all__
EXPECTED_ALL = [
"AstraDBChatMessageHistory",
"CassandraChatMessageHistory",
"ChatMessageHistory",
"CombinedMemory",

View File

@@ -1,9 +1,6 @@
from langchain.prompts.chat import __all__
EXPECTED_ALL = [
"MessageLike",
"MessageLikeRepresentation",
"MessagePromptTemplateT",
"AIMessagePromptTemplate",
"BaseChatPromptTemplate",
"BaseMessagePromptTemplate",

View File

@@ -23,7 +23,6 @@ EXPECTED_ALL = [
"MetalRetriever",
"MilvusRetriever",
"MultiQueryRetriever",
"OutlineRetriever",
"PineconeHybridSearchRetriever",
"PubMedRetriever",
"RemoteLangChainRetriever",

View File

@@ -13,10 +13,6 @@ EXPECTED_ALL = [
"RunnableSequence",
"RunnableSerializable",
"coerce_to_runnable",
"Input",
"Output",
"Other",
"RunnableLike",
]

View File

@@ -1,10 +1,6 @@
from langchain.schema.runnable.history import __all__
EXPECTED_ALL = [
"RunnableWithMessageHistory",
"GetSessionHistoryCallable",
"MessagesOrDictWithMessages",
]
EXPECTED_ALL = ["RunnableWithMessageHistory"]
def test_all_imports() -> None:

View File

@@ -1,6 +1,6 @@
from langchain.schema.runnable.retry import __all__
EXPECTED_ALL = ["RunnableRetry", "U"]
EXPECTED_ALL = ["RunnableRetry"]
def test_all_imports() -> None:

View File

@@ -22,8 +22,6 @@ EXPECTED_ALL = [
"indent_lines_after_first",
"Input",
"Output",
"Addable",
"AnyConfigurableField",
]

View File

@@ -1,6 +1,6 @@
from langchain.schema.cache import __all__
EXPECTED_ALL = ["BaseCache", "RETURN_VAL_TYPE"]
EXPECTED_ALL = ["BaseCache"]
def test_all_imports() -> None:

View File

@@ -1,12 +1,6 @@
from langchain.schema.language_model import __all__
EXPECTED_ALL = [
"BaseLanguageModel",
"_get_token_ids_default_method",
"get_tokenizer",
"LanguageModelOutput",
"LanguageModelInput",
]
EXPECTED_ALL = ["BaseLanguageModel", "_get_token_ids_default_method", "get_tokenizer"]
def test_all_imports() -> None:

View File

@@ -22,7 +22,6 @@ EXPECTED_ALL = [
"merge_content",
"messages_from_dict",
"messages_to_dict",
"AnyMessage",
]

View File

@@ -9,7 +9,6 @@ EXPECTED_ALL = [
"NoOpOutputParser",
"OutputParserException",
"StrOutputParser",
"T",
]

View File

@@ -1,6 +1,6 @@
from langchain.schema.storage import __all__
EXPECTED_ALL = ["BaseStore", "K", "V"]
EXPECTED_ALL = ["BaseStore"]
def test_all_imports() -> None:

View File

@@ -1,6 +1,6 @@
from langchain.schema.vectorstore import __all__
EXPECTED_ALL = ["VectorStore", "VectorStoreRetriever", "VST"]
EXPECTED_ALL = ["VectorStore", "VectorStoreRetriever"]
def test_all_imports() -> None:

View File

@@ -20,7 +20,6 @@ EXPECTED_ALL = [
"MaxComputeAPIWrapper",
"MetaphorSearchAPIWrapper",
"OpenWeatherMapAPIWrapper",
"OutlineAPIWrapper",
"Portkey",
"PowerBIDataset",
"PubMedAPIWrapper",

62
poetry.lock generated
View File

@@ -987,7 +987,7 @@ files = [
{file = "greenlet-3.0.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0b72b802496cccbd9b31acea72b6f87e7771ccfd7f7927437d592e5c92ed703c"},
{file = "greenlet-3.0.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:527cd90ba3d8d7ae7dceb06fda619895768a46a1b4e423bdb24c1969823b8362"},
{file = "greenlet-3.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:37f60b3a42d8b5499be910d1267b24355c495064f271cfe74bf28b17b099133c"},
{file = "greenlet-3.0.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:1482fba7fbed96ea7842b5a7fc11d61727e8be75a077e603e8ab49d24e234383"},
{file = "greenlet-3.0.0-cp311-universal2-macosx_10_9_universal2.whl", hash = "sha256:c3692ecf3fe754c8c0f2c95ff19626584459eab110eaab66413b1e7425cd84e9"},
{file = "greenlet-3.0.0-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:be557119bf467d37a8099d91fbf11b2de5eb1fd5fc5b91598407574848dc910f"},
{file = "greenlet-3.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:73b2f1922a39d5d59cc0e597987300df3396b148a9bd10b76a058a2f2772fc04"},
{file = "greenlet-3.0.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d1e22c22f7826096ad503e9bb681b05b8c1f5a8138469b255eb91f26a76634f2"},
@@ -997,6 +997,7 @@ files = [
{file = "greenlet-3.0.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:952256c2bc5b4ee8df8dfc54fc4de330970bf5d79253c863fb5e6761f00dda35"},
{file = "greenlet-3.0.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:269d06fa0f9624455ce08ae0179430eea61085e3cf6457f05982b37fd2cefe17"},
{file = "greenlet-3.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9adbd8ecf097e34ada8efde9b6fec4dd2a903b1e98037adf72d12993a1c80b51"},
{file = "greenlet-3.0.0-cp312-universal2-macosx_10_9_universal2.whl", hash = "sha256:553d6fb2324e7f4f0899e5ad2c427a4579ed4873f42124beba763f16032959af"},
{file = "greenlet-3.0.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6b5ce7f40f0e2f8b88c28e6691ca6806814157ff05e794cdd161be928550f4c"},
{file = "greenlet-3.0.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ecf94aa539e97a8411b5ea52fc6ccd8371be9550c4041011a091eb8b3ca1d810"},
{file = "greenlet-3.0.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80dcd3c938cbcac986c5c92779db8e8ce51a89a849c135172c88ecbdc8c056b7"},
@@ -1029,7 +1030,6 @@ files = [
{file = "greenlet-3.0.0-cp39-cp39-win32.whl", hash = "sha256:0d3f83ffb18dc57243e0151331e3c383b05e5b6c5029ac29f754745c800f8ed9"},
{file = "greenlet-3.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:831d6f35037cf18ca5e80a737a27d822d87cd922521d18ed3dbc8a6967be50ce"},
{file = "greenlet-3.0.0-cp39-universal2-macosx_11_0_x86_64.whl", hash = "sha256:a048293392d4e058298710a54dfaefcefdf49d287cd33fb1f7d63d55426e4355"},
{file = "greenlet-3.0.0.tar.gz", hash = "sha256:19834e3f91f485442adc1ee440171ec5d9a4840a1f7bd5ed97833544719ce10b"},
]
[package.extras]
@@ -1627,7 +1627,7 @@ files = [
[[package]]
name = "langchain"
version = "0.0.339rc3"
version = "0.0.335"
description = "Building applications with LLMs through composability"
optional = false
python-versions = ">=3.8.1,<4.0"
@@ -1640,7 +1640,6 @@ anyio = "<4.0"
async-timeout = {version = "^4.0.0", markers = "python_version < \"3.11\""}
dataclasses-json = ">= 0.5.7, < 0.7"
jsonpatch = "^1.33"
langchain-core = "^0.0.6"
langsmith = "~0.0.63"
numpy = "^1"
pydantic = ">=1,<3"
@@ -1650,17 +1649,17 @@ SQLAlchemy = ">=1.4,<3"
tenacity = "^8.1.0"
[package.extras]
all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-textanalytics (>=5.3.0,<6.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.8.3,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.13.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<4)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.6.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"]
azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-textanalytics (>=5.3.0,<6.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (<2)"]
all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.8.3,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.13.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<4)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.6.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"]
azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (>=0,<1)"]
clarifai = ["clarifai (>=9.1.0)"]
cli = ["typer (>=0.9.0,<0.10.0)"]
cohere = ["cohere (>=4,<5)"]
docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"]
embeddings = ["sentence-transformers (>=2,<3)"]
extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "dashvector (>=1.0.1,<2.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.6.0,<0.7.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "dashvector (>=1.0.1,<2.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.6.0,<0.7.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (>=0,<1)", "openai (>=0,<1)", "openapi-pydantic (>=0.3.2,<0.4.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
javascript = ["esprima (>=4.0.1,<5.0.0)"]
llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"]
openai = ["openai (<2)", "tiktoken (>=0.3.2,<0.6.0)"]
llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"]
openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.6.0)"]
qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"]
text-helpers = ["chardet (>=5.1.0,<6.0.0)"]
@@ -1668,23 +1667,6 @@ text-helpers = ["chardet (>=5.1.0,<6.0.0)"]
type = "directory"
url = "libs/langchain"
[[package]]
name = "langchain-core"
version = "0.0.6"
description = "Building applications with LLMs through composability"
optional = false
python-versions = ">=3.8.1,<4.0"
files = [
{file = "langchain_core-0.0.6-py3-none-any.whl", hash = "sha256:dcc727ff811159e09fc1d72caae4aaea892611349d5c3fc1c18b3a19573faf27"},
{file = "langchain_core-0.0.6.tar.gz", hash = "sha256:cffd1031764d838ad2a2f3f64477b710923ddad58eb9fe3130ff94b3669e8dd8"},
]
[package.dependencies]
jsonpatch = ">=1.33,<2.0"
langsmith = ">=0.0.63,<0.1.0"
pydantic = ">=1,<3"
tenacity = ">=8.1.0,<9.0.0"
[[package]]
name = "langsmith"
version = "0.0.63"
@@ -1782,16 +1764,6 @@ files = [
{file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
{file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
{file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
{file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
{file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
{file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
@@ -2767,7 +2739,6 @@ files = [
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
{file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
{file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
{file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
{file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
@@ -2775,15 +2746,8 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
{file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
{file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
{file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
{file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
@@ -2800,7 +2764,6 @@ files = [
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
{file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
{file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
{file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
{file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
@@ -2808,7 +2771,6 @@ files = [
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
{file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
{file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
{file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
@@ -3488,14 +3450,6 @@ files = [
{file = "SQLAlchemy-2.0.21-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b69f1f754d92eb1cc6b50938359dead36b96a1dcf11a8670bff65fd9b21a4b09"},
{file = "SQLAlchemy-2.0.21-cp311-cp311-win32.whl", hash = "sha256:af520a730d523eab77d754f5cf44cc7dd7ad2d54907adeb3233177eeb22f271b"},
{file = "SQLAlchemy-2.0.21-cp311-cp311-win_amd64.whl", hash = "sha256:141675dae56522126986fa4ca713739d00ed3a6f08f3c2eb92c39c6dfec463ce"},
{file = "SQLAlchemy-2.0.21-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:56628ca27aa17b5890391ded4e385bf0480209726f198799b7e980c6bd473bd7"},
{file = "SQLAlchemy-2.0.21-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:db726be58837fe5ac39859e0fa40baafe54c6d54c02aba1d47d25536170b690f"},
{file = "SQLAlchemy-2.0.21-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7421c1bfdbb7214313919472307be650bd45c4dc2fcb317d64d078993de045b"},
{file = "SQLAlchemy-2.0.21-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:632784f7a6f12cfa0e84bf2a5003b07660addccf5563c132cd23b7cc1d7371a9"},
{file = "SQLAlchemy-2.0.21-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f6f7276cf26145a888f2182a98f204541b519d9ea358a65d82095d9c9e22f917"},
{file = "SQLAlchemy-2.0.21-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2a1f7ffac934bc0ea717fa1596f938483fb8c402233f9b26679b4f7b38d6ab6e"},
{file = "SQLAlchemy-2.0.21-cp312-cp312-win32.whl", hash = "sha256:bfece2f7cec502ec5f759bbc09ce711445372deeac3628f6fa1c16b7fb45b682"},
{file = "SQLAlchemy-2.0.21-cp312-cp312-win_amd64.whl", hash = "sha256:526b869a0f4f000d8d8ee3409d0becca30ae73f494cbb48801da0129601f72c6"},
{file = "SQLAlchemy-2.0.21-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7614f1eab4336df7dd6bee05bc974f2b02c38d3d0c78060c5faa4cd1ca2af3b8"},
{file = "SQLAlchemy-2.0.21-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d59cb9e20d79686aa473e0302e4a82882d7118744d30bb1dfb62d3c47141b3ec"},
{file = "SQLAlchemy-2.0.21-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a95aa0672e3065d43c8aa80080cdd5cc40fe92dc873749e6c1cf23914c4b83af"},

View File

@@ -9,8 +9,6 @@ The package utilizes a full-text index for efficient mapping of text values to d
In the provided example, the full-text index is used to map names of people and movies from the user's query to corresponding database entries.
![Workflow diagram](https://raw.githubusercontent.com/langchain-ai/langchain/master/templates/neo4j-cypher-ft/static/workflow.png)
## Environment Setup
The following environment variables need to be set:

Binary file not shown.

Before

Width:  |  Height:  |  Size: 468 KiB

View File

@@ -7,8 +7,6 @@ Additionally, it features a conversational memory module that stores the dialogu
The conversation memory is uniquely maintained for each user session, ensuring personalized interactions.
To facilitate this, please supply both the `user_id` and `session_id` when using the conversation chain.
![Workflow diagram](https://raw.githubusercontent.com/langchain-ai/langchain/master/templates/neo4j-cypher-memory/static/workflow.png)
## Environment Setup
Define the following environment variables:

Binary file not shown.

Before

Width:  |  Height:  |  Size: 560 KiB

View File

@@ -5,8 +5,6 @@ This template allows you to interact with a Neo4j graph database in natural lang
It transforms a natural language question into a Cypher query (used to fetch data from Neo4j databases), executes the query, and provides a natural language response based on the query results.
[![Workflow diagram](https://raw.githubusercontent.com/langchain-ai/langchain/master/templates/neo4j-cypher/static/workflow.png)](https://medium.com/neo4j/langchain-cypher-search-tips-tricks-f7c9e9abca4d)
## Environment Setup
Define the following environment variables:

Binary file not shown.

Before

Width:  |  Height:  |  Size: 357 KiB

View File

@@ -1 +0,0 @@
__pycache__

View File

@@ -1,21 +0,0 @@
MIT License
Copyright (c) 2023 LangChain, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -1,70 +0,0 @@
# skeleton-of-thought
Implements "Skeleton of Thought" from [this](https://sites.google.com/view/sot-llm) paper.
This technique makes it possible to generate longer generates more quickly by first generating a skeleton, then generating each point of the outline.
## Environment Setup
Set the `OPENAI_API_KEY` environment variable to access the OpenAI models.
To get your `OPENAI_API_KEY`, navigate to [API keys](https://platform.openai.com/account/api-keys) on your OpenAI account and create a new secret key.
## Usage
To use this package, you should first have the LangChain CLI installed:
```shell
pip install -U langchain-cli
```
To create a new LangChain project and install this as the only package, you can do:
```shell
langchain app new my-app --package skeleton-of-thought
```
If you want to add this to an existing project, you can just run:
```shell
langchain app add skeleton-of-thought
```
And add the following code to your `server.py` file:
```python
from skeleton_of_thought import chain as skeleton_of_thought_chain
add_routes(app, skeleton_of_thought_chain, path="/skeleton-of-thought")
```
(Optional) Let's now configure LangSmith.
LangSmith will help us trace, monitor and debug LangChain applications.
LangSmith is currently in private beta, you can sign up [here](https://smith.langchain.com/).
If you don't have access, you can skip this section
```shell
export LANGCHAIN_TRACING_V2=true
export LANGCHAIN_API_KEY=<your-api-key>
export LANGCHAIN_PROJECT=<your-project> # if not specified, defaults to "default"
```
If you are inside this directory, then you can spin up a LangServe instance directly by:
```shell
langchain serve
```
This will start the FastAPI app with a server is running locally at
[http://localhost:8000](http://localhost:8000)
We can see all templates at [http://127.0.0.1:8000/docs](http://127.0.0.1:8000/docs)
We can access the playground at [http://127.0.0.1:8000/skeleton-of-thought/playground](http://127.0.0.1:8000/skeleton-of-thought/playground)
We can access the template from code with:
```python
from langserve.client import RemoteRunnable
runnable = RemoteRunnable("http://localhost:8000/skeleton-of-thought")
```

View File

@@ -1,24 +0,0 @@
[tool.poetry]
name = "skeleton-of-thought"
version = "0.0.1"
description = ""
authors = []
readme = "README.md"
[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
langchain = ">=0.0.313, <0.1"
openai = "^0.28.1"
[tool.poetry.group.dev.dependencies]
langchain-cli = ">=0.0.4"
fastapi = "^0.104.0"
sse-starlette = "^1.6.5"
[tool.langserve]
export_module = "skeleton_of_thought"
export_attr = "chain"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

View File

@@ -1,3 +0,0 @@
from skeleton_of_thought.chain import chain
__all__ = ["chain"]

View File

@@ -1,96 +0,0 @@
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.pydantic_v1 import BaseModel
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
skeleton_generator_template = """[User:] Youre an organizer responsible for only \
giving the skeleton (not the full content) for answering the question.
Provide the skeleton in a list of points (numbered 1., 2., 3., etc.) to answer \
the question. \
Instead of writing a full sentence, each skeleton point should be very short \
with only 35 words. \
Generally, the skeleton should have 310 points. Now, please provide the skeleton \
for the following question.
{question}
Skeleton:
[Assistant:] 1."""
skeleton_generator_prompt = ChatPromptTemplate.from_template(
skeleton_generator_template
)
skeleton_generator_chain = (
skeleton_generator_prompt | ChatOpenAI() | StrOutputParser() | (lambda x: "1. " + x)
)
point_expander_template = """[User:] Youre responsible for continuing \
the writing of one and only one point in the overall answer to the following question.
{question}
The skeleton of the answer is
{skeleton}
Continue and only continue the writing of point {point_index}. \
Write it **very shortly** in 12 sentence and do not continue with other points!
[Assistant:] {point_index}. {point_skeleton}"""
point_expander_prompt = ChatPromptTemplate.from_template(point_expander_template)
point_expander_chain = RunnablePassthrough.assign(
continuation=point_expander_prompt | ChatOpenAI() | StrOutputParser()
) | (lambda x: x["point_skeleton"].strip() + " " + x["continuation"])
def parse_numbered_list(input_str):
"""Parses a numbered list into a list of dictionaries
Each element having two keys:
'index' for the index in the numbered list, and 'point' for the content.
"""
# Split the input string into lines
lines = input_str.split("\n")
# Initialize an empty list to store the parsed items
parsed_list = []
for line in lines:
# Split each line at the first period to separate the index from the content
parts = line.split(". ", 1)
if len(parts) == 2:
# Convert the index part to an integer
# and strip any whitespace from the content
index = int(parts[0])
point = parts[1].strip()
# Add a dictionary to the parsed list
parsed_list.append({"point_index": index, "point_skeleton": point})
return parsed_list
def create_list_elements(_input):
skeleton = _input["skeleton"]
numbered_list = parse_numbered_list(skeleton)
for el in numbered_list:
el["skeleton"] = skeleton
el["question"] = _input["question"]
return numbered_list
def get_final_answer(expanded_list):
final_answer_str = "Here's a comprehensive answer:\n\n"
for i, el in enumerate(expanded_list):
final_answer_str += f"{i+1}. {el}\n\n"
return final_answer_str
class ChainInput(BaseModel):
question: str
chain = (
RunnablePassthrough.assign(skeleton=skeleton_generator_chain)
| create_list_elements
| point_expander_chain.map()
| get_final_answer
).with_types(input_type=ChainInput)