mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-06 01:00:22 +00:00
Compare commits
47 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
50bb704da5 | ||
|
|
e195b78e1d | ||
|
|
77a165e0d9 | ||
|
|
7608f85f13 | ||
|
|
0786395b56 | ||
|
|
9dd4cacae2 | ||
|
|
7f3f6097e7 | ||
|
|
ccf71e23e8 | ||
|
|
49b65a1b57 | ||
|
|
e1e01d6586 | ||
|
|
596f294b01 | ||
|
|
cbb4860fcd | ||
|
|
adabdfdfc7 | ||
|
|
0a0276bcdb | ||
|
|
2dc3c64386 | ||
|
|
a34510536d | ||
|
|
bcf130c07c | ||
|
|
f4e6eac3b6 | ||
|
|
415d38ae62 | ||
|
|
49694f6a3f | ||
|
|
85e05fa5d6 | ||
|
|
ac9609f58f | ||
|
|
201b61d5b3 | ||
|
|
a43abf24e4 | ||
|
|
f9636b6cd2 | ||
|
|
d1f2075bde | ||
|
|
73b9ca54cb | ||
|
|
db3369272a | ||
|
|
1835624bad | ||
|
|
303724980c | ||
|
|
79a567d885 | ||
|
|
97122fb577 | ||
|
|
eaf916f999 | ||
|
|
7ecee7821a | ||
|
|
21fbbe83a7 | ||
|
|
57e2de2077 | ||
|
|
69fe0621d4 | ||
|
|
f23fed34e8 | ||
|
|
ff1c6de86c | ||
|
|
868db99b17 | ||
|
|
7b7bea5424 | ||
|
|
882a588264 | ||
|
|
1b7caa1a29 | ||
|
|
e9abe176bc | ||
|
|
6b9529e11a | ||
|
|
c6149aacef | ||
|
|
800fe4a73f |
14
.github/PULL_REQUEST_TEMPLATE.md
vendored
14
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -1,11 +1,11 @@
|
||||
<!-- Thank you for contributing to LangChain!
|
||||
|
||||
Replace this entire comment with:
|
||||
- Description: a description of the change,
|
||||
- Issue: the issue # it fixes (if applicable),
|
||||
- Dependencies: any dependencies required for this change,
|
||||
- Tag maintainer: for a quicker response, tag the relevant maintainer (see below),
|
||||
- Twitter handle: we announce bigger features on Twitter. If your PR gets announced and you'd like a mention, we'll gladly shout you out!
|
||||
- **Description:** a description of the change,
|
||||
- **Issue:** the issue # it fixes (if applicable),
|
||||
- **Dependencies:** any dependencies required for this change,
|
||||
- **Tag maintainer:** for a quicker response, tag the relevant maintainer (see below),
|
||||
- **Twitter handle:** we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out!
|
||||
|
||||
Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally.
|
||||
|
||||
@@ -14,7 +14,7 @@ https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md
|
||||
|
||||
If you're adding a new integration, please include:
|
||||
1. a test for the integration, preferably unit tests that do not rely on network access,
|
||||
2. an example notebook showing its use. These live is docs/extras directory.
|
||||
2. an example notebook showing its use. It lives in `docs/extras` directory.
|
||||
|
||||
If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17, @rlancemartin.
|
||||
If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17.
|
||||
-->
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
"First, configure your environment variables to tell LangChain to log traces. This is done by setting the `LANGCHAIN_TRACING_V2` environment variable to true.\n",
|
||||
"You can tell LangChain which project to log to by setting the `LANGCHAIN_PROJECT` environment variable (if this isn't set, runs will be logged to the `default` project). This will automatically create the project for you if it doesn't exist. You must also set the `LANGCHAIN_ENDPOINT` and `LANGCHAIN_API_KEY` environment variables.\n",
|
||||
"\n",
|
||||
"For more information on other ways to set up tracing, please reference the [LangSmith documentation](https://docs.smith.langchain.com/docs/)\n",
|
||||
"For more information on other ways to set up tracing, please reference the [LangSmith documentation](https://docs.smith.langchain.com/docs/).\n",
|
||||
"\n",
|
||||
"**NOTE:** You must also set your `OPENAI_API_KEY` and `SERPAPI_API_KEY` environment variables in order to run the following tutorial.\n",
|
||||
"\n",
|
||||
@@ -65,6 +65,17 @@
|
||||
"However, in this example, we will use environment variables."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "e4780363-f05a-4649-8b1a-9b449f960ce4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# %pip install -U langchain langsmith --quiet\n",
|
||||
"# %pip install google-search-results pandas --quiet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
@@ -81,7 +92,7 @@
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_PROJECT\"] = f\"Tracing Walkthrough - {unique_id}\"\n",
|
||||
"os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.smith.langchain.com\"\n",
|
||||
"os.environ[\"LANGCHAIN_API_KEY\"] = \"\" # Update to your API key\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = \"\" # Update to your API key\n",
|
||||
"\n",
|
||||
"# Used by the agent in this tutorial\n",
|
||||
"# os.environ[\"OPENAI_API_KEY\"] = \"<YOUR-OPENAI-API-KEY>\"\n",
|
||||
@@ -156,8 +167,6 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"inputs = [\n",
|
||||
" \"How many people live in canada as of 2023?\",\n",
|
||||
" \"who is dua lipa's boyfriend? what is his age raised to the .43 power?\",\n",
|
||||
@@ -170,20 +179,8 @@
|
||||
" \"who is kendall jenner's boyfriend? what is his height (in inches) raised to .13 power?\",\n",
|
||||
" \"what is 1213 divided by 4345?\",\n",
|
||||
"]\n",
|
||||
"results = []\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def arun(agent, input_example):\n",
|
||||
" try:\n",
|
||||
" return await agent.arun(input_example)\n",
|
||||
" except Exception as e:\n",
|
||||
" # The agent sometimes makes mistakes! These will be captured by the tracing.\n",
|
||||
" return e\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"for input_example in inputs:\n",
|
||||
" results.append(arun(agent, input_example))\n",
|
||||
"results = await asyncio.gather(*results)"
|
||||
"results = agent.batch(inputs, return_exceptions=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -389,53 +386,30 @@
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"View the evaluation results for project '2023-07-17-11-25-20-AgentExecutor' at:\n",
|
||||
"https://dev.smith.langchain.com/projects/p/1c9baec3-ae86-4fac-9e99-e1b9f8e7818c?eval=true\n",
|
||||
"Processed examples: 1\r"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Chain failed for example 5a2ac8da-8c2b-4d12-acb9-5c4b0f47fe8a. Error: LLMMathChain._evaluate(\"\n",
|
||||
"Chain failed for example f8dfff24-d288-4d8e-ba94-c3cc33dd10d0 with inputs {'input': \"what is dua lipa's boyfriend age raised to the .43 power?\"}\n",
|
||||
"Error Type: ValueError, Message: LLMMathChain._evaluate(\"\n",
|
||||
"age_of_Dua_Lipa_boyfriend ** 0.43\n",
|
||||
"\") raised error: 'age_of_Dua_Lipa_boyfriend'. Please try again with a valid numerical expression\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Processed examples: 4\r"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Chain failed for example 91439261-1c86-4198-868b-a6c1cc8a051b. Error: Too many arguments to single-input tool Calculator. Args: ['height ^ 0.13', {'height': 68}]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Processed examples: 9\r"
|
||||
"\") raised error: 'age_of_Dua_Lipa_boyfriend'. Please try again with a valid numerical expression\n",
|
||||
"Chain failed for example 78c959a4-467d-4469-8bd7-c5f0b059bc4a with inputs {'input': \"who is dua lipa's boyfriend? what is his age raised to the .43 power?\"}\n",
|
||||
"Error Type: ValueError, Message: LLMMathChain._evaluate(\"\n",
|
||||
"age ** 0.43\n",
|
||||
"\") raised error: 'age'. Please try again with a valid numerical expression\n",
|
||||
"Chain failed for example 6de48a56-3f30-4aac-b6cf-eee4b05ad43f with inputs {'input': \"who is kendall jenner's boyfriend? what is his height (in inches) raised to .13 power?\"}\n",
|
||||
"Error Type: ToolException, Message: Too many arguments to single-input tool Calculator. Args: ['height ^ 0.13', {'height': 72}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.smith import (\n",
|
||||
" arun_on_dataset,\n",
|
||||
" run_on_dataset, # Available if your chain doesn't support async calls.\n",
|
||||
" run_on_dataset, \n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain_results = await arun_on_dataset(\n",
|
||||
"chain_results = run_on_dataset(\n",
|
||||
" client=client,\n",
|
||||
" dataset_name=dataset_name,\n",
|
||||
" llm_or_chain_factory=agent_factory,\n",
|
||||
@@ -448,6 +422,218 @@
|
||||
"# These are logged as warnings here and captured as errors in the tracing UI."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "9da60638-5be8-4b5f-a721-2c6627aeaf0c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>input</th>\n",
|
||||
" <th>output</th>\n",
|
||||
" <th>reference</th>\n",
|
||||
" <th>embedding_cosine_distance</th>\n",
|
||||
" <th>correctness</th>\n",
|
||||
" <th>helpfulness</th>\n",
|
||||
" <th>fifth-grader-score</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>78c959a4-467d-4469-8bd7-c5f0b059bc4a</th>\n",
|
||||
" <td>{'input': 'who is dua lipa's boyfriend? what i...</td>\n",
|
||||
" <td>{'Error': 'ValueError('LLMMathChain._evaluate(...</td>\n",
|
||||
" <td>{'output': 'Romain Gavras' age raised to the 0...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>f8dfff24-d288-4d8e-ba94-c3cc33dd10d0</th>\n",
|
||||
" <td>{'input': 'what is dua lipa's boyfriend age ra...</td>\n",
|
||||
" <td>{'Error': 'ValueError('LLMMathChain._evaluate(...</td>\n",
|
||||
" <td>{'output': 'Approximately 4.9888126515157.'}</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>c78d5e84-3fbd-442f-affb-4b0e5806c439</th>\n",
|
||||
" <td>{'input': 'how far is it from paris to boston ...</td>\n",
|
||||
" <td>{'input': 'how far is it from paris to boston ...</td>\n",
|
||||
" <td>{'output': 'The distance from Paris to Boston ...</td>\n",
|
||||
" <td>0.007577</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>02cadef9-5794-49a9-8e43-acca977cab60</th>\n",
|
||||
" <td>{'input': 'How many people live in canada as o...</td>\n",
|
||||
" <td>{'input': 'How many people live in canada as o...</td>\n",
|
||||
" <td>{'output': 'The current population of Canada a...</td>\n",
|
||||
" <td>0.016324</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>e888a340-0486-4552-bb4b-911756e6bed7</th>\n",
|
||||
" <td>{'input': 'what was the total number of points...</td>\n",
|
||||
" <td>{'input': 'what was the total number of points...</td>\n",
|
||||
" <td>{'output': '3'}</td>\n",
|
||||
" <td>0.225076</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1b1f655b-754c-474d-8832-e6ec6bad3943</th>\n",
|
||||
" <td>{'input': 'what was the total number of points...</td>\n",
|
||||
" <td>{'input': 'what was the total number of points...</td>\n",
|
||||
" <td>{'output': 'The total number of points scored ...</td>\n",
|
||||
" <td>0.011580</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>51f1b1f1-3b51-400f-b871-65f8a3a3c2d4</th>\n",
|
||||
" <td>{'input': 'how many more points were scored in...</td>\n",
|
||||
" <td>{'input': 'how many more points were scored in...</td>\n",
|
||||
" <td>{'output': '15'}</td>\n",
|
||||
" <td>0.251002</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>83339364-0135-4efd-a24a-f3bd2a85e33a</th>\n",
|
||||
" <td>{'input': 'what is 153 raised to .1312 power?'}</td>\n",
|
||||
" <td>{'input': 'what is 153 raised to .1312 power?'...</td>\n",
|
||||
" <td>{'output': '1.9347796717823205'}</td>\n",
|
||||
" <td>0.127441</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6de48a56-3f30-4aac-b6cf-eee4b05ad43f</th>\n",
|
||||
" <td>{'input': 'who is kendall jenner's boyfriend? ...</td>\n",
|
||||
" <td>{'Error': 'ToolException(\"Too many arguments t...</td>\n",
|
||||
" <td>{'output': 'Bad Bunny's height raised to the p...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>0c41cc28-9c07-4550-8940-68b58cbc045e</th>\n",
|
||||
" <td>{'input': 'what is 1213 divided by 4345?'}</td>\n",
|
||||
" <td>{'input': 'what is 1213 divided by 4345?', 'ou...</td>\n",
|
||||
" <td>{'output': '0.2791714614499425'}</td>\n",
|
||||
" <td>0.144522</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" input \\\n",
|
||||
"78c959a4-467d-4469-8bd7-c5f0b059bc4a {'input': 'who is dua lipa's boyfriend? what i... \n",
|
||||
"f8dfff24-d288-4d8e-ba94-c3cc33dd10d0 {'input': 'what is dua lipa's boyfriend age ra... \n",
|
||||
"c78d5e84-3fbd-442f-affb-4b0e5806c439 {'input': 'how far is it from paris to boston ... \n",
|
||||
"02cadef9-5794-49a9-8e43-acca977cab60 {'input': 'How many people live in canada as o... \n",
|
||||
"e888a340-0486-4552-bb4b-911756e6bed7 {'input': 'what was the total number of points... \n",
|
||||
"1b1f655b-754c-474d-8832-e6ec6bad3943 {'input': 'what was the total number of points... \n",
|
||||
"51f1b1f1-3b51-400f-b871-65f8a3a3c2d4 {'input': 'how many more points were scored in... \n",
|
||||
"83339364-0135-4efd-a24a-f3bd2a85e33a {'input': 'what is 153 raised to .1312 power?'} \n",
|
||||
"6de48a56-3f30-4aac-b6cf-eee4b05ad43f {'input': 'who is kendall jenner's boyfriend? ... \n",
|
||||
"0c41cc28-9c07-4550-8940-68b58cbc045e {'input': 'what is 1213 divided by 4345?'} \n",
|
||||
"\n",
|
||||
" output \\\n",
|
||||
"78c959a4-467d-4469-8bd7-c5f0b059bc4a {'Error': 'ValueError('LLMMathChain._evaluate(... \n",
|
||||
"f8dfff24-d288-4d8e-ba94-c3cc33dd10d0 {'Error': 'ValueError('LLMMathChain._evaluate(... \n",
|
||||
"c78d5e84-3fbd-442f-affb-4b0e5806c439 {'input': 'how far is it from paris to boston ... \n",
|
||||
"02cadef9-5794-49a9-8e43-acca977cab60 {'input': 'How many people live in canada as o... \n",
|
||||
"e888a340-0486-4552-bb4b-911756e6bed7 {'input': 'what was the total number of points... \n",
|
||||
"1b1f655b-754c-474d-8832-e6ec6bad3943 {'input': 'what was the total number of points... \n",
|
||||
"51f1b1f1-3b51-400f-b871-65f8a3a3c2d4 {'input': 'how many more points were scored in... \n",
|
||||
"83339364-0135-4efd-a24a-f3bd2a85e33a {'input': 'what is 153 raised to .1312 power?'... \n",
|
||||
"6de48a56-3f30-4aac-b6cf-eee4b05ad43f {'Error': 'ToolException(\"Too many arguments t... \n",
|
||||
"0c41cc28-9c07-4550-8940-68b58cbc045e {'input': 'what is 1213 divided by 4345?', 'ou... \n",
|
||||
"\n",
|
||||
" reference \\\n",
|
||||
"78c959a4-467d-4469-8bd7-c5f0b059bc4a {'output': 'Romain Gavras' age raised to the 0... \n",
|
||||
"f8dfff24-d288-4d8e-ba94-c3cc33dd10d0 {'output': 'Approximately 4.9888126515157.'} \n",
|
||||
"c78d5e84-3fbd-442f-affb-4b0e5806c439 {'output': 'The distance from Paris to Boston ... \n",
|
||||
"02cadef9-5794-49a9-8e43-acca977cab60 {'output': 'The current population of Canada a... \n",
|
||||
"e888a340-0486-4552-bb4b-911756e6bed7 {'output': '3'} \n",
|
||||
"1b1f655b-754c-474d-8832-e6ec6bad3943 {'output': 'The total number of points scored ... \n",
|
||||
"51f1b1f1-3b51-400f-b871-65f8a3a3c2d4 {'output': '15'} \n",
|
||||
"83339364-0135-4efd-a24a-f3bd2a85e33a {'output': '1.9347796717823205'} \n",
|
||||
"6de48a56-3f30-4aac-b6cf-eee4b05ad43f {'output': 'Bad Bunny's height raised to the p... \n",
|
||||
"0c41cc28-9c07-4550-8940-68b58cbc045e {'output': '0.2791714614499425'} \n",
|
||||
"\n",
|
||||
" embedding_cosine_distance correctness \\\n",
|
||||
"78c959a4-467d-4469-8bd7-c5f0b059bc4a NaN NaN \n",
|
||||
"f8dfff24-d288-4d8e-ba94-c3cc33dd10d0 NaN NaN \n",
|
||||
"c78d5e84-3fbd-442f-affb-4b0e5806c439 0.007577 1.0 \n",
|
||||
"02cadef9-5794-49a9-8e43-acca977cab60 0.016324 1.0 \n",
|
||||
"e888a340-0486-4552-bb4b-911756e6bed7 0.225076 0.0 \n",
|
||||
"1b1f655b-754c-474d-8832-e6ec6bad3943 0.011580 0.0 \n",
|
||||
"51f1b1f1-3b51-400f-b871-65f8a3a3c2d4 0.251002 1.0 \n",
|
||||
"83339364-0135-4efd-a24a-f3bd2a85e33a 0.127441 1.0 \n",
|
||||
"6de48a56-3f30-4aac-b6cf-eee4b05ad43f NaN NaN \n",
|
||||
"0c41cc28-9c07-4550-8940-68b58cbc045e 0.144522 1.0 \n",
|
||||
"\n",
|
||||
" helpfulness fifth-grader-score \n",
|
||||
"78c959a4-467d-4469-8bd7-c5f0b059bc4a NaN NaN \n",
|
||||
"f8dfff24-d288-4d8e-ba94-c3cc33dd10d0 NaN NaN \n",
|
||||
"c78d5e84-3fbd-442f-affb-4b0e5806c439 1.0 1.0 \n",
|
||||
"02cadef9-5794-49a9-8e43-acca977cab60 1.0 1.0 \n",
|
||||
"e888a340-0486-4552-bb4b-911756e6bed7 0.0 0.0 \n",
|
||||
"1b1f655b-754c-474d-8832-e6ec6bad3943 0.0 0.0 \n",
|
||||
"51f1b1f1-3b51-400f-b871-65f8a3a3c2d4 1.0 1.0 \n",
|
||||
"83339364-0135-4efd-a24a-f3bd2a85e33a 1.0 1.0 \n",
|
||||
"6de48a56-3f30-4aac-b6cf-eee4b05ad43f NaN NaN \n",
|
||||
"0c41cc28-9c07-4550-8940-68b58cbc045e 1.0 1.0 "
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain_results.to_dataframe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cdacd159-eb4d-49e9-bb2a-c55322c40ed4",
|
||||
@@ -474,7 +660,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 18,
|
||||
"id": "33bfefde-d1bb-4f50-9f7a-fd572ee76820",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -483,22 +669,22 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Run(id=UUID('e39f310b-c5a8-4192-8a59-6a9498e1cb85'), name='AgentExecutor', start_time=datetime.datetime(2023, 7, 17, 18, 25, 30, 653872), run_type=<RunTypeEnum.chain: 'chain'>, end_time=datetime.datetime(2023, 7, 17, 18, 25, 35, 359642), extra={'runtime': {'library': 'langchain', 'runtime': 'python', 'platform': 'macOS-13.4.1-arm64-arm-64bit', 'sdk_version': '0.0.8', 'library_version': '0.0.231', 'runtime_version': '3.11.2'}, 'total_tokens': 512, 'prompt_tokens': 451, 'completion_tokens': 61}, error=None, serialized=None, events=[{'name': 'start', 'time': '2023-07-17T18:25:30.653872'}, {'name': 'end', 'time': '2023-07-17T18:25:35.359642'}], inputs={'input': 'what is 1213 divided by 4345?'}, outputs={'output': '1213 divided by 4345 is approximately 0.2792.'}, reference_example_id=UUID('a75cf754-4f73-46fd-b126-9bcd0695e463'), parent_run_id=None, tags=['openai-functions', 'testing-notebook'], execution_order=1, session_id=UUID('1c9baec3-ae86-4fac-9e99-e1b9f8e7818c'), child_run_ids=[UUID('40d0fdca-0b2b-47f4-a9da-f2b229aa4ed5'), UUID('cfa5130f-264c-4126-8950-ec1c4c31b800'), UUID('ba638a2f-2a57-45db-91e8-9a7a66a42c5a'), UUID('fcc29b5a-cdb7-4bcc-8194-47729bbdf5fb'), UUID('a6f92bf5-cfba-4747-9336-370cb00c928a'), UUID('65312576-5a39-4250-b820-4dfae7d73945')], child_runs=None, feedback_stats={'correctness': {'n': 1, 'avg': 1.0, 'mode': 1}, 'helpfulness': {'n': 1, 'avg': 1.0, 'mode': 1}, 'fifth-grader-score': {'n': 1, 'avg': 1.0, 'mode': 1}, 'embedding_cosine_distance': {'n': 1, 'avg': 0.144522385071361, 'mode': 0.144522385071361}})"
|
||||
"Run(id=UUID('a6893e95-a9cc-43e0-b9fa-f471b0cfee83'), name='AgentExecutor', start_time=datetime.datetime(2023, 9, 13, 22, 34, 32, 177406), run_type='chain', end_time=datetime.datetime(2023, 9, 13, 22, 34, 37, 77740), extra={'runtime': {'cpu': {'time': {'sys': 3.153218304, 'user': 5.045262336}, 'percent': 0.0, 'ctx_switches': {'voluntary': 42164.0, 'involuntary': 0.0}}, 'mem': {'rss': 184205312.0}, 'library': 'langchain', 'runtime': 'python', 'platform': 'macOS-13.4.1-arm64-arm-64bit', 'sdk_version': '0.0.26', 'thread_count': 58.0, 'library_version': '0.0.286', 'runtime_version': '3.11.2', 'langchain_version': '0.0.286', 'py_implementation': 'CPython'}}, error=None, serialized=None, events=[{'name': 'start', 'time': '2023-09-13T22:34:32.177406'}, {'name': 'end', 'time': '2023-09-13T22:34:37.077740'}], inputs={'input': 'what is 1213 divided by 4345?'}, outputs={'output': '1213 divided by 4345 is approximately 0.2792.'}, reference_example_id=UUID('0c41cc28-9c07-4550-8940-68b58cbc045e'), parent_run_id=None, tags=['openai-functions', 'testing-notebook'], execution_order=1, session_id=UUID('7865a050-467e-4c58-9322-58a26f182ecb'), child_run_ids=[UUID('37faef05-b6b3-4cb7-a6db-471425e69b46'), UUID('2d6a895f-de2c-4f7f-b5f1-ca876d38e530'), UUID('e7d145e3-74b0-4f32-9240-3e370becdf8f'), UUID('10db62c9-fe4f-4aba-959a-ad02cfadfa20'), UUID('8dc46a27-8ab9-4f33-9ec1-660ca73ebb4f'), UUID('eccd042e-dde0-4425-b62f-e855e25d6b64')], child_runs=None, feedback_stats={'correctness': {'n': 1, 'avg': 1.0, 'mode': 1, 'is_all_model': True}, 'helpfulness': {'n': 1, 'avg': 1.0, 'mode': 1, 'is_all_model': True}, 'fifth-grader-score': {'n': 1, 'avg': 1.0, 'mode': 1, 'is_all_model': True}, 'embedding_cosine_distance': {'n': 1, 'avg': 0.144522385071361, 'mode': 0.144522385071361, 'is_all_model': True}}, app_path='/o/ebbaf2eb-769b-4505-aca2-d11de10372a4/projects/p/7865a050-467e-4c58-9322-58a26f182ecb/r/a6893e95-a9cc-43e0-b9fa-f471b0cfee83', manifest_id=None, status='success', prompt_tokens=None, completion_tokens=None, total_tokens=None, first_token_time=None, parent_run_ids=None)"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"runs = list(client.list_runs(dataset_name=dataset_name))\n",
|
||||
"runs = list(client.list_runs(project_name=chain_results[\"project_name\"], execution_order=1))\n",
|
||||
"runs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 22,
|
||||
"id": "6595c888-1f5c-4ae3-9390-0a559f5575d1",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -507,21 +693,17 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'correctness': {'n': 7, 'avg': 0.5714285714285714, 'mode': 1},\n",
|
||||
" 'helpfulness': {'n': 7, 'avg': 0.7142857142857143, 'mode': 1},\n",
|
||||
" 'fifth-grader-score': {'n': 7, 'avg': 0.7142857142857143, 'mode': 1},\n",
|
||||
" 'embedding_cosine_distance': {'n': 7,\n",
|
||||
" 'avg': 0.11462010799473926,\n",
|
||||
" 'mode': 0.0130477459560272}}"
|
||||
"TracerSessionResult(id=UUID('7865a050-467e-4c58-9322-58a26f182ecb'), start_time=datetime.datetime(2023, 9, 13, 22, 34, 10, 611846), name='test-dependable-stop-67', extra=None, tenant_id=UUID('ebbaf2eb-769b-4505-aca2-d11de10372a4'), run_count=None, latency_p50=None, latency_p99=None, total_tokens=None, prompt_tokens=None, completion_tokens=None, last_run_start_time=None, feedback_stats=None, reference_dataset_ids=None, run_facets=None)"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"client.read_project(project_id=runs[0].session_id).feedback_stats"
|
||||
"# After some time, these will be populated.\n",
|
||||
"client.read_project(project_name=chain_results[\"project_name\"]).feedback_stats"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
181
docs/extras/integrations/chat/baidu_qianfan_endpoint.ipynb
Normal file
181
docs/extras/integrations/chat/baidu_qianfan_endpoint.ipynb
Normal file
@@ -0,0 +1,181 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Baidu Qianfan\n",
|
||||
"\n",
|
||||
"Baidu AI Cloud Qianfan Platform is a one-stop large model development and service operation platform for enterprise developers. Qianfan not only provides including the model of Wenxin Yiyan (ERNIE-Bot) and the third-party open source models, but also provides various AI development tools and the whole set of development environment, which facilitates customers to use and develop large model applications easily.\n",
|
||||
"\n",
|
||||
"Basically, those model are split into the following type:\n",
|
||||
"\n",
|
||||
"- Embedding\n",
|
||||
"- Chat\n",
|
||||
"- Completion\n",
|
||||
"\n",
|
||||
"In this notebook, we will introduce how to use langchain with [Qianfan](https://cloud.baidu.com/doc/WENXINWORKSHOP/index.html) mainly in `Chat` corresponding\n",
|
||||
" to the package `langchain/chat_models` in langchain:\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## API Initialization\n",
|
||||
"\n",
|
||||
"To use the LLM services based on Baidu Qianfan, you have to initialize these parameters:\n",
|
||||
"\n",
|
||||
"You could either choose to init the AK,SK in enviroment variables or init params:\n",
|
||||
"\n",
|
||||
"```base\n",
|
||||
"export QIANFAN_AK=XXX\n",
|
||||
"export QIANFAN_SK=XXX\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"## Current supported models:\n",
|
||||
"\n",
|
||||
"- ERNIE-Bot-turbo (default models)\n",
|
||||
"- ERNIE-Bot\n",
|
||||
"- BLOOMZ-7B\n",
|
||||
"- Llama-2-7b-chat\n",
|
||||
"- Llama-2-13b-chat\n",
|
||||
"- Llama-2-70b-chat\n",
|
||||
"- Qianfan-BLOOMZ-7B-compressed\n",
|
||||
"- Qianfan-Chinese-Llama-2-7B\n",
|
||||
"- ChatGLM2-6B-32K\n",
|
||||
"- AquilaChat-7B"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\"\"\"For basic init and call\"\"\"\n",
|
||||
"from langchain.chat_models.baidu_qianfan_endpoint import QianfanChatEndpoint \n",
|
||||
"from langchain.chat_models.base import HumanMessage\n",
|
||||
"import os\n",
|
||||
"os.environ[\"QIAFAN_AK\"] = \"xxx\"\n",
|
||||
"os.environ[\"QIAFAN_AK\"] = \"xxx\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chat = QianfanChatEndpoint(\n",
|
||||
" qianfan_ak=\"xxx\",\n",
|
||||
" qianfan_sk=\"xxx\",\n",
|
||||
" streaming=True, \n",
|
||||
" )\n",
|
||||
"res = chat([HumanMessage(content=\"write a funny joke\")])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
" \n",
|
||||
"from langchain.chat_models.baidu_qianfan_endpoint import QianfanChatEndpoint\n",
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"chatLLM = QianfanChatEndpoint(\n",
|
||||
" streaming=True,\n",
|
||||
")\n",
|
||||
"res = chatLLM.stream([HumanMessage(content=\"hi\")], streaming=True)\n",
|
||||
"for r in res:\n",
|
||||
" print(\"chat resp1:\", r)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def run_aio_generate():\n",
|
||||
" resp = await chatLLM.agenerate(messages=[[HumanMessage(content=\"write a 20 words sentence about sea.\")]])\n",
|
||||
" print(resp)\n",
|
||||
" \n",
|
||||
"await run_aio_generate()\n",
|
||||
"\n",
|
||||
"async def run_aio_stream():\n",
|
||||
" async for res in chatLLM.astream([HumanMessage(content=\"write a 20 words sentence about sea.\")]):\n",
|
||||
" print(\"astream\", res)\n",
|
||||
" \n",
|
||||
"await run_aio_stream()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use different models in Qianfan\n",
|
||||
"\n",
|
||||
"In the case you want to deploy your own model based on Ernie Bot or third-party open sources model, you could follow these steps:\n",
|
||||
"\n",
|
||||
"- 1. (Optional, if the model are included in the default models, skip it)Deploy your model in Qianfan Console, get your own customized deploy endpoint.\n",
|
||||
"- 2. Set up the field called `endpoint` in the initlization:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chatBloom = QianfanChatEndpoint(\n",
|
||||
" streaming=True, \n",
|
||||
" model=\"BLOOMZ-7B\",\n",
|
||||
" )\n",
|
||||
"res = chatBloom([HumanMessage(content=\"hi\")])\n",
|
||||
"print(res)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Model Params:\n",
|
||||
"\n",
|
||||
"For now, only `ERNIE-Bot` and `ERNIE-Bot-turbo` support model params below, we might support more models in the future.\n",
|
||||
"\n",
|
||||
"- temperature\n",
|
||||
"- top_p\n",
|
||||
"- penalty_score\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"res = chat.stream([HumanMessage(content=\"hi\")], **{'top_p': 0.4, 'temperature': 0.1, 'penalty_score': 1})\n",
|
||||
"\n",
|
||||
"for r in res:\n",
|
||||
" print(r)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.2"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "2d8226dd90b7dc6e8932aea372a8bf9fc71abac4be3cdd5a63a36c2a19e3700f"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -23,9 +23,7 @@
|
||||
"source": [
|
||||
"from langchain.document_loaders import ArcGISLoader\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"url = \"https://maps1.vcgov.org/arcgis/rest/services/Beaches/MapServer/7\"\n",
|
||||
"\n",
|
||||
"loader = ArcGISLoader(url)"
|
||||
]
|
||||
},
|
||||
@@ -39,8 +37,8 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 7.86 ms, sys: 0 ns, total: 7.86 ms\n",
|
||||
"Wall time: 802 ms\n"
|
||||
"CPU times: user 2.37 ms, sys: 5.83 ms, total: 8.19 ms\n",
|
||||
"Wall time: 1.05 s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -59,7 +57,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'accessed': '2023-08-15T04:30:41.689270+00:00Z',\n",
|
||||
"{'accessed': '2023-09-13T19:58:32.546576+00:00Z',\n",
|
||||
" 'name': 'Beach Ramps',\n",
|
||||
" 'url': 'https://maps1.vcgov.org/arcgis/rest/services/Beaches/MapServer/7',\n",
|
||||
" 'layer_description': '(Not Provided)',\n",
|
||||
@@ -243,9 +241,76 @@
|
||||
"docs[0].metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a9687fb6-5016-41a1-b4e4-7a042aa5291e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieving Geometries \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"If you want to retrieve feature geometries, you may do so with the `return_geometry` keyword.\n",
|
||||
"\n",
|
||||
"Each document's geometry will be stored in its metadata dictionary."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "680247b1-cb2f-4d76-ad56-75d0230c2f2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader_geom = ArcGISLoader(url, return_geometry=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "93656a43-8c97-4e79-b4e1-be2e4eff98d5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 9.6 ms, sys: 5.84 ms, total: 15.4 ms\n",
|
||||
"Wall time: 1.06 s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"\n",
|
||||
"docs = loader_geom.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "c02eca3b-634a-4d02-8ec0-ae29f5feac6b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'x': -81.01508803280349,\n",
|
||||
" 'y': 29.24246579525828,\n",
|
||||
" 'spatialReference': {'wkid': 4326, 'latestWkid': 4326}}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0].metadata['geometry']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "1d132b7d-5a13-4d66-98e8-785ffdf87af0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -253,29 +318,29 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\"OBJECTID\": 4, \"AccessName\": \"BEACHWAY AV\", \"AccessID\": \"NS-106\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1400 N ATLANTIC AV\", \"MilePost\": 1.57, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 5, \"AccessName\": \"SEABREEZE BLVD\", \"AccessID\": \"DB-051\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"500 BLK N ATLANTIC AV\", \"MilePost\": 14.24, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 6, \"AccessName\": \"27TH AV\", \"AccessID\": \"NS-141\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3600 BLK S ATLANTIC AV\", \"MilePost\": 4.83, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 11, \"AccessName\": \"INTERNATIONAL SPEEDWAY BLVD\", \"AccessID\": \"DB-059\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"300 BLK S ATLANTIC AV\", \"MilePost\": 15.27, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 14, \"AccessName\": \"GRANADA BLVD\", \"AccessID\": \"OB-030\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"20 BLK OCEAN SHORE BLVD\", \"MilePost\": 10.02, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 27, \"AccessName\": \"UNIVERSITY BLVD\", \"AccessID\": \"DB-048\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"900 BLK N ATLANTIC AV\", \"MilePost\": 13.74, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 38, \"AccessName\": \"BEACH ST\", \"AccessID\": \"PI-097\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"4890 BLK S ATLANTIC AV\", \"MilePost\": 25.85, \"City\": \"PONCE INLET\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 42, \"AccessName\": \"BOTEFUHR AV\", \"AccessID\": \"DBS-067\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1900 BLK S ATLANTIC AV\", \"MilePost\": 16.68, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 43, \"AccessName\": \"SILVER BEACH AV\", \"AccessID\": \"DB-064\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1000 BLK S ATLANTIC AV\", \"MilePost\": 15.98, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 45, \"AccessName\": \"MILSAP RD\", \"AccessID\": \"OB-037\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"700 BLK S ATLANTIC AV\", \"MilePost\": 11.52, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 56, \"AccessName\": \"3RD AV\", \"AccessID\": \"NS-118\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1200 BLK HILL ST\", \"MilePost\": 3.25, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 64, \"AccessName\": \"DUNLAWTON BLVD\", \"AccessID\": \"DBS-078\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3400 BLK S ATLANTIC AV\", \"MilePost\": 20.61, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 69, \"AccessName\": \"EMILIA AV\", \"AccessID\": \"DBS-082\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3790 BLK S ATLANTIC AV\", \"MilePost\": 21.38, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 94, \"AccessName\": \"FLAGLER AV\", \"AccessID\": \"NS-110\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"500 BLK FLAGLER AV\", \"MilePost\": 2.57, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 96, \"AccessName\": \"CRAWFORD RD\", \"AccessID\": \"NS-108\", \"AccessType\": \"OPEN VEHICLE RAMP - PASS\", \"GeneralLoc\": \"800 BLK N ATLANTIC AV\", \"MilePost\": 2.19, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 124, \"AccessName\": \"HARTFORD AV\", \"AccessID\": \"DB-043\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1890 BLK N ATLANTIC AV\", \"MilePost\": 12.76, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 127, \"AccessName\": \"WILLIAMS AV\", \"AccessID\": \"DB-042\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"2200 BLK N ATLANTIC AV\", \"MilePost\": 12.5, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 136, \"AccessName\": \"CARDINAL DR\", \"AccessID\": \"OB-036\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"600 BLK S ATLANTIC AV\", \"MilePost\": 11.27, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 229, \"AccessName\": \"EL PORTAL ST\", \"AccessID\": \"DBS-076\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3200 BLK S ATLANTIC AV\", \"MilePost\": 20.04, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 230, \"AccessName\": \"HARVARD DR\", \"AccessID\": \"OB-038\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"900 BLK S ATLANTIC AV\", \"MilePost\": 11.72, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 232, \"AccessName\": \"VAN AV\", \"AccessID\": \"DBS-075\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3100 BLK S ATLANTIC AV\", \"MilePost\": 19.6, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 234, \"AccessName\": \"ROCKEFELLER DR\", \"AccessID\": \"OB-034\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"400 BLK S ATLANTIC AV\", \"MilePost\": 10.9, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 235, \"AccessName\": \"MINERVA RD\", \"AccessID\": \"DBS-069\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"2300 BLK S ATLANTIC AV\", \"MilePost\": 17.52, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n"
|
||||
"{\"OBJECTID\": 4, \"AccessName\": \"UNIVERSITY BLVD\", \"AccessID\": \"DB-048\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"900 BLK N ATLANTIC AV\", \"MilePost\": 13.74, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694597536000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 18, \"AccessName\": \"BEACHWAY AV\", \"AccessID\": \"NS-106\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1400 N ATLANTIC AV\", \"MilePost\": 1.57, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694600478000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 24, \"AccessName\": \"27TH AV\", \"AccessID\": \"NS-141\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3600 BLK S ATLANTIC AV\", \"MilePost\": 4.83, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED FOR HIGH TIDE\", \"Entry_Date_Time\": 1694619363000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 26, \"AccessName\": \"SEABREEZE BLVD\", \"AccessID\": \"DB-051\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"500 BLK N ATLANTIC AV\", \"MilePost\": 14.24, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694597536000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 30, \"AccessName\": \"INTERNATIONAL SPEEDWAY BLVD\", \"AccessID\": \"DB-059\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"300 BLK S ATLANTIC AV\", \"MilePost\": 15.27, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694598638000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 33, \"AccessName\": \"GRANADA BLVD\", \"AccessID\": \"OB-030\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"20 BLK OCEAN SHORE BLVD\", \"MilePost\": 10.02, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1694595424000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 39, \"AccessName\": \"BEACH ST\", \"AccessID\": \"PI-097\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"4890 BLK S ATLANTIC AV\", \"MilePost\": 25.85, \"City\": \"PONCE INLET\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1694596294000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 44, \"AccessName\": \"SILVER BEACH AV\", \"AccessID\": \"DB-064\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1000 BLK S ATLANTIC AV\", \"MilePost\": 15.98, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694598638000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 45, \"AccessName\": \"BOTEFUHR AV\", \"AccessID\": \"DBS-067\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1900 BLK S ATLANTIC AV\", \"MilePost\": 16.68, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694598638000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 46, \"AccessName\": \"MINERVA RD\", \"AccessID\": \"DBS-069\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"2300 BLK S ATLANTIC AV\", \"MilePost\": 17.52, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694598638000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 56, \"AccessName\": \"3RD AV\", \"AccessID\": \"NS-118\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1200 BLK HILL ST\", \"MilePost\": 3.25, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694600478000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 65, \"AccessName\": \"MILSAP RD\", \"AccessID\": \"OB-037\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"700 BLK S ATLANTIC AV\", \"MilePost\": 11.52, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1694595749000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 72, \"AccessName\": \"ROCKEFELLER DR\", \"AccessID\": \"OB-034\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"400 BLK S ATLANTIC AV\", \"MilePost\": 10.9, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED - SEASONAL\", \"Entry_Date_Time\": 1694591351000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 74, \"AccessName\": \"DUNLAWTON BLVD\", \"AccessID\": \"DBS-078\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3400 BLK S ATLANTIC AV\", \"MilePost\": 20.61, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694601124000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 77, \"AccessName\": \"EMILIA AV\", \"AccessID\": \"DBS-082\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3790 BLK S ATLANTIC AV\", \"MilePost\": 21.38, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694601124000, \"DrivingZone\": \"BOTH\"}\n",
|
||||
"{\"OBJECTID\": 84, \"AccessName\": \"VAN AV\", \"AccessID\": \"DBS-075\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3100 BLK S ATLANTIC AV\", \"MilePost\": 19.6, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694601124000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 104, \"AccessName\": \"HARVARD DR\", \"AccessID\": \"OB-038\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"900 BLK S ATLANTIC AV\", \"MilePost\": 11.72, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694597536000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 106, \"AccessName\": \"WILLIAMS AV\", \"AccessID\": \"DB-042\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"2200 BLK N ATLANTIC AV\", \"MilePost\": 12.5, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694597536000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 109, \"AccessName\": \"HARTFORD AV\", \"AccessID\": \"DB-043\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1890 BLK N ATLANTIC AV\", \"MilePost\": 12.76, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED - SEASONAL\", \"Entry_Date_Time\": 1694591351000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 138, \"AccessName\": \"CRAWFORD RD\", \"AccessID\": \"NS-108\", \"AccessType\": \"OPEN VEHICLE RAMP - PASS\", \"GeneralLoc\": \"800 BLK N ATLANTIC AV\", \"MilePost\": 2.19, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694600478000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 140, \"AccessName\": \"FLAGLER AV\", \"AccessID\": \"NS-110\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"500 BLK FLAGLER AV\", \"MilePost\": 2.57, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694600478000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 144, \"AccessName\": \"CARDINAL DR\", \"AccessID\": \"OB-036\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"600 BLK S ATLANTIC AV\", \"MilePost\": 11.27, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1694595749000, \"DrivingZone\": \"YES\"}\n",
|
||||
"{\"OBJECTID\": 174, \"AccessName\": \"EL PORTAL ST\", \"AccessID\": \"DBS-076\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3200 BLK S ATLANTIC AV\", \"MilePost\": 20.04, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694601124000, \"DrivingZone\": \"YES\"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -301,7 +366,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.13"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
177
docs/extras/integrations/llms/baidu_qianfan_endpoint.ipynb
Normal file
177
docs/extras/integrations/llms/baidu_qianfan_endpoint.ipynb
Normal file
@@ -0,0 +1,177 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Baidu Qianfan\n",
|
||||
"\n",
|
||||
"Baidu AI Cloud Qianfan Platform is a one-stop large model development and service operation platform for enterprise developers. Qianfan not only provides including the model of Wenxin Yiyan (ERNIE-Bot) and the third-party open source models, but also provides various AI development tools and the whole set of development environment, which facilitates customers to use and develop large model applications easily.\n",
|
||||
"\n",
|
||||
"Basically, those model are split into the following type:\n",
|
||||
"\n",
|
||||
"- Embedding\n",
|
||||
"- Chat\n",
|
||||
"- Coompletion\n",
|
||||
"\n",
|
||||
"In this notebook, we will introduce how to use langchain with [Qianfan](https://cloud.baidu.com/doc/WENXINWORKSHOP/index.html) mainly in `Completion` corresponding\n",
|
||||
" to the package `langchain/llms` in langchain:\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## API Initialization\n",
|
||||
"\n",
|
||||
"To use the LLM services based on Baidu Qianfan, you have to initialize these parameters:\n",
|
||||
"\n",
|
||||
"You could either choose to init the AK,SK in enviroment variables or init params:\n",
|
||||
"\n",
|
||||
"```base\n",
|
||||
"export QIANFAN_AK=XXX\n",
|
||||
"export QIANFAN_SK=XXX\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"## Current supported models:\n",
|
||||
"\n",
|
||||
"- ERNIE-Bot-turbo (default models)\n",
|
||||
"- ERNIE-Bot\n",
|
||||
"- BLOOMZ-7B\n",
|
||||
"- Llama-2-7b-chat\n",
|
||||
"- Llama-2-13b-chat\n",
|
||||
"- Llama-2-70b-chat\n",
|
||||
"- Qianfan-BLOOMZ-7B-compressed\n",
|
||||
"- Qianfan-Chinese-Llama-2-7B\n",
|
||||
"- ChatGLM2-6B-32K\n",
|
||||
"- AquilaChat-7B"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\"\"\"For basic init and call\"\"\"\n",
|
||||
"from langchain.llms.baidu_qianfan_endpoint import QianfanLLMEndpoint\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"QIANFAN_AK\"] = \"xx\"\n",
|
||||
"os.environ[\"QIANFAN_SK\"] = \"xx\"\n",
|
||||
"\n",
|
||||
"llm = QianfanLLMEndpoint(streaming=True, ak=\"xx\", sk=\"xx\")\n",
|
||||
"res = llm(\"hi\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"\"\"\"Test for llm generate \"\"\"\n",
|
||||
"res = llm.generate(prompts=[\"hillo?\"])\n",
|
||||
"import asyncio\n",
|
||||
"\"\"\"Test for llm aio generate\"\"\"\n",
|
||||
"async def run_aio_generate():\n",
|
||||
" resp = await llm.agenerate(prompts=[\"Write a 20-word article about rivers.\"])\n",
|
||||
" print(resp)\n",
|
||||
"\n",
|
||||
"await run_aio_generate()\n",
|
||||
"\n",
|
||||
"\"\"\"Test for llm stream\"\"\"\n",
|
||||
"for res in llm.stream(\"write a joke.\"):\n",
|
||||
" print(res)\n",
|
||||
"\n",
|
||||
"\"\"\"Test for llm aio stream\"\"\"\n",
|
||||
"async def run_aio_stream():\n",
|
||||
" async for res in llm.astream(\"Write a 20-word article about mountains\"):\n",
|
||||
" print(res)\n",
|
||||
"\n",
|
||||
"await run_aio_stream()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use different models in Qianfan\n",
|
||||
"\n",
|
||||
"In the case you want to deploy your own model based on EB or serval open sources model, you could follow these steps:\n",
|
||||
"\n",
|
||||
"- 1. (Optional, if the model are included in the default models, skip it)Deploy your model in Qianfan Console, get your own customized deploy endpoint.\n",
|
||||
"- 2. Set up the field called `endpoint` in the initlization:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = QianfanLLMEndpoint(qianfan_ak='xxx', \n",
|
||||
" qianfan_sk='xxx', \n",
|
||||
" streaming=True, \n",
|
||||
" model=\"ERNIE-Bot-turbo\",\n",
|
||||
" endpoint=\"eb-instant\",\n",
|
||||
" )\n",
|
||||
"res = llm(\"hi\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Model Params:\n",
|
||||
"\n",
|
||||
"For now, only `ERNIE-Bot` and `ERNIE-Bot-turbo` support model params below, we might support more models in the future.\n",
|
||||
"\n",
|
||||
"- temperature\n",
|
||||
"- top_p\n",
|
||||
"- penalty_score\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"res = llm.generate(prompts=[\"hi\"], streaming=True, **{'top_p': 0.4, 'temperature': 0.1, 'penalty_score': 1})\n",
|
||||
"\n",
|
||||
"for r in res:\n",
|
||||
" print(r)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "6fa70026b407ae751a5c9e6bd7f7d482379da8ad616f98512780b705c84ee157"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -96,7 +96,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -119,16 +119,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"1. Dogs do not have the ability to operate complex machinery like cars.\\n2. Dogs do not have the physical dexterity or coordination to manipulate the controls of a car.\\n3. Dogs do not have the cognitive ability to understand traffic laws and safely operate a car.\\n4. Therefore, no, a dog cannot drive a car.\\nAssistant, please provide the reasoning step by step.\\n\\nAssistant:\\n\\n1. Dogs do not have the ability to operate complex machinery like cars.\\n\\t* This is because dogs do not possess the necessary cognitive abilities to understand how to operate a car.\\n2. Dogs do not have the physical dexterity or coordination to manipulate the controls of a car.\\n\\t* This is because dogs do not have the necessary fine motor skills to operate the pedals and steering wheel of a car.\\n3. Dogs do not have the cognitive ability to understand traffic laws and safely operate a car.\\n\\t* This is because dogs do not have the ability to comprehend and interpret traffic signals, road signs, and other drivers' behaviors.\\n4. Therefore, no, a dog cannot drive a car.\""
|
||||
"'1. Dogs do not have the ability to operate a vehicle.\\n2. Dogs do not have hands or fingers to manipulate the steering wheel and pedals.\\n3. Dogs do not have the cognitive ability to understand traffic laws and road signs.\\n4. Dogs do not have the physical strength to operate the pedals and steering wheel.\\n\\nTherefore, the answer is no, a dog cannot drive a car.\\n\\nThe reasoning steps are:\\n\\n1. Dogs do not have the ability to operate a vehicle.\\n2. Dogs do not have hands or fingers to manipulate the steering wheel and pedals.\\n3. Dogs do not have the cognitive ability to understand traffic laws and road signs.\\n4. Dogs do not have the physical strength to operate the pedals and steering wheel.\\n\\nThe answer is no, a dog cannot drive a car.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -164,7 +164,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -177,16 +177,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'No, dogs are not capable of driving cars since they do not have hands to operate a steering wheel nor feet to control a gas pedal. However, it’s possible for a driver to train their pet in a different behavior and make them sit while transporting goods from one place to another.\\n\\n'"
|
||||
"'Yes, dogs can drive cars as long as they are trained to do so and under proper supervision. When driving a vehicle, dogs need sufficient rest periods and breaks to ensure their safety and well-being. The dog has to be able to see clearly out of the driver’s window, have access to water and food while driving and also be restrained in some way during these times. If necessary, two dogs can safely ride together in one car where neither dog drives (the passenger rides with someone who is qualified to supervise them). When it comes time for them to get home after a day of driving, most dogs want nothing more than to get back into their bed\\n\\n'"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -208,7 +208,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -220,16 +220,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'https://replicate.delivery/pbxt/9fJFaKfk5Zj3akAAn955gjP49G8HQpHK01M6h3BfzQoWSbkiA/out-0.png'"
|
||||
"'https://pbxt.replicate.delivery/KhTfVxYI9nyf7UExThmESBaW7dYr2IqrFDg5rGEoULTdSSkRA/out-0.png'"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -292,7 +292,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -300,8 +300,8 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1. Dogs do not have the ability to operate complex machinery like cars.\n",
|
||||
"2. Dogs do not have the physical dexterity to manipulate the controls of a car.\n",
|
||||
"3. Dogs do not have the cognitive ability to understand traffic laws and drive safely.\n",
|
||||
"2. Dogs do not have hands or fingers to grasp and manipulate objects like steering wheels and pedals.\n",
|
||||
"3. Dogs do not have the cognitive ability to understand traffic laws and road signs.\n",
|
||||
"\n",
|
||||
"Therefore, the answer is no, a dog cannot drive a car."
|
||||
]
|
||||
@@ -333,7 +333,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 64,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -352,14 +352,14 @@
|
||||
"7. Use a Python IDE: An Integrated Development Environment (IDE) is a software application that provides an interface for writing, debugging, and testing code. Using a Python IDE such as PyCharm, VSCode, or Spyder can make writing and debugging Python code much easier.\n",
|
||||
"8. Learn by building: One of the best ways to learn Python is by building projects. Start with small projects and gradually work your way up to more complex ones.\n",
|
||||
"9. Learn from others: Look at other people's code, understand how it works and try to implement it in your own way.\n",
|
||||
"10. Be patient: Learning a programming language takes time and practice, so be patient with yourself and don't get discouraged if you don't understand something at first.\n",
|
||||
"10. Learn the basics of programming: Before diving into Python, it's important to understand the basics of programming. Learn the basic concepts such as data types, variables, loops, conditional statements, functions, etc.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Please let me know if you have any other questions or if there is anything\n",
|
||||
"Raw output runtime: 32.74260359999607 seconds\n",
|
||||
"Please let me know\n",
|
||||
"Raw output runtime: 26.067544750000003 seconds\n",
|
||||
"Stopped output:\n",
|
||||
" There are several ways to learn Python, and the best method for you will depend on your learning style and goals. Here are a few suggestions:\n",
|
||||
"Stopped output runtime: 3.2350128999969456 seconds\n"
|
||||
"Stopped output runtime: 25.669324958000004 seconds\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -398,7 +398,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -414,7 +414,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -435,7 +435,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -456,7 +456,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -476,7 +476,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -496,7 +496,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -506,16 +506,16 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new SimpleSequentialChain chain...\u001b[0m\n",
|
||||
"\u001b[36;1m\u001b[1;3mColorful socks could be named \"Dazzle Socks\"\n",
|
||||
"\u001b[36;1m\u001b[1;3mColorful socks could be named \"Dazzle Socks\" or \"Barefoot Innovation.\"\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"\u001b[33;1m\u001b[1;3mA logo featuring bright colorful socks could be named Dazzle Socks\n",
|
||||
"\u001b[33;1m\u001b[1;3mA colorful pair of socks could represent Dazzle Socks, a fun and innovative company! The color blue might symbolize skyrockets, while yellow might suggest sunshine. Red could refer to blood in the water, inspiring aggression and action. Orange might stand for golden opportunities in the sand - maybe even leading us to our breakthroughs. Green could connote nature and money, signifying wealth and sustainability. Finally, white suggests purity and cleanliness, keeping us on track without being overly restrictive. Together, these colors create a unique and lively palette from which to design a logo.\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"\u001b[38;5;200m\u001b[1;3mhttps://replicate.delivery/pbxt/682XgeUlFela7kmZgPOf39dDdGDDkwjsCIJ0aQ0AO5bTbbkiA/out-0.png\u001b[0m\n",
|
||||
"\u001b[38;5;200m\u001b[1;3mhttps://pbxt.replicate.delivery/WebsLVtue2rl9kXNJY6rxdfZqDcVq28bmjkcYuXKqSicnkIjA/out-0.png\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"https://replicate.delivery/pbxt/682XgeUlFela7kmZgPOf39dDdGDDkwjsCIJ0aQ0AO5bTbbkiA/out-0.png\n"
|
||||
"https://pbxt.replicate.delivery/WebsLVtue2rl9kXNJY6rxdfZqDcVq28bmjkcYuXKqSicnkIjA/out-0.png\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -544,9 +544,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "poetry-venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "poetry-venv"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -558,7 +558,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.9.1"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install \"cassio>=0.0.7\""
|
||||
"!pip install \"cassio>=0.1.0\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -155,7 +155,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,15 +1,20 @@
|
||||
# Milvus
|
||||
|
||||
This page covers how to use the Milvus ecosystem within LangChain.
|
||||
It is broken into two parts: installation and setup, and then references to specific Milvus wrappers.
|
||||
>[Milvus](https://milvus.io/docs/overview.md) is a database that stores, indexes, and manages
|
||||
> massive embedding vectors generated by deep neural networks and other machine learning (ML) models.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
- Install the Python SDK with `pip install pymilvus`
|
||||
## Wrappers
|
||||
|
||||
### VectorStore
|
||||
Install the Python SDK:
|
||||
|
||||
There exists a wrapper around Milvus indexes, allowing you to use it as a vectorstore,
|
||||
```bash
|
||||
pip install pymilvus
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
There exists a wrapper around `Milvus` indexes, allowing you to use it as a vectorstore,
|
||||
whether for semantic search or example selection.
|
||||
|
||||
To import this vectorstore:
|
||||
@@ -17,4 +22,4 @@ To import this vectorstore:
|
||||
from langchain.vectorstores import Milvus
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of the Miluvs wrapper, see [this notebook](/docs/integrations/vectorstores/milvus.html)
|
||||
For a more detailed walkthrough of the `Miluvs` wrapper, see [this notebook](/docs/integrations/vectorstores/milvus.html)
|
||||
|
||||
@@ -1,16 +1,18 @@
|
||||
# Pinecone
|
||||
|
||||
This page covers how to use the Pinecone ecosystem within LangChain.
|
||||
It is broken into two parts: installation and setup, and then references to specific Pinecone wrappers.
|
||||
>[Pinecone](https://docs.pinecone.io/docs/overview) is a vector database with broad functionality.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
Install the Python SDK:
|
||||
|
||||
```bash
|
||||
pip install pinecone-client
|
||||
```
|
||||
|
||||
|
||||
## Vectorstore
|
||||
## Vector store
|
||||
|
||||
There exists a wrapper around Pinecone indexes, allowing you to use it as a vectorstore,
|
||||
whether for semantic search or example selection.
|
||||
|
||||
@@ -1,15 +1,22 @@
|
||||
# Qdrant
|
||||
|
||||
This page covers how to use the Qdrant ecosystem within LangChain.
|
||||
It is broken into two parts: installation and setup, and then references to specific Qdrant wrappers.
|
||||
>[Qdrant](https://qdrant.tech/documentation/) (read: quadrant) is a vector similarity search engine.
|
||||
> It provides a production-ready service with a convenient API to store, search, and manage
|
||||
> points - vectors with an additional payload. `Qdrant` is tailored to extended filtering support.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
- Install the Python SDK with `pip install qdrant-client`
|
||||
## Wrappers
|
||||
|
||||
### VectorStore
|
||||
Install the Python SDK:
|
||||
|
||||
There exists a wrapper around Qdrant indexes, allowing you to use it as a vectorstore,
|
||||
```bash
|
||||
pip install qdrant-client
|
||||
```
|
||||
|
||||
|
||||
## Vector Store
|
||||
|
||||
There exists a wrapper around `Qdrant` indexes, allowing you to use it as a vectorstore,
|
||||
whether for semantic search or example selection.
|
||||
|
||||
To import this vectorstore:
|
||||
|
||||
@@ -1,18 +1,26 @@
|
||||
# Redis
|
||||
|
||||
>[Redis](https://redis.com) is an open-source key-value store that can be used as a cache,
|
||||
> message broker, database, vector database and more.
|
||||
|
||||
This page covers how to use the [Redis](https://redis.com) ecosystem within LangChain.
|
||||
It is broken into two parts: installation and setup, and then references to specific Redis wrappers.
|
||||
|
||||
## Installation and Setup
|
||||
- Install the Redis Python SDK with `pip install redis`
|
||||
|
||||
Install the Python SDK:
|
||||
|
||||
```bash
|
||||
pip install redis
|
||||
```
|
||||
|
||||
## Wrappers
|
||||
|
||||
All wrappers needing a redis url connection string to connect to the database support either a stand alone Redis server
|
||||
All wrappers need a redis url connection string to connect to the database support either a stand alone Redis server
|
||||
or a High-Availability setup with Replication and Redis Sentinels.
|
||||
|
||||
### Redis Standalone connection url
|
||||
For standalone Redis server the official redis connection url formats can be used as describe in the python redis modules
|
||||
For standalone `Redis` server, the official redis connection url formats can be used as describe in the python redis modules
|
||||
"from_url()" method [Redis.from_url](https://redis-py.readthedocs.io/en/stable/connections.html#redis.Redis.from_url)
|
||||
|
||||
Example: `redis_url = "redis://:secret-pass@localhost:6379/0"`
|
||||
@@ -20,7 +28,7 @@ Example: `redis_url = "redis://:secret-pass@localhost:6379/0"`
|
||||
### Redis Sentinel connection url
|
||||
|
||||
For [Redis sentinel setups](https://redis.io/docs/management/sentinel/) the connection scheme is "redis+sentinel".
|
||||
This is an un-offical extensions to the official IANA registered protocol schemes as long as there is no connection url
|
||||
This is an unofficial extensions to the official IANA registered protocol schemes as long as there is no connection url
|
||||
for Sentinels available.
|
||||
|
||||
Example: `redis_url = "redis+sentinel://:secret-pass@sentinel-host:26379/mymaster/0"`
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Vearch
|
||||
|
||||
Vearch is a scalable distributed system for efficient similarity search of deep learning vectors.
|
||||
[Vearch](https://github.com/vearch/vearch) is a scalable distributed system for efficient similarity search of deep learning vectors.
|
||||
|
||||
# Installation and Setup
|
||||
|
||||
|
||||
@@ -1,17 +1,18 @@
|
||||
# Vectara
|
||||
|
||||
|
||||
What is Vectara?
|
||||
>[Vectara](https://docs.vectara.com/docs/) is a GenAI platform for developers. It provides a simple API to build Grounded Generation
|
||||
>(aka Retrieval-augmented-generation or RAG) applications.
|
||||
|
||||
**Vectara Overview:**
|
||||
- Vectara is developer-first API platform for building GenAI applications
|
||||
- `Vectara` is developer-first API platform for building GenAI applications
|
||||
- To use Vectara - first [sign up](https://console.vectara.com/signup) and create an account. Then create a corpus and an API key for indexing and searching.
|
||||
- You can use Vectara's [indexing API](https://docs.vectara.com/docs/indexing-apis/indexing) to add documents into Vectara's index
|
||||
- You can use Vectara's [Search API](https://docs.vectara.com/docs/search-apis/search) to query Vectara's index (which also supports Hybrid search implicitly).
|
||||
- You can use Vectara's integration with LangChain as a Vector store or using the Retriever abstraction.
|
||||
|
||||
## Installation and Setup
|
||||
To use Vectara with LangChain no special installation steps are required.
|
||||
|
||||
To use `Vectara` with LangChain no special installation steps are required.
|
||||
To get started, follow our [quickstart](https://docs.vectara.com/docs/quickstart) guide to create an account, a corpus and an API key.
|
||||
Once you have these, you can provide them as arguments to the Vectara vectorstore, or you can set them as environment variables.
|
||||
|
||||
@@ -19,9 +20,8 @@ Once you have these, you can provide them as arguments to the Vectara vectorstor
|
||||
- export `VECTARA_CORPUS_ID`="your_corpus_id"
|
||||
- export `VECTARA_API_KEY`="your-vectara-api-key"
|
||||
|
||||
## Usage
|
||||
|
||||
### VectorStore
|
||||
## Vector Store
|
||||
|
||||
There exists a wrapper around the Vectara platform, allowing you to use it as a vectorstore, whether for semantic search or example selection.
|
||||
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
# Weaviate
|
||||
|
||||
This page covers how to use the Weaviate ecosystem within LangChain.
|
||||
>[Weaviate](https://weaviate.io/) is an open-source vector database. It allows you to store data objects and vector embeddings from
|
||||
>your favorite ML models, and scale seamlessly into billions of data objects.
|
||||
|
||||
What is Weaviate?
|
||||
|
||||
**Weaviate in a nutshell:**
|
||||
What is `Weaviate`?
|
||||
- Weaviate is an open-source database of the type vector search engine.
|
||||
- Weaviate allows you to store JSON documents in a class property-like fashion while attaching machine learning vectors to these documents to represent them in vector space.
|
||||
- Weaviate can be used stand-alone (aka bring your vectors) or with a variety of modules that can do the vectorization for you and extend the core capabilities.
|
||||
@@ -14,15 +14,20 @@ What is Weaviate?
|
||||
|
||||
**Weaviate in detail:**
|
||||
|
||||
Weaviate is a low-latency vector search engine with out-of-the-box support for different media types (text, images, etc.). It offers Semantic Search, Question-Answer Extraction, Classification, Customizable Models (PyTorch/TensorFlow/Keras), etc. Built from scratch in Go, Weaviate stores both objects and vectors, allowing for combining vector search with structured filtering and the fault tolerance of a cloud-native database. It is all accessible through GraphQL, REST, and various client-side programming languages.
|
||||
`Weaviate` is a low-latency vector search engine with out-of-the-box support for different media types (text, images, etc.). It offers Semantic Search, Question-Answer Extraction, Classification, Customizable Models (PyTorch/TensorFlow/Keras), etc. Built from scratch in Go, Weaviate stores both objects and vectors, allowing for combining vector search with structured filtering and the fault tolerance of a cloud-native database. It is all accessible through GraphQL, REST, and various client-side programming languages.
|
||||
|
||||
## Installation and Setup
|
||||
- Install the Python SDK with `pip install weaviate-client`
|
||||
## Wrappers
|
||||
|
||||
### VectorStore
|
||||
Install the Python SDK:
|
||||
|
||||
There exists a wrapper around Weaviate indexes, allowing you to use it as a vectorstore,
|
||||
```bash
|
||||
pip install weaviate-client
|
||||
```
|
||||
|
||||
|
||||
## Vector Store
|
||||
|
||||
There exists a wrapper around `Weaviate` indexes, allowing you to use it as a vectorstore,
|
||||
whether for semantic search or example selection.
|
||||
|
||||
To import this vectorstore:
|
||||
|
||||
@@ -36,7 +36,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"document = \"This is a content of the document\"\n",
|
||||
"query = \"What is the contnt of the document?\""
|
||||
"query = \"What is the content of the document?\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -0,0 +1,124 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Baidu Qianfan\n",
|
||||
"\n",
|
||||
"Baidu AI Cloud Qianfan Platform is a one-stop large model development and service operation platform for enterprise developers. Qianfan not only provides including the model of Wenxin Yiyan (ERNIE-Bot) and the third-party open source models, but also provides various AI development tools and the whole set of development environment, which facilitates customers to use and develop large model applications easily.\n",
|
||||
"\n",
|
||||
"Basically, those model are split into the following type:\n",
|
||||
"\n",
|
||||
"- Embedding\n",
|
||||
"- Chat\n",
|
||||
"- Completion\n",
|
||||
"\n",
|
||||
"In this notebook, we will introduce how to use langchain with [Qianfan](https://cloud.baidu.com/doc/WENXINWORKSHOP/index.html) mainly in `Embedding` corresponding\n",
|
||||
" to the package `langchain/embeddings` in langchain:\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## API Initialization\n",
|
||||
"\n",
|
||||
"To use the LLM services based on Baidu Qianfan, you have to initialize these parameters:\n",
|
||||
"\n",
|
||||
"You could either choose to init the AK,SK in enviroment variables or init params:\n",
|
||||
"\n",
|
||||
"```base\n",
|
||||
"export QIANFAN_AK=XXX\n",
|
||||
"export QIANFAN_SK=XXX\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\"\"\"For basic init and call\"\"\"\n",
|
||||
"from langchain.embeddings.baidu_qianfan_endpoint import QianfanEmbeddingsEndpoint \n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"os.environ[\"QIANFAN_AK\"] = \"xx\"\n",
|
||||
"os.environ[\"QIANFAN_SK\"] = \"xx\"\n",
|
||||
"\n",
|
||||
"embed = QianfanEmbeddingsEndpoint(qianfan_ak='xxx', \n",
|
||||
" qianfan_sk='xxx')\n",
|
||||
"res = embed.embed_documents([\"hi\", \"world\"])\n",
|
||||
"\n",
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"async def aioEmbed():\n",
|
||||
" res = await embed.aembed_query(\"qianfan\")\n",
|
||||
" print(res)\n",
|
||||
"await aioEmbed()\n",
|
||||
"\n",
|
||||
"import asyncio\n",
|
||||
"async def aioEmbedDocs():\n",
|
||||
" res = await embed.aembed_documents([\"hi\", \"world\"])\n",
|
||||
" for r in res:\n",
|
||||
" print(\"\", r[:8])\n",
|
||||
"await aioEmbedDocs()\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use different models in Qianfan\n",
|
||||
"\n",
|
||||
"In the case you want to deploy your own model based on Ernie Bot or third-party open sources model, you could follow these steps:\n",
|
||||
"\n",
|
||||
"- 1. (Optional, if the model are included in the default models, skip it)Deploy your model in Qianfan Console, get your own customized deploy endpoint.\n",
|
||||
"- 2. Set up the field called `endpoint` in the initlization:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embed = QianfanEmbeddingsEndpoint(qianfan_ak='xxx', \n",
|
||||
" qianfan_sk='xxx',\n",
|
||||
" model=\"bge_large_zh\",\n",
|
||||
" endpoint=\"bge_large_zh\")\n",
|
||||
"\n",
|
||||
"res = embed.embed_documents([\"hi\", \"world\"])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "6fa70026b407ae751a5c9e6bd7f7d482379da8ad616f98512780b705c84ee157"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
226
docs/extras/integrations/tools/eleven_labs_tts.ipynb
Normal file
226
docs/extras/integrations/tools/eleven_labs_tts.ipynb
Normal file
@@ -0,0 +1,226 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a991a6f8-1897-4f49-a191-ae3bdaeda856",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Eleven Labs Text2Speech\n",
|
||||
"\n",
|
||||
"This notebook shows how to interact with the `ElevenLabs API` to achieve text-to-speech capabilities."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9eeb311e-e1bd-4959-8536-4d267f302eb3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, you need to set up an ElevenLabs account. You can follow the instructions [here](https://docs.elevenlabs.io/welcome/introduction)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "0a309c0e-5310-4eaa-8af9-bcbc252e45da",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !pip install elevenlabs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f097c3b1-f761-43cb-aad0-8ba2e93e5f5f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"ELEVEN_API_KEY\"] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "434b2454-2bff-484d-822c-4026a9dc1383",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "2f57a647-9214-4562-a8cf-f263a15d1f40",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'eleven_labs_text2speech'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.tools import ElevenLabsText2SpeechTool\n",
|
||||
"\n",
|
||||
"text_to_speak = \"Hello world! I am the real slim shady\"\n",
|
||||
"\n",
|
||||
"tts = ElevenLabsText2SpeechTool()\n",
|
||||
"tts.name"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d4613fed-66f0-47c6-be50-7e7670654427",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can generate audio, save it to the temporary file and then play it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "f1984844-aa75-4f83-9d42-1c8052d87cc0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"speech_file = tts.run(text_to_speak)\n",
|
||||
"tts.play(speech_file)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "42d89cd4-ac2a-4857-9787-c9018b4a8782",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Or stream audio directly."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "d72822f8-3223-47e2-8d2e-6ff46b8c8645",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tts.stream_speech(text_to_speak)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a152766d-5f06-48b1-ac89-b4e8d88d3c9f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an Agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "37626aea-0cf0-4849-9c00-c0f40515ffe0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import OpenAI\n",
|
||||
"from langchain.agents import initialize_agent, AgentType, load_tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "c168f28e-d5b7-4c93-bed8-0ab317b4a44b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"tools = load_tools([\"eleven_labs_text2speech\"])\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools=tools,\n",
|
||||
" llm=llm,\n",
|
||||
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "336bf95a-3ccb-4963-aac3-638a4df2ed78",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mAction:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"eleven_labs_text2speech\",\n",
|
||||
" \"action_input\": {\n",
|
||||
" \"query\": \"Why did the chicken cross the playground? To get to the other slide!\"\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m/tmp/tmpsfg783f1.wav\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I have the audio file ready to be sent to the human\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"/tmp/tmpsfg783f1.wav\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"audio_file = agent.run(\"Tell me a joke and read it out for me.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "f0aa7aa9-4682-4599-8cae-59347d9e5210",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tts.play(audio_file)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -23,7 +23,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install \"cassio>=0.0.7\""
|
||||
"!pip install \"cassio>=0.1.0\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -152,7 +152,9 @@
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||
"SOURCE_FILE_NAME = \"../../modules/state_of_the_union.txt\"\n",
|
||||
"\n",
|
||||
"loader = TextLoader(SOURCE_FILE_NAME)\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
@@ -197,7 +199,7 @@
|
||||
"# table_name=table_name,\n",
|
||||
"# )\n",
|
||||
"\n",
|
||||
"# docsearch_preexisting.similarity_search(query, k=2)"
|
||||
"# docs = docsearch_preexisting.similarity_search(query, k=2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -253,6 +255,51 @@
|
||||
"for i, doc in enumerate(found_docs):\n",
|
||||
" print(f\"{i + 1}.\", doc.page_content, \"\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "da791c5f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Metadata filtering\n",
|
||||
"\n",
|
||||
"You can specify filtering on metadata when running searches in the vector store. By default, when inserting documents, the only metadata is the `\"source\"` (but you can customize the metadata at insertion time).\n",
|
||||
"\n",
|
||||
"Since only one files was inserted, this is just a demonstration of how filters are passed:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "93f132fa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"filter = {\"source\": SOURCE_FILE_NAME}\n",
|
||||
"filtered_docs = docsearch.similarity_search(query, filter=filter, k=5)\n",
|
||||
"print(f\"{len(filtered_docs)} documents retrieved.\")\n",
|
||||
"print(f\"{filtered_docs[0].page_content[:64]} ...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1b413ec4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"filter = {\"source\": \"nonexisting_file.txt\"}\n",
|
||||
"filtered_docs2 = docsearch.similarity_search(query, filter=filter)\n",
|
||||
"print(f\"{len(filtered_docs2)} documents retrieved.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a0fea764",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Please visit the [cassIO documentation](https://cassio.org/frameworks/langchain/about/) for more on using vector stores with Langchain."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -271,7 +318,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -10,7 +10,8 @@
|
||||
"\n",
|
||||
"It supports:\n",
|
||||
"- approximate nearest neighbor search\n",
|
||||
"- L2 distance and cosine distance\n",
|
||||
"- Euclidean similarity and cosine similarity\n",
|
||||
"- Hybrid search combining vector and keyword searches\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the Neo4j vector index (`Neo4jVector`)."
|
||||
]
|
||||
@@ -24,41 +25,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Requirement already satisfied: neo4j in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (5.11.0)\n",
|
||||
"Requirement already satisfied: pytz in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from neo4j) (2023.3)\n",
|
||||
"Requirement already satisfied: openai in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (0.27.6)\n",
|
||||
"Requirement already satisfied: requests>=2.20 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from openai) (2.31.0)\n",
|
||||
"Requirement already satisfied: tqdm in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from openai) (4.66.1)\n",
|
||||
"Requirement already satisfied: aiohttp in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from openai) (3.8.5)\n",
|
||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from requests>=2.20->openai) (3.2.0)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from requests>=2.20->openai) (3.4)\n",
|
||||
"Requirement already satisfied: urllib3<3,>=1.21.1 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from requests>=2.20->openai) (2.0.4)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from requests>=2.20->openai) (2023.7.22)\n",
|
||||
"Requirement already satisfied: attrs>=17.3.0 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from aiohttp->openai) (23.1.0)\n",
|
||||
"Requirement already satisfied: multidict<7.0,>=4.5 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from aiohttp->openai) (6.0.4)\n",
|
||||
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from aiohttp->openai) (4.0.3)\n",
|
||||
"Requirement already satisfied: yarl<2.0,>=1.0 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from aiohttp->openai) (1.9.2)\n",
|
||||
"Requirement already satisfied: frozenlist>=1.1.1 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from aiohttp->openai) (1.4.0)\n",
|
||||
"Requirement already satisfied: aiosignal>=1.1.2 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from aiohttp->openai) (1.3.1)\n",
|
||||
"Requirement already satisfied: tiktoken in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (0.4.0)\n",
|
||||
"Requirement already satisfied: regex>=2022.1.18 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from tiktoken) (2023.8.8)\n",
|
||||
"Requirement already satisfied: requests>=2.26.0 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from tiktoken) (2.31.0)\n",
|
||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from requests>=2.26.0->tiktoken) (3.2.0)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from requests>=2.26.0->tiktoken) (3.4)\n",
|
||||
"Requirement already satisfied: urllib3<3,>=1.21.1 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from requests>=2.26.0->tiktoken) (2.0.4)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in /home/tomaz/anaconda3/envs/myenv/lib/python3.11/site-packages (from requests>=2.26.0->tiktoken) (2023.7.22)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Pip install necessary package\n",
|
||||
"!pip install neo4j\n",
|
||||
@@ -115,6 +86,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||
"\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
@@ -179,16 +151,6 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Score: 0.9077161550521851\n",
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Score: 0.9077161550521851\n",
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
@@ -214,18 +176,36 @@
|
||||
"We’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Score: 0.891287088394165\n",
|
||||
"A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n",
|
||||
"Score: 0.8867912292480469\n",
|
||||
"And for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \n",
|
||||
"\n",
|
||||
"And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n",
|
||||
"As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \n",
|
||||
"\n",
|
||||
"We can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \n",
|
||||
"While it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \n",
|
||||
"\n",
|
||||
"We’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n",
|
||||
"And soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \n",
|
||||
"\n",
|
||||
"We’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n",
|
||||
"So tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \n",
|
||||
"\n",
|
||||
"We’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n",
|
||||
"First, beat the opioid epidemic.\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Score: 0.8866499662399292\n",
|
||||
"Tonight, I’m announcing a crackdown on these companies overcharging American businesses and consumers. \n",
|
||||
"\n",
|
||||
"And as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \n",
|
||||
"\n",
|
||||
"That ends on my watch. \n",
|
||||
"\n",
|
||||
"Medicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \n",
|
||||
"\n",
|
||||
"We’ll also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \n",
|
||||
"\n",
|
||||
"Let’s pass the Paycheck Fairness Act and paid leave. \n",
|
||||
"\n",
|
||||
"Raise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \n",
|
||||
"\n",
|
||||
"Let’s increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls America’s best-kept secret: community colleges.\n",
|
||||
"--------------------------------------------------------------------------------\n"
|
||||
]
|
||||
}
|
||||
@@ -281,7 +261,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['2f70679a-4416-11ee-b7c3-d46a6aa24f5b']"
|
||||
"['064c7032-5093-11ee-8041-3b350f274873']"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
@@ -328,14 +308,67 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retriever options\n",
|
||||
"## Hybrid search (vector + keyword)\n",
|
||||
"\n",
|
||||
"Neo4j integrates both vector and keyword indexes, which allows you to use a hybrid search approach"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The Neo4jVector Module will connect to Neo4j and create a vector and keyword indices if needed.\n",
|
||||
"hybrid_db = Neo4jVector.from_documents(\n",
|
||||
" docs, \n",
|
||||
" OpenAIEmbeddings(), \n",
|
||||
" url=url, \n",
|
||||
" username=username, \n",
|
||||
" password=password,\n",
|
||||
" search_type=\"hybrid\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To load the hybrid search from existing indexes, you have to provide both the vector and keyword indices"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"index_name = \"vector\" # default index name\n",
|
||||
"keyword_index_name = \"keyword\" #default keyword index name\n",
|
||||
"\n",
|
||||
"store = Neo4jVector.from_existing_index(\n",
|
||||
" OpenAIEmbeddings(),\n",
|
||||
" url=url,\n",
|
||||
" username=username,\n",
|
||||
" password=password,\n",
|
||||
" index_name=index_name,\n",
|
||||
" keyword_index_name=keyword_index_name,\n",
|
||||
" search_type=\"hybrid\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Retriever options\n",
|
||||
"\n",
|
||||
"This section shows how to use `Neo4jVector` as a retriever."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -344,7 +377,7 @@
|
||||
"Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../modules/state_of_the_union.txt'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -365,7 +398,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -375,7 +408,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -386,7 +419,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -396,7 +429,7 @@
|
||||
" 'sources': '../../modules/state_of_the_union.txt'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -432,7 +465,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.8.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -158,7 +158,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -178,7 +178,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -242,7 +242,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -253,7 +253,7 @@
|
||||
"rds = Redis.from_texts(\n",
|
||||
" texts,\n",
|
||||
" embeddings,\n",
|
||||
" metadatas=metadats,\n",
|
||||
" metadatas=metadata,\n",
|
||||
" redis_url=\"redis://localhost:6379\",\n",
|
||||
" index_name=\"users\"\n",
|
||||
")"
|
||||
@@ -597,7 +597,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -607,7 +607,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1110,6 +1110,38 @@
|
||||
"retriever.get_relevant_documents(\"foo\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = rds.as_retriever(search_type=\"mmr\", search_kwargs={\"fetch_k\": 20, \"k\": 4, \"lambda_mult\": 0.1})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='foo', metadata={'id': 'doc:users:8f6b673b390647809d510112cde01a27', 'user': 'john', 'job': 'engineer', 'credit_score': 'high', 'age': '18'}),\n",
|
||||
" Document(page_content='bar', metadata={'id': 'doc:users:93521560735d42328b48c9c6f6418d6a', 'user': 'tyler', 'job': 'engineer', 'credit_score': 'high', 'age': '100'}),\n",
|
||||
" Document(page_content='foo', metadata={'id': 'doc:users:125ecd39d07845eabf1a699d44134a5b', 'user': 'nancy', 'job': 'doctor', 'credit_score': 'high', 'age': '94'}),\n",
|
||||
" Document(page_content='foo', metadata={'id': 'doc:users:d6200ab3764c466082fde3eaab972a2a', 'user': 'derrick', 'job': 'doctor', 'credit_score': 'low', 'age': '45'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retriever.get_relevant_documents(\"foo\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -1227,7 +1259,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -272,15 +272,6 @@
|
||||
"Anything uploaded to weaviate is automatically persistent into the database. You do not need to call any specific method or pass any param for this to happen."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "05fd146c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Retriever options"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "503e2e75",
|
||||
|
||||
@@ -6,11 +6,14 @@
|
||||
"id": "13afcae7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Deep Lake self-querying \n",
|
||||
"# Deep Lake\n",
|
||||
"\n",
|
||||
">[Deep Lake](https://www.activeloop.ai) is a multimodal database for building AI applications.\n",
|
||||
">[Deep Lake](https://www.activeloop.ai) is a multimodal database for building AI applications\n",
|
||||
">[Deep Lake](https://github.com/activeloopai/deeplake) is a database for AI.\n",
|
||||
">Store Vectors, Images, Texts, Videos, etc. Use with LLMs/LangChain. Store, query, version,\n",
|
||||
"> & visualize any AI data. Stream data in real time to PyTorch/TensorFlow.\n",
|
||||
"\n",
|
||||
"In the notebook we'll demo the `SelfQueryRetriever` wrapped around a Deep Lake vector store. "
|
||||
"In the notebook, we'll demo the `SelfQueryRetriever` wrapped around a `Deep Lake` vector store. "
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -5,11 +5,11 @@
|
||||
"id": "13afcae7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Chroma self-querying \n",
|
||||
"# Chroma\n",
|
||||
"\n",
|
||||
">[Chroma](https://docs.trychroma.com/getting-started) is a database for building AI applications with embeddings.\n",
|
||||
"\n",
|
||||
"In the notebook we'll demo the `SelfQueryRetriever` wrapped around a Chroma vector store. "
|
||||
"In the notebook, we'll demo the `SelfQueryRetriever` wrapped around a `Chroma` vector store. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -447,7 +447,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -2,20 +2,36 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# DashVector self-querying\n",
|
||||
"\n",
|
||||
"> [DashVector](https://help.aliyun.com/document_detail/2510225.html) is a fully-managed vectorDB service that supports high-dimension dense and sparse vectors, real-time insertion and filtered search. It is built to scale automatically and can adapt to different application requirements.\n",
|
||||
"\n",
|
||||
"In this notebook we'll demo the `SelfQueryRetriever` with a `DashVector` vector store."
|
||||
],
|
||||
"id": "59895c73d1a0f3ca",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"id": "59895c73d1a0f3ca"
|
||||
"source": [
|
||||
"# DashVector\n",
|
||||
"\n",
|
||||
"> [DashVector](https://help.aliyun.com/document_detail/2510225.html) is a fully managed vector DB service that supports high-dimension dense and sparse vectors, real-time insertion and filtered search. It is built to scale automatically and can adapt to different application requirements.\n",
|
||||
"> The vector retrieval service `DashVector` is based on the `Proxima` core of the efficient vector engine independently developed by `DAMO Academy`,\n",
|
||||
"> and provides a cloud-native, fully managed vector retrieval service with horizontal expansion capabilities.\n",
|
||||
"> `DashVector` exposes its powerful vector management, vector query and other diversified capabilities through a simple and\n",
|
||||
"> easy-to-use SDK/API interface, which can be quickly integrated by upper-layer AI applications, thereby providing services\n",
|
||||
"> including large model ecology, multi-modal AI search, molecular structure A variety of application scenarios, including analysis,\n",
|
||||
"> provide the required efficient vector retrieval capabilities.\n",
|
||||
"\n",
|
||||
"In this notebook, we'll demo the `SelfQueryRetriever` with a `DashVector` vector store."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "539ae9367e45a178",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Create DashVector vectorstore\n",
|
||||
"\n",
|
||||
@@ -24,46 +40,55 @@
|
||||
"To use DashVector, you have to have `dashvector` package installed, and you must have an API key and an Environment. Here are the [installation instructions](https://help.aliyun.com/document_detail/2510223.html).\n",
|
||||
"\n",
|
||||
"NOTE: The self-query retriever requires you to have `lark` package installed."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "539ae9367e45a178"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "67df7e1f8dc8cdd0",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !pip install lark dashvector"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "67df7e1f8dc8cdd0"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "ff61eaf13973b5fe",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-24T02:58:46.905337Z",
|
||||
"start_time": "2023-08-24T02:58:46.252566Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import dashvector\n",
|
||||
"\n",
|
||||
"client = dashvector.Client(api_key=os.environ[\"DASHVECTOR_API_KEY\"])"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-24T02:58:46.905337Z",
|
||||
"start_time": "2023-08-24T02:58:46.252566Z"
|
||||
}
|
||||
},
|
||||
"id": "ff61eaf13973b5fe"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "de5c77957ee42d14",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import Document\n",
|
||||
@@ -74,15 +99,22 @@
|
||||
"\n",
|
||||
"# create DashVector collection\n",
|
||||
"client.create(\"langchain-self-retriever-demo\", dimension=1536)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "de5c77957ee42d14"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "8f40605548a4550",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-24T02:59:08.090031Z",
|
||||
"start_time": "2023-08-24T02:59:05.660295Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = [\n",
|
||||
@@ -119,31 +151,37 @@
|
||||
"vectorstore = DashVector.from_documents(\n",
|
||||
" docs, embeddings, collection_name=\"langchain-self-retriever-demo\"\n",
|
||||
")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-24T02:59:08.090031Z",
|
||||
"start_time": "2023-08-24T02:59:05.660295Z"
|
||||
}
|
||||
},
|
||||
"id": "8f40605548a4550"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eb1340adafac8993",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Create your self-querying retriever\n",
|
||||
"\n",
|
||||
"Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "eb1340adafac8993"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "d65233dc044f95a7",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-24T02:59:11.003940Z",
|
||||
"start_time": "2023-08-24T02:59:10.476722Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import Tongyi\n",
|
||||
@@ -175,31 +213,37 @@
|
||||
"retriever = SelfQueryRetriever.from_llm(\n",
|
||||
" llm, vectorstore, document_content_description, metadata_field_info, verbose=True\n",
|
||||
")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-24T02:59:11.003940Z",
|
||||
"start_time": "2023-08-24T02:59:10.476722Z"
|
||||
}
|
||||
},
|
||||
"id": "d65233dc044f95a7"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a54af0d67b473db6",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Testing it out\n",
|
||||
"\n",
|
||||
"And now we can try actually using our retriever!"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "a54af0d67b473db6"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "dad9da670a267fe7",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-24T02:59:28.577901Z",
|
||||
"start_time": "2023-08-24T02:59:26.780184Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -210,7 +254,12 @@
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'year': 1993, 'rating': 7.699999809265137, 'genre': 'action'}),\n Document(page_content='Toys come alive and have a blast doing so', metadata={'year': 1995, 'genre': 'animated'}),\n Document(page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...', metadata={'year': 2010, 'director': 'Christopher Nolan', 'rating': 8.199999809265137}),\n Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.600000381469727})]"
|
||||
"text/plain": [
|
||||
"[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'year': 1993, 'rating': 7.699999809265137, 'genre': 'action'}),\n",
|
||||
" Document(page_content='Toys come alive and have a blast doing so', metadata={'year': 1995, 'genre': 'animated'}),\n",
|
||||
" Document(page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...', metadata={'year': 2010, 'director': 'Christopher Nolan', 'rating': 8.199999809265137}),\n",
|
||||
" Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.600000381469727})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
@@ -220,19 +269,22 @@
|
||||
"source": [
|
||||
"# This example only specifies a relevant query\n",
|
||||
"retriever.get_relevant_documents(\"What are some movies about dinosaurs\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-24T02:59:28.577901Z",
|
||||
"start_time": "2023-08-24T02:59:26.780184Z"
|
||||
}
|
||||
},
|
||||
"id": "dad9da670a267fe7"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "d486a64316153d52",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-24T02:59:32.370774Z",
|
||||
"start_time": "2023-08-24T02:59:30.614252Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -243,7 +295,10 @@
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "[Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'year': 1979, 'director': 'Andrei Tarkovsky', 'rating': 9.899999618530273, 'genre': 'science fiction'}),\n Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.600000381469727})]"
|
||||
"text/plain": [
|
||||
"[Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'year': 1979, 'director': 'Andrei Tarkovsky', 'rating': 9.899999618530273, 'genre': 'science fiction'}),\n",
|
||||
" Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.600000381469727})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
@@ -253,19 +308,22 @@
|
||||
"source": [
|
||||
"# This example only specifies a filter\n",
|
||||
"retriever.get_relevant_documents(\"I want to watch a movie rated higher than 8.5\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-24T02:59:32.370774Z",
|
||||
"start_time": "2023-08-24T02:59:30.614252Z"
|
||||
}
|
||||
},
|
||||
"id": "d486a64316153d52"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "e05919cdead7bd4a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-24T02:59:35.353439Z",
|
||||
"start_time": "2023-08-24T02:59:33.278255Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -276,7 +334,9 @@
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "[Document(page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them', metadata={'year': 2019, 'director': 'Greta Gerwig', 'rating': 8.300000190734863})]"
|
||||
"text/plain": [
|
||||
"[Document(page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them', metadata={'year': 2019, 'director': 'Greta Gerwig', 'rating': 8.300000190734863})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
@@ -286,19 +346,22 @@
|
||||
"source": [
|
||||
"# This example specifies a query and a filter\n",
|
||||
"retriever.get_relevant_documents(\"Has Greta Gerwig directed any movies about women\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-24T02:59:35.353439Z",
|
||||
"start_time": "2023-08-24T02:59:33.278255Z"
|
||||
}
|
||||
},
|
||||
"id": "e05919cdead7bd4a"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "ac2c7012379e918e",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-24T02:59:38.913707Z",
|
||||
"start_time": "2023-08-24T02:59:36.659271Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -309,7 +372,9 @@
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "[Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'year': 1979, 'director': 'Andrei Tarkovsky', 'rating': 9.899999618530273, 'genre': 'science fiction'})]"
|
||||
"text/plain": [
|
||||
"[Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'year': 1979, 'director': 'Andrei Tarkovsky', 'rating': 9.899999618530273, 'genre': 'science fiction'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
@@ -319,33 +384,39 @@
|
||||
"source": [
|
||||
"# This example specifies a composite filter\n",
|
||||
"retriever.get_relevant_documents(\"What's a highly rated (above 8.5) science fiction film?\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-24T02:59:38.913707Z",
|
||||
"start_time": "2023-08-24T02:59:36.659271Z"
|
||||
}
|
||||
},
|
||||
"id": "ac2c7012379e918e"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "af6aa93ae44af414",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Filter k\n",
|
||||
"\n",
|
||||
"We can also use the self query retriever to specify `k`: the number of documents to fetch.\n",
|
||||
"\n",
|
||||
"We can do this by passing `enable_limit=True` to the constructor."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "af6aa93ae44af414"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "a8c8f09bf5702767",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-24T02:59:41.594073Z",
|
||||
"start_time": "2023-08-24T02:59:41.563323Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = SelfQueryRetriever.from_llm(\n",
|
||||
@@ -356,19 +427,22 @@
|
||||
" enable_limit=True,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-24T02:59:41.594073Z",
|
||||
"start_time": "2023-08-24T02:59:41.563323Z"
|
||||
}
|
||||
},
|
||||
"id": "a8c8f09bf5702767"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "b1089a6043980b84",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-24T02:59:48.450506Z",
|
||||
"start_time": "2023-08-24T02:59:46.252944Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -379,7 +453,10 @@
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'year': 1993, 'rating': 7.699999809265137, 'genre': 'action'}),\n Document(page_content='Toys come alive and have a blast doing so', metadata={'year': 1995, 'genre': 'animated'})]"
|
||||
"text/plain": [
|
||||
"[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'year': 1993, 'rating': 7.699999809265137, 'genre': 'action'}),\n",
|
||||
" Document(page_content='Toys come alive and have a blast doing so', metadata={'year': 1995, 'genre': 'animated'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
@@ -389,44 +466,39 @@
|
||||
"source": [
|
||||
"# This example only specifies a relevant query\n",
|
||||
"retriever.get_relevant_documents(\"what are two movies about dinosaurs\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-24T02:59:48.450506Z",
|
||||
"start_time": "2023-08-24T02:59:46.252944Z"
|
||||
}
|
||||
},
|
||||
"id": "b1089a6043980b84"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [],
|
||||
"id": "6d2d64e2ebb17d30",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"id": "6d2d64e2ebb17d30"
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -5,7 +5,13 @@
|
||||
"id": "13afcae7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Elasticsearch self-querying "
|
||||
"# Elasticsearch\n",
|
||||
"\n",
|
||||
"> [Elasticsearch](https://www.elastic.co/elasticsearch/) is a distributed, RESTful search and analytics engine.\n",
|
||||
"> It provides a distributed, multi-tenant-capable full-text search engine with an HTTP web interface and schema-free\n",
|
||||
"> JSON documents.\n",
|
||||
"\n",
|
||||
"In this notebook, we'll demo the `SelfQueryRetriever` with an `Elasticsearch` vector store."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -13,8 +19,9 @@
|
||||
"id": "68e75fb9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating a Elasticsearch vector store\n",
|
||||
"First we'll want to create a Elasticsearch vector store and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n",
|
||||
"## Creating an Elasticsearch vector store\n",
|
||||
"\n",
|
||||
"First, we'll want to create an `Elasticsearch` vector store and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n",
|
||||
"\n",
|
||||
"**Note:** The self-query retriever requires you to have `lark` installed (`pip install lark`). We also need the `elasticsearch` package."
|
||||
]
|
||||
@@ -354,7 +361,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.3"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -4,9 +4,11 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Self-querying with Milvus\n",
|
||||
"# Milvus\n",
|
||||
"\n",
|
||||
"In the walkthrough we'll demo the `SelfQueryRetriever` with a `Milvus` vector store."
|
||||
">[Milvus](https://milvus.io/docs/overview.md) is a database that stores, indexes, and manages massive embedding vectors generated by deep neural networks and other machine learning (ML) models.\n",
|
||||
"\n",
|
||||
"In the walkthrough, we'll demo the `SelfQueryRetriever` with a `Milvus` vector store."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -352,7 +354,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -366,10 +368,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -5,12 +5,15 @@
|
||||
"id": "13afcae7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Self-querying with MyScale\n",
|
||||
"# MyScale\n",
|
||||
"\n",
|
||||
">[MyScale](https://docs.myscale.com/en/) is an integrated vector database. You can access your database in SQL and also from here, LangChain. MyScale can make a use of [various data types and functions for filters](https://blog.myscale.com/2023/06/06/why-integrated-database-solution-can-boost-your-llm-apps/#filter-on-anything-without-constraints). It will boost up your LLM app no matter if you are scaling up your data or expand your system to broader application.\n",
|
||||
">[MyScale](https://docs.myscale.com/en/) is an integrated vector database. You can access your database in SQL and also from here, LangChain.\n",
|
||||
">`MyScale` can make use of [various data types and functions for filters](https://blog.myscale.com/2023/06/06/why-integrated-database-solution-can-boost-your-llm-apps/#filter-on-anything-without-constraints). It will boost up your LLM app no matter if you are scaling up your data or expand your system to broader application.\n",
|
||||
"\n",
|
||||
"In the notebook we'll demo the `SelfQueryRetriever` wrapped around a MyScale vector store with some extra pieces we contributed to LangChain. In short, it can be condensed into 4 points:\n",
|
||||
"1. Add `contain` comparator to match list of any if there is more than one element matched\n",
|
||||
"In the notebook, we'll demo the `SelfQueryRetriever` wrapped around a `MyScale` vector store with some extra pieces we contributed to LangChain. \n",
|
||||
"\n",
|
||||
"In short, it can be condensed into 4 points:\n",
|
||||
"1. Add `contain` comparator to match the list of any if there is more than one element matched\n",
|
||||
"2. Add `timestamp` data type for datetime match (ISO-format, or YYYY-MM-DD)\n",
|
||||
"3. Add `like` comparator for string pattern search\n",
|
||||
"4. Add arbitrary function capability"
|
||||
@@ -221,9 +224,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fc3f1e6e",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# This example only specifies a filter\n",
|
||||
@@ -384,7 +385,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -5,9 +5,11 @@
|
||||
"id": "13afcae7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Self-querying with Pinecone\n",
|
||||
"# Pinecone\n",
|
||||
"\n",
|
||||
"In the walkthrough we'll demo the `SelfQueryRetriever` with a `Pinecone` vector store."
|
||||
">[Pinecone](https://docs.pinecone.io/docs/overview) is a vector database with broad functionality.\n",
|
||||
"\n",
|
||||
"In the walkthrough, we'll demo the `SelfQueryRetriever` with a `Pinecone` vector store."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -395,7 +397,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -6,11 +6,11 @@
|
||||
"id": "13afcae7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Qdrant self-querying \n",
|
||||
"# Qdrant\n",
|
||||
"\n",
|
||||
">[Qdrant](https://qdrant.tech/documentation/) (read: quadrant) is a vector similarity search engine. It provides a production-ready service with a convenient API to store, search, and manage points - vectors with an additional payload. `Qdrant` is tailored to extended filtering support.\n",
|
||||
"\n",
|
||||
"In the notebook we'll demo the `SelfQueryRetriever` wrapped around a Qdrant vector store. "
|
||||
"In the notebook, we'll demo the `SelfQueryRetriever` wrapped around a `Qdrant` vector store. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -419,7 +419,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -5,11 +5,11 @@
|
||||
"id": "13afcae7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Redis self-querying \n",
|
||||
"# Redis\n",
|
||||
"\n",
|
||||
">[Redis](https://redis.com) is an open-source key-value store that can be used as a cache, message broker, database, vector database and more.\n",
|
||||
"\n",
|
||||
"In the notebook we'll demo the `SelfQueryRetriever` wrapped around a Redis vector store. "
|
||||
"In the notebook, we'll demo the `SelfQueryRetriever` wrapped around a `Redis` vector store. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -450,9 +450,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "poetry-venv",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "poetry-venv"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -464,7 +464,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -5,19 +5,22 @@
|
||||
"id": "13afcae7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Supabase Vector self-querying \n",
|
||||
"# Supabase\n",
|
||||
"\n",
|
||||
">[Supabase](https://supabase.com/docs) is an open source `Firebase` alternative. \n",
|
||||
">[Supabase](https://supabase.com/docs) is an open-source `Firebase` alternative. \n",
|
||||
"> `Supabase` is built on top of `PostgreSQL`, which offers strong `SQL` \n",
|
||||
"> querying capabilities and enables a simple interface with already-existing tools and frameworks.\n",
|
||||
"\n",
|
||||
">[PostgreSQL](https://en.wikipedia.org/wiki/PostgreSQL) also known as `Postgres`,\n",
|
||||
"> is a free and open-source relational database management system (RDBMS) \n",
|
||||
"> emphasizing extensibility and `SQL` compliance.\n",
|
||||
">\n",
|
||||
">[Supabase](https://supabase.com/docs/guides/ai) provides an open-source toolkit for developing AI applications\n",
|
||||
">using Postgres and pgvector. Use the Supabase client libraries to store, index, and query your vector embeddings at scale.\n",
|
||||
"\n",
|
||||
"In the notebook we'll demo the `SelfQueryRetriever` wrapped around a Supabase vector store.\n",
|
||||
"In the notebook, we'll demo the `SelfQueryRetriever` wrapped around a `Supabase` vector store.\n",
|
||||
"\n",
|
||||
"Specifically we will:\n",
|
||||
"Specifically, we will:\n",
|
||||
"1. Create a Supabase database\n",
|
||||
"2. Enable the `pgvector` extension\n",
|
||||
"3. Create a `documents` table and `match_documents` function that will be used by `SupabaseVectorStore`\n",
|
||||
@@ -569,7 +572,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -5,11 +5,12 @@
|
||||
"id": "13afcae7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Vectara self-querying \n",
|
||||
"# Vectara\n",
|
||||
"\n",
|
||||
">[Vectara](https://docs.vectara.com/docs/) is a GenAI platform for developers. It provides a simple API to build Grounded Generation (aka Retrieval-augmented-generation) applications.\n",
|
||||
">[Vectara](https://docs.vectara.com/docs/) is a GenAI platform for developers. It provides a simple API to build Grounded Generation\n",
|
||||
">(aka Retrieval-augmented-generation or RAG) applications.\n",
|
||||
"\n",
|
||||
"In the notebook we'll demo the `SelfQueryRetriever` wrapped around a Vectara vector store. "
|
||||
"In the notebook, we'll demo the `SelfQueryRetriever` wrapped around a Vectara vector store. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -432,7 +433,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -5,7 +5,12 @@
|
||||
"id": "13afcae7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Weaviate self-querying "
|
||||
"# Weaviate\n",
|
||||
"\n",
|
||||
">[Weaviate](https://weaviate.io/) is an open-source vector database. It allows you to store data objects and vector embeddings from\n",
|
||||
">your favorite ML models, and scale seamlessly into billions of data objects.\n",
|
||||
"\n",
|
||||
"In the notebook, we'll demo the `SelfQueryRetriever` wrapped around a `Weaviate` vector store. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -293,7 +298,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -32,6 +32,7 @@ from langchain.tools.requests.tool import (
|
||||
RequestsPostTool,
|
||||
RequestsPutTool,
|
||||
)
|
||||
from langchain.tools.eleven_labs.text2speech import ElevenLabsText2SpeechTool
|
||||
from langchain.tools.scenexplain.tool import SceneXplainTool
|
||||
from langchain.tools.searx_search.tool import SearxSearchResults, SearxSearchRun
|
||||
from langchain.tools.shell.tool import ShellTool
|
||||
@@ -285,6 +286,10 @@ def _get_dataforseo_api_search_json(**kwargs: Any) -> BaseTool:
|
||||
return DataForSeoAPISearchResults(api_wrapper=DataForSeoAPIWrapper(**kwargs))
|
||||
|
||||
|
||||
def _get_eleven_labs_text2speech(**kwargs: Any) -> BaseTool:
|
||||
return ElevenLabsText2SpeechTool(**kwargs)
|
||||
|
||||
|
||||
_EXTRA_LLM_TOOLS: Dict[
|
||||
str,
|
||||
Tuple[Callable[[Arg(BaseLanguageModel, "llm"), KwArg(Any)], BaseTool], List[str]],
|
||||
@@ -340,6 +345,7 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st
|
||||
_get_dataforseo_api_search_json,
|
||||
["api_login", "api_password", "aiosession"],
|
||||
),
|
||||
"eleven_labs_text2speech": (_get_eleven_labs_text2speech, ["eleven_api_key"]),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -127,7 +127,7 @@ def _parse_ai_message(message: BaseMessage) -> Union[AgentAction, AgentFinish]:
|
||||
else:
|
||||
tool_input = _tool_input
|
||||
|
||||
content_msg = "responded: {content}\n" if message.content else "\n"
|
||||
content_msg = f"responded: {message.content}\n" if message.content else "\n"
|
||||
|
||||
return _FunctionsAgentAction(
|
||||
tool=function_name,
|
||||
|
||||
@@ -129,7 +129,7 @@ def _parse_ai_message(message: BaseMessage) -> Union[List[AgentAction], AgentFin
|
||||
else:
|
||||
tool_input = _tool_input
|
||||
|
||||
content_msg = "responded: {content}\n" if message.content else "\n"
|
||||
content_msg = f"responded: {message.content}\n" if message.content else "\n"
|
||||
log = f"\nInvoking: `{function_name}` with `{tool_input}`\n{content_msg}\n"
|
||||
_tool = _FunctionsAgentAction(
|
||||
tool=function_name,
|
||||
|
||||
@@ -80,6 +80,8 @@ def _dump_generations_to_json(generations: RETURN_VAL_TYPE) -> str:
|
||||
|
||||
Returns:
|
||||
str: Json representing a list of generations.
|
||||
|
||||
Warning: would not work well with arbitrary subclasses of `Generation`
|
||||
"""
|
||||
return json.dumps([generation.dict() for generation in generations])
|
||||
|
||||
@@ -95,6 +97,8 @@ def _load_generations_from_json(generations_json: str) -> RETURN_VAL_TYPE:
|
||||
|
||||
Returns:
|
||||
RETURN_VAL_TYPE: A list of generations.
|
||||
|
||||
Warning: would not work well with arbitrary subclasses of `Generation`
|
||||
"""
|
||||
try:
|
||||
results = json.loads(generations_json)
|
||||
@@ -105,6 +109,65 @@ def _load_generations_from_json(generations_json: str) -> RETURN_VAL_TYPE:
|
||||
)
|
||||
|
||||
|
||||
def _dumps_generations(generations: RETURN_VAL_TYPE) -> str:
|
||||
"""
|
||||
Serialization for generic RETURN_VAL_TYPE, i.e. sequence of `Generation`
|
||||
|
||||
Args:
|
||||
generations (RETURN_VAL_TYPE): A list of language model generations.
|
||||
|
||||
Returns:
|
||||
str: a single string representing a list of generations.
|
||||
|
||||
This function (+ its counterpart `_loads_generations`) rely on
|
||||
the dumps/loads pair with Reviver, so are able to deal
|
||||
with all subclasses of Generation.
|
||||
|
||||
Each item in the list can be `dumps`ed to a string,
|
||||
then we make the whole list of strings into a json-dumped.
|
||||
"""
|
||||
return json.dumps([dumps(_item) for _item in generations])
|
||||
|
||||
|
||||
def _loads_generations(generations_str: str) -> Union[RETURN_VAL_TYPE, None]:
|
||||
"""
|
||||
Deserialization of a string into a generic RETURN_VAL_TYPE
|
||||
(i.e. a sequence of `Generation`).
|
||||
|
||||
See `_dumps_generations`, the inverse of this function.
|
||||
|
||||
Args:
|
||||
generations_str (str): A string representing a list of generations.
|
||||
|
||||
Compatible with the legacy cache-blob format
|
||||
Does not raise exceptions for malformed entries, just logs a warning
|
||||
and returns none: the caller should be prepared for such a cache miss.
|
||||
|
||||
Returns:
|
||||
RETURN_VAL_TYPE: A list of generations.
|
||||
"""
|
||||
try:
|
||||
generations = [loads(_item_str) for _item_str in json.loads(generations_str)]
|
||||
return generations
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
# deferring the (soft) handling to after the legacy-format attempt
|
||||
pass
|
||||
|
||||
try:
|
||||
gen_dicts = json.loads(generations_str)
|
||||
# not relying on `_load_generations_from_json` (which could disappear):
|
||||
generations = [Generation(**generation_dict) for generation_dict in gen_dicts]
|
||||
logger.warning(
|
||||
f"Legacy 'Generation' cached blob encountered: '{generations_str}'"
|
||||
)
|
||||
return generations
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.warning(
|
||||
f"Malformed/unparsable cached blob encountered: '{generations_str}'"
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
class InMemoryCache(BaseCache):
|
||||
"""Cache that stores things in memory."""
|
||||
|
||||
@@ -733,10 +796,11 @@ class CassandraCache(BaseCache):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
session: CassandraSession,
|
||||
keyspace: str,
|
||||
session: Optional[CassandraSession] = None,
|
||||
keyspace: Optional[str] = None,
|
||||
table_name: str = CASSANDRA_CACHE_DEFAULT_TABLE_NAME,
|
||||
ttl_seconds: Optional[int] = CASSANDRA_CACHE_DEFAULT_TTL_SECONDS,
|
||||
skip_provisioning: bool = False,
|
||||
):
|
||||
"""
|
||||
Initialize with a ready session and a keyspace name.
|
||||
@@ -767,6 +831,7 @@ class CassandraCache(BaseCache):
|
||||
keys=["llm_string", "prompt"],
|
||||
primary_key_type=["TEXT", "TEXT"],
|
||||
ttl_seconds=self.ttl_seconds,
|
||||
skip_provisioning=skip_provisioning,
|
||||
)
|
||||
|
||||
def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
|
||||
@@ -775,14 +840,19 @@ class CassandraCache(BaseCache):
|
||||
llm_string=_hash(llm_string),
|
||||
prompt=_hash(prompt),
|
||||
)
|
||||
if item:
|
||||
return _load_generations_from_json(item["body_blob"])
|
||||
if item is not None:
|
||||
generations = _loads_generations(item["body_blob"])
|
||||
# this protects against malformed cached items:
|
||||
if generations is not None:
|
||||
return generations
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
|
||||
def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
|
||||
"""Update cache based on prompt and llm_string."""
|
||||
blob = _dump_generations_to_json(return_val)
|
||||
blob = _dumps_generations(return_val)
|
||||
self.kv_cache.put(
|
||||
llm_string=_hash(llm_string),
|
||||
prompt=_hash(prompt),
|
||||
@@ -836,13 +906,14 @@ class CassandraSemanticCache(BaseCache):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
session: CassandraSession,
|
||||
keyspace: str,
|
||||
session: Optional[CassandraSession],
|
||||
keyspace: Optional[str],
|
||||
embedding: Embeddings,
|
||||
table_name: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TABLE_NAME,
|
||||
distance_metric: str = CASSANDRA_SEMANTIC_CACHE_DEFAULT_DISTANCE_METRIC,
|
||||
score_threshold: float = CASSANDRA_SEMANTIC_CACHE_DEFAULT_SCORE_THRESHOLD,
|
||||
ttl_seconds: Optional[int] = CASSANDRA_SEMANTIC_CACHE_DEFAULT_TTL_SECONDS,
|
||||
skip_provisioning: bool = False,
|
||||
):
|
||||
"""
|
||||
Initialize the cache with all relevant parameters.
|
||||
@@ -897,6 +968,7 @@ class CassandraSemanticCache(BaseCache):
|
||||
vector_dimension=self.embedding_dimension,
|
||||
ttl_seconds=self.ttl_seconds,
|
||||
metadata_indexing=("allow", {"_llm_string_hash"}),
|
||||
skip_provisioning=skip_provisioning,
|
||||
)
|
||||
|
||||
def _get_embedding_dimension(self) -> int:
|
||||
@@ -906,7 +978,7 @@ class CassandraSemanticCache(BaseCache):
|
||||
"""Update cache based on prompt and llm_string."""
|
||||
embedding_vector = self._get_embedding(text=prompt)
|
||||
llm_string_hash = _hash(llm_string)
|
||||
body = _dump_generations_to_json(return_val)
|
||||
body = _dumps_generations(return_val)
|
||||
metadata = {
|
||||
"_prompt": prompt,
|
||||
"_llm_string_hash": llm_string_hash,
|
||||
@@ -947,11 +1019,15 @@ class CassandraSemanticCache(BaseCache):
|
||||
)
|
||||
if hits:
|
||||
hit = hits[0]
|
||||
generations_str = hit["body_blob"]
|
||||
return (
|
||||
hit["row_id"],
|
||||
_load_generations_from_json(generations_str),
|
||||
)
|
||||
generations = _loads_generations(hit["body_blob"])
|
||||
if generations is not None:
|
||||
# this protects against malformed cached items:
|
||||
return (
|
||||
hit["row_id"],
|
||||
generations,
|
||||
)
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ from typing import Any, Dict, List, Optional, Sequence, Set, Union
|
||||
from uuid import UUID
|
||||
|
||||
import langsmith
|
||||
from langsmith import schemas as langsmith_schemas
|
||||
from langsmith.evaluation.evaluator import EvaluationResult
|
||||
|
||||
from langchain.callbacks import manager
|
||||
from langchain.callbacks.tracers import langchain as langchain_tracer
|
||||
@@ -76,7 +76,7 @@ class EvaluatorCallbackHandler(BaseTracer):
|
||||
self.futures: Set[Future] = set()
|
||||
self.skip_unfinished = skip_unfinished
|
||||
self.project_name = project_name
|
||||
self.logged_feedback: Dict[str, List[langsmith_schemas.Feedback]] = {}
|
||||
self.logged_eval_results: Dict[str, List[EvaluationResult]] = {}
|
||||
|
||||
def _evaluate_in_project(self, run: Run, evaluator: langsmith.RunEvaluator) -> None:
|
||||
"""Evaluate the run in the project.
|
||||
@@ -91,11 +91,11 @@ class EvaluatorCallbackHandler(BaseTracer):
|
||||
"""
|
||||
try:
|
||||
if self.project_name is None:
|
||||
feedback = self.client.evaluate_run(run, evaluator)
|
||||
eval_result = self.client.evaluate_run(run, evaluator)
|
||||
with manager.tracing_v2_enabled(
|
||||
project_name=self.project_name, tags=["eval"], client=self.client
|
||||
):
|
||||
feedback = self.client.evaluate_run(run, evaluator)
|
||||
eval_result = self.client.evaluate_run(run, evaluator)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error evaluating run {run.id} with "
|
||||
@@ -104,7 +104,7 @@ class EvaluatorCallbackHandler(BaseTracer):
|
||||
)
|
||||
raise e
|
||||
example_id = str(run.reference_example_id)
|
||||
self.logged_feedback.setdefault(example_id, []).append(feedback)
|
||||
self.logged_eval_results.setdefault(example_id, []).append(eval_result)
|
||||
|
||||
def _persist_run(self, run: Run) -> None:
|
||||
"""Run the evaluator on the run.
|
||||
|
||||
@@ -38,7 +38,7 @@ class StuffDocumentsChain(BaseCombineDocumentsChain):
|
||||
# details.
|
||||
document_prompt = PromptTemplate(
|
||||
input_variables=["page_content"],
|
||||
template="{page_content}"
|
||||
template="{page_content}"
|
||||
)
|
||||
document_variable_name = "context"
|
||||
llm = OpenAI()
|
||||
|
||||
@@ -20,6 +20,8 @@ an interface where "chat messages" are the inputs and outputs.
|
||||
from langchain.chat_models.anthropic import ChatAnthropic
|
||||
from langchain.chat_models.anyscale import ChatAnyscale
|
||||
from langchain.chat_models.azure_openai import AzureChatOpenAI
|
||||
from langchain.chat_models.baidu_qianfan_endpoint import QianfanChatEndpoint
|
||||
from langchain.chat_models.bedrock import BedrockChat
|
||||
from langchain.chat_models.ernie import ErnieBotChat
|
||||
from langchain.chat_models.fake import FakeListChatModel
|
||||
from langchain.chat_models.google_palm import ChatGooglePalm
|
||||
@@ -35,6 +37,7 @@ from langchain.chat_models.vertexai import ChatVertexAI
|
||||
|
||||
__all__ = [
|
||||
"ChatOpenAI",
|
||||
"BedrockChat",
|
||||
"AzureChatOpenAI",
|
||||
"FakeListChatModel",
|
||||
"PromptLayerChatOpenAI",
|
||||
@@ -49,4 +52,5 @@ __all__ = [
|
||||
"ChatLiteLLM",
|
||||
"ErnieBotChat",
|
||||
"ChatKonko",
|
||||
"QianfanChatEndpoint",
|
||||
]
|
||||
|
||||
293
libs/langchain/langchain/chat_models/baidu_qianfan_endpoint.py
Normal file
293
libs/langchain/langchain/chat_models/baidu_qianfan_endpoint.py
Normal file
@@ -0,0 +1,293 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import (
|
||||
Any,
|
||||
AsyncIterator,
|
||||
Dict,
|
||||
Iterator,
|
||||
List,
|
||||
Mapping,
|
||||
Optional,
|
||||
)
|
||||
|
||||
from langchain.callbacks.manager import (
|
||||
AsyncCallbackManagerForLLMRun,
|
||||
CallbackManagerForLLMRun,
|
||||
)
|
||||
from langchain.chat_models.base import BaseChatModel
|
||||
from langchain.pydantic_v1 import Field, root_validator
|
||||
from langchain.schema import ChatGeneration, ChatResult
|
||||
from langchain.schema.messages import (
|
||||
AIMessage,
|
||||
AIMessageChunk,
|
||||
BaseMessage,
|
||||
BaseMessageChunk,
|
||||
ChatMessage,
|
||||
FunctionMessage,
|
||||
HumanMessage,
|
||||
)
|
||||
from langchain.schema.output import ChatGenerationChunk
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _convert_resp_to_message_chunk(resp: Mapping[str, Any]) -> BaseMessageChunk:
|
||||
return AIMessageChunk(
|
||||
content=resp["result"],
|
||||
role="assistant",
|
||||
)
|
||||
|
||||
|
||||
def convert_message_to_dict(message: BaseMessage) -> dict:
|
||||
message_dict: Dict[str, Any]
|
||||
if isinstance(message, ChatMessage):
|
||||
message_dict = {"role": message.role, "content": message.content}
|
||||
elif isinstance(message, HumanMessage):
|
||||
message_dict = {"role": "user", "content": message.content}
|
||||
elif isinstance(message, AIMessage):
|
||||
message_dict = {"role": "assistant", "content": message.content}
|
||||
if "function_call" in message.additional_kwargs:
|
||||
message_dict["functions"] = message.additional_kwargs["function_call"]
|
||||
# If function call only, content is None not empty string
|
||||
if message_dict["content"] == "":
|
||||
message_dict["content"] = None
|
||||
elif isinstance(message, FunctionMessage):
|
||||
message_dict = {
|
||||
"role": "function",
|
||||
"content": message.content,
|
||||
"name": message.name,
|
||||
}
|
||||
else:
|
||||
raise TypeError(f"Got unknown type {message}")
|
||||
|
||||
return message_dict
|
||||
|
||||
|
||||
class QianfanChatEndpoint(BaseChatModel):
|
||||
"""Baidu Qianfan chat models.
|
||||
|
||||
To use, you should have the ``qianfan`` python package installed, and
|
||||
the environment variable ``qianfan_ak`` and ``qianfan_sk`` set with your
|
||||
API key and Secret Key.
|
||||
|
||||
ak, sk are required parameters
|
||||
which you could get from https://cloud.baidu.com/product/wenxinworkshop
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.chat_models import QianfanChatEndpoint
|
||||
qianfan_chat = QianfanChatEndpoint(model="ERNIE-Bot",
|
||||
endpoint="your_endpoint", ak="your_ak", sk="your_sk")
|
||||
"""
|
||||
|
||||
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
client: Any
|
||||
|
||||
qianfan_ak: Optional[str] = None
|
||||
qianfan_sk: Optional[str] = None
|
||||
|
||||
streaming: Optional[bool] = False
|
||||
"""Whether to stream the results or not."""
|
||||
|
||||
request_timeout: Optional[int] = 60
|
||||
"""request timeout for chat http requests"""
|
||||
|
||||
top_p: Optional[float] = 0.8
|
||||
temperature: Optional[float] = 0.95
|
||||
penalty_score: Optional[float] = 1
|
||||
"""Model params, only supported in ERNIE-Bot and ERNIE-Bot-turbo.
|
||||
In the case of other model, passing these params will not affect the result.
|
||||
"""
|
||||
|
||||
model: str = "ERNIE-Bot-turbo"
|
||||
"""Model name.
|
||||
you could get from https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu
|
||||
|
||||
preset models are mapping to an endpoint.
|
||||
`model` will be ignored if `endpoint` is set
|
||||
"""
|
||||
|
||||
endpoint: Optional[str] = None
|
||||
"""Endpoint of the Qianfan LLM, required if custom model used."""
|
||||
|
||||
@root_validator()
|
||||
def validate_enviroment(cls, values: Dict) -> Dict:
|
||||
values["qianfan_ak"] = get_from_dict_or_env(
|
||||
values,
|
||||
"qianfan_ak",
|
||||
"QIANFAN_AK",
|
||||
)
|
||||
values["qianfan_sk"] = get_from_dict_or_env(
|
||||
values,
|
||||
"qianfan_sk",
|
||||
"QIANFAN_SK",
|
||||
)
|
||||
params = {
|
||||
"ak": values["qianfan_ak"],
|
||||
"sk": values["qianfan_sk"],
|
||||
"model": values["model"],
|
||||
"stream": values["streaming"],
|
||||
}
|
||||
if values["endpoint"] is not None and values["endpoint"] != "":
|
||||
params["endpoint"] = values["endpoint"]
|
||||
try:
|
||||
import qianfan
|
||||
|
||||
values["client"] = qianfan.ChatCompletion(**params)
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"qianfan package not found, please install it with "
|
||||
"`pip install qianfan`"
|
||||
)
|
||||
return values
|
||||
|
||||
@property
|
||||
def _identifying_params(self) -> Dict[str, Any]:
|
||||
return {
|
||||
**{"endpoint": self.endpoint, "model": self.model},
|
||||
**super()._identifying_params,
|
||||
}
|
||||
|
||||
@property
|
||||
def _llm_type(self) -> str:
|
||||
"""Return type of chat_model."""
|
||||
return "baidu-qianfan-chat"
|
||||
|
||||
@property
|
||||
def _default_params(self) -> Dict[str, Any]:
|
||||
"""Get the default parameters for calling OpenAI API."""
|
||||
normal_params = {
|
||||
"stream": self.streaming,
|
||||
"request_timeout": self.request_timeout,
|
||||
"top_p": self.top_p,
|
||||
"temperature": self.temperature,
|
||||
"penalty_score": self.penalty_score,
|
||||
}
|
||||
|
||||
return {**normal_params, **self.model_kwargs}
|
||||
|
||||
def _convert_prompt_msg_params(
|
||||
self,
|
||||
messages: List[BaseMessage],
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
return {
|
||||
**{"messages": [convert_message_to_dict(m) for m in messages]},
|
||||
**self._default_params,
|
||||
**kwargs,
|
||||
}
|
||||
|
||||
def _generate(
|
||||
self,
|
||||
messages: List[BaseMessage],
|
||||
stop: Optional[List[str]] = None,
|
||||
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> ChatResult:
|
||||
"""Call out to an qianfan models endpoint for each generation with a prompt.
|
||||
Args:
|
||||
messages: The messages to pass into the model.
|
||||
stop: Optional list of stop words to use when generating.
|
||||
Returns:
|
||||
The string generated by the model.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
response = qianfan_model("Tell me a joke.")
|
||||
"""
|
||||
if self.streaming:
|
||||
completion = ""
|
||||
for chunk in self._stream(messages, stop, run_manager, **kwargs):
|
||||
completion += chunk.text
|
||||
lc_msg = AIMessage(content=completion, additional_kwargs={})
|
||||
gen = ChatGeneration(
|
||||
message=lc_msg,
|
||||
generation_info=dict(finish_reason="finished"),
|
||||
)
|
||||
return ChatResult(
|
||||
generations=[gen],
|
||||
llm_output={"token_usage": {}, "model_name": self.model},
|
||||
)
|
||||
params = self._convert_prompt_msg_params(messages, **kwargs)
|
||||
response_payload = self.client.do(**params)
|
||||
lc_msg = AIMessage(content=response_payload["result"], additional_kwargs={})
|
||||
gen = ChatGeneration(
|
||||
message=lc_msg,
|
||||
generation_info=dict(finish_reason="finished"),
|
||||
)
|
||||
token_usage = response_payload.get("usage", {})
|
||||
llm_output = {"token_usage": token_usage, "model_name": self.model}
|
||||
return ChatResult(generations=[gen], llm_output=llm_output)
|
||||
|
||||
async def _agenerate(
|
||||
self,
|
||||
messages: List[BaseMessage],
|
||||
stop: Optional[List[str]] = None,
|
||||
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> ChatResult:
|
||||
if self.streaming:
|
||||
completion = ""
|
||||
async for chunk in self._astream(messages, stop, run_manager, **kwargs):
|
||||
completion += chunk.text
|
||||
lc_msg = AIMessage(content=completion, additional_kwargs={})
|
||||
gen = ChatGeneration(
|
||||
message=lc_msg,
|
||||
generation_info=dict(finish_reason="finished"),
|
||||
)
|
||||
return ChatResult(
|
||||
generations=[gen],
|
||||
llm_output={"token_usage": {}, "model_name": self.model},
|
||||
)
|
||||
params = self._convert_prompt_msg_params(messages, **kwargs)
|
||||
response_payload = await self.client.ado(**params)
|
||||
lc_msg = AIMessage(content=response_payload["result"], additional_kwargs={})
|
||||
generations = []
|
||||
gen = ChatGeneration(
|
||||
message=lc_msg,
|
||||
generation_info=dict(finish_reason="finished"),
|
||||
)
|
||||
generations.append(gen)
|
||||
token_usage = response_payload.get("usage", {})
|
||||
llm_output = {"token_usage": token_usage, "model_name": self.model}
|
||||
return ChatResult(generations=generations, llm_output=llm_output)
|
||||
|
||||
def _stream(
|
||||
self,
|
||||
messages: List[BaseMessage],
|
||||
stop: Optional[List[str]] = None,
|
||||
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> Iterator[ChatGenerationChunk]:
|
||||
params = self._convert_prompt_msg_params(messages, **kwargs)
|
||||
for res in self.client.do(**params):
|
||||
if res:
|
||||
chunk = ChatGenerationChunk(
|
||||
text=res["result"],
|
||||
message=_convert_resp_to_message_chunk(res),
|
||||
generation_info={"finish_reason": "finished"},
|
||||
)
|
||||
yield chunk
|
||||
if run_manager:
|
||||
run_manager.on_llm_new_token(chunk.text)
|
||||
|
||||
async def _astream(
|
||||
self,
|
||||
messages: List[BaseMessage],
|
||||
stop: Optional[List[str]] = None,
|
||||
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> AsyncIterator[ChatGenerationChunk]:
|
||||
params = self._convert_prompt_msg_params(messages, **kwargs)
|
||||
async for res in await self.client.ado(**params):
|
||||
if res:
|
||||
chunk = ChatGenerationChunk(
|
||||
text=res["result"], message=_convert_resp_to_message_chunk(res)
|
||||
)
|
||||
yield chunk
|
||||
if run_manager:
|
||||
await run_manager.on_llm_new_token(chunk.text)
|
||||
@@ -28,6 +28,7 @@ class ArcGISLoader(BaseLoader):
|
||||
out_fields: Optional[Union[List[str], str]] = None,
|
||||
return_geometry: bool = False,
|
||||
return_all_records: bool = True,
|
||||
lyr_desc: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
):
|
||||
try:
|
||||
@@ -55,7 +56,7 @@ class ArcGISLoader(BaseLoader):
|
||||
self.url = layer.url
|
||||
self.layer = layer
|
||||
|
||||
self.layer_properties = self._get_layer_properties()
|
||||
self.layer_properties = self._get_layer_properties(lyr_desc)
|
||||
|
||||
self.where = where
|
||||
|
||||
@@ -70,21 +71,23 @@ class ArcGISLoader(BaseLoader):
|
||||
self.return_all_records = return_all_records
|
||||
self.kwargs = kwargs
|
||||
|
||||
def _get_layer_properties(self) -> dict:
|
||||
def _get_layer_properties(self, lyr_desc: Optional[str] = None) -> dict:
|
||||
"""Get the layer properties from the FeatureLayer."""
|
||||
import arcgis
|
||||
|
||||
layer_number_pattern = re.compile(r"/\d+$")
|
||||
props = self.layer.properties
|
||||
|
||||
try:
|
||||
if self.BEAUTIFULSOUP:
|
||||
lyr_desc = self.BEAUTIFULSOUP(props["description"]).text
|
||||
else:
|
||||
lyr_desc = props["description"]
|
||||
lyr_desc = lyr_desc or _NOT_PROVIDED
|
||||
except KeyError:
|
||||
lyr_desc = _NOT_PROVIDED
|
||||
if lyr_desc is None:
|
||||
# retrieve description from the FeatureLayer if not provided
|
||||
try:
|
||||
if self.BEAUTIFULSOUP:
|
||||
lyr_desc = self.BEAUTIFULSOUP(props["description"]).text
|
||||
else:
|
||||
lyr_desc = props["description"]
|
||||
lyr_desc = lyr_desc or _NOT_PROVIDED
|
||||
except KeyError:
|
||||
lyr_desc = _NOT_PROVIDED
|
||||
try:
|
||||
item_id = props["serviceItemId"]
|
||||
item = self.gis.content.get(item_id) or arcgis.features.FeatureLayer(
|
||||
@@ -109,7 +112,6 @@ class ArcGISLoader(BaseLoader):
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""Lazy load records from FeatureLayer."""
|
||||
|
||||
query_response = self.layer.query(
|
||||
where=self.where,
|
||||
out_fields=self.out_fields,
|
||||
@@ -117,19 +119,30 @@ class ArcGISLoader(BaseLoader):
|
||||
return_all_records=self.return_all_records,
|
||||
**self.kwargs,
|
||||
)
|
||||
features = (feature.as_dict["attributes"] for feature in query_response)
|
||||
features = (feature.as_dict for feature in query_response)
|
||||
for feature in features:
|
||||
yield Document(
|
||||
page_content=json.dumps(feature),
|
||||
metadata={
|
||||
"accessed": f"{datetime.now(timezone.utc).isoformat()}Z",
|
||||
"name": self.layer_properties["layer_properties"]["name"],
|
||||
"url": self.url,
|
||||
"layer_description": self.layer_properties["layer_description"],
|
||||
"item_description": self.layer_properties["item_description"],
|
||||
"layer_properties": self.layer_properties["layer_properties"],
|
||||
},
|
||||
)
|
||||
attributes = feature["attributes"]
|
||||
page_content = json.dumps(attributes)
|
||||
|
||||
metadata = {
|
||||
"accessed": f"{datetime.now(timezone.utc).isoformat()}Z",
|
||||
"name": self.layer_properties["layer_properties"]["name"],
|
||||
"url": self.url,
|
||||
"layer_description": self.layer_properties["layer_description"],
|
||||
"item_description": self.layer_properties["item_description"],
|
||||
"layer_properties": self.layer_properties["layer_properties"],
|
||||
}
|
||||
|
||||
if self.return_geometry:
|
||||
try:
|
||||
geometry = feature["geometry"]
|
||||
metadata.update({"geometry": geometry})
|
||||
except KeyError:
|
||||
warnings.warn(
|
||||
"Geometry could not be retrieved from the feature layer."
|
||||
)
|
||||
|
||||
yield Document(page_content=page_content, metadata=metadata)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load all records from FeatureLayer."""
|
||||
|
||||
@@ -6,7 +6,7 @@ import time
|
||||
from abc import ABC
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterator, List, Mapping, Optional, Sequence, Union
|
||||
from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, Union
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
@@ -62,14 +62,20 @@ class UnstructuredPDFLoader(UnstructuredFileLoader):
|
||||
class BasePDFLoader(BaseLoader, ABC):
|
||||
"""Base Loader class for `PDF` files.
|
||||
|
||||
Defaults to check for local file, but if the file is a web path, it will download it
|
||||
to a temporary file, use it, then clean up the temporary file after completion
|
||||
If the file is a web path, it will download it to a temporary file, use it, then
|
||||
clean up the temporary file after completion.
|
||||
"""
|
||||
|
||||
def __init__(self, file_path: str):
|
||||
"""Initialize with a file path."""
|
||||
def __init__(self, file_path: str, *, headers: Optional[Dict] = None):
|
||||
"""Initialize with a file path.
|
||||
|
||||
Args:
|
||||
file_path: Either a local, S3 or web path to a PDF file.
|
||||
headers: Headers to use for GET request to download a file from a web path.
|
||||
"""
|
||||
self.file_path = file_path
|
||||
self.web_path = None
|
||||
self.headers = headers
|
||||
if "~" in self.file_path:
|
||||
self.file_path = os.path.expanduser(self.file_path)
|
||||
|
||||
@@ -78,18 +84,15 @@ class BasePDFLoader(BaseLoader, ABC):
|
||||
self.temp_dir = tempfile.TemporaryDirectory()
|
||||
_, suffix = os.path.splitext(self.file_path)
|
||||
temp_pdf = os.path.join(self.temp_dir.name, f"tmp{suffix}")
|
||||
if self._is_s3_url(self.file_path):
|
||||
self.web_path = self.file_path
|
||||
else:
|
||||
r = requests.get(self.file_path)
|
||||
|
||||
self.web_path = self.file_path
|
||||
if not self._is_s3_url(self.file_path):
|
||||
r = requests.get(self.file_path, headers=self.headers)
|
||||
if r.status_code != 200:
|
||||
raise ValueError(
|
||||
"Check the url of your file; returned status code %s"
|
||||
% r.status_code
|
||||
)
|
||||
|
||||
self.web_path = self.file_path
|
||||
with open(temp_pdf, mode="wb") as f:
|
||||
f.write(r.content)
|
||||
self.file_path = str(temp_pdf)
|
||||
@@ -138,7 +141,10 @@ class PyPDFLoader(BasePDFLoader):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, file_path: str, password: Optional[Union[str, bytes]] = None
|
||||
self,
|
||||
file_path: str,
|
||||
password: Optional[Union[str, bytes]] = None,
|
||||
headers: Optional[Dict] = None,
|
||||
) -> None:
|
||||
"""Initialize with a file path."""
|
||||
try:
|
||||
@@ -148,7 +154,7 @@ class PyPDFLoader(BasePDFLoader):
|
||||
"pypdf package not found, please install it with " "`pip install pypdf`"
|
||||
)
|
||||
self.parser = PyPDFParser(password=password)
|
||||
super().__init__(file_path)
|
||||
super().__init__(file_path, headers=headers)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load given path as pages."""
|
||||
@@ -165,9 +171,9 @@ class PyPDFLoader(BasePDFLoader):
|
||||
class PyPDFium2Loader(BasePDFLoader):
|
||||
"""Load `PDF` using `pypdfium2` and chunks at character level."""
|
||||
|
||||
def __init__(self, file_path: str):
|
||||
def __init__(self, file_path: str, *, headers: Optional[Dict] = None):
|
||||
"""Initialize with a file path."""
|
||||
super().__init__(file_path)
|
||||
super().__init__(file_path, headers=headers)
|
||||
self.parser = PyPDFium2Parser()
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
@@ -230,7 +236,7 @@ class PyPDFDirectoryLoader(BaseLoader):
|
||||
class PDFMinerLoader(BasePDFLoader):
|
||||
"""Load `PDF` files using `PDFMiner`."""
|
||||
|
||||
def __init__(self, file_path: str) -> None:
|
||||
def __init__(self, file_path: str, *, headers: Optional[Dict] = None) -> None:
|
||||
"""Initialize with file path."""
|
||||
try:
|
||||
from pdfminer.high_level import extract_text # noqa:F401
|
||||
@@ -240,7 +246,7 @@ class PDFMinerLoader(BasePDFLoader):
|
||||
"`pip install pdfminer.six`"
|
||||
)
|
||||
|
||||
super().__init__(file_path)
|
||||
super().__init__(file_path, headers=headers)
|
||||
self.parser = PDFMinerParser()
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
@@ -258,7 +264,7 @@ class PDFMinerLoader(BasePDFLoader):
|
||||
class PDFMinerPDFasHTMLLoader(BasePDFLoader):
|
||||
"""Load `PDF` files as HTML content using `PDFMiner`."""
|
||||
|
||||
def __init__(self, file_path: str):
|
||||
def __init__(self, file_path: str, *, headers: Optional[Dict] = None):
|
||||
"""Initialize with a file path."""
|
||||
try:
|
||||
from pdfminer.high_level import extract_text_to_fp # noqa:F401
|
||||
@@ -268,7 +274,7 @@ class PDFMinerPDFasHTMLLoader(BasePDFLoader):
|
||||
"`pip install pdfminer.six`"
|
||||
)
|
||||
|
||||
super().__init__(file_path)
|
||||
super().__init__(file_path, headers=headers)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load file."""
|
||||
@@ -292,7 +298,7 @@ class PDFMinerPDFasHTMLLoader(BasePDFLoader):
|
||||
class PyMuPDFLoader(BasePDFLoader):
|
||||
"""Load `PDF` files using `PyMuPDF`."""
|
||||
|
||||
def __init__(self, file_path: str) -> None:
|
||||
def __init__(self, file_path: str, *, headers: Optional[Dict] = None) -> None:
|
||||
"""Initialize with a file path."""
|
||||
try:
|
||||
import fitz # noqa:F401
|
||||
@@ -302,7 +308,7 @@ class PyMuPDFLoader(BasePDFLoader):
|
||||
"`pip install pymupdf`"
|
||||
)
|
||||
|
||||
super().__init__(file_path)
|
||||
super().__init__(file_path, headers=headers)
|
||||
|
||||
def load(self, **kwargs: Optional[Any]) -> List[Document]:
|
||||
"""Load file."""
|
||||
@@ -335,19 +341,19 @@ class MathpixPDFLoader(BasePDFLoader):
|
||||
should_clean_pdf: a flag to clean the PDF file. Default is False.
|
||||
**kwargs: additional keyword arguments.
|
||||
"""
|
||||
super().__init__(file_path)
|
||||
self.mathpix_api_key = get_from_dict_or_env(
|
||||
kwargs, "mathpix_api_key", "MATHPIX_API_KEY"
|
||||
)
|
||||
self.mathpix_api_id = get_from_dict_or_env(
|
||||
kwargs, "mathpix_api_id", "MATHPIX_API_ID"
|
||||
)
|
||||
super().__init__(file_path, **kwargs)
|
||||
self.processed_file_format = processed_file_format
|
||||
self.max_wait_time_seconds = max_wait_time_seconds
|
||||
self.should_clean_pdf = should_clean_pdf
|
||||
|
||||
@property
|
||||
def headers(self) -> dict:
|
||||
def _mathpix_headers(self) -> Dict[str, str]:
|
||||
return {"app_id": self.mathpix_api_id, "app_key": self.mathpix_api_key}
|
||||
|
||||
@property
|
||||
@@ -363,7 +369,7 @@ class MathpixPDFLoader(BasePDFLoader):
|
||||
with open(self.file_path, "rb") as f:
|
||||
files = {"file": f}
|
||||
response = requests.post(
|
||||
self.url, headers=self.headers, files=files, data=self.data
|
||||
self.url, headers=self._mathpix_headers, files=files, data=self.data
|
||||
)
|
||||
response_data = response.json()
|
||||
if "pdf_id" in response_data:
|
||||
@@ -441,6 +447,7 @@ class PDFPlumberLoader(BasePDFLoader):
|
||||
file_path: str,
|
||||
text_kwargs: Optional[Mapping[str, Any]] = None,
|
||||
dedupe: bool = False,
|
||||
headers: Optional[Dict] = None,
|
||||
) -> None:
|
||||
"""Initialize with a file path."""
|
||||
try:
|
||||
@@ -451,7 +458,7 @@ class PDFPlumberLoader(BasePDFLoader):
|
||||
"`pip install pdfplumber`"
|
||||
)
|
||||
|
||||
super().__init__(file_path)
|
||||
super().__init__(file_path, headers=headers)
|
||||
self.text_kwargs = text_kwargs or {}
|
||||
self.dedupe = dedupe
|
||||
|
||||
@@ -493,6 +500,7 @@ class AmazonTextractPDFLoader(BasePDFLoader):
|
||||
credentials_profile_name: Optional[str] = None,
|
||||
region_name: Optional[str] = None,
|
||||
endpoint_url: Optional[str] = None,
|
||||
headers: Optional[Dict] = None,
|
||||
) -> None:
|
||||
"""Initialize the loader.
|
||||
|
||||
@@ -507,7 +515,7 @@ class AmazonTextractPDFLoader(BasePDFLoader):
|
||||
endpoint_url: endpoint url for the textract service (Optional)
|
||||
|
||||
"""
|
||||
super().__init__(file_path)
|
||||
super().__init__(file_path, headers=headers)
|
||||
|
||||
try:
|
||||
import textractcaller as tc # noqa: F401
|
||||
@@ -608,7 +616,11 @@ class DocumentIntelligenceLoader(BasePDFLoader):
|
||||
"""Loads a PDF with Azure Document Intelligence"""
|
||||
|
||||
def __init__(
|
||||
self, file_path: str, client: Any, model: str = "prebuilt-document"
|
||||
self,
|
||||
file_path: str,
|
||||
client: Any,
|
||||
model: str = "prebuilt-document",
|
||||
headers: Optional[Dict] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize the object for file processing with Azure Document Intelligence
|
||||
@@ -638,7 +650,7 @@ class DocumentIntelligenceLoader(BasePDFLoader):
|
||||
"""
|
||||
|
||||
self.parser = DocumentIntelligenceParser(client=client, model=model)
|
||||
super().__init__(file_path)
|
||||
super().__init__(file_path, headers=headers)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load given path as pages."""
|
||||
|
||||
@@ -19,6 +19,7 @@ from langchain.embeddings.aleph_alpha import (
|
||||
AlephAlphaSymmetricSemanticEmbedding,
|
||||
)
|
||||
from langchain.embeddings.awa import AwaEmbeddings
|
||||
from langchain.embeddings.baidu_qianfan_endpoint import QianfanEmbeddingsEndpoint
|
||||
from langchain.embeddings.bedrock import BedrockEmbeddings
|
||||
from langchain.embeddings.cache import CacheBackedEmbeddings
|
||||
from langchain.embeddings.clarifai import ClarifaiEmbeddings
|
||||
@@ -105,6 +106,7 @@ __all__ = [
|
||||
"AwaEmbeddings",
|
||||
"HuggingFaceBgeEmbeddings",
|
||||
"ErnieEmbeddings",
|
||||
"QianfanEmbeddingsEndpoint",
|
||||
]
|
||||
|
||||
|
||||
|
||||
138
libs/langchain/langchain/embeddings/baidu_qianfan_endpoint.py
Normal file
138
libs/langchain/langchain/embeddings/baidu_qianfan_endpoint.py
Normal file
@@ -0,0 +1,138 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.pydantic_v1 import BaseModel, root_validator
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class QianfanEmbeddingsEndpoint(BaseModel, Embeddings):
|
||||
"""`Baidu Qianfan Embeddings` embedding models."""
|
||||
|
||||
qianfan_ak: Optional[str] = None
|
||||
"""Qianfan application apikey"""
|
||||
|
||||
qianfan_sk: Optional[str] = None
|
||||
"""Qianfan application secretkey"""
|
||||
|
||||
chunk_size: int = 16
|
||||
"""Chunk size when multiple texts are input"""
|
||||
|
||||
model: str = "Embedding-V1"
|
||||
"""Model name
|
||||
you could get from https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu
|
||||
|
||||
for now, we support Embedding-V1 and
|
||||
- Embedding-V1 (默认模型)
|
||||
- bge-large-en
|
||||
- bge-large-zh
|
||||
|
||||
preset models are mapping to an endpoint.
|
||||
`model` will be ignored if `endpoint` is set
|
||||
"""
|
||||
|
||||
endpoint: str = ""
|
||||
"""Endpoint of the Qianfan Embedding, required if custom model used."""
|
||||
|
||||
client: Any
|
||||
"""Qianfan client"""
|
||||
|
||||
max_retries: int = 5
|
||||
"""Max reties times"""
|
||||
|
||||
@root_validator()
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""
|
||||
Validate whether qianfan_ak and qianfan_sk in the environment variables or
|
||||
configuration file are available or not.
|
||||
|
||||
init qianfan embedding client with `ak`, `sk`, `model`, `endpoint`
|
||||
|
||||
Args:
|
||||
|
||||
values: a dictionary containing configuration information, must include the
|
||||
fields of qianfan_ak and qianfan_sk
|
||||
Returns:
|
||||
|
||||
a dictionary containing configuration information. If qianfan_ak and
|
||||
qianfan_sk are not provided in the environment variables or configuration
|
||||
file,the original values will be returned; otherwise, values containing
|
||||
qianfan_ak and qianfan_sk will be returned.
|
||||
Raises:
|
||||
|
||||
ValueError: qianfan package not found, please install it with `pip install
|
||||
qianfan`
|
||||
"""
|
||||
values["qianfan_ak"] = get_from_dict_or_env(
|
||||
values,
|
||||
"qianfan_ak",
|
||||
"QIANFAN_AK",
|
||||
)
|
||||
values["qianfan_sk"] = get_from_dict_or_env(
|
||||
values,
|
||||
"qianfan_sk",
|
||||
"QIANFAN_SK",
|
||||
)
|
||||
|
||||
try:
|
||||
import qianfan
|
||||
|
||||
params = {
|
||||
"ak": values["qianfan_ak"],
|
||||
"sk": values["qianfan_sk"],
|
||||
"model": values["model"],
|
||||
}
|
||||
if values["endpoint"] is not None and values["endpoint"] != "":
|
||||
params["endpoint"] = values["endpoint"]
|
||||
values["client"] = qianfan.Embedding(**params)
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"qianfan package not found, please install it with "
|
||||
"`pip install qianfan`"
|
||||
)
|
||||
return values
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
resp = self.embed_documents([text])
|
||||
return resp[0]
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""
|
||||
Embeds a list of text documents using the AutoVOT algorithm.
|
||||
|
||||
Args:
|
||||
texts (List[str]): A list of text documents to embed.
|
||||
|
||||
Returns:
|
||||
List[List[float]]: A list of embeddings for each document in the input list.
|
||||
Each embedding is represented as a list of float values.
|
||||
"""
|
||||
text_in_chunks = [
|
||||
texts[i : i + self.chunk_size]
|
||||
for i in range(0, len(texts), self.chunk_size)
|
||||
]
|
||||
lst = []
|
||||
for chunk in text_in_chunks:
|
||||
resp = self.client.do(texts=chunk)
|
||||
lst.extend([res["embedding"] for res in resp["data"]])
|
||||
return lst
|
||||
|
||||
async def aembed_query(self, text: str) -> List[float]:
|
||||
embeddings = await self.aembed_documents([text])
|
||||
return embeddings[0]
|
||||
|
||||
async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
text_in_chunks = [
|
||||
texts[i : i + self.chunk_size]
|
||||
for i in range(0, len(texts), self.chunk_size)
|
||||
]
|
||||
lst = []
|
||||
for chunk in text_in_chunks:
|
||||
resp = await self.client.ado(texts=chunk)
|
||||
for res in resp["data"]:
|
||||
lst.extend([res["embedding"]])
|
||||
return lst
|
||||
@@ -26,6 +26,7 @@ from langchain.llms.anthropic import Anthropic
|
||||
from langchain.llms.anyscale import Anyscale
|
||||
from langchain.llms.aviary import Aviary
|
||||
from langchain.llms.azureml_endpoint import AzureMLOnlineEndpoint
|
||||
from langchain.llms.baidu_qianfan_endpoint import QianfanLLMEndpoint
|
||||
from langchain.llms.bananadev import Banana
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.llms.baseten import Baseten
|
||||
@@ -160,6 +161,7 @@ __all__ = [
|
||||
"Writer",
|
||||
"OctoAIEndpoint",
|
||||
"Xinference",
|
||||
"QianfanLLMEndpoint",
|
||||
]
|
||||
|
||||
type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
|
||||
@@ -228,4 +230,5 @@ type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
|
||||
"vllm_openai": VLLMOpenAI,
|
||||
"writer": Writer,
|
||||
"xinference": Xinference,
|
||||
"qianfan_endpoint": QianfanLLMEndpoint,
|
||||
}
|
||||
|
||||
217
libs/langchain/langchain/llms/baidu_qianfan_endpoint.py
Normal file
217
libs/langchain/langchain/llms/baidu_qianfan_endpoint.py
Normal file
@@ -0,0 +1,217 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import (
|
||||
Any,
|
||||
AsyncIterator,
|
||||
Dict,
|
||||
Iterator,
|
||||
List,
|
||||
Optional,
|
||||
)
|
||||
|
||||
from langchain.callbacks.manager import (
|
||||
AsyncCallbackManagerForLLMRun,
|
||||
CallbackManagerForLLMRun,
|
||||
)
|
||||
from langchain.llms.base import LLM
|
||||
from langchain.pydantic_v1 import Field, root_validator
|
||||
from langchain.schema.output import GenerationChunk
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class QianfanLLMEndpoint(LLM):
|
||||
"""Baidu Qianfan hosted open source or customized models.
|
||||
|
||||
To use, you should have the ``qianfan`` python package installed, and
|
||||
the environment variable ``qianfan_ak`` and ``qianfan_sk`` set with
|
||||
your API key and Secret Key.
|
||||
|
||||
ak, sk are required parameters which you could get from
|
||||
https://cloud.baidu.com/product/wenxinworkshop
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.llms import QianfanLLMEndpoint
|
||||
qianfan_model = QianfanLLMEndpoint(model="ERNIE-Bot",
|
||||
endpoint="your_endpoint", ak="your_ak", sk="your_sk")
|
||||
"""
|
||||
|
||||
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
client: Any
|
||||
|
||||
qianfan_ak: Optional[str] = None
|
||||
qianfan_sk: Optional[str] = None
|
||||
|
||||
streaming: Optional[bool] = False
|
||||
"""Whether to stream the results or not."""
|
||||
|
||||
model: str = "ERNIE-Bot-turbo"
|
||||
"""Model name.
|
||||
you could get from https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu
|
||||
|
||||
preset models are mapping to an endpoint.
|
||||
`model` will be ignored if `endpoint` is set
|
||||
"""
|
||||
|
||||
endpoint: Optional[str] = None
|
||||
"""Endpoint of the Qianfan LLM, required if custom model used."""
|
||||
|
||||
request_timeout: Optional[int] = 60
|
||||
"""request timeout for chat http requests"""
|
||||
|
||||
top_p: Optional[float] = 0.8
|
||||
temperature: Optional[float] = 0.95
|
||||
penalty_score: Optional[float] = 1
|
||||
"""Model params, only supported in ERNIE-Bot and ERNIE-Bot-turbo.
|
||||
In the case of other model, passing these params will not affect the result.
|
||||
"""
|
||||
|
||||
@root_validator()
|
||||
def validate_enviroment(cls, values: Dict) -> Dict:
|
||||
values["qianfan_ak"] = get_from_dict_or_env(
|
||||
values,
|
||||
"qianfan_ak",
|
||||
"QIANFAN_AK",
|
||||
)
|
||||
values["qianfan_sk"] = get_from_dict_or_env(
|
||||
values,
|
||||
"qianfan_sk",
|
||||
"QIANFAN_SK",
|
||||
)
|
||||
|
||||
params = {
|
||||
"ak": values["qianfan_ak"],
|
||||
"sk": values["qianfan_sk"],
|
||||
"model": values["model"],
|
||||
}
|
||||
if values["endpoint"] is not None and values["endpoint"] != "":
|
||||
params["endpoint"] = values["endpoint"]
|
||||
try:
|
||||
import qianfan
|
||||
|
||||
values["client"] = qianfan.Completion(**params)
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"qianfan package not found, please install it with "
|
||||
"`pip install qianfan`"
|
||||
)
|
||||
return values
|
||||
|
||||
@property
|
||||
def _identifying_params(self) -> Dict[str, Any]:
|
||||
return {
|
||||
**{"endpoint": self.endpoint, "model": self.model},
|
||||
**super()._identifying_params,
|
||||
}
|
||||
|
||||
@property
|
||||
def _llm_type(self) -> str:
|
||||
"""Return type of llm."""
|
||||
return "baidu-qianfan-endpoint"
|
||||
|
||||
@property
|
||||
def _default_params(self) -> Dict[str, Any]:
|
||||
"""Get the default parameters for calling OpenAI API."""
|
||||
normal_params = {
|
||||
"stream": self.streaming,
|
||||
"request_timeout": self.request_timeout,
|
||||
"top_p": self.top_p,
|
||||
"temperature": self.temperature,
|
||||
"penalty_score": self.penalty_score,
|
||||
}
|
||||
|
||||
return {**normal_params, **self.model_kwargs}
|
||||
|
||||
def _convert_prompt_msg_params(
|
||||
self,
|
||||
prompt: str,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
return {
|
||||
**{"prompt": prompt, "model": self.model},
|
||||
**self._default_params,
|
||||
**kwargs,
|
||||
}
|
||||
|
||||
def _call(
|
||||
self,
|
||||
prompt: str,
|
||||
stop: Optional[List[str]] = None,
|
||||
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
"""Call out to an qianfan models endpoint for each generation with a prompt.
|
||||
Args:
|
||||
prompt: The prompt to pass into the model.
|
||||
stop: Optional list of stop words to use when generating.
|
||||
Returns:
|
||||
The string generated by the model.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
response = qianfan_model("Tell me a joke.")
|
||||
"""
|
||||
if self.streaming:
|
||||
completion = ""
|
||||
for chunk in self._stream(prompt, stop, run_manager, **kwargs):
|
||||
completion += chunk.text
|
||||
return completion
|
||||
params = self._convert_prompt_msg_params(prompt, **kwargs)
|
||||
response_payload = self.client.do(**params)
|
||||
|
||||
return response_payload["result"]
|
||||
|
||||
async def _acall(
|
||||
self,
|
||||
prompt: str,
|
||||
stop: Optional[List[str]] = None,
|
||||
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
if self.streaming:
|
||||
completion = ""
|
||||
async for chunk in self._astream(prompt, stop, run_manager, **kwargs):
|
||||
completion += chunk.text
|
||||
return completion
|
||||
|
||||
params = self._convert_prompt_msg_params(prompt, **kwargs)
|
||||
response_payload = await self.client.ado(**params)
|
||||
|
||||
return response_payload["result"]
|
||||
|
||||
def _stream(
|
||||
self,
|
||||
prompt: str,
|
||||
stop: Optional[List[str]] = None,
|
||||
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> Iterator[GenerationChunk]:
|
||||
params = self._convert_prompt_msg_params(prompt, **kwargs)
|
||||
|
||||
for res in self.client.do(**params):
|
||||
if res:
|
||||
chunk = GenerationChunk(text=res["result"])
|
||||
yield chunk
|
||||
if run_manager:
|
||||
run_manager.on_llm_new_token(chunk.text)
|
||||
|
||||
async def _astream(
|
||||
self,
|
||||
prompt: str,
|
||||
stop: Optional[List[str]] = None,
|
||||
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> AsyncIterator[GenerationChunk]:
|
||||
params = self._convert_prompt_msg_params(prompt, **kwargs)
|
||||
async for res in await self.client.ado(**params):
|
||||
if res:
|
||||
chunk = GenerationChunk(text=res["result"])
|
||||
|
||||
yield chunk
|
||||
if run_manager:
|
||||
await run_manager.on_llm_new_token(chunk.text)
|
||||
@@ -265,10 +265,19 @@ class OpenLLM(LLM):
|
||||
self._identifying_params["model_name"], **copied
|
||||
)
|
||||
if self._client:
|
||||
return self._client.query(prompt, **config.model_dump(flatten=True))
|
||||
res = self._client.query(prompt, **config.model_dump(flatten=True))
|
||||
else:
|
||||
assert self._runner is not None
|
||||
return self._runner(prompt, **config.model_dump(flatten=True))
|
||||
res = self._runner(prompt, **config.model_dump(flatten=True))
|
||||
if isinstance(res, dict) and "text" in res:
|
||||
return res["text"]
|
||||
elif isinstance(res, str):
|
||||
return res
|
||||
else:
|
||||
raise ValueError(
|
||||
"Expected result to be a dict with key 'text' or a string. "
|
||||
f"Received {res}"
|
||||
)
|
||||
|
||||
async def _acall(
|
||||
self,
|
||||
@@ -291,7 +300,7 @@ class OpenLLM(LLM):
|
||||
self._identifying_params["model_name"], **copied
|
||||
)
|
||||
if self._client:
|
||||
return await self._client.acall(
|
||||
res = await self._client.acall(
|
||||
"generate", prompt, **config.model_dump(flatten=True)
|
||||
)
|
||||
else:
|
||||
@@ -304,6 +313,16 @@ class OpenLLM(LLM):
|
||||
generated_result = await self._runner.generate.async_run(
|
||||
prompt, **generate_kwargs
|
||||
)
|
||||
return self._runner.llm.postprocess_generate(
|
||||
res = self._runner.llm.postprocess_generate(
|
||||
prompt, generated_result, **postprocess_kwargs
|
||||
)
|
||||
|
||||
if isinstance(res, dict) and "text" in res:
|
||||
return res["text"]
|
||||
elif isinstance(res, str):
|
||||
return res
|
||||
else:
|
||||
raise ValueError(
|
||||
"Expected result to be a dict with key 'text' or a string. "
|
||||
f"Received {res}"
|
||||
)
|
||||
|
||||
@@ -1,11 +1,17 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Mapping, Optional
|
||||
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional
|
||||
|
||||
from langchain.callbacks.manager import CallbackManagerForLLMRun
|
||||
from langchain.llms.base import LLM
|
||||
from langchain.pydantic_v1 import Extra, Field, root_validator
|
||||
from langchain.schema.output import GenerationChunk
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from replicate.prediction import Prediction
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -17,32 +23,42 @@ class Replicate(LLM):
|
||||
You can find your token here: https://replicate.com/account
|
||||
|
||||
The model param is required, but any other model parameters can also
|
||||
be passed in with the format input={model_param: value, ...}
|
||||
be passed in with the format model_kwargs={model_param: value, ...}
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.llms import Replicate
|
||||
replicate = Replicate(model="stability-ai/stable-diffusion: \
|
||||
27b93a2413e7f36cd83da926f365628\
|
||||
0b2931564ff050bf9575f1fdf9bcd7478",
|
||||
input={"image_dimensions": "512x512"})
|
||||
|
||||
replicate = Replicate(
|
||||
model=(
|
||||
"stability-ai/stable-diffusion: "
|
||||
"27b93a2413e7f36cd83da926f3656280b2931564ff050bf9575f1fdf9bcd7478",
|
||||
),
|
||||
model_kwargs={"image_dimensions": "512x512"}
|
||||
)
|
||||
"""
|
||||
|
||||
model: str
|
||||
input: Dict[str, Any] = Field(default_factory=dict)
|
||||
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
|
||||
model_kwargs: Dict[str, Any] = Field(default_factory=dict, alias="input")
|
||||
replicate_api_token: Optional[str] = None
|
||||
prompt_key: Optional[str] = None
|
||||
version_obj: Any = Field(default=None, exclude=True)
|
||||
"""Optionally pass in the model version object during initialization to avoid
|
||||
having to make an extra API call to retrieve it during streaming. NOTE: not
|
||||
serializable, is excluded from serialization.
|
||||
"""
|
||||
|
||||
streaming: bool = Field(default=False)
|
||||
streaming: bool = False
|
||||
"""Whether to stream the results."""
|
||||
|
||||
stop: Optional[List[str]] = Field(default=[])
|
||||
stop: List[str] = Field(default_factory=list)
|
||||
"""Stop sequences to early-terminate generation."""
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic config."""
|
||||
|
||||
allow_population_by_field_name = True
|
||||
extra = Extra.forbid
|
||||
|
||||
@property
|
||||
@@ -58,7 +74,12 @@ class Replicate(LLM):
|
||||
"""Build extra kwargs from additional params that were passed in."""
|
||||
all_required_field_names = {field.alias for field in cls.__fields__.values()}
|
||||
|
||||
extra = values.get("model_kwargs", {})
|
||||
input = values.pop("input", {})
|
||||
if input:
|
||||
logger.warning(
|
||||
"Init param `input` is deprecated, please use `model_kwargs` instead."
|
||||
)
|
||||
extra = {**values.get("model_kwargs", {}), **input}
|
||||
for field_name in list(values):
|
||||
if field_name not in all_required_field_names:
|
||||
if field_name in extra:
|
||||
@@ -81,11 +102,11 @@ class Replicate(LLM):
|
||||
return values
|
||||
|
||||
@property
|
||||
def _identifying_params(self) -> Mapping[str, Any]:
|
||||
def _identifying_params(self) -> Dict[str, Any]:
|
||||
"""Get the identifying parameters."""
|
||||
return {
|
||||
"model": self.model,
|
||||
**{"model_kwargs": self.model_kwargs},
|
||||
"model_kwargs": self.model_kwargs,
|
||||
}
|
||||
|
||||
@property
|
||||
@@ -101,6 +122,66 @@ class Replicate(LLM):
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
"""Call to replicate endpoint."""
|
||||
if self.streaming:
|
||||
completion: Optional[str] = None
|
||||
for chunk in self._stream(
|
||||
prompt, stop=stop, run_manager=run_manager, **kwargs
|
||||
):
|
||||
if completion is None:
|
||||
completion = chunk.text
|
||||
else:
|
||||
completion += chunk.text
|
||||
else:
|
||||
prediction = self._create_prediction(prompt, **kwargs)
|
||||
prediction.wait()
|
||||
if prediction.status == "failed":
|
||||
raise RuntimeError(prediction.error)
|
||||
if isinstance(prediction.output, str):
|
||||
completion = prediction.output
|
||||
else:
|
||||
completion = "".join(prediction.output)
|
||||
assert completion is not None
|
||||
stop_conditions = stop or self.stop
|
||||
for s in stop_conditions:
|
||||
if s in completion:
|
||||
completion = completion[: completion.find(s)]
|
||||
return completion
|
||||
|
||||
def _stream(
|
||||
self,
|
||||
prompt: str,
|
||||
stop: Optional[List[str]] = None,
|
||||
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> Iterator[GenerationChunk]:
|
||||
prediction = self._create_prediction(prompt, **kwargs)
|
||||
stop_conditions = stop or self.stop
|
||||
stop_condition_reached = False
|
||||
current_completion: str = ""
|
||||
for output in prediction.output_iterator():
|
||||
current_completion += output
|
||||
# test for stop conditions, if specified
|
||||
for s in stop_conditions:
|
||||
if s in current_completion:
|
||||
prediction.cancel()
|
||||
stop_condition_reached = True
|
||||
# Potentially some tokens that should still be yielded before ending
|
||||
# stream.
|
||||
stop_index = max(output.find(s), 0)
|
||||
output = output[:stop_index]
|
||||
if not output:
|
||||
break
|
||||
if output:
|
||||
yield GenerationChunk(text=output)
|
||||
if run_manager:
|
||||
run_manager.on_llm_new_token(
|
||||
output,
|
||||
verbose=self.verbose,
|
||||
)
|
||||
if stop_condition_reached:
|
||||
break
|
||||
|
||||
def _create_prediction(self, prompt: str, **kwargs: Any) -> Prediction:
|
||||
try:
|
||||
import replicate as replicate_python
|
||||
except ImportError:
|
||||
@@ -110,41 +191,27 @@ class Replicate(LLM):
|
||||
)
|
||||
|
||||
# get the model and version
|
||||
model_str, version_str = self.model.split(":")
|
||||
model = replicate_python.models.get(model_str)
|
||||
version = model.versions.get(version_str)
|
||||
if self.version_obj is None:
|
||||
model_str, version_str = self.model.split(":")
|
||||
model = replicate_python.models.get(model_str)
|
||||
self.version_obj = model.versions.get(version_str)
|
||||
|
||||
# sort through the openapi schema to get the name of the first input
|
||||
input_properties = sorted(
|
||||
version.openapi_schema["components"]["schemas"]["Input"][
|
||||
"properties"
|
||||
].items(),
|
||||
key=lambda item: item[1].get("x-order", 0),
|
||||
if self.prompt_key is None:
|
||||
# sort through the openapi schema to get the name of the first input
|
||||
input_properties = sorted(
|
||||
self.version_obj.openapi_schema["components"]["schemas"]["Input"][
|
||||
"properties"
|
||||
].items(),
|
||||
key=lambda item: item[1].get("x-order", 0),
|
||||
)
|
||||
|
||||
self.prompt_key = input_properties[0][0]
|
||||
|
||||
input_: Dict = {
|
||||
self.prompt_key: prompt,
|
||||
**self.model_kwargs,
|
||||
**kwargs,
|
||||
}
|
||||
return replicate_python.predictions.create(
|
||||
version=self.version_obj, input=input_
|
||||
)
|
||||
first_input_name = input_properties[0][0]
|
||||
inputs = {first_input_name: prompt, **self.input}
|
||||
|
||||
prediction = replicate_python.predictions.create(
|
||||
version=version, input={**inputs, **kwargs}
|
||||
)
|
||||
current_completion: str = ""
|
||||
stop_condition_reached = False
|
||||
for output in prediction.output_iterator():
|
||||
current_completion += output
|
||||
|
||||
# test for stop conditions, if specified
|
||||
if stop:
|
||||
for s in stop:
|
||||
if s in current_completion:
|
||||
prediction.cancel()
|
||||
stop_index = current_completion.find(s)
|
||||
current_completion = current_completion[:stop_index]
|
||||
stop_condition_reached = True
|
||||
break
|
||||
|
||||
if stop_condition_reached:
|
||||
break
|
||||
|
||||
if self.streaming and run_manager:
|
||||
run_manager.on_llm_new_token(output)
|
||||
return current_completion
|
||||
|
||||
@@ -156,7 +156,10 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
|
||||
else "extractive_segments"
|
||||
)
|
||||
|
||||
for chunk in derived_struct_data.get(chunk_type, []):
|
||||
if chunk_type not in derived_struct_data:
|
||||
continue
|
||||
|
||||
for chunk in derived_struct_data[chunk_type]:
|
||||
doc_metadata["source"] = derived_struct_data.get("link", "")
|
||||
|
||||
if chunk_type == "extractive_answers":
|
||||
|
||||
@@ -82,6 +82,8 @@ class TestResult(dict):
|
||||
_quantiles = df[feedback_cols].quantile(
|
||||
quantiles or [0.25, 0.5, 0.75], numeric_only=True
|
||||
)
|
||||
_quantiles.loc["mean"] = df[feedback_cols].mean()
|
||||
_quantiles.loc["mode"] = df[feedback_cols].mode().iloc[0]
|
||||
return _quantiles.transpose()
|
||||
|
||||
def to_dataframe(self) -> pd.DataFrame:
|
||||
@@ -864,7 +866,8 @@ def _prepare_eval_run(
|
||||
f"Project {project_name} already exists. Please use a different name."
|
||||
)
|
||||
print(
|
||||
f"View the evaluation results for project '{project_name}' at:\n{project.url}"
|
||||
f"View the evaluation results for project '{project_name}' at:\n{project.url}",
|
||||
flush=True,
|
||||
)
|
||||
dataset = client.read_dataset(dataset_name=dataset_name)
|
||||
examples = list(client.list_examples(dataset_id=dataset.id))
|
||||
@@ -925,14 +928,14 @@ def _collect_test_results(
|
||||
project_name: str,
|
||||
) -> TestResult:
|
||||
wait_for_all_tracers()
|
||||
all_feedback = {}
|
||||
all_eval_results = {}
|
||||
for c in configs:
|
||||
for callback in cast(list, c["callbacks"]):
|
||||
if isinstance(callback, EvaluatorCallbackHandler):
|
||||
all_feedback.update(callback.logged_feedback)
|
||||
all_eval_results.update(callback.logged_eval_results)
|
||||
results = {}
|
||||
for example, output in zip(examples, batch_results):
|
||||
feedback = all_feedback.get(str(example.id), [])
|
||||
feedback = all_eval_results.get(str(example.id), [])
|
||||
results[str(example.id)] = {
|
||||
"output": output,
|
||||
"input": example.inputs,
|
||||
|
||||
@@ -44,6 +44,7 @@ from langchain.tools.edenai import (
|
||||
EdenAiTextToSpeechTool,
|
||||
EdenaiTool,
|
||||
)
|
||||
from langchain.tools.eleven_labs.text2speech import ElevenLabsText2SpeechTool
|
||||
from langchain.tools.file_management import (
|
||||
CopyFileTool,
|
||||
DeleteFileTool,
|
||||
@@ -167,6 +168,7 @@ __all__ = [
|
||||
"EdenAiSpeechToTextTool",
|
||||
"EdenAiTextModerationTool",
|
||||
"EdenaiTool",
|
||||
"ElevenLabsText2SpeechTool",
|
||||
"ExtractHyperlinksTool",
|
||||
"ExtractTextTool",
|
||||
"FileSearchTool",
|
||||
|
||||
5
libs/langchain/langchain/tools/eleven_labs/__init__.py
Normal file
5
libs/langchain/langchain/tools/eleven_labs/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Eleven Labs Services Tools."""
|
||||
|
||||
from langchain.tools.eleven_labs.text2speech import ElevenLabsText2SpeechTool
|
||||
|
||||
__all__ = ["ElevenLabsText2SpeechTool"]
|
||||
8
libs/langchain/langchain/tools/eleven_labs/models.py
Normal file
8
libs/langchain/langchain/tools/eleven_labs/models.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class ElevenLabsModel(str, Enum):
|
||||
"""Models available for Eleven Labs Text2Speech."""
|
||||
|
||||
MULTI_LINGUAL = "eleven_multilingual_v1"
|
||||
MONO_LINGUAL = "eleven_monolingual_v1"
|
||||
80
libs/langchain/langchain/tools/eleven_labs/text2speech.py
Normal file
80
libs/langchain/langchain/tools/eleven_labs/text2speech.py
Normal file
@@ -0,0 +1,80 @@
|
||||
import tempfile
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Optional, Union
|
||||
|
||||
from langchain.callbacks.manager import CallbackManagerForToolRun
|
||||
from langchain.pydantic_v1 import root_validator
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
|
||||
def _import_elevenlabs() -> Any:
|
||||
try:
|
||||
import elevenlabs
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Cannot import elevenlabs, please install `pip install elevenlabs`."
|
||||
) from e
|
||||
return elevenlabs
|
||||
|
||||
|
||||
class ElevenLabsModel(str, Enum):
|
||||
"""Models available for Eleven Labs Text2Speech."""
|
||||
|
||||
MULTI_LINGUAL = "eleven_multilingual_v1"
|
||||
MONO_LINGUAL = "eleven_monolingual_v1"
|
||||
|
||||
|
||||
class ElevenLabsText2SpeechTool(BaseTool):
|
||||
"""Tool that queries the Eleven Labs Text2Speech API.
|
||||
|
||||
In order to set this up, follow instructions at:
|
||||
https://docs.elevenlabs.io/welcome/introduction
|
||||
"""
|
||||
|
||||
model: Union[ElevenLabsModel, str] = ElevenLabsModel.MULTI_LINGUAL
|
||||
|
||||
name: str = "eleven_labs_text2speech"
|
||||
description: str = (
|
||||
"A wrapper around Eleven Labs Text2Speech. "
|
||||
"Useful for when you need to convert text to speech. "
|
||||
"It supports multiple languages, including English, German, Polish, "
|
||||
"Spanish, Italian, French, Portuguese, and Hindi. "
|
||||
)
|
||||
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key exists in environment."""
|
||||
_ = get_from_dict_or_env(values, "eleven_api_key", "ELEVEN_API_KEY")
|
||||
|
||||
return values
|
||||
|
||||
def _run(
|
||||
self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
elevenlabs = _import_elevenlabs()
|
||||
try:
|
||||
speech = elevenlabs.generate(text=query, model=self.model)
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="bx", suffix=".wav", delete=False
|
||||
) as f:
|
||||
f.write(speech)
|
||||
return f.name
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Error while running ElevenLabsText2SpeechTool: {e}")
|
||||
|
||||
def play(self, speech_file: str) -> None:
|
||||
"""Play the text as speech."""
|
||||
elevenlabs = _import_elevenlabs()
|
||||
with open(speech_file, mode="rb") as f:
|
||||
speech = f.read()
|
||||
|
||||
elevenlabs.play(speech)
|
||||
|
||||
def stream_speech(self, query: str) -> None:
|
||||
"""Stream the text as speech as it is generated.
|
||||
Play the text in your speakers."""
|
||||
elevenlabs = _import_elevenlabs()
|
||||
speech_stream = elevenlabs.generate(text=query, model=self.model, stream=True)
|
||||
elevenlabs.stream(speech_stream)
|
||||
@@ -1,6 +1,7 @@
|
||||
"""Generic utility functions."""
|
||||
import contextlib
|
||||
import datetime
|
||||
import functools
|
||||
import importlib
|
||||
import warnings
|
||||
from importlib.metadata import version
|
||||
@@ -14,7 +15,8 @@ def xor_args(*arg_groups: Tuple[str, ...]) -> Callable:
|
||||
"""Validate specified keyword args are mutually exclusive."""
|
||||
|
||||
def decorator(func: Callable) -> Callable:
|
||||
def wrapper(*args: Any, **kwargs: Any) -> Callable:
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
||||
"""Validate exactly one arg in each group is not None."""
|
||||
counts = [
|
||||
sum(1 for arg in arg_group if kwargs.get(arg) is not None)
|
||||
|
||||
@@ -2,7 +2,18 @@ from __future__ import annotations
|
||||
|
||||
import typing
|
||||
import uuid
|
||||
from typing import Any, Callable, Iterable, List, Optional, Tuple, Type, TypeVar
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
Type,
|
||||
TypeVar,
|
||||
Union,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
@@ -18,11 +29,12 @@ CVST = TypeVar("CVST", bound="Cassandra")
|
||||
|
||||
|
||||
class Cassandra(VectorStore):
|
||||
"""`Cassandra` vector store.
|
||||
"""Wrapper around Apache Cassandra(R) for vector-store workloads.
|
||||
|
||||
It based on the Cassandra vector-store capabilities, based on cassIO.
|
||||
There is no notion of a default table name, since each embedding
|
||||
function implies its own vector dimension, which is part of the schema.
|
||||
To use it, you need a recent installation of the `cassio` library
|
||||
and a Cassandra cluster / Astra DB instance supporting vector capabilities.
|
||||
|
||||
Visit the cassio.org website for extensive quickstarts and code examples.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
@@ -31,12 +43,20 @@ class Cassandra(VectorStore):
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
|
||||
embeddings = OpenAIEmbeddings()
|
||||
session = ...
|
||||
keyspace = 'my_keyspace'
|
||||
vectorstore = Cassandra(embeddings, session, keyspace, 'my_doc_archive')
|
||||
session = ... # create your Cassandra session object
|
||||
keyspace = 'my_keyspace' # the keyspace should exist already
|
||||
table_name = 'my_vector_store'
|
||||
vectorstore = Cassandra(embeddings, session, keyspace, table_name)
|
||||
"""
|
||||
|
||||
_embedding_dimension: int | None
|
||||
_embedding_dimension: Union[int, None]
|
||||
|
||||
@staticmethod
|
||||
def _filter_to_metadata(filter_dict: Optional[Dict[str, str]]) -> Dict[str, Any]:
|
||||
if filter_dict is None:
|
||||
return {}
|
||||
else:
|
||||
return filter_dict
|
||||
|
||||
def _get_embedding_dimension(self) -> int:
|
||||
if self._embedding_dimension is None:
|
||||
@@ -81,8 +101,18 @@ class Cassandra(VectorStore):
|
||||
def embeddings(self) -> Embeddings:
|
||||
return self.embedding
|
||||
|
||||
@staticmethod
|
||||
def _dont_flip_the_cos_score(distance: float) -> float:
|
||||
# the identity
|
||||
return distance
|
||||
|
||||
def _select_relevance_score_fn(self) -> Callable[[float], float]:
|
||||
return self._cosine_relevance_score_fn
|
||||
"""
|
||||
The underlying VectorTable already returns a "score proper",
|
||||
i.e. one in [0, 1] where higher means more *similar*,
|
||||
so here the final score transformation is not reversing the interval:
|
||||
"""
|
||||
return self._dont_flip_the_cos_score
|
||||
|
||||
def delete_collection(self) -> None:
|
||||
"""
|
||||
@@ -172,22 +202,24 @@ class Cassandra(VectorStore):
|
||||
self,
|
||||
embedding: List[float],
|
||||
k: int = 4,
|
||||
filter: Optional[Dict[str, str]] = None,
|
||||
) -> List[Tuple[Document, float, str]]:
|
||||
"""Return docs most similar to embedding vector.
|
||||
|
||||
No support for `filter` query (on metadata) along with vector search.
|
||||
|
||||
Args:
|
||||
embedding (str): Embedding to look up documents similar to.
|
||||
k (int): Number of Documents to return. Defaults to 4.
|
||||
Returns:
|
||||
List of (Document, score, id), the most similar to the query vector.
|
||||
"""
|
||||
search_metadata = self._filter_to_metadata(filter)
|
||||
#
|
||||
hits = self.table.search(
|
||||
embedding_vector=embedding,
|
||||
top_k=k,
|
||||
metric="cos",
|
||||
metric_threshold=None,
|
||||
metadata=search_metadata,
|
||||
)
|
||||
# We stick to 'cos' distance as it can be normalized on a 0-1 axis
|
||||
# (1=most relevant), as required by this class' contract.
|
||||
@@ -207,11 +239,13 @@ class Cassandra(VectorStore):
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
filter: Optional[Dict[str, str]] = None,
|
||||
) -> List[Tuple[Document, float, str]]:
|
||||
embedding_vector = self.embedding.embed_query(query)
|
||||
return self.similarity_search_with_score_id_by_vector(
|
||||
embedding=embedding_vector,
|
||||
k=k,
|
||||
filter=filter,
|
||||
)
|
||||
|
||||
# id-unaware search facilities
|
||||
@@ -219,11 +253,10 @@ class Cassandra(VectorStore):
|
||||
self,
|
||||
embedding: List[float],
|
||||
k: int = 4,
|
||||
filter: Optional[Dict[str, str]] = None,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Return docs most similar to embedding vector.
|
||||
|
||||
No support for `filter` query (on metadata) along with vector search.
|
||||
|
||||
Args:
|
||||
embedding (str): Embedding to look up documents similar to.
|
||||
k (int): Number of Documents to return. Defaults to 4.
|
||||
@@ -235,6 +268,7 @@ class Cassandra(VectorStore):
|
||||
for (doc, score, docId) in self.similarity_search_with_score_id_by_vector(
|
||||
embedding=embedding,
|
||||
k=k,
|
||||
filter=filter,
|
||||
)
|
||||
]
|
||||
|
||||
@@ -242,18 +276,21 @@ class Cassandra(VectorStore):
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
filter: Optional[Dict[str, str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
embedding_vector = self.embedding.embed_query(query)
|
||||
return self.similarity_search_by_vector(
|
||||
embedding_vector,
|
||||
k,
|
||||
filter=filter,
|
||||
)
|
||||
|
||||
def similarity_search_by_vector(
|
||||
self,
|
||||
embedding: List[float],
|
||||
k: int = 4,
|
||||
filter: Optional[Dict[str, str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
return [
|
||||
@@ -261,6 +298,7 @@ class Cassandra(VectorStore):
|
||||
for doc, _ in self.similarity_search_with_score_by_vector(
|
||||
embedding,
|
||||
k,
|
||||
filter=filter,
|
||||
)
|
||||
]
|
||||
|
||||
@@ -268,11 +306,13 @@ class Cassandra(VectorStore):
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
filter: Optional[Dict[str, str]] = None,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
embedding_vector = self.embedding.embed_query(query)
|
||||
return self.similarity_search_with_score_by_vector(
|
||||
embedding_vector,
|
||||
k,
|
||||
filter=filter,
|
||||
)
|
||||
|
||||
def max_marginal_relevance_search_by_vector(
|
||||
@@ -281,6 +321,7 @@ class Cassandra(VectorStore):
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
filter: Optional[Dict[str, str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance.
|
||||
@@ -296,11 +337,14 @@ class Cassandra(VectorStore):
|
||||
Returns:
|
||||
List of Documents selected by maximal marginal relevance.
|
||||
"""
|
||||
search_metadata = self._filter_to_metadata(filter)
|
||||
|
||||
prefetchHits = self.table.search(
|
||||
embedding_vector=embedding,
|
||||
top_k=fetch_k,
|
||||
metric="cos",
|
||||
metric_threshold=None,
|
||||
metadata=search_metadata,
|
||||
)
|
||||
# let the mmr utility pick the *indices* in the above array
|
||||
mmrChosenIndices = maximal_marginal_relevance(
|
||||
@@ -328,6 +372,7 @@ class Cassandra(VectorStore):
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
filter: Optional[Dict[str, str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance.
|
||||
@@ -350,6 +395,7 @@ class Cassandra(VectorStore):
|
||||
k,
|
||||
fetch_k,
|
||||
lambda_mult=lambda_mult,
|
||||
filter=filter,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -541,19 +541,28 @@ class Chroma(VectorStore):
|
||||
document_id (str): ID of the document to update.
|
||||
document (Document): Document to update.
|
||||
"""
|
||||
text = document.page_content
|
||||
metadata = document.metadata
|
||||
return self.update_documents([document_id], [document])
|
||||
|
||||
def update_documents(self, ids: List[str], documents: List[Document]) -> None:
|
||||
"""Update a document in the collection.
|
||||
|
||||
Args:
|
||||
ids (List[str]): List of ids of the document to update.
|
||||
documents (List[Document]): List of documents to update.
|
||||
"""
|
||||
text = [document.page_content for document in documents]
|
||||
metadata = [document.metadata for document in documents]
|
||||
if self._embedding_function is None:
|
||||
raise ValueError(
|
||||
"For update, you must specify an embedding function on creation."
|
||||
)
|
||||
embeddings = self._embedding_function.embed_documents([text])
|
||||
embeddings = self._embedding_function.embed_documents(text)
|
||||
|
||||
self._collection.update(
|
||||
ids=[document_id],
|
||||
ids=ids,
|
||||
embeddings=embeddings,
|
||||
documents=[text],
|
||||
metadatas=[metadata],
|
||||
documents=text,
|
||||
metadatas=metadata,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
import logging
|
||||
import uuid
|
||||
from typing import (
|
||||
@@ -20,13 +21,44 @@ from langchain.vectorstores.base import VectorStore
|
||||
from langchain.vectorstores.utils import DistanceStrategy
|
||||
|
||||
DEFAULT_DISTANCE_STRATEGY = DistanceStrategy.COSINE
|
||||
|
||||
distance_mapping = {
|
||||
DISTANCE_MAPPING = {
|
||||
DistanceStrategy.EUCLIDEAN_DISTANCE: "euclidean",
|
||||
DistanceStrategy.COSINE: "cosine",
|
||||
}
|
||||
|
||||
|
||||
class SearchType(str, enum.Enum):
|
||||
"""Enumerator of the Distance strategies."""
|
||||
|
||||
VECTOR = "vector"
|
||||
HYBRID = "hybrid"
|
||||
|
||||
|
||||
DEFAULT_SEARCH_TYPE = SearchType.VECTOR
|
||||
|
||||
|
||||
def _get_search_index_query(search_type: SearchType) -> str:
|
||||
type_to_query_map = {
|
||||
SearchType.VECTOR: (
|
||||
"CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score "
|
||||
),
|
||||
SearchType.HYBRID: (
|
||||
"CALL { "
|
||||
"CALL db.index.vector.queryNodes($index, $k, $embedding) "
|
||||
"YIELD node, score "
|
||||
"RETURN node, score UNION "
|
||||
"CALL db.index.fulltext.queryNodes($keyword_index, $query, {limit: $k}) "
|
||||
"YIELD node, score "
|
||||
"WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
|
||||
"UNWIND nodes AS n "
|
||||
"RETURN n.node AS node, (n.score / max) AS score " # We use 0 as min
|
||||
"} "
|
||||
"WITH node, max(score) AS score ORDER BY score DESC LIMIT $k " # dedup
|
||||
),
|
||||
}
|
||||
return type_to_query_map[search_type]
|
||||
|
||||
|
||||
def check_if_not_null(props: List[str], values: List[Any]) -> None:
|
||||
for prop, value in zip(props, values):
|
||||
if not value:
|
||||
@@ -82,9 +114,11 @@ class Neo4jVector(VectorStore):
|
||||
self,
|
||||
embedding: Embeddings,
|
||||
*,
|
||||
search_type: SearchType = SearchType.VECTOR,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
url: Optional[str] = None,
|
||||
keyword_index_name: Optional[str] = "keyword",
|
||||
database: str = "neo4j",
|
||||
index_name: str = "vector",
|
||||
node_label: str = "Chunk",
|
||||
@@ -153,12 +187,14 @@ class Neo4jVector(VectorStore):
|
||||
self.embedding = embedding
|
||||
self._distance_strategy = distance_strategy
|
||||
self.index_name = index_name
|
||||
self.keyword_index_name = keyword_index_name
|
||||
self.node_label = node_label
|
||||
self.embedding_node_property = embedding_node_property
|
||||
self.text_node_property = text_node_property
|
||||
self.logger = logger or logging.getLogger(__name__)
|
||||
self.override_relevance_score_fn = relevance_score_fn
|
||||
self.retrieval_query = retrieval_query
|
||||
self.search_type = search_type
|
||||
# Calculate embedding dimension
|
||||
self.embedding_dimension = len(embedding.embed_query("foo"))
|
||||
|
||||
@@ -263,6 +299,39 @@ class Neo4jVector(VectorStore):
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
def retrieve_existing_fts_index(self) -> Optional[str]:
|
||||
"""
|
||||
Check if the fulltext index exists in the Neo4j database
|
||||
|
||||
This method queries the Neo4j database for existing fts indexes
|
||||
with the specified name.
|
||||
|
||||
Returns:
|
||||
(Tuple): keyword index information
|
||||
"""
|
||||
|
||||
index_information = self.query(
|
||||
"SHOW INDEXES YIELD name, type, labelsOrTypes, properties, options "
|
||||
"WHERE type = 'FULLTEXT' AND (name = $keyword_index_name "
|
||||
"OR (labelsOrTypes = [$node_label] AND "
|
||||
"properties = [$text_node_property])) "
|
||||
"RETURN name, labelsOrTypes, properties, options ",
|
||||
params={
|
||||
"keyword_index_name": self.keyword_index_name,
|
||||
"node_label": self.node_label,
|
||||
"text_node_property": self.text_node_property,
|
||||
},
|
||||
)
|
||||
# sort by index_name
|
||||
index_information = sort_by_index_name(index_information, self.index_name)
|
||||
try:
|
||||
self.keyword_index_name = index_information[0]["name"]
|
||||
self.text_node_property = index_information[0]["properties"][0]
|
||||
node_label = index_information[0]["labelsOrTypes"][0]
|
||||
return node_label
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
def create_new_index(self) -> None:
|
||||
"""
|
||||
This method constructs a Cypher query and executes it
|
||||
@@ -282,10 +351,23 @@ class Neo4jVector(VectorStore):
|
||||
"node_label": self.node_label,
|
||||
"embedding_node_property": self.embedding_node_property,
|
||||
"embedding_dimension": self.embedding_dimension,
|
||||
"similarity_metric": distance_mapping[self._distance_strategy],
|
||||
"similarity_metric": DISTANCE_MAPPING[self._distance_strategy],
|
||||
}
|
||||
self.query(index_query, params=parameters)
|
||||
|
||||
def create_new_keyword_index(self) -> None:
|
||||
"""
|
||||
This method constructs a Cypher query and executes it
|
||||
to create a new full text index in Neo4j.
|
||||
"""
|
||||
fts_index_query = (
|
||||
f"CREATE FULLTEXT INDEX {self.keyword_index_name} "
|
||||
f"FOR (n:`{self.node_label}`) ON EACH "
|
||||
f"[n.`{self.text_node_property}`]"
|
||||
)
|
||||
|
||||
self.query(fts_index_query)
|
||||
|
||||
@property
|
||||
def embeddings(self) -> Embeddings:
|
||||
return self.embedding
|
||||
@@ -299,6 +381,7 @@ class Neo4jVector(VectorStore):
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
create_id_index: bool = True,
|
||||
search_type: SearchType = SearchType.VECTOR,
|
||||
**kwargs: Any,
|
||||
) -> Neo4jVector:
|
||||
if ids is None:
|
||||
@@ -309,13 +392,13 @@ class Neo4jVector(VectorStore):
|
||||
|
||||
store = cls(
|
||||
embedding=embedding,
|
||||
search_type=search_type,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# Check if the index already exists
|
||||
# Check if the vector index already exists
|
||||
embedding_dimension = store.retrieve_existing_index()
|
||||
|
||||
# If the index doesn't exist yet
|
||||
# If the vector index doesn't exist yet
|
||||
if not embedding_dimension:
|
||||
store.create_new_index()
|
||||
# If the index already exists, check if embedding dimensions match
|
||||
@@ -328,6 +411,17 @@ class Neo4jVector(VectorStore):
|
||||
f"Vector index dimension: {embedding_dimension}"
|
||||
)
|
||||
|
||||
if search_type == SearchType.HYBRID:
|
||||
fts_node_label = store.retrieve_existing_fts_index()
|
||||
# If the FTS index doesn't exist yet
|
||||
if not fts_node_label:
|
||||
store.create_new_keyword_index()
|
||||
else: # Validate that FTS and Vector index use the same information
|
||||
if not fts_node_label == store.node_label:
|
||||
raise ValueError(
|
||||
"Vector and keyword index don't index the same node label"
|
||||
)
|
||||
|
||||
# Create unique constraint for faster import
|
||||
if create_id_index:
|
||||
store.query(
|
||||
@@ -429,6 +523,7 @@ class Neo4jVector(VectorStore):
|
||||
return self.similarity_search_by_vector(
|
||||
embedding=embedding,
|
||||
k=k,
|
||||
query=query,
|
||||
)
|
||||
|
||||
def similarity_search_with_score(
|
||||
@@ -444,11 +539,13 @@ class Neo4jVector(VectorStore):
|
||||
List of Documents most similar to the query and score for each
|
||||
"""
|
||||
embedding = self.embedding.embed_query(query)
|
||||
docs = self.similarity_search_with_score_by_vector(embedding=embedding, k=k)
|
||||
docs = self.similarity_search_with_score_by_vector(
|
||||
embedding=embedding, k=k, query=query
|
||||
)
|
||||
return docs
|
||||
|
||||
def similarity_search_with_score_by_vector(
|
||||
self, embedding: List[float], k: int = 4
|
||||
self, embedding: List[float], k: int = 4, **kwargs: Any
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""
|
||||
Perform a similarity search in the Neo4j database using a
|
||||
@@ -478,12 +575,14 @@ class Neo4jVector(VectorStore):
|
||||
self.retrieval_query if self.retrieval_query else default_retrieval
|
||||
)
|
||||
|
||||
read_query = (
|
||||
"CALL db.index.vector.queryNodes($index, $k, $embedding) "
|
||||
"YIELD node, score "
|
||||
) + retrieval_query
|
||||
|
||||
parameters = {"index": self.index_name, "k": k, "embedding": embedding}
|
||||
read_query = _get_search_index_query(self.search_type) + retrieval_query
|
||||
parameters = {
|
||||
"index": self.index_name,
|
||||
"k": k,
|
||||
"embedding": embedding,
|
||||
"keyword_index": self.keyword_index_name,
|
||||
"query": kwargs["query"],
|
||||
}
|
||||
|
||||
results = self.query(read_query, params=parameters)
|
||||
|
||||
@@ -517,7 +616,7 @@ class Neo4jVector(VectorStore):
|
||||
List of Documents most similar to the query vector.
|
||||
"""
|
||||
docs_and_scores = self.similarity_search_with_score_by_vector(
|
||||
embedding=embedding, k=k
|
||||
embedding=embedding, k=k, **kwargs
|
||||
)
|
||||
return [doc for doc, _ in docs_and_scores]
|
||||
|
||||
@@ -596,6 +695,8 @@ class Neo4jVector(VectorStore):
|
||||
cls: Type[Neo4jVector],
|
||||
embedding: Embeddings,
|
||||
index_name: str,
|
||||
search_type: SearchType = DEFAULT_SEARCH_TYPE,
|
||||
keyword_index_name: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> Neo4jVector:
|
||||
"""
|
||||
@@ -607,9 +708,17 @@ class Neo4jVector(VectorStore):
|
||||
the `index_name` definition.
|
||||
"""
|
||||
|
||||
if search_type == SearchType.HYBRID and not keyword_index_name:
|
||||
raise ValueError(
|
||||
"keyword_index name has to be specified "
|
||||
"when using hybrid search option"
|
||||
)
|
||||
|
||||
store = cls(
|
||||
embedding=embedding,
|
||||
index_name=index_name,
|
||||
keyword_index_name=keyword_index_name,
|
||||
search_type=search_type,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@@ -630,6 +739,20 @@ class Neo4jVector(VectorStore):
|
||||
f"Vector index dimension: {embedding_dimension}"
|
||||
)
|
||||
|
||||
if search_type == SearchType.HYBRID:
|
||||
fts_node_label = store.retrieve_existing_fts_index()
|
||||
# If the FTS index doesn't exist yet
|
||||
if not fts_node_label:
|
||||
raise ValueError(
|
||||
"The specified keyword index name does not exist. "
|
||||
"Make sure to check if you spelled it correctly"
|
||||
)
|
||||
else: # Validate that FTS and Vector index use the same information
|
||||
if not fts_node_label == store.node_label:
|
||||
raise ValueError(
|
||||
"Vector and keyword index don't index the same node label"
|
||||
)
|
||||
|
||||
return store
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -1220,7 +1220,7 @@ class Redis(VectorStore):
|
||||
)
|
||||
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
raise ImportError(
|
||||
"Could not import redis python package. "
|
||||
"Please install it with `pip install redis`."
|
||||
)
|
||||
@@ -1425,6 +1425,7 @@ class RedisVectorStoreRetriever(VectorStoreRetriever):
|
||||
"similarity",
|
||||
"similarity_distance_threshold",
|
||||
"similarity_score_threshold",
|
||||
"mmr",
|
||||
]
|
||||
"""Allowed search types."""
|
||||
|
||||
@@ -1438,7 +1439,6 @@ class RedisVectorStoreRetriever(VectorStoreRetriever):
|
||||
) -> List[Document]:
|
||||
if self.search_type == "similarity":
|
||||
docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
|
||||
|
||||
elif self.search_type == "similarity_distance_threshold":
|
||||
if self.search_kwargs["distance_threshold"] is None:
|
||||
raise ValueError(
|
||||
@@ -1454,6 +1454,10 @@ class RedisVectorStoreRetriever(VectorStoreRetriever):
|
||||
)
|
||||
)
|
||||
docs = [doc for doc, _ in docs_and_similarities]
|
||||
elif self.search_type == "mmr":
|
||||
docs = self.vectorstore.max_marginal_relevance_search(
|
||||
query, **self.search_kwargs
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"search_type of {self.search_type} not allowed.")
|
||||
return docs
|
||||
|
||||
@@ -374,7 +374,9 @@ class SingleStoreDB(VectorStore):
|
||||
FROM {} {} ORDER BY __score {} LIMIT %s""".format(
|
||||
self.content_field,
|
||||
self.metadata_field,
|
||||
self.distance_strategy,
|
||||
self.distance_strategy.name
|
||||
if isinstance(self.distance_strategy, DistanceStrategy)
|
||||
else self.distance_strategy,
|
||||
self.vector_field,
|
||||
self.table_name,
|
||||
where_clause,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "langchain"
|
||||
version = "0.0.287"
|
||||
version = "0.0.290"
|
||||
description = "Building applications with LLMs through composability"
|
||||
authors = []
|
||||
license = "MIT"
|
||||
|
||||
@@ -38,7 +38,7 @@ def cassandra_connection() -> Iterator[Tuple[Any, str]]:
|
||||
|
||||
def test_cassandra_cache(cassandra_connection: Tuple[Any, str]) -> None:
|
||||
session, keyspace = cassandra_connection
|
||||
cache = CassandraCache(session, keyspace)
|
||||
cache = CassandraCache(session=session, keyspace=keyspace)
|
||||
langchain.llm_cache = cache
|
||||
llm = FakeLLM()
|
||||
params = llm.dict()
|
||||
@@ -58,7 +58,7 @@ def test_cassandra_cache(cassandra_connection: Tuple[Any, str]) -> None:
|
||||
|
||||
def test_cassandra_cache_ttl(cassandra_connection: Tuple[Any, str]) -> None:
|
||||
session, keyspace = cassandra_connection
|
||||
cache = CassandraCache(session, keyspace, ttl_seconds=2)
|
||||
cache = CassandraCache(session=session, keyspace=keyspace, ttl_seconds=2)
|
||||
langchain.llm_cache = cache
|
||||
llm = FakeLLM()
|
||||
params = llm.dict()
|
||||
@@ -80,7 +80,11 @@ def test_cassandra_cache_ttl(cassandra_connection: Tuple[Any, str]) -> None:
|
||||
|
||||
def test_cassandra_semantic_cache(cassandra_connection: Tuple[Any, str]) -> None:
|
||||
session, keyspace = cassandra_connection
|
||||
sem_cache = CassandraSemanticCache(session, keyspace, embedding=FakeEmbeddings())
|
||||
sem_cache = CassandraSemanticCache(
|
||||
session=session,
|
||||
keyspace=keyspace,
|
||||
embedding=FakeEmbeddings(),
|
||||
)
|
||||
langchain.llm_cache = sem_cache
|
||||
llm = FakeLLM()
|
||||
params = llm.dict()
|
||||
|
||||
@@ -0,0 +1,85 @@
|
||||
"""Test Baidu Qianfan Chat Endpoint."""
|
||||
|
||||
from langchain.callbacks.manager import CallbackManager
|
||||
from langchain.chat_models.baidu_qianfan_endpoint import QianfanChatEndpoint
|
||||
from langchain.schema import (
|
||||
AIMessage,
|
||||
BaseMessage,
|
||||
ChatGeneration,
|
||||
HumanMessage,
|
||||
LLMResult,
|
||||
)
|
||||
from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler
|
||||
|
||||
|
||||
def test_default_call() -> None:
|
||||
"""Test default model(`ERNIE-Bot`) call."""
|
||||
chat = QianfanChatEndpoint()
|
||||
response = chat(messages=[HumanMessage(content="Hello")])
|
||||
assert isinstance(response, BaseMessage)
|
||||
assert isinstance(response.content, str)
|
||||
|
||||
|
||||
def test_model() -> None:
|
||||
"""Test model kwarg works."""
|
||||
chat = QianfanChatEndpoint(model="BLOOMZ-7B")
|
||||
response = chat(messages=[HumanMessage(content="Hello")])
|
||||
assert isinstance(response, BaseMessage)
|
||||
assert isinstance(response.content, str)
|
||||
|
||||
|
||||
def test_endpoint() -> None:
|
||||
"""Test user custom model deployments like some open source models."""
|
||||
chat = QianfanChatEndpoint(endpoint="qianfan_bloomz_7b_compressed")
|
||||
response = chat(messages=[HumanMessage(content="Hello")])
|
||||
assert isinstance(response, BaseMessage)
|
||||
assert isinstance(response.content, str)
|
||||
|
||||
|
||||
def test_multiple_history() -> None:
|
||||
"""Tests multiple history works."""
|
||||
chat = QianfanChatEndpoint()
|
||||
|
||||
response = chat(
|
||||
messages=[
|
||||
HumanMessage(content="Hello."),
|
||||
AIMessage(content="Hello!"),
|
||||
HumanMessage(content="How are you doing?"),
|
||||
]
|
||||
)
|
||||
assert isinstance(response, BaseMessage)
|
||||
assert isinstance(response.content, str)
|
||||
|
||||
|
||||
def test_stream() -> None:
|
||||
"""Test that stream works."""
|
||||
chat = QianfanChatEndpoint(streaming=True)
|
||||
callback_handler = FakeCallbackHandler()
|
||||
callback_manager = CallbackManager([callback_handler])
|
||||
response = chat(
|
||||
messages=[
|
||||
HumanMessage(content="Hello."),
|
||||
AIMessage(content="Hello!"),
|
||||
HumanMessage(content="Who are you?"),
|
||||
],
|
||||
stream=True,
|
||||
callbacks=callback_manager,
|
||||
)
|
||||
assert callback_handler.llm_streams > 0
|
||||
assert isinstance(response.content, str)
|
||||
|
||||
|
||||
def test_multiple_messages() -> None:
|
||||
"""Tests multiple messages works."""
|
||||
chat = QianfanChatEndpoint()
|
||||
message = HumanMessage(content="Hi, how are you.")
|
||||
response = chat.generate([[message], [message]])
|
||||
|
||||
assert isinstance(response, LLMResult)
|
||||
assert len(response.generations) == 2
|
||||
for generations in response.generations:
|
||||
assert len(generations) == 1
|
||||
for generation in generations:
|
||||
assert isinstance(generation, ChatGeneration)
|
||||
assert isinstance(generation.text, str)
|
||||
assert generation.text == generation.message.content
|
||||
@@ -0,0 +1,25 @@
|
||||
"""Test Baidu Qianfan Embedding Endpoint."""
|
||||
from langchain.embeddings.baidu_qianfan_endpoint import QianfanEmbeddingsEndpoint
|
||||
|
||||
|
||||
def test_embedding_multiple_documents() -> None:
|
||||
documents = ["foo", "bar"]
|
||||
embedding = QianfanEmbeddingsEndpoint()
|
||||
output = embedding.embed_documents(documents)
|
||||
assert len(output) == 2
|
||||
assert len(output[0]) == 384
|
||||
assert len(output[1]) == 384
|
||||
|
||||
|
||||
def test_embedding_query() -> None:
|
||||
query = "foo"
|
||||
embedding = QianfanEmbeddingsEndpoint()
|
||||
output = embedding.embed_query(query)
|
||||
assert len(output) == 384
|
||||
|
||||
|
||||
def test_model() -> None:
|
||||
documents = ["hi", "qianfan"]
|
||||
embedding = QianfanEmbeddingsEndpoint(model="Embedding-V1")
|
||||
output = embedding.embed_documents(documents)
|
||||
assert len(output) == 2
|
||||
@@ -0,0 +1,37 @@
|
||||
"""Test Baidu Qianfan LLM Endpoint."""
|
||||
from typing import Generator
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.llms.baidu_qianfan_endpoint import QianfanLLMEndpoint
|
||||
from langchain.schema import LLMResult
|
||||
|
||||
|
||||
def test_call() -> None:
|
||||
"""Test valid call to qianfan."""
|
||||
llm = QianfanLLMEndpoint()
|
||||
output = llm("write a joke")
|
||||
assert isinstance(output, str)
|
||||
|
||||
|
||||
def test_generate() -> None:
|
||||
"""Test valid call to qianfan."""
|
||||
llm = QianfanLLMEndpoint()
|
||||
output = llm.generate(["write a joke"])
|
||||
assert isinstance(output, LLMResult)
|
||||
assert isinstance(output.generations, list)
|
||||
|
||||
|
||||
def test_generate_stream() -> None:
|
||||
"""Test valid call to qianfan."""
|
||||
llm = QianfanLLMEndpoint()
|
||||
output = llm.stream("write a joke")
|
||||
assert isinstance(output, Generator)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_qianfan_aio() -> None:
|
||||
llm = QianfanLLMEndpoint(streaming=True)
|
||||
|
||||
async for token in llm.astream("hi qianfan."):
|
||||
assert isinstance(token, str)
|
||||
@@ -4,16 +4,15 @@ from langchain.callbacks.manager import CallbackManager
|
||||
from langchain.llms.replicate import Replicate
|
||||
from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler
|
||||
|
||||
TEST_MODEL_NAME = "replicate/hello-world"
|
||||
TEST_MODEL_VER = "5c7d5dc6dd8bf75c1acaa8565735e7986bc5b66206b55cca93cb72c9bf15ccaa"
|
||||
TEST_MODEL = TEST_MODEL_NAME + ":" + TEST_MODEL_VER
|
||||
TEST_MODEL = "replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5" # noqa: E501
|
||||
|
||||
|
||||
def test_replicate_call() -> None:
|
||||
"""Test simple non-streaming call to Replicate."""
|
||||
llm = Replicate(model=TEST_MODEL)
|
||||
output = llm("LangChain")
|
||||
assert output == "hello LangChain"
|
||||
output = llm("What is LangChain")
|
||||
assert output
|
||||
assert isinstance(output, str)
|
||||
|
||||
|
||||
def test_replicate_streaming_call() -> None:
|
||||
@@ -22,13 +21,24 @@ def test_replicate_streaming_call() -> None:
|
||||
callback_manager = CallbackManager([callback_handler])
|
||||
|
||||
llm = Replicate(streaming=True, callback_manager=callback_manager, model=TEST_MODEL)
|
||||
output = llm("LangChain")
|
||||
assert output == "hello LangChain"
|
||||
assert callback_handler.llm_streams == 15
|
||||
output = llm("What is LangChain")
|
||||
assert output
|
||||
assert isinstance(output, str)
|
||||
|
||||
|
||||
def test_replicate_stop_sequence() -> None:
|
||||
"""Test call to Replicate with a stop sequence."""
|
||||
llm = Replicate(model=TEST_MODEL)
|
||||
output = llm("one two three", stop=["two"])
|
||||
assert output == "hello one "
|
||||
def test_replicate_model_kwargs() -> None:
|
||||
"""Test simple non-streaming call to Replicate."""
|
||||
llm = Replicate(
|
||||
model=TEST_MODEL, model_kwargs={"max_length": 100, "temperature": 0.01}
|
||||
)
|
||||
long_output = llm("What is LangChain")
|
||||
llm = Replicate(
|
||||
model=TEST_MODEL, model_kwargs={"max_length": 10, "temperature": 0.01}
|
||||
)
|
||||
short_output = llm("What is LangChain")
|
||||
assert len(short_output) < len(long_output)
|
||||
|
||||
|
||||
def test_replicate_input() -> None:
|
||||
llm = Replicate(model=TEST_MODEL, input={"max_length": 10})
|
||||
assert llm.model_kwargs == {"max_length": 10}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
"""Test Cassandra functionality."""
|
||||
import time
|
||||
from typing import List, Optional, Type
|
||||
|
||||
from cassandra.cluster import Cluster
|
||||
@@ -61,9 +62,9 @@ def test_cassandra_with_score() -> None:
|
||||
docs = [o[0] for o in output]
|
||||
scores = [o[1] for o in output]
|
||||
assert docs == [
|
||||
Document(page_content="foo", metadata={"page": 0}),
|
||||
Document(page_content="bar", metadata={"page": 1}),
|
||||
Document(page_content="baz", metadata={"page": 2}),
|
||||
Document(page_content="foo", metadata={"page": "0.0"}),
|
||||
Document(page_content="bar", metadata={"page": "1.0"}),
|
||||
Document(page_content="baz", metadata={"page": "2.0"}),
|
||||
]
|
||||
assert scores[0] > scores[1] > scores[2]
|
||||
|
||||
@@ -76,10 +77,10 @@ def test_cassandra_max_marginal_relevance_search() -> None:
|
||||
|
||||
______ v2
|
||||
/ \
|
||||
/ \ v1
|
||||
/ | v1
|
||||
v3 | . | query
|
||||
\ / v0
|
||||
\______/ (N.B. very crude drawing)
|
||||
| / v0
|
||||
|______/ (N.B. very crude drawing)
|
||||
|
||||
With fetch_k==3 and k==2, when query is at (1, ),
|
||||
one expects that v2 and v0 are returned (in some order).
|
||||
@@ -94,8 +95,8 @@ def test_cassandra_max_marginal_relevance_search() -> None:
|
||||
(mmr_doc.page_content, mmr_doc.metadata["page"]) for mmr_doc in output
|
||||
}
|
||||
assert output_set == {
|
||||
("+0.25", 2),
|
||||
("-0.124", 0),
|
||||
("+0.25", "2.0"),
|
||||
("-0.124", "0.0"),
|
||||
}
|
||||
|
||||
|
||||
@@ -150,6 +151,7 @@ def test_cassandra_delete() -> None:
|
||||
assert len(output) == 1
|
||||
|
||||
docsearch.clear()
|
||||
time.sleep(0.3)
|
||||
output = docsearch.similarity_search("foo", k=10)
|
||||
assert len(output) == 0
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ import os
|
||||
from typing import List
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.vectorstores import Neo4jVector
|
||||
from langchain.vectorstores.neo4j_vector import Neo4jVector, SearchType
|
||||
from langchain.vectorstores.utils import DistanceStrategy
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
||||
|
||||
@@ -26,7 +26,7 @@ def drop_vector_indexes(store: Neo4jVector) -> None:
|
||||
all_indexes = store.query(
|
||||
"""
|
||||
SHOW INDEXES YIELD name, type
|
||||
WHERE type = "VECTOR"
|
||||
WHERE type IN ["VECTOR", "FULLTEXT"]
|
||||
RETURN name
|
||||
"""
|
||||
)
|
||||
@@ -331,3 +331,142 @@ def test_neo4jvector_prefer_indexname_insert() -> None:
|
||||
Document(page_content="foo", metadata={}),
|
||||
]
|
||||
drop_vector_indexes(existing_index)
|
||||
|
||||
|
||||
def test_neo4jvector_hybrid() -> None:
|
||||
"""Test end to end construction with hybrid search."""
|
||||
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
|
||||
text_embedding_pairs = list(zip(texts, text_embeddings))
|
||||
docsearch = Neo4jVector.from_embeddings(
|
||||
text_embeddings=text_embedding_pairs,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
url=url,
|
||||
username=username,
|
||||
password=password,
|
||||
pre_delete_collection=True,
|
||||
search_type=SearchType.HYBRID,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_neo4jvector_hybrid_deduplicate() -> None:
|
||||
"""Test result deduplication with hybrid search."""
|
||||
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
|
||||
text_embedding_pairs = list(zip(texts, text_embeddings))
|
||||
docsearch = Neo4jVector.from_embeddings(
|
||||
text_embeddings=text_embedding_pairs,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
url=url,
|
||||
username=username,
|
||||
password=password,
|
||||
pre_delete_collection=True,
|
||||
search_type=SearchType.HYBRID,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=3)
|
||||
assert output == [
|
||||
Document(page_content="foo"),
|
||||
Document(page_content="bar"),
|
||||
Document(page_content="baz"),
|
||||
]
|
||||
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_neo4jvector_hybrid_retrieval_query() -> None:
|
||||
"""Test custom retrieval_query with hybrid search."""
|
||||
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
|
||||
text_embedding_pairs = list(zip(texts, text_embeddings))
|
||||
docsearch = Neo4jVector.from_embeddings(
|
||||
text_embeddings=text_embedding_pairs,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
url=url,
|
||||
username=username,
|
||||
password=password,
|
||||
pre_delete_collection=True,
|
||||
search_type=SearchType.HYBRID,
|
||||
retrieval_query="RETURN 'moo' AS text, score, {test: 'test'} AS metadata",
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="moo", metadata={"test": "test"})]
|
||||
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_neo4jvector_hybrid_retrieval_query2() -> None:
|
||||
"""Test custom retrieval_query with hybrid search."""
|
||||
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
|
||||
text_embedding_pairs = list(zip(texts, text_embeddings))
|
||||
docsearch = Neo4jVector.from_embeddings(
|
||||
text_embeddings=text_embedding_pairs,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
url=url,
|
||||
username=username,
|
||||
password=password,
|
||||
pre_delete_collection=True,
|
||||
search_type=SearchType.HYBRID,
|
||||
retrieval_query="RETURN node.text AS text, score, {test: 'test'} AS metadata",
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={"test": "test"})]
|
||||
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_neo4jvector_missing_keyword() -> None:
|
||||
"""Test hybrid search with missing keyword_index_search."""
|
||||
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
|
||||
text_embedding_pairs = list(zip(texts, text_embeddings))
|
||||
docsearch = Neo4jVector.from_embeddings(
|
||||
text_embeddings=text_embedding_pairs,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
url=url,
|
||||
username=username,
|
||||
password=password,
|
||||
pre_delete_collection=True,
|
||||
)
|
||||
try:
|
||||
Neo4jVector.from_existing_index(
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
url=url,
|
||||
username=username,
|
||||
password=password,
|
||||
index_name="vector",
|
||||
search_type=SearchType.HYBRID,
|
||||
)
|
||||
except ValueError as e:
|
||||
assert str(e) == (
|
||||
"keyword_index name has to be specified when " "using hybrid search option"
|
||||
)
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_neo4jvector_hybrid_from_existing() -> None:
|
||||
"""Test hybrid search with missing keyword_index_search."""
|
||||
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
|
||||
text_embedding_pairs = list(zip(texts, text_embeddings))
|
||||
Neo4jVector.from_embeddings(
|
||||
text_embeddings=text_embedding_pairs,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
url=url,
|
||||
username=username,
|
||||
password=password,
|
||||
pre_delete_collection=True,
|
||||
search_type=SearchType.HYBRID,
|
||||
)
|
||||
existing = Neo4jVector.from_existing_index(
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
url=url,
|
||||
username=username,
|
||||
password=password,
|
||||
index_name="vector",
|
||||
keyword_index_name="keyword",
|
||||
search_type=SearchType.HYBRID,
|
||||
)
|
||||
|
||||
output = existing.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
drop_vector_indexes(existing)
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
import pytest
|
||||
|
||||
from langchain.agents.openai_functions_agent.base import (
|
||||
_FunctionsAgentAction,
|
||||
_parse_ai_message,
|
||||
)
|
||||
from langchain.schema import AgentFinish, OutputParserException
|
||||
from langchain.schema.messages import AIMessage, SystemMessage
|
||||
|
||||
|
||||
# Test: _parse_ai_message() function.
|
||||
class TestParseAIMessage:
|
||||
# Test: Pass Non-AIMessage.
|
||||
def test_not_an_ai(self) -> None:
|
||||
err = f"Expected an AI message got {str(SystemMessage)}"
|
||||
with pytest.raises(TypeError, match=err):
|
||||
_parse_ai_message(SystemMessage(content="x"))
|
||||
|
||||
# Test: Model response (not a function call).
|
||||
def test_model_response(self) -> None:
|
||||
msg = AIMessage(content="Model response.")
|
||||
result = _parse_ai_message(msg)
|
||||
|
||||
assert isinstance(result, AgentFinish)
|
||||
assert result.return_values == {"output": "Model response."}
|
||||
assert result.log == "Model response."
|
||||
|
||||
# Test: Model response with a function call.
|
||||
def test_func_call(self) -> None:
|
||||
msg = AIMessage(
|
||||
content="LLM thoughts.",
|
||||
additional_kwargs={
|
||||
"function_call": {"name": "foo", "arguments": '{"param": 42}'}
|
||||
},
|
||||
)
|
||||
result = _parse_ai_message(msg)
|
||||
|
||||
assert isinstance(result, _FunctionsAgentAction)
|
||||
assert result.tool == "foo"
|
||||
assert result.tool_input == {"param": 42}
|
||||
assert result.log == (
|
||||
"\nInvoking: `foo` with `{'param': 42}`\nresponded: LLM thoughts.\n\n"
|
||||
)
|
||||
assert result.message_log == [msg]
|
||||
|
||||
# Test: Model response with a function call (old style tools).
|
||||
def test_func_call_oldstyle(self) -> None:
|
||||
msg = AIMessage(
|
||||
content="LLM thoughts.",
|
||||
additional_kwargs={
|
||||
"function_call": {"name": "foo", "arguments": '{"__arg1": "42"}'}
|
||||
},
|
||||
)
|
||||
result = _parse_ai_message(msg)
|
||||
|
||||
assert isinstance(result, _FunctionsAgentAction)
|
||||
assert result.tool == "foo"
|
||||
assert result.tool_input == "42"
|
||||
assert result.log == (
|
||||
"\nInvoking: `foo` with `42`\nresponded: LLM thoughts.\n\n"
|
||||
)
|
||||
assert result.message_log == [msg]
|
||||
|
||||
# Test: Invalid function call args.
|
||||
def test_func_call_invalid(self) -> None:
|
||||
msg = AIMessage(
|
||||
content="LLM thoughts.",
|
||||
additional_kwargs={"function_call": {"name": "foo", "arguments": "{42]"}},
|
||||
)
|
||||
|
||||
err = (
|
||||
"Could not parse tool input: {'name': 'foo', 'arguments': '{42]'} "
|
||||
"because the `arguments` is not valid JSON."
|
||||
)
|
||||
with pytest.raises(OutputParserException, match=err):
|
||||
_parse_ai_message(msg)
|
||||
@@ -0,0 +1,90 @@
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.agents.openai_functions_multi_agent.base import (
|
||||
_FunctionsAgentAction,
|
||||
_parse_ai_message,
|
||||
)
|
||||
from langchain.schema import AgentFinish, OutputParserException
|
||||
from langchain.schema.messages import AIMessage, SystemMessage
|
||||
|
||||
|
||||
# Test: _parse_ai_message() function.
|
||||
class TestParseAIMessage:
|
||||
# Test: Pass Non-AIMessage.
|
||||
def test_not_an_ai(self) -> None:
|
||||
err = f"Expected an AI message got {str(SystemMessage)}"
|
||||
with pytest.raises(TypeError, match=err):
|
||||
_parse_ai_message(SystemMessage(content="x"))
|
||||
|
||||
# Test: Model response (not a function call).
|
||||
def test_model_response(self) -> None:
|
||||
msg = AIMessage(content="Model response.")
|
||||
result = _parse_ai_message(msg)
|
||||
|
||||
assert isinstance(result, AgentFinish)
|
||||
assert result.return_values == {"output": "Model response."}
|
||||
assert result.log == "Model response."
|
||||
|
||||
# Test: Model response with a function call.
|
||||
def test_func_call(self) -> None:
|
||||
act = json.dumps([{"action_name": "foo", "action": {"param": 42}}])
|
||||
|
||||
msg = AIMessage(
|
||||
content="LLM thoughts.",
|
||||
additional_kwargs={
|
||||
"function_call": {"name": "foo", "arguments": f'{{"actions": {act}}}'}
|
||||
},
|
||||
)
|
||||
result = _parse_ai_message(msg)
|
||||
|
||||
assert isinstance(result, list)
|
||||
assert len(result) == 1
|
||||
|
||||
action = result[0]
|
||||
assert isinstance(action, _FunctionsAgentAction)
|
||||
assert action.tool == "foo"
|
||||
assert action.tool_input == {"param": 42}
|
||||
assert action.log == (
|
||||
"\nInvoking: `foo` with `{'param': 42}`\nresponded: LLM thoughts.\n\n"
|
||||
)
|
||||
assert action.message_log == [msg]
|
||||
|
||||
# Test: Model response with a function call (old style tools).
|
||||
def test_func_call_oldstyle(self) -> None:
|
||||
act = json.dumps([{"action_name": "foo", "action": {"__arg1": "42"}}])
|
||||
|
||||
msg = AIMessage(
|
||||
content="LLM thoughts.",
|
||||
additional_kwargs={
|
||||
"function_call": {"name": "foo", "arguments": f'{{"actions": {act}}}'}
|
||||
},
|
||||
)
|
||||
result = _parse_ai_message(msg)
|
||||
|
||||
assert isinstance(result, list)
|
||||
assert len(result) == 1
|
||||
|
||||
action = result[0]
|
||||
assert isinstance(action, _FunctionsAgentAction)
|
||||
assert action.tool == "foo"
|
||||
assert action.tool_input == "42"
|
||||
assert action.log == (
|
||||
"\nInvoking: `foo` with `42`\nresponded: LLM thoughts.\n\n"
|
||||
)
|
||||
assert action.message_log == [msg]
|
||||
|
||||
# Test: Invalid function call args.
|
||||
def test_func_call_invalid(self) -> None:
|
||||
msg = AIMessage(
|
||||
content="LLM thoughts.",
|
||||
additional_kwargs={"function_call": {"name": "foo", "arguments": "{42]"}},
|
||||
)
|
||||
|
||||
err = (
|
||||
"Could not parse tool input: {'name': 'foo', 'arguments': '{42]'} "
|
||||
"because the `arguments` is not valid JSON."
|
||||
)
|
||||
with pytest.raises(OutputParserException, match=err):
|
||||
_parse_ai_message(msg)
|
||||
@@ -26,6 +26,7 @@ def mock_feature_layer(): # type: ignore
|
||||
feature_layer.properties = {
|
||||
"description": "<html><body>Some HTML content</body></html>",
|
||||
"name": "test",
|
||||
"serviceItemId": "testItemId",
|
||||
}
|
||||
return feature_layer
|
||||
|
||||
@@ -46,3 +47,80 @@ def test_lazy_load(arcgis_mocks, mock_feature_layer, mock_gis): # type: ignore
|
||||
assert len(documents) == 1
|
||||
assert documents[0].metadata["url"] == "https://example.com/layer_url"
|
||||
# Add more assertions based on your expected behavior
|
||||
|
||||
|
||||
def test_initialization_with_string_layer( # type: ignore
|
||||
arcgis_mocks, mock_feature_layer, mock_gis
|
||||
):
|
||||
layer_url = "https://example.com/layer_url"
|
||||
|
||||
with patch("arcgis.features.FeatureLayer", return_value=mock_feature_layer):
|
||||
loader = ArcGISLoader(layer=layer_url, gis=mock_gis)
|
||||
|
||||
assert loader.url == layer_url
|
||||
|
||||
|
||||
def test_layer_description_provided_by_user( # type: ignore
|
||||
arcgis_mocks, mock_feature_layer, mock_gis
|
||||
):
|
||||
custom_description = "Custom Layer Description"
|
||||
loader = ArcGISLoader(
|
||||
layer=mock_feature_layer, gis=mock_gis, lyr_desc=custom_description
|
||||
)
|
||||
|
||||
layer_properties = loader._get_layer_properties(lyr_desc=custom_description)
|
||||
|
||||
assert layer_properties["layer_description"] == custom_description
|
||||
|
||||
|
||||
def test_initialization_without_arcgis(mock_feature_layer, mock_gis): # type: ignore
|
||||
with patch.dict("sys.modules", {"arcgis": None}):
|
||||
with pytest.raises(
|
||||
ImportError, match="arcgis is required to use the ArcGIS Loader"
|
||||
):
|
||||
ArcGISLoader(layer=mock_feature_layer, gis=mock_gis)
|
||||
|
||||
|
||||
def test_get_layer_properties_with_description( # type: ignore
|
||||
arcgis_mocks, mock_feature_layer, mock_gis
|
||||
):
|
||||
loader = ArcGISLoader(
|
||||
layer=mock_feature_layer, gis=mock_gis, lyr_desc="Custom Description"
|
||||
)
|
||||
|
||||
props = loader._get_layer_properties("Custom Description")
|
||||
|
||||
assert props["layer_description"] == "Custom Description"
|
||||
|
||||
|
||||
def test_load_method(arcgis_mocks, mock_feature_layer, mock_gis): # type: ignore
|
||||
loader = ArcGISLoader(layer=mock_feature_layer, gis=mock_gis)
|
||||
|
||||
documents = loader.load()
|
||||
|
||||
assert len(documents) == 1
|
||||
|
||||
|
||||
def test_geometry_returned(arcgis_mocks, mock_feature_layer, mock_gis): # type: ignore
|
||||
mock_feature_layer.query.return_value = [
|
||||
MagicMock(
|
||||
as_dict={
|
||||
"attributes": {"field": "value"},
|
||||
"geometry": {"type": "point", "coordinates": [0, 0]},
|
||||
}
|
||||
)
|
||||
]
|
||||
|
||||
loader = ArcGISLoader(layer=mock_feature_layer, gis=mock_gis, return_geometry=True)
|
||||
|
||||
documents = list(loader.lazy_load())
|
||||
assert "geometry" in documents[0].metadata
|
||||
|
||||
|
||||
def test_geometry_not_returned( # type: ignore
|
||||
arcgis_mocks, mock_feature_layer, mock_gis
|
||||
):
|
||||
loader = ArcGISLoader(layer=mock_feature_layer, gis=mock_gis, return_geometry=False)
|
||||
|
||||
documents = list(loader.lazy_load())
|
||||
assert "geometry" not in documents[0].metadata
|
||||
|
||||
@@ -36,6 +36,7 @@ _EXPECTED = [
|
||||
"EdenAiTextModerationTool",
|
||||
"EdenAiTextToSpeechTool",
|
||||
"EdenaiTool",
|
||||
"ElevenLabsText2SpeechTool",
|
||||
"ExtractHyperlinksTool",
|
||||
"ExtractTextTool",
|
||||
"FileSearchTool",
|
||||
|
||||
Reference in New Issue
Block a user