mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-05 04:38:26 +00:00
Fix make docs_build
and related scripts (#7276)
**Description: a description of the change** Fixed `make docs_build` and related scripts which caused errors. There are several changes. First, I made the build of the documentation and the API Reference into two separate commands. This is because it takes less time to build. The commands for documents are `make docs_build`, `make docs_clean`, and `make docs_linkcheck`. The commands for API Reference are `make api_docs_build`, `api_docs_clean`, and `api_docs_linkcheck`. It looked like `docs/.local_build.sh` could be used to build the documentation, so I used that. Since `.local_build.sh` was also building API Rerefence internally, I removed that process. `.local_build.sh` also added some Bash options to stop in error or so. Futher more added `cd "${SCRIPT_DIR}"` at the beginning so that the script will work no matter which directory it is executed in. `docs/api_reference/api_reference.rst` is removed, because which is generated by `docs/api_reference/create_api_rst.py`, and added it to .gitignore. Finally, the description of CONTRIBUTING.md was modified. **Issue: the issue # it fixes (if applicable)** https://github.com/hwchase17/langchain/issues/6413 **Dependencies: any dependencies required for this change** `nbdoc` was missing in group docs so it was added. I installed it with the `poetry add --group docs nbdoc` command. I am concerned if any modifications are needed to poetry.lock. I would greatly appreciate it if you could pay close attention to this file during the review. **Tag maintainer** - General / Misc / if you don't know who to tag: @baskaryan If this PR needs any additional changes, I'll be happy to make them! --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
74c28df363
commit
2667ddc686
24
.github/CONTRIBUTING.md
vendored
24
.github/CONTRIBUTING.md
vendored
@ -224,30 +224,38 @@ When you run `poetry install`, the `langchain` package is installed as editable
|
|||||||
|
|
||||||
### Contribute Documentation
|
### Contribute Documentation
|
||||||
|
|
||||||
Docs are largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/) from the code.
|
The docs directory contains Documentation and API Reference.
|
||||||
|
|
||||||
|
Documentation is built using [Docusaurus 2](https://docusaurus.io/).
|
||||||
|
|
||||||
|
API Reference are largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/) from the code.
|
||||||
For that reason, we ask that you add good documentation to all classes and methods.
|
For that reason, we ask that you add good documentation to all classes and methods.
|
||||||
|
|
||||||
Similar to linting, we recognize documentation can be annoying. If you do not want to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed.
|
Similar to linting, we recognize documentation can be annoying. If you do not want to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed.
|
||||||
|
|
||||||
### Build Documentation Locally
|
### Build Documentation Locally
|
||||||
|
|
||||||
|
In the following commands, the prefix `api_` indicates that those are operations for the API Reference.
|
||||||
|
|
||||||
Before building the documentation, it is always a good idea to clean the build directory:
|
Before building the documentation, it is always a good idea to clean the build directory:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
make docs_clean
|
make docs_clean
|
||||||
|
make api_docs_clean
|
||||||
```
|
```
|
||||||
|
|
||||||
Next, you can run the linkchecker to make sure all links are valid:
|
Next, you can build the documentation as outlined below:
|
||||||
|
|
||||||
```bash
|
|
||||||
make docs_linkcheck
|
|
||||||
```
|
|
||||||
|
|
||||||
Finally, you can build the documentation as outlined below:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
make docs_build
|
make docs_build
|
||||||
|
make api_docs_build
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally, you can run the linkchecker to make sure all links are valid:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make docs_linkcheck
|
||||||
|
make api_docs_linkcheck
|
||||||
```
|
```
|
||||||
|
|
||||||
## 🏭 Release Process
|
## 🏭 Release Process
|
||||||
|
5
.gitignore
vendored
5
.gitignore
vendored
@ -161,7 +161,12 @@ docs/node_modules/
|
|||||||
docs/.docusaurus/
|
docs/.docusaurus/
|
||||||
docs/.cache-loader/
|
docs/.cache-loader/
|
||||||
docs/_dist
|
docs/_dist
|
||||||
|
docs/api_reference/api_reference.rst
|
||||||
docs/api_reference/_build
|
docs/api_reference/_build
|
||||||
|
docs/api_reference/*/
|
||||||
|
!docs/api_reference/_static/
|
||||||
|
!docs/api_reference/templates/
|
||||||
|
!docs/api_reference/themes/
|
||||||
docs/docs_skeleton/build
|
docs/docs_skeleton/build
|
||||||
docs/docs_skeleton/node_modules
|
docs/docs_skeleton/node_modules
|
||||||
docs/docs_skeleton/yarn.lock
|
docs/docs_skeleton/yarn.lock
|
||||||
|
45
Makefile
45
Makefile
@ -1,4 +1,4 @@
|
|||||||
.PHONY: all clean format lint test tests test_watch integration_tests docker_tests help extended_tests
|
.PHONY: all clean docs_build docs_clean docs_linkcheck api_docs_build api_docs_clean api_docs_linkcheck format lint test tests test_watch integration_tests docker_tests help extended_tests
|
||||||
|
|
||||||
# Default target executed when no arguments are given to make.
|
# Default target executed when no arguments are given to make.
|
||||||
all: help
|
all: help
|
||||||
@ -14,6 +14,33 @@ coverage:
|
|||||||
--cov-report xml \
|
--cov-report xml \
|
||||||
--cov-report term-missing:skip-covered
|
--cov-report term-missing:skip-covered
|
||||||
|
|
||||||
|
######################
|
||||||
|
# DOCUMENTATION
|
||||||
|
######################
|
||||||
|
|
||||||
|
clean: docs_clean api_docs_clean
|
||||||
|
|
||||||
|
|
||||||
|
docs_build:
|
||||||
|
docs/.local_build.sh
|
||||||
|
|
||||||
|
docs_clean:
|
||||||
|
rm -r docs/_dist
|
||||||
|
|
||||||
|
docs_linkcheck:
|
||||||
|
poetry run linkchecker docs/_dist/docs_skeleton/ --ignore-url node_modules
|
||||||
|
|
||||||
|
api_docs_build:
|
||||||
|
poetry run python docs/api_reference/create_api_rst.py
|
||||||
|
cd docs/api_reference && poetry run make html
|
||||||
|
|
||||||
|
api_docs_clean:
|
||||||
|
rm -f docs/api_reference/api_reference.rst
|
||||||
|
cd docs/api_reference && poetry run make clean
|
||||||
|
|
||||||
|
api_docs_linkcheck:
|
||||||
|
poetry run linkchecker docs/api_reference/_build/html/index.html
|
||||||
|
|
||||||
# Define a variable for the test file path.
|
# Define a variable for the test file path.
|
||||||
TEST_FILE ?= tests/unit_tests/
|
TEST_FILE ?= tests/unit_tests/
|
||||||
|
|
||||||
@ -36,22 +63,6 @@ docker_tests:
|
|||||||
docker build -t my-langchain-image:test .
|
docker build -t my-langchain-image:test .
|
||||||
docker run --rm my-langchain-image:test
|
docker run --rm my-langchain-image:test
|
||||||
|
|
||||||
######################
|
|
||||||
# DOCUMENTATION
|
|
||||||
######################
|
|
||||||
|
|
||||||
docs_compile:
|
|
||||||
poetry run nbdoc_build --srcdir $(srcdir)
|
|
||||||
|
|
||||||
docs_build:
|
|
||||||
cd docs && poetry run make html
|
|
||||||
|
|
||||||
docs_clean:
|
|
||||||
cd docs && poetry run make clean
|
|
||||||
|
|
||||||
docs_linkcheck:
|
|
||||||
poetry run linkchecker docs/_build/html/index.html
|
|
||||||
|
|
||||||
######################
|
######################
|
||||||
# LINTING AND FORMATTING
|
# LINTING AND FORMATTING
|
||||||
######################
|
######################
|
||||||
|
@ -1,10 +1,15 @@
|
|||||||
mkdir _dist
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -o errexit
|
||||||
|
set -o nounset
|
||||||
|
set -o pipefail
|
||||||
|
set -o xtrace
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "$0")"; pwd)"
|
||||||
|
cd "${SCRIPT_DIR}"
|
||||||
|
|
||||||
|
mkdir -p _dist/docs_skeleton
|
||||||
cp -r {docs_skeleton,snippets} _dist
|
cp -r {docs_skeleton,snippets} _dist
|
||||||
mkdir -p _dist/docs_skeleton/static/api_reference
|
|
||||||
cd api_reference
|
|
||||||
poetry run make html
|
|
||||||
cp -r _build/* ../_dist/docs_skeleton/static/api_reference
|
|
||||||
cd ..
|
|
||||||
cp -r extras/* _dist/docs_skeleton/docs
|
cp -r extras/* _dist/docs_skeleton/docs
|
||||||
cd _dist/docs_skeleton
|
cd _dist/docs_skeleton
|
||||||
poetry run nbdoc_build
|
poetry run nbdoc_build
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -120,7 +120,8 @@
|
|||||||
" history = []\n",
|
" history = []\n",
|
||||||
" while True:\n",
|
" while True:\n",
|
||||||
" user_input = input(\"\\n>>> input >>>\\n>>>: \")\n",
|
" user_input = input(\"\\n>>> input >>>\\n>>>: \")\n",
|
||||||
" if user_input == 'q': break\n",
|
" if user_input == \"q\":\n",
|
||||||
|
" break\n",
|
||||||
" history.append(HumanMessage(content=user_input))\n",
|
" history.append(HumanMessage(content=user_input))\n",
|
||||||
" history.append(llm(history))"
|
" history.append(llm(history))"
|
||||||
]
|
]
|
||||||
|
@ -117,11 +117,11 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Initialize the language model\n",
|
"# Initialize the language model\n",
|
||||||
"# You can add your own OpenAI API key by adding openai_api_key=\"<your_api_key>\" \n",
|
"# You can add your own OpenAI API key by adding openai_api_key=\"<your_api_key>\"\n",
|
||||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
|
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Initialize the SerpAPIWrapper for search functionality\n",
|
"# Initialize the SerpAPIWrapper for search functionality\n",
|
||||||
"#Replace <your_api_key> in openai_api_key=\"<your_api_key>\" with your actual SerpAPI key.\n",
|
"# Replace <your_api_key> in openai_api_key=\"<your_api_key>\" with your actual SerpAPI key.\n",
|
||||||
"search = SerpAPIWrapper()\n",
|
"search = SerpAPIWrapper()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Define a list of tools offered by the agent\n",
|
"# Define a list of tools offered by the agent\n",
|
||||||
@ -130,7 +130,7 @@
|
|||||||
" name=\"Search\",\n",
|
" name=\"Search\",\n",
|
||||||
" func=search.run,\n",
|
" func=search.run,\n",
|
||||||
" coroutine=search.arun,\n",
|
" coroutine=search.arun,\n",
|
||||||
" description=\"Useful when you need to answer questions about current events. You should ask targeted questions.\"\n",
|
" description=\"Useful when you need to answer questions about current events. You should ask targeted questions.\",\n",
|
||||||
" ),\n",
|
" ),\n",
|
||||||
"]"
|
"]"
|
||||||
]
|
]
|
||||||
@ -143,8 +143,12 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"functions_agent = initialize_agent(tools, llm, agent=AgentType.OPENAI_MULTI_FUNCTIONS, verbose=False)\n",
|
"functions_agent = initialize_agent(\n",
|
||||||
"conversations_agent = initialize_agent(tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=False)"
|
" tools, llm, agent=AgentType.OPENAI_MULTI_FUNCTIONS, verbose=False\n",
|
||||||
|
")\n",
|
||||||
|
"conversations_agent = initialize_agent(\n",
|
||||||
|
" tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=False\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -199,14 +203,14 @@
|
|||||||
"# This will lead to larger confidence intervals downstream.\n",
|
"# This will lead to larger confidence intervals downstream.\n",
|
||||||
"batch = []\n",
|
"batch = []\n",
|
||||||
"for example in tqdm(dataset[:20]):\n",
|
"for example in tqdm(dataset[:20]):\n",
|
||||||
" batch.extend([agent.acall(example['inputs']) for agent in agents])\n",
|
" batch.extend([agent.acall(example[\"inputs\"]) for agent in agents])\n",
|
||||||
" if len(batch) >= concurrency_level:\n",
|
" if len(batch) >= concurrency_level:\n",
|
||||||
" batch_results = await asyncio.gather(*batch, return_exceptions=True)\n",
|
" batch_results = await asyncio.gather(*batch, return_exceptions=True)\n",
|
||||||
" results.extend(list(zip(*[iter(batch_results)]*2)))\n",
|
" results.extend(list(zip(*[iter(batch_results)] * 2)))\n",
|
||||||
" batch = []\n",
|
" batch = []\n",
|
||||||
"if batch:\n",
|
"if batch:\n",
|
||||||
" batch_results = await asyncio.gather(*batch, return_exceptions=True)\n",
|
" batch_results = await asyncio.gather(*batch, return_exceptions=True)\n",
|
||||||
" results.extend(list(zip(*[iter(batch_results)]*2)))"
|
" results.extend(list(zip(*[iter(batch_results)] * 2)))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -230,11 +234,12 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"import random\n",
|
"import random\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"def predict_preferences(dataset, results) -> list:\n",
|
"def predict_preferences(dataset, results) -> list:\n",
|
||||||
" preferences = []\n",
|
" preferences = []\n",
|
||||||
"\n",
|
"\n",
|
||||||
" for example, (res_a, res_b) in zip(dataset, results):\n",
|
" for example, (res_a, res_b) in zip(dataset, results):\n",
|
||||||
" input_ = example['inputs']\n",
|
" input_ = example[\"inputs\"]\n",
|
||||||
" # Flip a coin to reduce persistent position bias\n",
|
" # Flip a coin to reduce persistent position bias\n",
|
||||||
" if random.random() < 0.5:\n",
|
" if random.random() < 0.5:\n",
|
||||||
" pred_a, pred_b = res_a, res_b\n",
|
" pred_a, pred_b = res_a, res_b\n",
|
||||||
@ -243,9 +248,9 @@
|
|||||||
" pred_a, pred_b = res_b, res_a\n",
|
" pred_a, pred_b = res_b, res_a\n",
|
||||||
" a, b = \"b\", \"a\"\n",
|
" a, b = \"b\", \"a\"\n",
|
||||||
" eval_res = eval_chain.evaluate_string_pairs(\n",
|
" eval_res = eval_chain.evaluate_string_pairs(\n",
|
||||||
" prediction=pred_a['output'] if isinstance(pred_a, dict) else str(pred_a),\n",
|
" prediction=pred_a[\"output\"] if isinstance(pred_a, dict) else str(pred_a),\n",
|
||||||
" prediction_b=pred_b['output'] if isinstance(pred_b, dict) else str(pred_b),\n",
|
" prediction_b=pred_b[\"output\"] if isinstance(pred_b, dict) else str(pred_b),\n",
|
||||||
" input=input_\n",
|
" input=input_,\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" if eval_res[\"value\"] == \"A\":\n",
|
" if eval_res[\"value\"] == \"A\":\n",
|
||||||
" preferences.append(a)\n",
|
" preferences.append(a)\n",
|
||||||
@ -298,10 +303,7 @@
|
|||||||
" \"b\": \"Structured Chat Agent\",\n",
|
" \"b\": \"Structured Chat Agent\",\n",
|
||||||
"}\n",
|
"}\n",
|
||||||
"counts = Counter(preferences)\n",
|
"counts = Counter(preferences)\n",
|
||||||
"pref_ratios = {\n",
|
"pref_ratios = {k: v / len(preferences) for k, v in counts.items()}\n",
|
||||||
" k: v/len(preferences) for k, v in\n",
|
|
||||||
" counts.items()\n",
|
|
||||||
"}\n",
|
|
||||||
"for k, v in pref_ratios.items():\n",
|
"for k, v in pref_ratios.items():\n",
|
||||||
" print(f\"{name_map.get(k)}: {v:.2%}\")"
|
" print(f\"{name_map.get(k)}: {v:.2%}\")"
|
||||||
]
|
]
|
||||||
@ -327,13 +329,16 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from math import sqrt\n",
|
"from math import sqrt\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def wilson_score_interval(preferences: list, which: str = \"a\", z: float = 1.96) -> tuple:\n",
|
"\n",
|
||||||
|
"def wilson_score_interval(\n",
|
||||||
|
" preferences: list, which: str = \"a\", z: float = 1.96\n",
|
||||||
|
") -> tuple:\n",
|
||||||
" \"\"\"Estimate the confidence interval using the Wilson score.\n",
|
" \"\"\"Estimate the confidence interval using the Wilson score.\n",
|
||||||
" \n",
|
"\n",
|
||||||
" See: https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval\n",
|
" See: https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval\n",
|
||||||
" for more details, including when to use it and when it should not be used.\n",
|
" for more details, including when to use it and when it should not be used.\n",
|
||||||
" \"\"\"\n",
|
" \"\"\"\n",
|
||||||
" total_preferences = preferences.count('a') + preferences.count('b')\n",
|
" total_preferences = preferences.count(\"a\") + preferences.count(\"b\")\n",
|
||||||
" n_s = preferences.count(which)\n",
|
" n_s = preferences.count(which)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" if total_preferences == 0:\n",
|
" if total_preferences == 0:\n",
|
||||||
@ -342,8 +347,11 @@
|
|||||||
" p_hat = n_s / total_preferences\n",
|
" p_hat = n_s / total_preferences\n",
|
||||||
"\n",
|
"\n",
|
||||||
" denominator = 1 + (z**2) / total_preferences\n",
|
" denominator = 1 + (z**2) / total_preferences\n",
|
||||||
" adjustment = (z / denominator) * sqrt(p_hat*(1-p_hat)/total_preferences + (z**2)/(4*total_preferences*total_preferences))\n",
|
" adjustment = (z / denominator) * sqrt(\n",
|
||||||
" center = (p_hat + (z**2) / (2*total_preferences)) / denominator\n",
|
" p_hat * (1 - p_hat) / total_preferences\n",
|
||||||
|
" + (z**2) / (4 * total_preferences * total_preferences)\n",
|
||||||
|
" )\n",
|
||||||
|
" center = (p_hat + (z**2) / (2 * total_preferences)) / denominator\n",
|
||||||
" lower_bound = min(max(center - adjustment, 0.0), 1.0)\n",
|
" lower_bound = min(max(center - adjustment, 0.0), 1.0)\n",
|
||||||
" upper_bound = min(max(center + adjustment, 0.0), 1.0)\n",
|
" upper_bound = min(max(center + adjustment, 0.0), 1.0)\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -369,7 +377,9 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"for which_, name in name_map.items():\n",
|
"for which_, name in name_map.items():\n",
|
||||||
" low, high = wilson_score_interval(preferences, which=which_)\n",
|
" low, high = wilson_score_interval(preferences, which=which_)\n",
|
||||||
" print(f'The \"{name}\" would be preferred between {low:.2%} and {high:.2%} percent of the time (with 95% confidence).')"
|
" print(\n",
|
||||||
|
" f'The \"{name}\" would be preferred between {low:.2%} and {high:.2%} percent of the time (with 95% confidence).'\n",
|
||||||
|
" )"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -398,13 +408,16 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"from scipy import stats\n",
|
"from scipy import stats\n",
|
||||||
|
"\n",
|
||||||
"preferred_model = max(pref_ratios, key=pref_ratios.get)\n",
|
"preferred_model = max(pref_ratios, key=pref_ratios.get)\n",
|
||||||
"successes = preferences.count(preferred_model)\n",
|
"successes = preferences.count(preferred_model)\n",
|
||||||
"n = len(preferences) - preferences.count(None)\n",
|
"n = len(preferences) - preferences.count(None)\n",
|
||||||
"p_value = stats.binom_test(successes, n, p=0.5, alternative='two-sided')\n",
|
"p_value = stats.binom_test(successes, n, p=0.5, alternative=\"two-sided\")\n",
|
||||||
"print(f\"\"\"The p-value is {p_value:.5f}. If the null hypothesis is true (i.e., if the selected eval chain actually has no preference between the models),\n",
|
"print(\n",
|
||||||
|
" f\"\"\"The p-value is {p_value:.5f}. If the null hypothesis is true (i.e., if the selected eval chain actually has no preference between the models),\n",
|
||||||
"then there is a {p_value:.5%} chance of observing the {name_map.get(preferred_model)} be preferred at least {successes}\n",
|
"then there is a {p_value:.5%} chance of observing the {name_map.get(preferred_model)} be preferred at least {successes}\n",
|
||||||
"times out of {n} trials.\"\"\")"
|
"times out of {n} trials.\"\"\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -54,7 +54,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"llm = ChatOpenAI(temperature=0)\n",
|
"llm = ChatOpenAI(temperature=0)\n",
|
||||||
"query=\"What's the origin of the term synecdoche?\"\n",
|
"query = \"What's the origin of the term synecdoche?\"\n",
|
||||||
"prediction = llm.predict(query)"
|
"prediction = llm.predict(query)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -151,19 +151,22 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"eval_chain = CriteriaEvalChain.from_llm(llm=llm, criteria=\"correctness\", requires_reference=True)\n",
|
"eval_chain = CriteriaEvalChain.from_llm(\n",
|
||||||
|
" llm=llm, criteria=\"correctness\", requires_reference=True\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# We can even override the model's learned knowledge using ground truth labels\n",
|
"# We can even override the model's learned knowledge using ground truth labels\n",
|
||||||
"eval_result = eval_chain.evaluate_strings(\n",
|
"eval_result = eval_chain.evaluate_strings(\n",
|
||||||
" input=\"What is the capital of the US?\",\n",
|
" input=\"What is the capital of the US?\",\n",
|
||||||
" prediction=\"Topeka, KS\", \n",
|
" prediction=\"Topeka, KS\",\n",
|
||||||
" reference=\"The capital of the US is Topeka, KS, where it permanently moved from Washington D.C. on May 16, 2023\")\n",
|
" reference=\"The capital of the US is Topeka, KS, where it permanently moved from Washington D.C. on May 16, 2023\",\n",
|
||||||
|
")\n",
|
||||||
"print(f'With ground truth: {eval_result[\"score\"]}')\n",
|
"print(f'With ground truth: {eval_result[\"score\"]}')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"eval_chain = CriteriaEvalChain.from_llm(llm=llm, criteria=\"correctness\")\n",
|
"eval_chain = CriteriaEvalChain.from_llm(llm=llm, criteria=\"correctness\")\n",
|
||||||
"eval_result = eval_chain.evaluate_strings(\n",
|
"eval_result = eval_chain.evaluate_strings(\n",
|
||||||
" input=\"What is the capital of the US?\",\n",
|
" input=\"What is the capital of the US?\",\n",
|
||||||
" prediction=\"Topeka, KS\", \n",
|
" prediction=\"Topeka, KS\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(f'Withoutg ground truth: {eval_result[\"score\"]}')"
|
"print(f'Withoutg ground truth: {eval_result[\"score\"]}')"
|
||||||
]
|
]
|
||||||
@ -230,9 +233,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"custom_criterion = {\n",
|
"custom_criterion = {\"numeric\": \"Does the output contain numeric information?\"}\n",
|
||||||
" \"numeric\": \"Does the output contain numeric information?\"\n",
|
|
||||||
"}\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"eval_chain = CriteriaEvalChain.from_llm(llm=llm, criteria=custom_criterion)\n",
|
"eval_chain = CriteriaEvalChain.from_llm(llm=llm, criteria=custom_criterion)\n",
|
||||||
"eval_result = eval_chain.evaluate_strings(prediction=prediction, input=query)\n",
|
"eval_result = eval_chain.evaluate_strings(prediction=prediction, input=query)\n",
|
||||||
@ -269,11 +270,17 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Example that complies\n",
|
"# Example that complies\n",
|
||||||
"query = \"What's the population of lagos?\"\n",
|
"query = \"What's the population of lagos?\"\n",
|
||||||
"eval_result = eval_chain.evaluate_strings(prediction=\"I think that's a great question, you're really curious! About 30 million people live in Lagos, Nigeria, as of 2023.\", input=query)\n",
|
"eval_result = eval_chain.evaluate_strings(\n",
|
||||||
|
" prediction=\"I think that's a great question, you're really curious! About 30 million people live in Lagos, Nigeria, as of 2023.\",\n",
|
||||||
|
" input=query,\n",
|
||||||
|
")\n",
|
||||||
"print(\"Meets criteria: \", eval_result[\"score\"])\n",
|
"print(\"Meets criteria: \", eval_result[\"score\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Example that does not comply\n",
|
"# Example that does not comply\n",
|
||||||
"eval_result = eval_chain.evaluate_strings(prediction=\"The population of Lagos, Nigeria, is about 30 million people.\", input=query)\n",
|
"eval_result = eval_chain.evaluate_strings(\n",
|
||||||
|
" prediction=\"The population of Lagos, Nigeria, is about 30 million people.\",\n",
|
||||||
|
" input=query,\n",
|
||||||
|
")\n",
|
||||||
"print(\"Does not meet criteria: \", eval_result[\"score\"])"
|
"print(\"Does not meet criteria: \", eval_result[\"score\"])"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -350,8 +357,13 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"eval_chain = CriteriaEvalChain.from_llm(llm=llm, criteria=[PRINCIPLES[\"harmful1\"], PRINCIPLES[\"harmful2\"]])\n",
|
"eval_chain = CriteriaEvalChain.from_llm(\n",
|
||||||
"eval_result = eval_chain.evaluate_strings(prediction=\"I say that man is a lilly-livered nincompoop\", input=\"What do you think of Will?\")\n",
|
" llm=llm, criteria=[PRINCIPLES[\"harmful1\"], PRINCIPLES[\"harmful2\"]]\n",
|
||||||
|
")\n",
|
||||||
|
"eval_result = eval_chain.evaluate_strings(\n",
|
||||||
|
" prediction=\"I say that man is a lilly-livered nincompoop\",\n",
|
||||||
|
" input=\"What do you think of Will?\",\n",
|
||||||
|
")\n",
|
||||||
"eval_result"
|
"eval_result"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -339,9 +339,9 @@
|
|||||||
" agent_trajectory=test_outputs_one[\"intermediate_steps\"],\n",
|
" agent_trajectory=test_outputs_one[\"intermediate_steps\"],\n",
|
||||||
" reference=(\n",
|
" reference=(\n",
|
||||||
" \"You need many more than 100,000 ping-pong balls in the empire state building.\"\n",
|
" \"You need many more than 100,000 ping-pong balls in the empire state building.\"\n",
|
||||||
" )\n",
|
" ),\n",
|
||||||
")\n",
|
")\n",
|
||||||
" \n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"Score from 1 to 5: \", evaluation[\"score\"])\n",
|
"print(\"Score from 1 to 5: \", evaluation[\"score\"])\n",
|
||||||
"print(\"Reasoning: \", evaluation[\"reasoning\"])"
|
"print(\"Reasoning: \", evaluation[\"reasoning\"])"
|
||||||
|
@ -47,7 +47,9 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"os.environ[\"SERPAPI_API_KEY\"] = \"897780527132b5f31d8d73c40c820d5ef2c2279687efa69f413a61f752027747\""
|
"os.environ[\n",
|
||||||
|
" \"SERPAPI_API_KEY\"\n",
|
||||||
|
"] = \"897780527132b5f31d8d73c40c820d5ef2c2279687efa69f413a61f752027747\""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -58,11 +60,11 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Initialize the OpenAI language model\n",
|
"# Initialize the OpenAI language model\n",
|
||||||
"#Replace <your_api_key> in openai_api_key=\"<your_api_key>\" with your actual OpenAI key.\n",
|
"# Replace <your_api_key> in openai_api_key=\"<your_api_key>\" with your actual OpenAI key.\n",
|
||||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
|
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Initialize the SerpAPIWrapper for search functionality\n",
|
"# Initialize the SerpAPIWrapper for search functionality\n",
|
||||||
"#Replace <your_api_key> in openai_api_key=\"<your_api_key>\" with your actual SerpAPI key.\n",
|
"# Replace <your_api_key> in openai_api_key=\"<your_api_key>\" with your actual SerpAPI key.\n",
|
||||||
"search = SerpAPIWrapper()\n",
|
"search = SerpAPIWrapper()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Define a list of tools offered by the agent\n",
|
"# Define a list of tools offered by the agent\n",
|
||||||
@ -70,9 +72,9 @@
|
|||||||
" Tool(\n",
|
" Tool(\n",
|
||||||
" name=\"Search\",\n",
|
" name=\"Search\",\n",
|
||||||
" func=search.run,\n",
|
" func=search.run,\n",
|
||||||
" description=\"Useful when you need to answer questions about current events. You should ask targeted questions.\"\n",
|
" description=\"Useful when you need to answer questions about current events. You should ask targeted questions.\",\n",
|
||||||
" ),\n",
|
" ),\n",
|
||||||
"]\n"
|
"]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -82,7 +84,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"mrkl = initialize_agent(tools, llm, agent=AgentType.OPENAI_MULTI_FUNCTIONS, verbose=True)"
|
"mrkl = initialize_agent(\n",
|
||||||
|
" tools, llm, agent=AgentType.OPENAI_MULTI_FUNCTIONS, verbose=True\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -94,6 +98,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# Do this so we can see exactly what's going on under the hood\n",
|
"# Do this so we can see exactly what's going on under the hood\n",
|
||||||
"import langchain\n",
|
"import langchain\n",
|
||||||
|
"\n",
|
||||||
"langchain.debug = True"
|
"langchain.debug = True"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -206,9 +211,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"mrkl.run(\n",
|
"mrkl.run(\"What is the weather in LA and SF?\")"
|
||||||
" \"What is the weather in LA and SF?\"\n",
|
|
||||||
")"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -229,12 +232,12 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"mrkl = initialize_agent(\n",
|
"mrkl = initialize_agent(\n",
|
||||||
" tools, \n",
|
" tools,\n",
|
||||||
" llm, \n",
|
" llm,\n",
|
||||||
" agent=AgentType.OPENAI_FUNCTIONS, \n",
|
" agent=AgentType.OPENAI_FUNCTIONS,\n",
|
||||||
" verbose=True, \n",
|
" verbose=True,\n",
|
||||||
" max_iterations=2, \n",
|
" max_iterations=2,\n",
|
||||||
" early_stopping_method=\"generate\"\n",
|
" early_stopping_method=\"generate\",\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -410,9 +413,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"mrkl.run(\n",
|
"mrkl.run(\"What is the weather in NYC today, yesterday, and the day before?\")"
|
||||||
" \"What is the weather in NYC today, yesterday, and the day before?\"\n",
|
|
||||||
")"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -78,6 +78,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from langchain.prompts import MessagesPlaceholder\n",
|
"from langchain.prompts import MessagesPlaceholder\n",
|
||||||
"from langchain.memory import ConversationBufferMemory\n",
|
"from langchain.memory import ConversationBufferMemory\n",
|
||||||
|
"\n",
|
||||||
"agent_kwargs = {\n",
|
"agent_kwargs = {\n",
|
||||||
" \"extra_prompt_messages\": [MessagesPlaceholder(variable_name=\"memory\")],\n",
|
" \"extra_prompt_messages\": [MessagesPlaceholder(variable_name=\"memory\")],\n",
|
||||||
"}\n",
|
"}\n",
|
||||||
@ -92,12 +93,12 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"agent = initialize_agent(\n",
|
"agent = initialize_agent(\n",
|
||||||
" tools, \n",
|
" tools,\n",
|
||||||
" llm, \n",
|
" llm,\n",
|
||||||
" agent=AgentType.OPENAI_FUNCTIONS, \n",
|
" agent=AgentType.OPENAI_FUNCTIONS,\n",
|
||||||
" verbose=True, \n",
|
" verbose=True,\n",
|
||||||
" agent_kwargs=agent_kwargs, \n",
|
" agent_kwargs=agent_kwargs,\n",
|
||||||
" memory=memory\n",
|
" memory=memory,\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -42,15 +42,14 @@
|
|||||||
"import yfinance as yf\n",
|
"import yfinance as yf\n",
|
||||||
"from datetime import datetime, timedelta\n",
|
"from datetime import datetime, timedelta\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"def get_current_stock_price(ticker):\n",
|
"def get_current_stock_price(ticker):\n",
|
||||||
" \"\"\"Method to get current stock price\"\"\"\n",
|
" \"\"\"Method to get current stock price\"\"\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
" ticker_data = yf.Ticker(ticker)\n",
|
" ticker_data = yf.Ticker(ticker)\n",
|
||||||
" recent = ticker_data.history(period='1d')\n",
|
" recent = ticker_data.history(period=\"1d\")\n",
|
||||||
" return {\n",
|
" return {\"price\": recent.iloc[0][\"Close\"], \"currency\": ticker_data.info[\"currency\"]}\n",
|
||||||
" 'price': recent.iloc[0]['Close'],\n",
|
"\n",
|
||||||
" 'currency': ticker_data.info['currency']\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"def get_stock_performance(ticker, days):\n",
|
"def get_stock_performance(ticker, days):\n",
|
||||||
" \"\"\"Method to get stock price change in percentage\"\"\"\n",
|
" \"\"\"Method to get stock price change in percentage\"\"\"\n",
|
||||||
@ -58,11 +57,9 @@
|
|||||||
" past_date = datetime.today() - timedelta(days=days)\n",
|
" past_date = datetime.today() - timedelta(days=days)\n",
|
||||||
" ticker_data = yf.Ticker(ticker)\n",
|
" ticker_data = yf.Ticker(ticker)\n",
|
||||||
" history = ticker_data.history(start=past_date)\n",
|
" history = ticker_data.history(start=past_date)\n",
|
||||||
" old_price = history.iloc[0]['Close']\n",
|
" old_price = history.iloc[0][\"Close\"]\n",
|
||||||
" current_price = history.iloc[-1]['Close']\n",
|
" current_price = history.iloc[-1][\"Close\"]\n",
|
||||||
" return {\n",
|
" return {\"percent_change\": ((current_price - old_price) / old_price) * 100}"
|
||||||
" 'percent_change': ((current_price - old_price)/old_price)*100\n",
|
|
||||||
" }"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -88,7 +85,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"get_current_stock_price('MSFT')"
|
"get_current_stock_price(\"MSFT\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -114,7 +111,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"get_stock_performance('MSFT', 30)"
|
"get_stock_performance(\"MSFT\", 30)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -138,10 +135,13 @@
|
|||||||
"from pydantic import BaseModel, Field\n",
|
"from pydantic import BaseModel, Field\n",
|
||||||
"from langchain.tools import BaseTool\n",
|
"from langchain.tools import BaseTool\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"class CurrentStockPriceInput(BaseModel):\n",
|
"class CurrentStockPriceInput(BaseModel):\n",
|
||||||
" \"\"\"Inputs for get_current_stock_price\"\"\"\n",
|
" \"\"\"Inputs for get_current_stock_price\"\"\"\n",
|
||||||
|
"\n",
|
||||||
" ticker: str = Field(description=\"Ticker symbol of the stock\")\n",
|
" ticker: str = Field(description=\"Ticker symbol of the stock\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"class CurrentStockPriceTool(BaseTool):\n",
|
"class CurrentStockPriceTool(BaseTool):\n",
|
||||||
" name = \"get_current_stock_price\"\n",
|
" name = \"get_current_stock_price\"\n",
|
||||||
" description = \"\"\"\n",
|
" description = \"\"\"\n",
|
||||||
@ -160,8 +160,10 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"class StockPercentChangeInput(BaseModel):\n",
|
"class StockPercentChangeInput(BaseModel):\n",
|
||||||
" \"\"\"Inputs for get_stock_performance\"\"\"\n",
|
" \"\"\"Inputs for get_stock_performance\"\"\"\n",
|
||||||
|
"\n",
|
||||||
" ticker: str = Field(description=\"Ticker symbol of the stock\")\n",
|
" ticker: str = Field(description=\"Ticker symbol of the stock\")\n",
|
||||||
" days: int = Field(description='Timedelta days to get past date from current date')\n",
|
" days: int = Field(description=\"Timedelta days to get past date from current date\")\n",
|
||||||
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"class StockPerformanceTool(BaseTool):\n",
|
"class StockPerformanceTool(BaseTool):\n",
|
||||||
" name = \"get_stock_performance\"\n",
|
" name = \"get_stock_performance\"\n",
|
||||||
@ -202,15 +204,9 @@
|
|||||||
"from langchain.chat_models import ChatOpenAI\n",
|
"from langchain.chat_models import ChatOpenAI\n",
|
||||||
"from langchain.agents import initialize_agent\n",
|
"from langchain.agents import initialize_agent\n",
|
||||||
"\n",
|
"\n",
|
||||||
"llm = ChatOpenAI(\n",
|
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0613\", temperature=0)\n",
|
||||||
" model=\"gpt-3.5-turbo-0613\",\n",
|
|
||||||
" temperature=0\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"tools = [\n",
|
"tools = [CurrentStockPriceTool(), StockPerformanceTool()]\n",
|
||||||
" CurrentStockPriceTool(),\n",
|
|
||||||
" StockPerformanceTool()\n",
|
|
||||||
"]\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"agent = initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True)"
|
"agent = initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True)"
|
||||||
]
|
]
|
||||||
@ -261,7 +257,9 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"agent.run(\"What is the current price of Microsoft stock? How it has performed over past 6 months?\")"
|
"agent.run(\n",
|
||||||
|
" \"What is the current price of Microsoft stock? How it has performed over past 6 months?\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -355,7 +353,9 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"agent.run('In the past 3 months, which stock between Microsoft and Google has performed the best?')"
|
"agent.run(\n",
|
||||||
|
" \"In the past 3 months, which stock between Microsoft and Google has performed the best?\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -79,10 +79,10 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
|
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
|
||||||
"agent = initialize_agent(\n",
|
"agent = initialize_agent(\n",
|
||||||
" toolkit.get_tools(), \n",
|
" toolkit.get_tools(),\n",
|
||||||
" llm, \n",
|
" llm,\n",
|
||||||
" agent=AgentType.OPENAI_FUNCTIONS, \n",
|
" agent=AgentType.OPENAI_FUNCTIONS,\n",
|
||||||
" verbose=True, \n",
|
" verbose=True,\n",
|
||||||
" agent_kwargs=agent_kwargs,\n",
|
" agent_kwargs=agent_kwargs,\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
|
@ -47,14 +47,14 @@
|
|||||||
"files = [\n",
|
"files = [\n",
|
||||||
" # https://abc.xyz/investor/static/pdf/2023Q1_alphabet_earnings_release.pdf\n",
|
" # https://abc.xyz/investor/static/pdf/2023Q1_alphabet_earnings_release.pdf\n",
|
||||||
" {\n",
|
" {\n",
|
||||||
" \"name\": \"alphabet-earnings\", \n",
|
" \"name\": \"alphabet-earnings\",\n",
|
||||||
" \"path\": \"/Users/harrisonchase/Downloads/2023Q1_alphabet_earnings_release.pdf\",\n",
|
" \"path\": \"/Users/harrisonchase/Downloads/2023Q1_alphabet_earnings_release.pdf\",\n",
|
||||||
" }, \n",
|
" },\n",
|
||||||
" # https://digitalassets.tesla.com/tesla-contents/image/upload/IR/TSLA-Q1-2023-Update\n",
|
" # https://digitalassets.tesla.com/tesla-contents/image/upload/IR/TSLA-Q1-2023-Update\n",
|
||||||
" {\n",
|
" {\n",
|
||||||
" \"name\": \"tesla-earnings\", \n",
|
" \"name\": \"tesla-earnings\",\n",
|
||||||
" \"path\": \"/Users/harrisonchase/Downloads/TSLA-Q1-2023-Update.pdf\"\n",
|
" \"path\": \"/Users/harrisonchase/Downloads/TSLA-Q1-2023-Update.pdf\",\n",
|
||||||
" }\n",
|
" },\n",
|
||||||
"]\n",
|
"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"for file in files:\n",
|
"for file in files:\n",
|
||||||
@ -64,14 +64,14 @@
|
|||||||
" docs = text_splitter.split_documents(pages)\n",
|
" docs = text_splitter.split_documents(pages)\n",
|
||||||
" embeddings = OpenAIEmbeddings()\n",
|
" embeddings = OpenAIEmbeddings()\n",
|
||||||
" retriever = FAISS.from_documents(docs, embeddings).as_retriever()\n",
|
" retriever = FAISS.from_documents(docs, embeddings).as_retriever()\n",
|
||||||
" \n",
|
"\n",
|
||||||
" # Wrap retrievers in a Tool\n",
|
" # Wrap retrievers in a Tool\n",
|
||||||
" tools.append(\n",
|
" tools.append(\n",
|
||||||
" Tool(\n",
|
" Tool(\n",
|
||||||
" args_schema=DocumentInput,\n",
|
" args_schema=DocumentInput,\n",
|
||||||
" name=file[\"name\"], \n",
|
" name=file[\"name\"],\n",
|
||||||
" description=f\"useful when you want to answer questions about {file['name']}\",\n",
|
" description=f\"useful when you want to answer questions about {file['name']}\",\n",
|
||||||
" func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever)\n",
|
" func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever),\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" )"
|
" )"
|
||||||
]
|
]
|
||||||
@ -130,7 +130,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"llm = ChatOpenAI(\n",
|
"llm = ChatOpenAI(\n",
|
||||||
" temperature=0,\n",
|
" temperature=0,\n",
|
||||||
" model=\"gpt-3.5-turbo-0613\", \n",
|
" model=\"gpt-3.5-turbo-0613\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"agent = initialize_agent(\n",
|
"agent = initialize_agent(\n",
|
||||||
@ -161,6 +161,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import langchain\n",
|
"import langchain\n",
|
||||||
|
"\n",
|
||||||
"langchain.debug = True"
|
"langchain.debug = True"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -396,7 +397,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"llm = ChatOpenAI(\n",
|
"llm = ChatOpenAI(\n",
|
||||||
" temperature=0,\n",
|
" temperature=0,\n",
|
||||||
" model=\"gpt-3.5-turbo-0613\", \n",
|
" model=\"gpt-3.5-turbo-0613\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"agent = initialize_agent(\n",
|
"agent = initialize_agent(\n",
|
||||||
|
@ -136,9 +136,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"agent.run(\"Create an email draft for me to edit of a letter from the perspective of a sentient parrot\"\n",
|
"agent.run(\n",
|
||||||
|
" \"Create an email draft for me to edit of a letter from the perspective of a sentient parrot\"\n",
|
||||||
" \" who is looking to collaborate on some research with her\"\n",
|
" \" who is looking to collaborate on some research with her\"\n",
|
||||||
" \" estranged friend, a cat. Under no circumstances may you send the message, however.\")"
|
" \" estranged friend, a cat. Under no circumstances may you send the message, however.\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -160,7 +162,9 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"agent.run(\"Could you search in my drafts folder and let me know if any of them are about collaboration?\")"
|
"agent.run(\n",
|
||||||
|
" \"Could you search in my drafts folder and let me know if any of them are about collaboration?\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -190,7 +194,9 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"agent.run(\"Can you schedule a 30 minute meeting with a sentient parrot to discuss research collaborations on October 3, 2023 at 2 pm Easter Time?\")"
|
"agent.run(\n",
|
||||||
|
" \"Can you schedule a 30 minute meeting with a sentient parrot to discuss research collaborations on October 3, 2023 at 2 pm Easter Time?\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -210,7 +216,9 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"agent.run(\"Can you tell me if I have any events on October 3, 2023 in Eastern Time, and if so, tell me if any of them are with a sentient parrot?\")"
|
"agent.run(\n",
|
||||||
|
" \"Can you tell me if I have any events on October 3, 2023 in Eastern Time, and if so, tell me if any of them are with a sentient parrot?\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import os\n",
|
"import os\n",
|
||||||
|
"\n",
|
||||||
"os.environ[\"DATAFORSEO_LOGIN\"] = \"your_api_access_username\"\n",
|
"os.environ[\"DATAFORSEO_LOGIN\"] = \"your_api_access_username\"\n",
|
||||||
"os.environ[\"DATAFORSEO_PASSWORD\"] = \"your_api_access_password\"\n",
|
"os.environ[\"DATAFORSEO_PASSWORD\"] = \"your_api_access_password\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -88,7 +89,8 @@
|
|||||||
"json_wrapper = DataForSeoAPIWrapper(\n",
|
"json_wrapper = DataForSeoAPIWrapper(\n",
|
||||||
" json_result_types=[\"organic\", \"knowledge_graph\", \"answer_box\"],\n",
|
" json_result_types=[\"organic\", \"knowledge_graph\", \"answer_box\"],\n",
|
||||||
" json_result_fields=[\"type\", \"title\", \"description\", \"text\"],\n",
|
" json_result_fields=[\"type\", \"title\", \"description\", \"text\"],\n",
|
||||||
" top_count=3)"
|
" top_count=3,\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -119,7 +121,8 @@
|
|||||||
" top_count=10,\n",
|
" top_count=10,\n",
|
||||||
" json_result_types=[\"organic\", \"local_pack\"],\n",
|
" json_result_types=[\"organic\", \"local_pack\"],\n",
|
||||||
" json_result_fields=[\"title\", \"description\", \"type\"],\n",
|
" json_result_fields=[\"title\", \"description\", \"type\"],\n",
|
||||||
" params={\"location_name\": \"Germany\", \"language_code\": \"en\"})\n",
|
" params={\"location_name\": \"Germany\", \"language_code\": \"en\"},\n",
|
||||||
|
")\n",
|
||||||
"customized_wrapper.results(\"coffee near me\")"
|
"customized_wrapper.results(\"coffee near me\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -142,7 +145,8 @@
|
|||||||
" top_count=10,\n",
|
" top_count=10,\n",
|
||||||
" json_result_types=[\"organic\", \"local_pack\"],\n",
|
" json_result_types=[\"organic\", \"local_pack\"],\n",
|
||||||
" json_result_fields=[\"title\", \"description\", \"type\"],\n",
|
" json_result_fields=[\"title\", \"description\", \"type\"],\n",
|
||||||
" params={\"location_name\": \"Germany\", \"language_code\": \"en\", \"se_name\": \"bing\"})\n",
|
" params={\"location_name\": \"Germany\", \"language_code\": \"en\", \"se_name\": \"bing\"},\n",
|
||||||
|
")\n",
|
||||||
"customized_wrapper.results(\"coffee near me\")"
|
"customized_wrapper.results(\"coffee near me\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -164,7 +168,12 @@
|
|||||||
"maps_search = DataForSeoAPIWrapper(\n",
|
"maps_search = DataForSeoAPIWrapper(\n",
|
||||||
" top_count=10,\n",
|
" top_count=10,\n",
|
||||||
" json_result_fields=[\"title\", \"value\", \"address\", \"rating\", \"type\"],\n",
|
" json_result_fields=[\"title\", \"value\", \"address\", \"rating\", \"type\"],\n",
|
||||||
" params={\"location_coordinate\": \"52.512,13.36,12z\", \"language_code\": \"en\", \"se_type\": \"maps\"})\n",
|
" params={\n",
|
||||||
|
" \"location_coordinate\": \"52.512,13.36,12z\",\n",
|
||||||
|
" \"language_code\": \"en\",\n",
|
||||||
|
" \"se_type\": \"maps\",\n",
|
||||||
|
" },\n",
|
||||||
|
")\n",
|
||||||
"maps_search.results(\"coffee near me\")"
|
"maps_search.results(\"coffee near me\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -184,10 +193,12 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from langchain.agents import Tool\n",
|
"from langchain.agents import Tool\n",
|
||||||
|
"\n",
|
||||||
"search = DataForSeoAPIWrapper(\n",
|
"search = DataForSeoAPIWrapper(\n",
|
||||||
" top_count=3,\n",
|
" top_count=3,\n",
|
||||||
" json_result_types=[\"organic\"],\n",
|
" json_result_types=[\"organic\"],\n",
|
||||||
" json_result_fields=[\"title\", \"description\", \"type\"])\n",
|
" json_result_fields=[\"title\", \"description\", \"type\"],\n",
|
||||||
|
")\n",
|
||||||
"tool = Tool(\n",
|
"tool = Tool(\n",
|
||||||
" name=\"google-search-answer\",\n",
|
" name=\"google-search-answer\",\n",
|
||||||
" description=\"My new answer tool\",\n",
|
" description=\"My new answer tool\",\n",
|
||||||
|
@ -90,7 +90,12 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"search.results(\"The best blog post about AI safety is definitely this: \", 10, include_domains=[\"lesswrong.com\"], start_published_date=\"2019-01-01\")"
|
"search.results(\n",
|
||||||
|
" \"The best blog post about AI safety is definitely this: \",\n",
|
||||||
|
" 10,\n",
|
||||||
|
" include_domains=[\"lesswrong.com\"],\n",
|
||||||
|
" start_published_date=\"2019-01-01\",\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -341,7 +341,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"llm = OpenAI(temperature=0)\n",
|
"llm = OpenAI(temperature=0)\n",
|
||||||
"zapier = ZapierNLAWrapper(zapier_nla_oauth_access_token='<fill in access token here>')\n",
|
"zapier = ZapierNLAWrapper(zapier_nla_oauth_access_token=\"<fill in access token here>\")\n",
|
||||||
"toolkit = ZapierToolkit.from_zapier_nla_wrapper(zapier)\n",
|
"toolkit = ZapierToolkit.from_zapier_nla_wrapper(zapier)\n",
|
||||||
"agent = initialize_agent(\n",
|
"agent = initialize_agent(\n",
|
||||||
" toolkit.get_tools(), llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
" toolkit.get_tools(), llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||||
|
@ -57,6 +57,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Remove the (1) import sys and sys.path.append(..) and (2) uncomment `!pip install langchain` after merging the PR for Infino/LangChain integration.\n",
|
"# Remove the (1) import sys and sys.path.append(..) and (2) uncomment `!pip install langchain` after merging the PR for Infino/LangChain integration.\n",
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
|
"\n",
|
||||||
"sys.path.append(\"../../../../../langchain\")\n",
|
"sys.path.append(\"../../../../../langchain\")\n",
|
||||||
"#!pip install langchain\n",
|
"#!pip install langchain\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -120,9 +121,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# These are a subset of questions from Stanford's QA dataset - \n",
|
"# These are a subset of questions from Stanford's QA dataset -\n",
|
||||||
"# https://rajpurkar.github.io/SQuAD-explorer/\n",
|
"# https://rajpurkar.github.io/SQuAD-explorer/\n",
|
||||||
"data = '''In what country is Normandy located?\n",
|
"data = \"\"\"In what country is Normandy located?\n",
|
||||||
"When were the Normans in Normandy?\n",
|
"When were the Normans in Normandy?\n",
|
||||||
"From which countries did the Norse originate?\n",
|
"From which countries did the Norse originate?\n",
|
||||||
"Who was the Norse leader?\n",
|
"Who was the Norse leader?\n",
|
||||||
@ -141,9 +142,9 @@
|
|||||||
"What principality did William the conquerer found?\n",
|
"What principality did William the conquerer found?\n",
|
||||||
"What is the original meaning of the word Norman?\n",
|
"What is the original meaning of the word Norman?\n",
|
||||||
"When was the Latin version of the word Norman first recorded?\n",
|
"When was the Latin version of the word Norman first recorded?\n",
|
||||||
"What name comes from the English words Normans/Normanz?'''\n",
|
"What name comes from the English words Normans/Normanz?\"\"\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"questions = data.split('\\n')"
|
"questions = data.split(\"\\n\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -190,10 +191,12 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Set your key here.\n",
|
"# Set your key here.\n",
|
||||||
"#os.environ[\"OPENAI_API_KEY\"] = \"YOUR_API_KEY\"\n",
|
"# os.environ[\"OPENAI_API_KEY\"] = \"YOUR_API_KEY\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Create callback handler. This logs latency, errors, token usage, prompts as well as prompt responses to Infino.\n",
|
"# Create callback handler. This logs latency, errors, token usage, prompts as well as prompt responses to Infino.\n",
|
||||||
"handler = InfinoCallbackHandler(model_id=\"test_openai\", model_version=\"0.1\", verbose=False)\n",
|
"handler = InfinoCallbackHandler(\n",
|
||||||
|
" model_id=\"test_openai\", model_version=\"0.1\", verbose=False\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Create LLM.\n",
|
"# Create LLM.\n",
|
||||||
"llm = OpenAI(temperature=0.1)\n",
|
"llm = OpenAI(temperature=0.1)\n",
|
||||||
@ -285,16 +288,16 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" # Extract x and y values from the data\n",
|
" # Extract x and y values from the data\n",
|
||||||
" timestamps = [item[\"time\"] for item in data]\n",
|
" timestamps = [item[\"time\"] for item in data]\n",
|
||||||
" dates=[dt.datetime.fromtimestamp(ts) for ts in timestamps]\n",
|
" dates = [dt.datetime.fromtimestamp(ts) for ts in timestamps]\n",
|
||||||
" y = [item[\"value\"] for item in data]\n",
|
" y = [item[\"value\"] for item in data]\n",
|
||||||
"\n",
|
"\n",
|
||||||
" plt.rcParams['figure.figsize'] = [6, 4]\n",
|
" plt.rcParams[\"figure.figsize\"] = [6, 4]\n",
|
||||||
" plt.subplots_adjust(bottom=0.2)\n",
|
" plt.subplots_adjust(bottom=0.2)\n",
|
||||||
" plt.xticks(rotation=25 )\n",
|
" plt.xticks(rotation=25)\n",
|
||||||
" ax=plt.gca()\n",
|
" ax = plt.gca()\n",
|
||||||
" xfmt = md.DateFormatter('%Y-%m-%d %H:%M:%S')\n",
|
" xfmt = md.DateFormatter(\"%Y-%m-%d %H:%M:%S\")\n",
|
||||||
" ax.xaxis.set_major_formatter(xfmt)\n",
|
" ax.xaxis.set_major_formatter(xfmt)\n",
|
||||||
" \n",
|
"\n",
|
||||||
" # Create the plot\n",
|
" # Create the plot\n",
|
||||||
" plt.plot(dates, y)\n",
|
" plt.plot(dates, y)\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -305,6 +308,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" plt.show()\n",
|
" plt.show()\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"response = client.search_ts(\"__name__\", \"latency\", 0, int(time.time()))\n",
|
"response = client.search_ts(\"__name__\", \"latency\", 0, int(time.time()))\n",
|
||||||
"plot(response.text, \"Latency\")\n",
|
"plot(response.text, \"Latency\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -318,7 +322,7 @@
|
|||||||
"plot(response.text, \"Completion Tokens\")\n",
|
"plot(response.text, \"Completion Tokens\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"response = client.search_ts(\"__name__\", \"total_tokens\", 0, int(time.time()))\n",
|
"response = client.search_ts(\"__name__\", \"total_tokens\", 0, int(time.time()))\n",
|
||||||
"plot(response.text, \"Total Tokens\")\n"
|
"plot(response.text, \"Total Tokens\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -356,7 +360,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"query = \"king charles III\"\n",
|
"query = \"king charles III\"\n",
|
||||||
"response = client.search_log(\"king charles III\", 0, int(time.time()))\n",
|
"response = client.search_log(\"king charles III\", 0, int(time.time()))\n",
|
||||||
"print(\"Results for\", query, \":\", response.text)\n"
|
"print(\"Results for\", query, \":\", response.text)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -326,8 +326,8 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# wait 20 secs to see display\n",
|
"# wait 20 secs to see display\n",
|
||||||
"cpal_chain.draw(path='web.svg')\n",
|
"cpal_chain.draw(path=\"web.svg\")\n",
|
||||||
"SVG('web.svg')"
|
"SVG(\"web.svg\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -351,7 +351,7 @@
|
|||||||
" \"Jan has three times the number of pets as Marcia.\"\n",
|
" \"Jan has three times the number of pets as Marcia.\"\n",
|
||||||
" \"Marcia has two more pets than Cindy.\"\n",
|
" \"Marcia has two more pets than Cindy.\"\n",
|
||||||
" \"If Cindy has ten pets, how many pets does Barak have?\"\n",
|
" \"If Cindy has ten pets, how many pets does Barak have?\"\n",
|
||||||
" )"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -456,7 +456,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"question = (\n",
|
"question = (\n",
|
||||||
" \"Jan has three times the number of pets as Marcia. \" \n",
|
" \"Jan has three times the number of pets as Marcia. \"\n",
|
||||||
" \"Marcia has two more pets than Cindy. \"\n",
|
" \"Marcia has two more pets than Cindy. \"\n",
|
||||||
" \"If Cindy has four pets, how many total pets do the three have?\"\n",
|
" \"If Cindy has four pets, how many total pets do the three have?\"\n",
|
||||||
")"
|
")"
|
||||||
@ -621,8 +621,8 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# wait 20 secs to see display\n",
|
"# wait 20 secs to see display\n",
|
||||||
"cpal_chain.draw(path='web.svg')\n",
|
"cpal_chain.draw(path=\"web.svg\")\n",
|
||||||
"SVG('web.svg')"
|
"SVG(\"web.svg\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -641,7 +641,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"question = (\n",
|
"question = (\n",
|
||||||
" \"Jan has the number of pets as Marcia plus the number of pets as Cindy. \" \n",
|
" \"Jan has the number of pets as Marcia plus the number of pets as Cindy. \"\n",
|
||||||
" \"Marcia has no pets. \"\n",
|
" \"Marcia has no pets. \"\n",
|
||||||
" \"If Cindy has four pets, how many total pets do the three have?\"\n",
|
" \"If Cindy has four pets, how many total pets do the three have?\"\n",
|
||||||
")"
|
")"
|
||||||
@ -748,8 +748,8 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# wait 20 secs to see display\n",
|
"# wait 20 secs to see display\n",
|
||||||
"cpal_chain.draw(path='web.svg')\n",
|
"cpal_chain.draw(path=\"web.svg\")\n",
|
||||||
"SVG('web.svg')"
|
"SVG(\"web.svg\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -768,7 +768,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"question = (\n",
|
"question = (\n",
|
||||||
" \"Jan has the number of pets as Marcia plus the number of pets as Cindy. \" \n",
|
" \"Jan has the number of pets as Marcia plus the number of pets as Cindy. \"\n",
|
||||||
" \"Marcia has two more pets than Cindy. \"\n",
|
" \"Marcia has two more pets than Cindy. \"\n",
|
||||||
" \"If Cindy has four pets, how many total pets do the three have?\"\n",
|
" \"If Cindy has four pets, how many total pets do the three have?\"\n",
|
||||||
")"
|
")"
|
||||||
@ -881,8 +881,8 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# wait 20 secs to see display\n",
|
"# wait 20 secs to see display\n",
|
||||||
"cpal_chain.draw(path='web.svg')\n",
|
"cpal_chain.draw(path=\"web.svg\")\n",
|
||||||
"SVG('web.svg')"
|
"SVG(\"web.svg\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -72,7 +72,10 @@
|
|||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from langchain.schema import BaseRetriever\n",
|
"from langchain.schema import BaseRetriever\n",
|
||||||
"from langchain.callbacks.manager import AsyncCallbackManagerForRetrieverRun, CallbackManagerForRetrieverRun\n",
|
"from langchain.callbacks.manager import (\n",
|
||||||
|
" AsyncCallbackManagerForRetrieverRun,\n",
|
||||||
|
" CallbackManagerForRetrieverRun,\n",
|
||||||
|
")\n",
|
||||||
"from langchain.utilities import GoogleSerperAPIWrapper\n",
|
"from langchain.utilities import GoogleSerperAPIWrapper\n",
|
||||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||||
"from langchain.chat_models import ChatOpenAI\n",
|
"from langchain.chat_models import ChatOpenAI\n",
|
||||||
@ -97,13 +100,15 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"class SerperSearchRetriever(BaseRetriever):\n",
|
"class SerperSearchRetriever(BaseRetriever):\n",
|
||||||
"\n",
|
|
||||||
" search: GoogleSerperAPIWrapper = None\n",
|
" search: GoogleSerperAPIWrapper = None\n",
|
||||||
"\n",
|
"\n",
|
||||||
" def _get_relevant_documents(self, query: str, *, run_manager: CallbackManagerForRetrieverRun, **kwargs: Any) -> List[Document]:\n",
|
" def _get_relevant_documents(\n",
|
||||||
|
" self, query: str, *, run_manager: CallbackManagerForRetrieverRun, **kwargs: Any\n",
|
||||||
|
" ) -> List[Document]:\n",
|
||||||
" return [Document(page_content=self.search.run(query))]\n",
|
" return [Document(page_content=self.search.run(query))]\n",
|
||||||
"\n",
|
"\n",
|
||||||
" async def _aget_relevant_documents(self,\n",
|
" async def _aget_relevant_documents(\n",
|
||||||
|
" self,\n",
|
||||||
" query: str,\n",
|
" query: str,\n",
|
||||||
" *,\n",
|
" *,\n",
|
||||||
" run_manager: AsyncCallbackManagerForRetrieverRun,\n",
|
" run_manager: AsyncCallbackManagerForRetrieverRun,\n",
|
||||||
|
@ -83,9 +83,15 @@
|
|||||||
"schema = client.schema()\n",
|
"schema = client.schema()\n",
|
||||||
"schema.propertyKey(\"name\").asText().ifNotExist().create()\n",
|
"schema.propertyKey(\"name\").asText().ifNotExist().create()\n",
|
||||||
"schema.propertyKey(\"birthDate\").asText().ifNotExist().create()\n",
|
"schema.propertyKey(\"birthDate\").asText().ifNotExist().create()\n",
|
||||||
"schema.vertexLabel(\"Person\").properties(\"name\", \"birthDate\").usePrimaryKeyId().primaryKeys(\"name\").ifNotExist().create()\n",
|
"schema.vertexLabel(\"Person\").properties(\n",
|
||||||
"schema.vertexLabel(\"Movie\").properties(\"name\").usePrimaryKeyId().primaryKeys(\"name\").ifNotExist().create()\n",
|
" \"name\", \"birthDate\"\n",
|
||||||
"schema.edgeLabel(\"ActedIn\").sourceLabel(\"Person\").targetLabel(\"Movie\").ifNotExist().create()"
|
").usePrimaryKeyId().primaryKeys(\"name\").ifNotExist().create()\n",
|
||||||
|
"schema.vertexLabel(\"Movie\").properties(\"name\").usePrimaryKeyId().primaryKeys(\n",
|
||||||
|
" \"name\"\n",
|
||||||
|
").ifNotExist().create()\n",
|
||||||
|
"schema.edgeLabel(\"ActedIn\").sourceLabel(\"Person\").targetLabel(\n",
|
||||||
|
" \"Movie\"\n",
|
||||||
|
").ifNotExist().create()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -124,7 +130,9 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"g.addEdge(\"ActedIn\", \"1:Al Pacino\", \"2:The Godfather\", {})\n",
|
"g.addEdge(\"ActedIn\", \"1:Al Pacino\", \"2:The Godfather\", {})\n",
|
||||||
"g.addEdge(\"ActedIn\", \"1:Al Pacino\", \"2:The Godfather Part II\", {})\n",
|
"g.addEdge(\"ActedIn\", \"1:Al Pacino\", \"2:The Godfather Part II\", {})\n",
|
||||||
"g.addEdge(\"ActedIn\", \"1:Al Pacino\", \"2:The Godfather Coda The Death of Michael Corleone\", {})\n",
|
"g.addEdge(\n",
|
||||||
|
" \"ActedIn\", \"1:Al Pacino\", \"2:The Godfather Coda The Death of Michael Corleone\", {}\n",
|
||||||
|
")\n",
|
||||||
"g.addEdge(\"ActedIn\", \"1:Robert De Niro\", \"2:The Godfather Part II\", {})"
|
"g.addEdge(\"ActedIn\", \"1:Robert De Niro\", \"2:The Godfather Part II\", {})"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -164,7 +172,7 @@
|
|||||||
" password=\"admin\",\n",
|
" password=\"admin\",\n",
|
||||||
" address=\"localhost\",\n",
|
" address=\"localhost\",\n",
|
||||||
" port=8080,\n",
|
" port=8080,\n",
|
||||||
" graph=\"hugegraph\"\n",
|
" graph=\"hugegraph\",\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -228,9 +236,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"chain = HugeGraphQAChain.from_llm(\n",
|
"chain = HugeGraphQAChain.from_llm(ChatOpenAI(temperature=0), graph=graph, verbose=True)"
|
||||||
" ChatOpenAI(temperature=0), graph=graph, verbose=True\n",
|
|
||||||
")"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -31,6 +31,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import kuzu\n",
|
"import kuzu\n",
|
||||||
|
"\n",
|
||||||
"db = kuzu.Database(\"test_db\")\n",
|
"db = kuzu.Database(\"test_db\")\n",
|
||||||
"conn = kuzu.Connection(db)"
|
"conn = kuzu.Connection(db)"
|
||||||
]
|
]
|
||||||
@ -61,7 +62,9 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"conn.execute(\"CREATE NODE TABLE Movie (name STRING, PRIMARY KEY(name))\")\n",
|
"conn.execute(\"CREATE NODE TABLE Movie (name STRING, PRIMARY KEY(name))\")\n",
|
||||||
"conn.execute(\"CREATE NODE TABLE Person (name STRING, birthDate STRING, PRIMARY KEY(name))\")\n",
|
"conn.execute(\n",
|
||||||
|
" \"CREATE NODE TABLE Person (name STRING, birthDate STRING, PRIMARY KEY(name))\"\n",
|
||||||
|
")\n",
|
||||||
"conn.execute(\"CREATE REL TABLE ActedIn (FROM Person TO Movie)\")"
|
"conn.execute(\"CREATE REL TABLE ActedIn (FROM Person TO Movie)\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -94,11 +97,21 @@
|
|||||||
"conn.execute(\"CREATE (:Person {name: 'Robert De Niro', birthDate: '1943-08-17'})\")\n",
|
"conn.execute(\"CREATE (:Person {name: 'Robert De Niro', birthDate: '1943-08-17'})\")\n",
|
||||||
"conn.execute(\"CREATE (:Movie {name: 'The Godfather'})\")\n",
|
"conn.execute(\"CREATE (:Movie {name: 'The Godfather'})\")\n",
|
||||||
"conn.execute(\"CREATE (:Movie {name: 'The Godfather: Part II'})\")\n",
|
"conn.execute(\"CREATE (:Movie {name: 'The Godfather: Part II'})\")\n",
|
||||||
"conn.execute(\"CREATE (:Movie {name: 'The Godfather Coda: The Death of Michael Corleone'})\")\n",
|
"conn.execute(\n",
|
||||||
"conn.execute(\"MATCH (p:Person), (m:Movie) WHERE p.name = 'Al Pacino' AND m.name = 'The Godfather' CREATE (p)-[:ActedIn]->(m)\")\n",
|
" \"CREATE (:Movie {name: 'The Godfather Coda: The Death of Michael Corleone'})\"\n",
|
||||||
"conn.execute(\"MATCH (p:Person), (m:Movie) WHERE p.name = 'Al Pacino' AND m.name = 'The Godfather: Part II' CREATE (p)-[:ActedIn]->(m)\")\n",
|
")\n",
|
||||||
"conn.execute(\"MATCH (p:Person), (m:Movie) WHERE p.name = 'Al Pacino' AND m.name = 'The Godfather Coda: The Death of Michael Corleone' CREATE (p)-[:ActedIn]->(m)\")\n",
|
"conn.execute(\n",
|
||||||
"conn.execute(\"MATCH (p:Person), (m:Movie) WHERE p.name = 'Robert De Niro' AND m.name = 'The Godfather: Part II' CREATE (p)-[:ActedIn]->(m)\")"
|
" \"MATCH (p:Person), (m:Movie) WHERE p.name = 'Al Pacino' AND m.name = 'The Godfather' CREATE (p)-[:ActedIn]->(m)\"\n",
|
||||||
|
")\n",
|
||||||
|
"conn.execute(\n",
|
||||||
|
" \"MATCH (p:Person), (m:Movie) WHERE p.name = 'Al Pacino' AND m.name = 'The Godfather: Part II' CREATE (p)-[:ActedIn]->(m)\"\n",
|
||||||
|
")\n",
|
||||||
|
"conn.execute(\n",
|
||||||
|
" \"MATCH (p:Person), (m:Movie) WHERE p.name = 'Al Pacino' AND m.name = 'The Godfather Coda: The Death of Michael Corleone' CREATE (p)-[:ActedIn]->(m)\"\n",
|
||||||
|
")\n",
|
||||||
|
"conn.execute(\n",
|
||||||
|
" \"MATCH (p:Person), (m:Movie) WHERE p.name = 'Robert De Niro' AND m.name = 'The Godfather: Part II' CREATE (p)-[:ActedIn]->(m)\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -137,9 +150,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"chain = KuzuQAChain.from_llm(\n",
|
"chain = KuzuQAChain.from_llm(ChatOpenAI(temperature=0), graph=graph, verbose=True)"
|
||||||
" ChatOpenAI(temperature=0), graph=graph, verbose=True\n",
|
|
||||||
")"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -235,7 +235,9 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"chain.run(\"Save that the person with the name 'Timothy Berners-Lee' has a work homepage at 'http://www.w3.org/foo/bar/'\")"
|
"chain.run(\n",
|
||||||
|
" \"Save that the person with the name 'Timothy Berners-Lee' has a work homepage at 'http://www.w3.org/foo/bar/'\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -38,7 +38,7 @@
|
|||||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||||
"texts = text_splitter.split_documents(documents)\n",
|
"texts = text_splitter.split_documents(documents)\n",
|
||||||
"for i, text in enumerate(texts):\n",
|
"for i, text in enumerate(texts):\n",
|
||||||
" text.metadata['source'] = f\"{i}-pl\"\n",
|
" text.metadata[\"source\"] = f\"{i}-pl\"\n",
|
||||||
"embeddings = OpenAIEmbeddings()\n",
|
"embeddings = OpenAIEmbeddings()\n",
|
||||||
"docsearch = Chroma.from_documents(texts, embeddings)"
|
"docsearch = Chroma.from_documents(texts, embeddings)"
|
||||||
]
|
]
|
||||||
@ -97,8 +97,8 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"final_qa_chain = StuffDocumentsChain(\n",
|
"final_qa_chain = StuffDocumentsChain(\n",
|
||||||
" llm_chain=qa_chain, \n",
|
" llm_chain=qa_chain,\n",
|
||||||
" document_variable_name='context',\n",
|
" document_variable_name=\"context\",\n",
|
||||||
" document_prompt=doc_prompt,\n",
|
" document_prompt=doc_prompt,\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
@ -111,8 +111,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"retrieval_qa = RetrievalQA(\n",
|
"retrieval_qa = RetrievalQA(\n",
|
||||||
" retriever=docsearch.as_retriever(),\n",
|
" retriever=docsearch.as_retriever(), combine_documents_chain=final_qa_chain\n",
|
||||||
" combine_documents_chain=final_qa_chain\n",
|
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -175,8 +174,8 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"final_qa_chain_pydantic = StuffDocumentsChain(\n",
|
"final_qa_chain_pydantic = StuffDocumentsChain(\n",
|
||||||
" llm_chain=qa_chain_pydantic, \n",
|
" llm_chain=qa_chain_pydantic,\n",
|
||||||
" document_variable_name='context',\n",
|
" document_variable_name=\"context\",\n",
|
||||||
" document_prompt=doc_prompt,\n",
|
" document_prompt=doc_prompt,\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
@ -189,8 +188,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"retrieval_qa_pydantic = RetrievalQA(\n",
|
"retrieval_qa_pydantic = RetrievalQA(\n",
|
||||||
" retriever=docsearch.as_retriever(),\n",
|
" retriever=docsearch.as_retriever(), combine_documents_chain=final_qa_chain_pydantic\n",
|
||||||
" combine_documents_chain=final_qa_chain_pydantic\n",
|
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -235,6 +233,7 @@
|
|||||||
"from langchain.chains import ConversationalRetrievalChain\n",
|
"from langchain.chains import ConversationalRetrievalChain\n",
|
||||||
"from langchain.memory import ConversationBufferMemory\n",
|
"from langchain.memory import ConversationBufferMemory\n",
|
||||||
"from langchain.chains import LLMChain\n",
|
"from langchain.chains import LLMChain\n",
|
||||||
|
"\n",
|
||||||
"memory = ConversationBufferMemory(memory_key=\"chat_history\", return_messages=True)\n",
|
"memory = ConversationBufferMemory(memory_key=\"chat_history\", return_messages=True)\n",
|
||||||
"_template = \"\"\"Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.\\\n",
|
"_template = \"\"\"Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.\\\n",
|
||||||
"Make sure to avoid using any unclear pronouns.\n",
|
"Make sure to avoid using any unclear pronouns.\n",
|
||||||
@ -258,10 +257,10 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"qa = ConversationalRetrievalChain(\n",
|
"qa = ConversationalRetrievalChain(\n",
|
||||||
" question_generator=condense_question_chain, \n",
|
" question_generator=condense_question_chain,\n",
|
||||||
" retriever=docsearch.as_retriever(),\n",
|
" retriever=docsearch.as_retriever(),\n",
|
||||||
" memory=memory, \n",
|
" memory=memory,\n",
|
||||||
" combine_docs_chain=final_qa_chain\n",
|
" combine_docs_chain=final_qa_chain,\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -389,7 +388,9 @@
|
|||||||
" \"\"\"An answer to the question being asked, with sources.\"\"\"\n",
|
" \"\"\"An answer to the question being asked, with sources.\"\"\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
" answer: str = Field(..., description=\"Answer to the question that was asked\")\n",
|
" answer: str = Field(..., description=\"Answer to the question that was asked\")\n",
|
||||||
" countries_referenced: List[str] = Field(..., description=\"All of the countries mentioned in the sources\")\n",
|
" countries_referenced: List[str] = Field(\n",
|
||||||
|
" ..., description=\"All of the countries mentioned in the sources\"\n",
|
||||||
|
" )\n",
|
||||||
" sources: List[str] = Field(\n",
|
" sources: List[str] = Field(\n",
|
||||||
" ..., description=\"List of sources used to answer the question\"\n",
|
" ..., description=\"List of sources used to answer the question\"\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
@ -405,20 +406,23 @@
|
|||||||
" HumanMessage(content=\"Answer question using the following context\"),\n",
|
" HumanMessage(content=\"Answer question using the following context\"),\n",
|
||||||
" HumanMessagePromptTemplate.from_template(\"{context}\"),\n",
|
" HumanMessagePromptTemplate.from_template(\"{context}\"),\n",
|
||||||
" HumanMessagePromptTemplate.from_template(\"Question: {question}\"),\n",
|
" HumanMessagePromptTemplate.from_template(\"Question: {question}\"),\n",
|
||||||
" HumanMessage(content=\"Tips: Make sure to answer in the correct format. Return all of the countries mentioned in the sources in uppercase characters.\"),\n",
|
" HumanMessage(\n",
|
||||||
|
" content=\"Tips: Make sure to answer in the correct format. Return all of the countries mentioned in the sources in uppercase characters.\"\n",
|
||||||
|
" ),\n",
|
||||||
"]\n",
|
"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"chain_prompt = ChatPromptTemplate(messages=prompt_messages)\n",
|
"chain_prompt = ChatPromptTemplate(messages=prompt_messages)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"qa_chain_pydantic = create_qa_with_structure_chain(llm, CustomResponseSchema, output_parser=\"pydantic\", prompt=chain_prompt)\n",
|
"qa_chain_pydantic = create_qa_with_structure_chain(\n",
|
||||||
|
" llm, CustomResponseSchema, output_parser=\"pydantic\", prompt=chain_prompt\n",
|
||||||
|
")\n",
|
||||||
"final_qa_chain_pydantic = StuffDocumentsChain(\n",
|
"final_qa_chain_pydantic = StuffDocumentsChain(\n",
|
||||||
" llm_chain=qa_chain_pydantic,\n",
|
" llm_chain=qa_chain_pydantic,\n",
|
||||||
" document_variable_name='context',\n",
|
" document_variable_name=\"context\",\n",
|
||||||
" document_prompt=doc_prompt,\n",
|
" document_prompt=doc_prompt,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"retrieval_qa_pydantic = RetrievalQA(\n",
|
"retrieval_qa_pydantic = RetrievalQA(\n",
|
||||||
" retriever=docsearch.as_retriever(),\n",
|
" retriever=docsearch.as_retriever(), combine_documents_chain=final_qa_chain_pydantic\n",
|
||||||
" combine_documents_chain=final_qa_chain_pydantic\n",
|
|
||||||
")\n",
|
")\n",
|
||||||
"query = \"What did he say about russia\"\n",
|
"query = \"What did he say about russia\"\n",
|
||||||
"retrieval_qa_pydantic.run(query)"
|
"retrieval_qa_pydantic.run(query)"
|
||||||
|
@ -35,7 +35,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"chain = get_openapi_chain(\"https://www.klarna.com/us/shopping/public/openai/v0/api-docs/\")"
|
"chain = get_openapi_chain(\n",
|
||||||
|
" \"https://www.klarna.com/us/shopping/public/openai/v0/api-docs/\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -186,7 +188,9 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"chain = get_openapi_chain(\"https://gist.githubusercontent.com/roaldnefs/053e505b2b7a807290908fe9aa3e1f00/raw/0a212622ebfef501163f91e23803552411ed00e4/openapi.yaml\")"
|
"chain = get_openapi_chain(\n",
|
||||||
|
" \"https://gist.githubusercontent.com/roaldnefs/053e505b2b7a807290908fe9aa3e1f00/raw/0a212622ebfef501163f91e23803552411ed00e4/openapi.yaml\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -22,7 +22,8 @@
|
|||||||
"from typing import Optional\n",
|
"from typing import Optional\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from langchain.chains.openai_functions import (\n",
|
"from langchain.chains.openai_functions import (\n",
|
||||||
" create_openai_fn_chain, create_structured_output_chain\n",
|
" create_openai_fn_chain,\n",
|
||||||
|
" create_structured_output_chain,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"from langchain.chat_models import ChatOpenAI\n",
|
"from langchain.chat_models import ChatOpenAI\n",
|
||||||
"from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate\n",
|
"from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate\n",
|
||||||
@ -58,8 +59,10 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from pydantic import BaseModel, Field\n",
|
"from pydantic import BaseModel, Field\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"class Person(BaseModel):\n",
|
"class Person(BaseModel):\n",
|
||||||
" \"\"\"Identifying information about a person.\"\"\"\n",
|
" \"\"\"Identifying information about a person.\"\"\"\n",
|
||||||
|
"\n",
|
||||||
" name: str = Field(..., description=\"The person's name\")\n",
|
" name: str = Field(..., description=\"The person's name\")\n",
|
||||||
" age: int = Field(..., description=\"The person's age\")\n",
|
" age: int = Field(..., description=\"The person's age\")\n",
|
||||||
" fav_food: Optional[str] = Field(None, description=\"The person's favorite food\")"
|
" fav_food: Optional[str] = Field(None, description=\"The person's favorite food\")"
|
||||||
@ -106,10 +109,12 @@
|
|||||||
" SystemMessage(\n",
|
" SystemMessage(\n",
|
||||||
" content=\"You are a world class algorithm for extracting information in structured formats.\"\n",
|
" content=\"You are a world class algorithm for extracting information in structured formats.\"\n",
|
||||||
" ),\n",
|
" ),\n",
|
||||||
" HumanMessage(content=\"Use the given format to extract information from the following input:\"),\n",
|
" HumanMessage(\n",
|
||||||
|
" content=\"Use the given format to extract information from the following input:\"\n",
|
||||||
|
" ),\n",
|
||||||
" HumanMessagePromptTemplate.from_template(\"{input}\"),\n",
|
" HumanMessagePromptTemplate.from_template(\"{input}\"),\n",
|
||||||
" HumanMessage(content=\"Tips: Make sure to answer in the correct format\"),\n",
|
" HumanMessage(content=\"Tips: Make sure to answer in the correct format\"),\n",
|
||||||
" ]\n",
|
"]\n",
|
||||||
"prompt = ChatPromptTemplate(messages=prompt_msgs)\n",
|
"prompt = ChatPromptTemplate(messages=prompt_msgs)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"chain = create_structured_output_chain(Person, llm, prompt, verbose=True)\n",
|
"chain = create_structured_output_chain(Person, llm, prompt, verbose=True)\n",
|
||||||
@ -162,12 +167,17 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from typing import Sequence\n",
|
"from typing import Sequence\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"class People(BaseModel):\n",
|
"class People(BaseModel):\n",
|
||||||
" \"\"\"Identifying information about all people in a text.\"\"\"\n",
|
" \"\"\"Identifying information about all people in a text.\"\"\"\n",
|
||||||
|
"\n",
|
||||||
" people: Sequence[Person] = Field(..., description=\"The people in the text\")\n",
|
" people: Sequence[Person] = Field(..., description=\"The people in the text\")\n",
|
||||||
" \n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"chain = create_structured_output_chain(People, llm, prompt, verbose=True)\n",
|
"chain = create_structured_output_chain(People, llm, prompt, verbose=True)\n",
|
||||||
"chain.run(\"Sally is 13, Joey just turned 12 and loves spinach. Caroline is 10 years older than Sally, so she's 23.\")"
|
"chain.run(\n",
|
||||||
|
" \"Sally is 13, Joey just turned 12 and loves spinach. Caroline is 10 years older than Sally, so she's 23.\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -192,27 +202,16 @@
|
|||||||
" \"description\": \"Identifying information about a person.\",\n",
|
" \"description\": \"Identifying information about a person.\",\n",
|
||||||
" \"type\": \"object\",\n",
|
" \"type\": \"object\",\n",
|
||||||
" \"properties\": {\n",
|
" \"properties\": {\n",
|
||||||
" \"name\": {\n",
|
" \"name\": {\"title\": \"Name\", \"description\": \"The person's name\", \"type\": \"string\"},\n",
|
||||||
" \"title\": \"Name\",\n",
|
" \"age\": {\"title\": \"Age\", \"description\": \"The person's age\", \"type\": \"integer\"},\n",
|
||||||
" \"description\": \"The person's name\",\n",
|
|
||||||
" \"type\": \"string\"\n",
|
|
||||||
" },\n",
|
|
||||||
" \"age\": {\n",
|
|
||||||
" \"title\": \"Age\",\n",
|
|
||||||
" \"description\": \"The person's age\",\n",
|
|
||||||
" \"type\": \"integer\"\n",
|
|
||||||
" },\n",
|
|
||||||
" \"fav_food\": {\n",
|
" \"fav_food\": {\n",
|
||||||
" \"title\": \"Fav Food\",\n",
|
" \"title\": \"Fav Food\",\n",
|
||||||
" \"description\": \"The person's favorite food\",\n",
|
" \"description\": \"The person's favorite food\",\n",
|
||||||
" \"type\": \"string\"\n",
|
" \"type\": \"string\",\n",
|
||||||
" }\n",
|
|
||||||
" },\n",
|
" },\n",
|
||||||
" \"required\": [\n",
|
" },\n",
|
||||||
" \"name\",\n",
|
" \"required\": [\"name\", \"age\"],\n",
|
||||||
" \"age\"\n",
|
"}"
|
||||||
" ]\n",
|
|
||||||
"}\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -286,13 +285,15 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"class RecordPerson(BaseModel):\n",
|
"class RecordPerson(BaseModel):\n",
|
||||||
" \"\"\"Record some identifying information about a pe.\"\"\"\n",
|
" \"\"\"Record some identifying information about a pe.\"\"\"\n",
|
||||||
|
"\n",
|
||||||
" name: str = Field(..., description=\"The person's name\")\n",
|
" name: str = Field(..., description=\"The person's name\")\n",
|
||||||
" age: int = Field(..., description=\"The person's age\")\n",
|
" age: int = Field(..., description=\"The person's age\")\n",
|
||||||
" fav_food: Optional[str] = Field(None, description=\"The person's favorite food\")\n",
|
" fav_food: Optional[str] = Field(None, description=\"The person's favorite food\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
" \n",
|
"\n",
|
||||||
"class RecordDog(BaseModel):\n",
|
"class RecordDog(BaseModel):\n",
|
||||||
" \"\"\"Record some identifying information about a dog.\"\"\"\n",
|
" \"\"\"Record some identifying information about a dog.\"\"\"\n",
|
||||||
|
"\n",
|
||||||
" name: str = Field(..., description=\"The dog's name\")\n",
|
" name: str = Field(..., description=\"The dog's name\")\n",
|
||||||
" color: str = Field(..., description=\"The dog's color\")\n",
|
" color: str = Field(..., description=\"The dog's color\")\n",
|
||||||
" fav_food: Optional[str] = Field(None, description=\"The dog's favorite food\")"
|
" fav_food: Optional[str] = Field(None, description=\"The dog's favorite food\")"
|
||||||
@ -333,10 +334,10 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"prompt_msgs = [\n",
|
"prompt_msgs = [\n",
|
||||||
" SystemMessage(\n",
|
" SystemMessage(content=\"You are a world class algorithm for recording entities\"),\n",
|
||||||
" content=\"You are a world class algorithm for recording entities\"\n",
|
" HumanMessage(\n",
|
||||||
|
" content=\"Make calls to the relevant function to record the entities in the following input:\"\n",
|
||||||
" ),\n",
|
" ),\n",
|
||||||
" HumanMessage(content=\"Make calls to the relevant function to record the entities in the following input:\"),\n",
|
|
||||||
" HumanMessagePromptTemplate.from_template(\"{input}\"),\n",
|
" HumanMessagePromptTemplate.from_template(\"{input}\"),\n",
|
||||||
" HumanMessage(content=\"Tips: Make sure to answer in the correct format\"),\n",
|
" HumanMessage(content=\"Tips: Make sure to answer in the correct format\"),\n",
|
||||||
"]\n",
|
"]\n",
|
||||||
@ -393,11 +394,16 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"class OptionalFavFood(BaseModel):\n",
|
"class OptionalFavFood(BaseModel):\n",
|
||||||
" \"\"\"Either a food or null.\"\"\"\n",
|
" \"\"\"Either a food or null.\"\"\"\n",
|
||||||
" food: Optional[str] = Field(None, description=\"Either the name of a food or null. Should be null if the food isn't known.\")\n",
|
"\n",
|
||||||
|
" food: Optional[str] = Field(\n",
|
||||||
|
" None,\n",
|
||||||
|
" description=\"Either the name of a food or null. Should be null if the food isn't known.\",\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def record_person(name: str, age: int, fav_food: OptionalFavFood) -> str:\n",
|
"def record_person(name: str, age: int, fav_food: OptionalFavFood) -> str:\n",
|
||||||
" \"\"\"Record some basic identifying information about a person.\n",
|
" \"\"\"Record some basic identifying information about a person.\n",
|
||||||
" \n",
|
"\n",
|
||||||
" Args:\n",
|
" Args:\n",
|
||||||
" name: The person's name.\n",
|
" name: The person's name.\n",
|
||||||
" age: The person's age in years.\n",
|
" age: The person's age in years.\n",
|
||||||
@ -405,9 +411,11 @@
|
|||||||
" \"\"\"\n",
|
" \"\"\"\n",
|
||||||
" return f\"Recording person {name} of age {age} with favorite food {fav_food.food}!\"\n",
|
" return f\"Recording person {name} of age {age} with favorite food {fav_food.food}!\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
" \n",
|
"\n",
|
||||||
"chain = create_openai_fn_chain([record_person], llm, prompt, verbose=True)\n",
|
"chain = create_openai_fn_chain([record_person], llm, prompt, verbose=True)\n",
|
||||||
"chain.run(\"The most important thing to remember about Tommy, my 12 year old, is that he'll do anything for apple pie.\")"
|
"chain.run(\n",
|
||||||
|
" \"The most important thing to remember about Tommy, my 12 year old, is that he'll do anything for apple pie.\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -458,7 +466,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"def record_dog(name: str, color: str, fav_food: OptionalFavFood) -> str:\n",
|
"def record_dog(name: str, color: str, fav_food: OptionalFavFood) -> str:\n",
|
||||||
" \"\"\"Record some basic identifying information about a dog.\n",
|
" \"\"\"Record some basic identifying information about a dog.\n",
|
||||||
" \n",
|
"\n",
|
||||||
" Args:\n",
|
" Args:\n",
|
||||||
" name: The dog's name.\n",
|
" name: The dog's name.\n",
|
||||||
" color: The dog's color.\n",
|
" color: The dog's color.\n",
|
||||||
@ -468,7 +476,9 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"chain = create_openai_fn_chain([record_person, record_dog], llm, prompt, verbose=True)\n",
|
"chain = create_openai_fn_chain([record_person, record_dog], llm, prompt, verbose=True)\n",
|
||||||
"chain.run(\"I can't find my dog Henry anywhere, he's a small brown beagle. Could you send a message about him?\")"
|
"chain.run(\n",
|
||||||
|
" \"I can't find my dog Henry anywhere, he's a small brown beagle. Could you send a message about him?\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -77,7 +77,9 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"loader = BraveSearchLoader(query=\"obama middle name\", api_key=api_key, search_kwargs={\"count\": 3})\n",
|
"loader = BraveSearchLoader(\n",
|
||||||
|
" query=\"obama middle name\", api_key=api_key, search_kwargs={\"count\": 3}\n",
|
||||||
|
")\n",
|
||||||
"docs = loader.load()\n",
|
"docs = loader.load()\n",
|
||||||
"len(docs)"
|
"len(docs)"
|
||||||
]
|
]
|
||||||
|
@ -126,11 +126,11 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"file_id=\"1x9WBtFPWMEAdjcJzPScRsjpjQvpSo_kz\"\n",
|
"file_id = \"1x9WBtFPWMEAdjcJzPScRsjpjQvpSo_kz\"\n",
|
||||||
"loader = GoogleDriveLoader(\n",
|
"loader = GoogleDriveLoader(\n",
|
||||||
" file_ids=[file_id],\n",
|
" file_ids=[file_id],\n",
|
||||||
" file_loader_cls=UnstructuredFileIOLoader,\n",
|
" file_loader_cls=UnstructuredFileIOLoader,\n",
|
||||||
" file_loader_kwargs={\"mode\": \"elements\"}\n",
|
" file_loader_kwargs={\"mode\": \"elements\"},\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -180,11 +180,11 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"folder_id=\"1asMOHY1BqBS84JcRbOag5LOJac74gpmD\"\n",
|
"folder_id = \"1asMOHY1BqBS84JcRbOag5LOJac74gpmD\"\n",
|
||||||
"loader = GoogleDriveLoader(\n",
|
"loader = GoogleDriveLoader(\n",
|
||||||
" folder_id=folder_id,\n",
|
" folder_id=folder_id,\n",
|
||||||
" file_loader_cls=UnstructuredFileIOLoader,\n",
|
" file_loader_cls=UnstructuredFileIOLoader,\n",
|
||||||
" file_loader_kwargs={\"mode\": \"elements\"}\n",
|
" file_loader_kwargs={\"mode\": \"elements\"},\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -101,7 +101,7 @@
|
|||||||
" \"../Papers/\",\n",
|
" \"../Papers/\",\n",
|
||||||
" glob=\"*\",\n",
|
" glob=\"*\",\n",
|
||||||
" suffixes=[\".pdf\"],\n",
|
" suffixes=[\".pdf\"],\n",
|
||||||
" parser= GrobidParser(segment_sentences=False)\n",
|
" parser=GrobidParser(segment_sentences=False),\n",
|
||||||
")\n",
|
")\n",
|
||||||
"docs = loader.load()"
|
"docs = loader.load()"
|
||||||
]
|
]
|
||||||
|
@ -18,7 +18,10 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from langchain.document_loaders import WebBaseLoader\n",
|
"from langchain.document_loaders import WebBaseLoader\n",
|
||||||
"loader_web = WebBaseLoader(\"https://github.com/basecamp/handbook/blob/master/37signals-is-you.md\")"
|
"\n",
|
||||||
|
"loader_web = WebBaseLoader(\n",
|
||||||
|
" \"https://github.com/basecamp/handbook/blob/master/37signals-is-you.md\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -29,6 +32,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from langchain.document_loaders import PyPDFLoader\n",
|
"from langchain.document_loaders import PyPDFLoader\n",
|
||||||
|
"\n",
|
||||||
"loader_pdf = PyPDFLoader(\"../MachineLearning-Lecture01.pdf\")"
|
"loader_pdf = PyPDFLoader(\"../MachineLearning-Lecture01.pdf\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -40,7 +44,8 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from langchain.document_loaders.merge import MergedDataLoader\n",
|
"from langchain.document_loaders.merge import MergedDataLoader\n",
|
||||||
"loader_all=MergedDataLoader(loaders=[loader_web,loader_pdf])"
|
"\n",
|
||||||
|
"loader_all = MergedDataLoader(loaders=[loader_web, loader_pdf])"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -50,7 +55,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"docs_all=loader_all.load()"
|
"docs_all = loader_all.load()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -36,7 +36,9 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Create a new loader object for the MHTML file\n",
|
"# Create a new loader object for the MHTML file\n",
|
||||||
"loader = MHTMLLoader(file_path='../../../../../../tests/integration_tests/examples/example.mht')\n",
|
"loader = MHTMLLoader(\n",
|
||||||
|
" file_path=\"../../../../../../tests/integration_tests/examples/example.mht\"\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Load the document from the file\n",
|
"# Load the document from the file\n",
|
||||||
"documents = loader.load()\n",
|
"documents = loader.load()\n",
|
||||||
|
@ -55,9 +55,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"dataset = \"vw6y-z8j6\" # 311 data\n",
|
"dataset = \"vw6y-z8j6\" # 311 data\n",
|
||||||
"dataset = \"tmnf-yvry\" # crime data\n",
|
"dataset = \"tmnf-yvry\" # crime data\n",
|
||||||
"loader = OpenCityDataLoader(city_id=\"data.sfgov.org\",\n",
|
"loader = OpenCityDataLoader(city_id=\"data.sfgov.org\", dataset_id=dataset, limit=2000)"
|
||||||
" dataset_id=dataset,\n",
|
|
||||||
" limit=2000)"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -33,9 +33,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"loader = UnstructuredOrgModeLoader(\n",
|
"loader = UnstructuredOrgModeLoader(file_path=\"example_data/README.org\", mode=\"elements\")\n",
|
||||||
" file_path=\"example_data/README.org\", mode=\"elements\"\n",
|
|
||||||
")\n",
|
|
||||||
"docs = loader.load()"
|
"docs = loader.load()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -239,7 +239,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Use lazy load for larger table, which won't read the full table into memory \n",
|
"# Use lazy load for larger table, which won't read the full table into memory\n",
|
||||||
"for i in loader.lazy_load():\n",
|
"for i in loader.lazy_load():\n",
|
||||||
" print(i)"
|
" print(i)"
|
||||||
]
|
]
|
||||||
|
@ -49,9 +49,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"url = 'https://js.langchain.com/docs/modules/memory/examples/'\n",
|
"url = \"https://js.langchain.com/docs/modules/memory/examples/\"\n",
|
||||||
"loader=RecursiveUrlLoader(url=url)\n",
|
"loader = RecursiveUrlLoader(url=url)\n",
|
||||||
"docs=loader.load()"
|
"docs = loader.load()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -138,10 +138,10 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"url = 'https://js.langchain.com/docs/'\n",
|
"url = \"https://js.langchain.com/docs/\"\n",
|
||||||
"exclude_dirs=['https://js.langchain.com/docs/api/']\n",
|
"exclude_dirs = [\"https://js.langchain.com/docs/api/\"]\n",
|
||||||
"loader=RecursiveUrlLoader(url=url,exclude_dirs=exclude_dirs)\n",
|
"loader = RecursiveUrlLoader(url=url, exclude_dirs=exclude_dirs)\n",
|
||||||
"docs=loader.load()"
|
"docs = loader.load()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -33,9 +33,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"loader = UnstructuredRSTLoader(\n",
|
"loader = UnstructuredRSTLoader(file_path=\"example_data/README.rst\", mode=\"elements\")\n",
|
||||||
" file_path=\"example_data/README.rst\", mode=\"elements\"\n",
|
|
||||||
")\n",
|
|
||||||
"docs = loader.load()"
|
"docs = loader.load()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -30,7 +30,8 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import warnings\n",
|
"import warnings\n",
|
||||||
"warnings.filterwarnings('ignore')\n",
|
"\n",
|
||||||
|
"warnings.filterwarnings(\"ignore\")\n",
|
||||||
"from pprint import pprint\n",
|
"from pprint import pprint\n",
|
||||||
"from langchain.text_splitter import Language\n",
|
"from langchain.text_splitter import Language\n",
|
||||||
"from langchain.document_loaders.generic import GenericLoader\n",
|
"from langchain.document_loaders.generic import GenericLoader\n",
|
||||||
@ -48,7 +49,7 @@
|
|||||||
" \"./example_data/source_code\",\n",
|
" \"./example_data/source_code\",\n",
|
||||||
" glob=\"*\",\n",
|
" glob=\"*\",\n",
|
||||||
" suffixes=[\".py\", \".js\"],\n",
|
" suffixes=[\".py\", \".js\"],\n",
|
||||||
" parser=LanguageParser()\n",
|
" parser=LanguageParser(),\n",
|
||||||
")\n",
|
")\n",
|
||||||
"docs = loader.load()"
|
"docs = loader.load()"
|
||||||
]
|
]
|
||||||
@ -200,7 +201,7 @@
|
|||||||
" \"./example_data/source_code\",\n",
|
" \"./example_data/source_code\",\n",
|
||||||
" glob=\"*\",\n",
|
" glob=\"*\",\n",
|
||||||
" suffixes=[\".py\"],\n",
|
" suffixes=[\".py\"],\n",
|
||||||
" parser=LanguageParser(language=Language.PYTHON, parser_threshold=1000)\n",
|
" parser=LanguageParser(language=Language.PYTHON, parser_threshold=1000),\n",
|
||||||
")\n",
|
")\n",
|
||||||
"docs = loader.load()"
|
"docs = loader.load()"
|
||||||
]
|
]
|
||||||
@ -281,7 +282,7 @@
|
|||||||
" \"./example_data/source_code\",\n",
|
" \"./example_data/source_code\",\n",
|
||||||
" glob=\"*\",\n",
|
" glob=\"*\",\n",
|
||||||
" suffixes=[\".js\"],\n",
|
" suffixes=[\".js\"],\n",
|
||||||
" parser=LanguageParser(language=Language.JS)\n",
|
" parser=LanguageParser(language=Language.JS),\n",
|
||||||
")\n",
|
")\n",
|
||||||
"docs = loader.load()"
|
"docs = loader.load()"
|
||||||
]
|
]
|
||||||
|
@ -46,7 +46,7 @@
|
|||||||
" Region=\"your cos region\",\n",
|
" Region=\"your cos region\",\n",
|
||||||
" SecretId=\"your cos secret_id\",\n",
|
" SecretId=\"your cos secret_id\",\n",
|
||||||
" SecretKey=\"your cos secret_key\",\n",
|
" SecretKey=\"your cos secret_key\",\n",
|
||||||
" )\n",
|
")\n",
|
||||||
"loader = TencentCOSDirectoryLoader(conf=conf, bucket=\"you_cos_bucket\")"
|
"loader = TencentCOSDirectoryLoader(conf=conf, bucket=\"you_cos_bucket\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -46,7 +46,7 @@
|
|||||||
" Region=\"your cos region\",\n",
|
" Region=\"your cos region\",\n",
|
||||||
" SecretId=\"your cos secret_id\",\n",
|
" SecretId=\"your cos secret_id\",\n",
|
||||||
" SecretKey=\"your cos secret_key\",\n",
|
" SecretKey=\"your cos secret_key\",\n",
|
||||||
" )\n",
|
")\n",
|
||||||
"loader = TencentCOSFileLoader(conf=conf, bucket=\"you_cos_bucket\", key=\"fake.docx\")"
|
"loader = TencentCOSFileLoader(conf=conf, bucket=\"you_cos_bucket\", key=\"fake.docx\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
File diff suppressed because one or more lines are too long
@ -155,9 +155,12 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Char-level splits\n",
|
"# Char-level splits\n",
|
||||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||||
|
"\n",
|
||||||
"chunk_size = 250\n",
|
"chunk_size = 250\n",
|
||||||
"chunk_overlap = 30\n",
|
"chunk_overlap = 30\n",
|
||||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)\n",
|
"text_splitter = RecursiveCharacterTextSplitter(\n",
|
||||||
|
" chunk_size=chunk_size, chunk_overlap=chunk_overlap\n",
|
||||||
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Split\n",
|
"# Split\n",
|
||||||
"splits = text_splitter.split_documents(md_header_splits)\n",
|
"splits = text_splitter.split_documents(md_header_splits)\n",
|
||||||
|
@ -28,14 +28,14 @@
|
|||||||
"# Load blog post\n",
|
"# Load blog post\n",
|
||||||
"loader = WebBaseLoader(\"https://lilianweng.github.io/posts/2023-06-23-agent/\")\n",
|
"loader = WebBaseLoader(\"https://lilianweng.github.io/posts/2023-06-23-agent/\")\n",
|
||||||
"data = loader.load()\n",
|
"data = loader.load()\n",
|
||||||
" \n",
|
"\n",
|
||||||
"# Split\n",
|
"# Split\n",
|
||||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)\n",
|
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
|
||||||
"splits = text_splitter.split_documents(data)\n",
|
"splits = text_splitter.split_documents(data)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# VectorDB\n",
|
"# VectorDB\n",
|
||||||
"embedding = OpenAIEmbeddings()\n",
|
"embedding = OpenAIEmbeddings()\n",
|
||||||
"vectordb = Chroma.from_documents(documents=splits,embedding=embedding)"
|
"vectordb = Chroma.from_documents(documents=splits, embedding=embedding)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -57,9 +57,12 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from langchain.chat_models import ChatOpenAI\n",
|
"from langchain.chat_models import ChatOpenAI\n",
|
||||||
"from langchain.retrievers.multi_query import MultiQueryRetriever\n",
|
"from langchain.retrievers.multi_query import MultiQueryRetriever\n",
|
||||||
"question=\"What are the approaches to Task Decomposition?\"\n",
|
"\n",
|
||||||
|
"question = \"What are the approaches to Task Decomposition?\"\n",
|
||||||
"llm = ChatOpenAI(temperature=0)\n",
|
"llm = ChatOpenAI(temperature=0)\n",
|
||||||
"retriever_from_llm = MultiQueryRetriever.from_llm(retriever=vectordb.as_retriever(),llm=llm)"
|
"retriever_from_llm = MultiQueryRetriever.from_llm(\n",
|
||||||
|
" retriever=vectordb.as_retriever(), llm=llm\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -71,8 +74,9 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# Set logging for the queries\n",
|
"# Set logging for the queries\n",
|
||||||
"import logging\n",
|
"import logging\n",
|
||||||
|
"\n",
|
||||||
"logging.basicConfig()\n",
|
"logging.basicConfig()\n",
|
||||||
"logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)"
|
"logging.getLogger(\"langchain.retrievers.multi_query\").setLevel(logging.INFO)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -127,20 +131,24 @@
|
|||||||
"from langchain.prompts import PromptTemplate\n",
|
"from langchain.prompts import PromptTemplate\n",
|
||||||
"from langchain.output_parsers import PydanticOutputParser\n",
|
"from langchain.output_parsers import PydanticOutputParser\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"# Output parser will split the LLM result into a list of queries\n",
|
"# Output parser will split the LLM result into a list of queries\n",
|
||||||
"class LineList(BaseModel):\n",
|
"class LineList(BaseModel):\n",
|
||||||
" # \"lines\" is the key (attribute name) of the parsed output\n",
|
" # \"lines\" is the key (attribute name) of the parsed output\n",
|
||||||
" lines: List[str] = Field(description=\"Lines of text\")\n",
|
" lines: List[str] = Field(description=\"Lines of text\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"class LineListOutputParser(PydanticOutputParser):\n",
|
"class LineListOutputParser(PydanticOutputParser):\n",
|
||||||
" def __init__(self) -> None:\n",
|
" def __init__(self) -> None:\n",
|
||||||
" super().__init__(pydantic_object=LineList)\n",
|
" super().__init__(pydantic_object=LineList)\n",
|
||||||
|
"\n",
|
||||||
" def parse(self, text: str) -> LineList:\n",
|
" def parse(self, text: str) -> LineList:\n",
|
||||||
" lines = text.strip().split(\"\\n\")\n",
|
" lines = text.strip().split(\"\\n\")\n",
|
||||||
" return LineList(lines=lines)\n",
|
" return LineList(lines=lines)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"output_parser = LineListOutputParser()\n",
|
"output_parser = LineListOutputParser()\n",
|
||||||
" \n",
|
"\n",
|
||||||
"QUERY_PROMPT = PromptTemplate(\n",
|
"QUERY_PROMPT = PromptTemplate(\n",
|
||||||
" input_variables=[\"question\"],\n",
|
" input_variables=[\"question\"],\n",
|
||||||
" template=\"\"\"You are an AI language model assistant. Your task is to generate five \n",
|
" template=\"\"\"You are an AI language model assistant. Your task is to generate five \n",
|
||||||
@ -153,10 +161,10 @@
|
|||||||
"llm = ChatOpenAI(temperature=0)\n",
|
"llm = ChatOpenAI(temperature=0)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Chain\n",
|
"# Chain\n",
|
||||||
"llm_chain = LLMChain(llm=llm,prompt=QUERY_PROMPT,output_parser=output_parser)\n",
|
"llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT, output_parser=output_parser)\n",
|
||||||
" \n",
|
"\n",
|
||||||
"# Other inputs\n",
|
"# Other inputs\n",
|
||||||
"question=\"What are the approaches to Task Decomposition?\""
|
"question = \"What are the approaches to Task Decomposition?\""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -185,12 +193,14 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Run\n",
|
"# Run\n",
|
||||||
"retriever = MultiQueryRetriever(retriever=vectordb.as_retriever(), \n",
|
"retriever = MultiQueryRetriever(\n",
|
||||||
" llm_chain=llm_chain,\n",
|
" retriever=vectordb.as_retriever(), llm_chain=llm_chain, parser_key=\"lines\"\n",
|
||||||
" parser_key=\"lines\") # \"lines\" is the key (attribute name) of the parsed output\n",
|
") # \"lines\" is the key (attribute name) of the parsed output\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Results\n",
|
"# Results\n",
|
||||||
"unique_docs = retriever.get_relevant_documents(query=\"What does the course say about regression?\")\n",
|
"unique_docs = retriever.get_relevant_documents(\n",
|
||||||
|
" query=\"What does the course say about regression?\"\n",
|
||||||
|
")\n",
|
||||||
"len(unique_docs)"
|
"len(unique_docs)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -59,11 +59,11 @@
|
|||||||
"import os\n",
|
"import os\n",
|
||||||
"import getpass\n",
|
"import getpass\n",
|
||||||
"\n",
|
"\n",
|
||||||
"os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')\n",
|
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
|
||||||
"os.environ['MYSCALE_HOST'] = getpass.getpass('MyScale URL:')\n",
|
"os.environ[\"MYSCALE_HOST\"] = getpass.getpass(\"MyScale URL:\")\n",
|
||||||
"os.environ['MYSCALE_PORT'] = getpass.getpass('MyScale Port:')\n",
|
"os.environ[\"MYSCALE_PORT\"] = getpass.getpass(\"MyScale Port:\")\n",
|
||||||
"os.environ['MYSCALE_USERNAME'] = getpass.getpass('MyScale Username:')\n",
|
"os.environ[\"MYSCALE_USERNAME\"] = getpass.getpass(\"MyScale Username:\")\n",
|
||||||
"os.environ['MYSCALE_PASSWORD'] = getpass.getpass('MyScale Password:')"
|
"os.environ[\"MYSCALE_PASSWORD\"] = getpass.getpass(\"MyScale Password:\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -103,16 +103,40 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"docs = [\n",
|
"docs = [\n",
|
||||||
" Document(page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\", metadata={\"date\": \"1993-07-02\", \"rating\": 7.7, \"genre\": [\"science fiction\"]}),\n",
|
" Document(\n",
|
||||||
" Document(page_content=\"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\", metadata={\"date\": \"2010-12-30\", \"director\": \"Christopher Nolan\", \"rating\": 8.2}),\n",
|
" page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n",
|
||||||
" Document(page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\", metadata={\"date\": \"2006-04-23\", \"director\": \"Satoshi Kon\", \"rating\": 8.6}),\n",
|
" metadata={\"date\": \"1993-07-02\", \"rating\": 7.7, \"genre\": [\"science fiction\"]},\n",
|
||||||
" Document(page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\", metadata={\"date\": \"2019-08-22\", \"director\": \"Greta Gerwig\", \"rating\": 8.3}),\n",
|
" ),\n",
|
||||||
" Document(page_content=\"Toys come alive and have a blast doing so\", metadata={\"date\": \"1995-02-11\", \"genre\": [\"animated\"]}),\n",
|
" Document(\n",
|
||||||
" Document(page_content=\"Three men walk into the Zone, three men walk out of the Zone\", metadata={\"date\": \"1979-09-10\", \"rating\": 9.9, \"director\": \"Andrei Tarkovsky\", \"genre\": [\"science fiction\", \"adventure\"], \"rating\": 9.9})\n",
|
" page_content=\"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n",
|
||||||
|
" metadata={\"date\": \"2010-12-30\", \"director\": \"Christopher Nolan\", \"rating\": 8.2},\n",
|
||||||
|
" ),\n",
|
||||||
|
" Document(\n",
|
||||||
|
" page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n",
|
||||||
|
" metadata={\"date\": \"2006-04-23\", \"director\": \"Satoshi Kon\", \"rating\": 8.6},\n",
|
||||||
|
" ),\n",
|
||||||
|
" Document(\n",
|
||||||
|
" page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n",
|
||||||
|
" metadata={\"date\": \"2019-08-22\", \"director\": \"Greta Gerwig\", \"rating\": 8.3},\n",
|
||||||
|
" ),\n",
|
||||||
|
" Document(\n",
|
||||||
|
" page_content=\"Toys come alive and have a blast doing so\",\n",
|
||||||
|
" metadata={\"date\": \"1995-02-11\", \"genre\": [\"animated\"]},\n",
|
||||||
|
" ),\n",
|
||||||
|
" Document(\n",
|
||||||
|
" page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n",
|
||||||
|
" metadata={\n",
|
||||||
|
" \"date\": \"1979-09-10\",\n",
|
||||||
|
" \"rating\": 9.9,\n",
|
||||||
|
" \"director\": \"Andrei Tarkovsky\",\n",
|
||||||
|
" \"genre\": [\"science fiction\", \"adventure\"],\n",
|
||||||
|
" \"rating\": 9.9,\n",
|
||||||
|
" },\n",
|
||||||
|
" ),\n",
|
||||||
"]\n",
|
"]\n",
|
||||||
"vectorstore = MyScale.from_documents(\n",
|
"vectorstore = MyScale.from_documents(\n",
|
||||||
" docs, \n",
|
" docs,\n",
|
||||||
" embeddings, \n",
|
" embeddings,\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -138,39 +162,39 @@
|
|||||||
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
|
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
|
||||||
"from langchain.chains.query_constructor.base import AttributeInfo\n",
|
"from langchain.chains.query_constructor.base import AttributeInfo\n",
|
||||||
"\n",
|
"\n",
|
||||||
"metadata_field_info=[\n",
|
"metadata_field_info = [\n",
|
||||||
" AttributeInfo(\n",
|
" AttributeInfo(\n",
|
||||||
" name=\"genre\",\n",
|
" name=\"genre\",\n",
|
||||||
" description=\"The genres of the movie\", \n",
|
" description=\"The genres of the movie\",\n",
|
||||||
" type=\"list[string]\", \n",
|
" type=\"list[string]\",\n",
|
||||||
" ),\n",
|
" ),\n",
|
||||||
" # If you want to include length of a list, just define it as a new column\n",
|
" # If you want to include length of a list, just define it as a new column\n",
|
||||||
" # This will teach the LLM to use it as a column when constructing filter.\n",
|
" # This will teach the LLM to use it as a column when constructing filter.\n",
|
||||||
" AttributeInfo(\n",
|
" AttributeInfo(\n",
|
||||||
" name=\"length(genre)\",\n",
|
" name=\"length(genre)\",\n",
|
||||||
" description=\"The length of genres of the movie\", \n",
|
" description=\"The length of genres of the movie\",\n",
|
||||||
" type=\"integer\", \n",
|
" type=\"integer\",\n",
|
||||||
" ),\n",
|
" ),\n",
|
||||||
" # Now you can define a column as timestamp. By simply set the type to timestamp.\n",
|
" # Now you can define a column as timestamp. By simply set the type to timestamp.\n",
|
||||||
" AttributeInfo(\n",
|
" AttributeInfo(\n",
|
||||||
" name=\"date\",\n",
|
" name=\"date\",\n",
|
||||||
" description=\"The date the movie was released\", \n",
|
" description=\"The date the movie was released\",\n",
|
||||||
" type=\"timestamp\", \n",
|
" type=\"timestamp\",\n",
|
||||||
" ),\n",
|
" ),\n",
|
||||||
" AttributeInfo(\n",
|
" AttributeInfo(\n",
|
||||||
" name=\"director\",\n",
|
" name=\"director\",\n",
|
||||||
" description=\"The name of the movie director\", \n",
|
" description=\"The name of the movie director\",\n",
|
||||||
" type=\"string\", \n",
|
" type=\"string\",\n",
|
||||||
" ),\n",
|
" ),\n",
|
||||||
" AttributeInfo(\n",
|
" AttributeInfo(\n",
|
||||||
" name=\"rating\",\n",
|
" name=\"rating\", description=\"A 1-10 rating for the movie\", type=\"float\"\n",
|
||||||
" description=\"A 1-10 rating for the movie\",\n",
|
|
||||||
" type=\"float\"\n",
|
|
||||||
" ),\n",
|
" ),\n",
|
||||||
"]\n",
|
"]\n",
|
||||||
"document_content_description = \"Brief summary of a movie\"\n",
|
"document_content_description = \"Brief summary of a movie\"\n",
|
||||||
"llm = OpenAI(temperature=0)\n",
|
"llm = OpenAI(temperature=0)\n",
|
||||||
"retriever = SelfQueryRetriever.from_llm(llm, vectorstore, document_content_description, metadata_field_info, verbose=True)"
|
"retriever = SelfQueryRetriever.from_llm(\n",
|
||||||
|
" llm, vectorstore, document_content_description, metadata_field_info, verbose=True\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -225,7 +249,9 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# This example specifies a composite filter\n",
|
"# This example specifies a composite filter\n",
|
||||||
"retriever.get_relevant_documents(\"What's a highly rated (above 8.5) science fiction film?\")"
|
"retriever.get_relevant_documents(\n",
|
||||||
|
" \"What's a highly rated (above 8.5) science fiction film?\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -236,7 +262,9 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# This example specifies a query and composite filter\n",
|
"# This example specifies a query and composite filter\n",
|
||||||
"retriever.get_relevant_documents(\"What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated\")"
|
"retriever.get_relevant_documents(\n",
|
||||||
|
" \"What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -290,7 +318,9 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Contain works for lists: so you can match a list with contain comparator!\n",
|
"# Contain works for lists: so you can match a list with contain comparator!\n",
|
||||||
"retriever.get_relevant_documents(\"What's a movie who has genres science fiction and adventure?\")"
|
"retriever.get_relevant_documents(\n",
|
||||||
|
" \"What's a movie who has genres science fiction and adventure?\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -315,12 +345,12 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"retriever = SelfQueryRetriever.from_llm(\n",
|
"retriever = SelfQueryRetriever.from_llm(\n",
|
||||||
" llm, \n",
|
" llm,\n",
|
||||||
" vectorstore, \n",
|
" vectorstore,\n",
|
||||||
" document_content_description, \n",
|
" document_content_description,\n",
|
||||||
" metadata_field_info, \n",
|
" metadata_field_info,\n",
|
||||||
" enable_limit=True,\n",
|
" enable_limit=True,\n",
|
||||||
" verbose=True\n",
|
" verbose=True,\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -53,9 +53,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"retriever = AmazonKendraRetriever(\n",
|
"retriever = AmazonKendraRetriever(index_id=\"c0806df7-e76b-4bce-9b5c-d5582f6b1a03\")"
|
||||||
" index_id=\"c0806df7-e76b-4bce-9b5c-d5582f6b1a03\"\n",
|
|
||||||
")"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -111,10 +111,10 @@
|
|||||||
"db.index(\n",
|
"db.index(\n",
|
||||||
" [\n",
|
" [\n",
|
||||||
" MyDoc(\n",
|
" MyDoc(\n",
|
||||||
" title=f'My document {i}',\n",
|
" title=f\"My document {i}\",\n",
|
||||||
" title_embedding=embeddings.embed_query(f'query {i}'),\n",
|
" title_embedding=embeddings.embed_query(f\"query {i}\"),\n",
|
||||||
" year=i,\n",
|
" year=i,\n",
|
||||||
" color=random.choice(['red', 'green', 'blue']),\n",
|
" color=random.choice([\"red\", \"green\", \"blue\"]),\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" for i in range(100)\n",
|
" for i in range(100)\n",
|
||||||
" ]\n",
|
" ]\n",
|
||||||
@ -142,15 +142,15 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# create a retriever\n",
|
"# create a retriever\n",
|
||||||
"retriever = DocArrayRetriever(\n",
|
"retriever = DocArrayRetriever(\n",
|
||||||
" index=db, \n",
|
" index=db,\n",
|
||||||
" embeddings=embeddings, \n",
|
" embeddings=embeddings,\n",
|
||||||
" search_field='title_embedding', \n",
|
" search_field=\"title_embedding\",\n",
|
||||||
" content_field='title',\n",
|
" content_field=\"title\",\n",
|
||||||
" filters=filter_query,\n",
|
" filters=filter_query,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# find the relevant document\n",
|
"# find the relevant document\n",
|
||||||
"doc = retriever.get_relevant_documents('some query')\n",
|
"doc = retriever.get_relevant_documents(\"some query\")\n",
|
||||||
"print(doc)"
|
"print(doc)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -179,16 +179,16 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# initialize the index\n",
|
"# initialize the index\n",
|
||||||
"db = HnswDocumentIndex[MyDoc](work_dir='hnsw_index')\n",
|
"db = HnswDocumentIndex[MyDoc](work_dir=\"hnsw_index\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# index data\n",
|
"# index data\n",
|
||||||
"db.index(\n",
|
"db.index(\n",
|
||||||
" [\n",
|
" [\n",
|
||||||
" MyDoc(\n",
|
" MyDoc(\n",
|
||||||
" title=f'My document {i}',\n",
|
" title=f\"My document {i}\",\n",
|
||||||
" title_embedding=embeddings.embed_query(f'query {i}'),\n",
|
" title_embedding=embeddings.embed_query(f\"query {i}\"),\n",
|
||||||
" year=i,\n",
|
" year=i,\n",
|
||||||
" color=random.choice(['red', 'green', 'blue']),\n",
|
" color=random.choice([\"red\", \"green\", \"blue\"]),\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" for i in range(100)\n",
|
" for i in range(100)\n",
|
||||||
" ]\n",
|
" ]\n",
|
||||||
@ -216,15 +216,15 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# create a retriever\n",
|
"# create a retriever\n",
|
||||||
"retriever = DocArrayRetriever(\n",
|
"retriever = DocArrayRetriever(\n",
|
||||||
" index=db, \n",
|
" index=db,\n",
|
||||||
" embeddings=embeddings, \n",
|
" embeddings=embeddings,\n",
|
||||||
" search_field='title_embedding', \n",
|
" search_field=\"title_embedding\",\n",
|
||||||
" content_field='title',\n",
|
" content_field=\"title\",\n",
|
||||||
" filters=filter_query,\n",
|
" filters=filter_query,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# find the relevant document\n",
|
"# find the relevant document\n",
|
||||||
"doc = retriever.get_relevant_documents('some query')\n",
|
"doc = retriever.get_relevant_documents(\"some query\")\n",
|
||||||
"print(doc)"
|
"print(doc)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -249,11 +249,12 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# There's a small difference with the Weaviate backend compared to the others. \n",
|
"# There's a small difference with the Weaviate backend compared to the others.\n",
|
||||||
"# Here, you need to 'mark' the field used for vector search with 'is_embedding=True'. \n",
|
"# Here, you need to 'mark' the field used for vector search with 'is_embedding=True'.\n",
|
||||||
"# So, let's create a new schema for Weaviate that takes care of this requirement.\n",
|
"# So, let's create a new schema for Weaviate that takes care of this requirement.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from pydantic import Field \n",
|
"from pydantic import Field\n",
|
||||||
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"class WeaviateDoc(BaseDoc):\n",
|
"class WeaviateDoc(BaseDoc):\n",
|
||||||
" title: str\n",
|
" title: str\n",
|
||||||
@ -275,19 +276,17 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# initialize the index\n",
|
"# initialize the index\n",
|
||||||
"dbconfig = WeaviateDocumentIndex.DBConfig(\n",
|
"dbconfig = WeaviateDocumentIndex.DBConfig(host=\"http://localhost:8080\")\n",
|
||||||
" host=\"http://localhost:8080\"\n",
|
|
||||||
")\n",
|
|
||||||
"db = WeaviateDocumentIndex[WeaviateDoc](db_config=dbconfig)\n",
|
"db = WeaviateDocumentIndex[WeaviateDoc](db_config=dbconfig)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# index data\n",
|
"# index data\n",
|
||||||
"db.index(\n",
|
"db.index(\n",
|
||||||
" [\n",
|
" [\n",
|
||||||
" MyDoc(\n",
|
" MyDoc(\n",
|
||||||
" title=f'My document {i}',\n",
|
" title=f\"My document {i}\",\n",
|
||||||
" title_embedding=embeddings.embed_query(f'query {i}'),\n",
|
" title_embedding=embeddings.embed_query(f\"query {i}\"),\n",
|
||||||
" year=i,\n",
|
" year=i,\n",
|
||||||
" color=random.choice(['red', 'green', 'blue']),\n",
|
" color=random.choice([\"red\", \"green\", \"blue\"]),\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" for i in range(100)\n",
|
" for i in range(100)\n",
|
||||||
" ]\n",
|
" ]\n",
|
||||||
@ -315,15 +314,15 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# create a retriever\n",
|
"# create a retriever\n",
|
||||||
"retriever = DocArrayRetriever(\n",
|
"retriever = DocArrayRetriever(\n",
|
||||||
" index=db, \n",
|
" index=db,\n",
|
||||||
" embeddings=embeddings, \n",
|
" embeddings=embeddings,\n",
|
||||||
" search_field='title_embedding', \n",
|
" search_field=\"title_embedding\",\n",
|
||||||
" content_field='title',\n",
|
" content_field=\"title\",\n",
|
||||||
" filters=filter_query,\n",
|
" filters=filter_query,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# find the relevant document\n",
|
"# find the relevant document\n",
|
||||||
"doc = retriever.get_relevant_documents('some query')\n",
|
"doc = retriever.get_relevant_documents(\"some query\")\n",
|
||||||
"print(doc)"
|
"print(doc)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -353,18 +352,17 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# initialize the index\n",
|
"# initialize the index\n",
|
||||||
"db = ElasticDocIndex[MyDoc](\n",
|
"db = ElasticDocIndex[MyDoc](\n",
|
||||||
" hosts=\"http://localhost:9200\", \n",
|
" hosts=\"http://localhost:9200\", index_name=\"docarray_retriever\"\n",
|
||||||
" index_name=\"docarray_retriever\"\n",
|
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# index data\n",
|
"# index data\n",
|
||||||
"db.index(\n",
|
"db.index(\n",
|
||||||
" [\n",
|
" [\n",
|
||||||
" MyDoc(\n",
|
" MyDoc(\n",
|
||||||
" title=f'My document {i}',\n",
|
" title=f\"My document {i}\",\n",
|
||||||
" title_embedding=embeddings.embed_query(f'query {i}'),\n",
|
" title_embedding=embeddings.embed_query(f\"query {i}\"),\n",
|
||||||
" year=i,\n",
|
" year=i,\n",
|
||||||
" color=random.choice(['red', 'green', 'blue']),\n",
|
" color=random.choice([\"red\", \"green\", \"blue\"]),\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" for i in range(100)\n",
|
" for i in range(100)\n",
|
||||||
" ]\n",
|
" ]\n",
|
||||||
@ -392,15 +390,15 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# create a retriever\n",
|
"# create a retriever\n",
|
||||||
"retriever = DocArrayRetriever(\n",
|
"retriever = DocArrayRetriever(\n",
|
||||||
" index=db, \n",
|
" index=db,\n",
|
||||||
" embeddings=embeddings, \n",
|
" embeddings=embeddings,\n",
|
||||||
" search_field='title_embedding', \n",
|
" search_field=\"title_embedding\",\n",
|
||||||
" content_field='title',\n",
|
" content_field=\"title\",\n",
|
||||||
" filters=filter_query,\n",
|
" filters=filter_query,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# find the relevant document\n",
|
"# find the relevant document\n",
|
||||||
"doc = retriever.get_relevant_documents('some query')\n",
|
"doc = retriever.get_relevant_documents(\"some query\")\n",
|
||||||
"print(doc)"
|
"print(doc)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -445,10 +443,10 @@
|
|||||||
"db.index(\n",
|
"db.index(\n",
|
||||||
" [\n",
|
" [\n",
|
||||||
" MyDoc(\n",
|
" MyDoc(\n",
|
||||||
" title=f'My document {i}',\n",
|
" title=f\"My document {i}\",\n",
|
||||||
" title_embedding=embeddings.embed_query(f'query {i}'),\n",
|
" title_embedding=embeddings.embed_query(f\"query {i}\"),\n",
|
||||||
" year=i,\n",
|
" year=i,\n",
|
||||||
" color=random.choice(['red', 'green', 'blue']),\n",
|
" color=random.choice([\"red\", \"green\", \"blue\"]),\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" for i in range(100)\n",
|
" for i in range(100)\n",
|
||||||
" ]\n",
|
" ]\n",
|
||||||
@ -486,15 +484,15 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# create a retriever\n",
|
"# create a retriever\n",
|
||||||
"retriever = DocArrayRetriever(\n",
|
"retriever = DocArrayRetriever(\n",
|
||||||
" index=db, \n",
|
" index=db,\n",
|
||||||
" embeddings=embeddings, \n",
|
" embeddings=embeddings,\n",
|
||||||
" search_field='title_embedding', \n",
|
" search_field=\"title_embedding\",\n",
|
||||||
" content_field='title',\n",
|
" content_field=\"title\",\n",
|
||||||
" filters=filter_query,\n",
|
" filters=filter_query,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# find the relevant document\n",
|
"# find the relevant document\n",
|
||||||
"doc = retriever.get_relevant_documents('some query')\n",
|
"doc = retriever.get_relevant_documents(\"some query\")\n",
|
||||||
"print(doc)"
|
"print(doc)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -552,7 +550,7 @@
|
|||||||
" \"director\": \"Francis Ford Coppola\",\n",
|
" \"director\": \"Francis Ford Coppola\",\n",
|
||||||
" \"rating\": 9.2,\n",
|
" \"rating\": 9.2,\n",
|
||||||
" },\n",
|
" },\n",
|
||||||
"]\n"
|
"]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -573,9 +571,9 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"import getpass\n",
|
"import getpass\n",
|
||||||
"import os \n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')"
|
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -591,6 +589,7 @@
|
|||||||
"from docarray.typing import NdArray\n",
|
"from docarray.typing import NdArray\n",
|
||||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"# define schema for your movie documents\n",
|
"# define schema for your movie documents\n",
|
||||||
"class MyDoc(BaseDoc):\n",
|
"class MyDoc(BaseDoc):\n",
|
||||||
" title: str\n",
|
" title: str\n",
|
||||||
@ -598,7 +597,7 @@
|
|||||||
" description_embedding: NdArray[1536]\n",
|
" description_embedding: NdArray[1536]\n",
|
||||||
" rating: float\n",
|
" rating: float\n",
|
||||||
" director: str\n",
|
" director: str\n",
|
||||||
" \n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"embeddings = OpenAIEmbeddings()\n",
|
"embeddings = OpenAIEmbeddings()\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -626,7 +625,7 @@
|
|||||||
"from docarray.index import HnswDocumentIndex\n",
|
"from docarray.index import HnswDocumentIndex\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# initialize the index\n",
|
"# initialize the index\n",
|
||||||
"db = HnswDocumentIndex[MyDoc](work_dir='movie_search')\n",
|
"db = HnswDocumentIndex[MyDoc](work_dir=\"movie_search\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# add data\n",
|
"# add data\n",
|
||||||
"db.index(docs)"
|
"db.index(docs)"
|
||||||
@ -663,14 +662,14 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# create a retriever\n",
|
"# create a retriever\n",
|
||||||
"retriever = DocArrayRetriever(\n",
|
"retriever = DocArrayRetriever(\n",
|
||||||
" index=db, \n",
|
" index=db,\n",
|
||||||
" embeddings=embeddings, \n",
|
" embeddings=embeddings,\n",
|
||||||
" search_field='description_embedding', \n",
|
" search_field=\"description_embedding\",\n",
|
||||||
" content_field='description'\n",
|
" content_field=\"description\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# find the relevant document\n",
|
"# find the relevant document\n",
|
||||||
"doc = retriever.get_relevant_documents('movie about dreams')\n",
|
"doc = retriever.get_relevant_documents(\"movie about dreams\")\n",
|
||||||
"print(doc)"
|
"print(doc)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -703,16 +702,16 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# create a retriever\n",
|
"# create a retriever\n",
|
||||||
"retriever = DocArrayRetriever(\n",
|
"retriever = DocArrayRetriever(\n",
|
||||||
" index=db, \n",
|
" index=db,\n",
|
||||||
" embeddings=embeddings, \n",
|
" embeddings=embeddings,\n",
|
||||||
" search_field='description_embedding', \n",
|
" search_field=\"description_embedding\",\n",
|
||||||
" content_field='description',\n",
|
" content_field=\"description\",\n",
|
||||||
" filters={'director': {'$eq': 'Christopher Nolan'}},\n",
|
" filters={\"director\": {\"$eq\": \"Christopher Nolan\"}},\n",
|
||||||
" top_k=2,\n",
|
" top_k=2,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# find relevant documents\n",
|
"# find relevant documents\n",
|
||||||
"docs = retriever.get_relevant_documents('space travel')\n",
|
"docs = retriever.get_relevant_documents(\"space travel\")\n",
|
||||||
"print(docs)"
|
"print(docs)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -745,17 +744,17 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# create a retriever\n",
|
"# create a retriever\n",
|
||||||
"retriever = DocArrayRetriever(\n",
|
"retriever = DocArrayRetriever(\n",
|
||||||
" index=db, \n",
|
" index=db,\n",
|
||||||
" embeddings=embeddings, \n",
|
" embeddings=embeddings,\n",
|
||||||
" search_field='description_embedding', \n",
|
" search_field=\"description_embedding\",\n",
|
||||||
" content_field='description',\n",
|
" content_field=\"description\",\n",
|
||||||
" filters={'rating': {'$gte': 8.7}},\n",
|
" filters={\"rating\": {\"$gte\": 8.7}},\n",
|
||||||
" search_type='mmr',\n",
|
" search_type=\"mmr\",\n",
|
||||||
" top_k=3,\n",
|
" top_k=3,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# find relevant documents\n",
|
"# find relevant documents\n",
|
||||||
"docs = retriever.get_relevant_documents('action movies')\n",
|
"docs = retriever.get_relevant_documents(\"action movies\")\n",
|
||||||
"print(docs)"
|
"print(docs)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -26,7 +26,10 @@
|
|||||||
"from langchain.vectorstores import Chroma\n",
|
"from langchain.vectorstores import Chroma\n",
|
||||||
"from langchain.embeddings import HuggingFaceEmbeddings\n",
|
"from langchain.embeddings import HuggingFaceEmbeddings\n",
|
||||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||||
"from langchain.document_transformers import EmbeddingsRedundantFilter,EmbeddingsClusteringFilter\n",
|
"from langchain.document_transformers import (\n",
|
||||||
|
" EmbeddingsRedundantFilter,\n",
|
||||||
|
" EmbeddingsClusteringFilter,\n",
|
||||||
|
")\n",
|
||||||
"from langchain.retrievers.document_compressors import DocumentCompressorPipeline\n",
|
"from langchain.retrievers.document_compressors import DocumentCompressorPipeline\n",
|
||||||
"from langchain.retrievers import ContextualCompressionRetriever\n",
|
"from langchain.retrievers import ContextualCompressionRetriever\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -118,7 +121,7 @@
|
|||||||
" embeddings=filter_embeddings,\n",
|
" embeddings=filter_embeddings,\n",
|
||||||
" num_clusters=10,\n",
|
" num_clusters=10,\n",
|
||||||
" num_closest=1,\n",
|
" num_closest=1,\n",
|
||||||
" )\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# If you want the final document to be ordered by the original retriever scores\n",
|
"# If you want the final document to be ordered by the original retriever scores\n",
|
||||||
"# you need to add the \"sorted\" parameter.\n",
|
"# you need to add the \"sorted\" parameter.\n",
|
||||||
@ -126,13 +129,13 @@
|
|||||||
" embeddings=filter_embeddings,\n",
|
" embeddings=filter_embeddings,\n",
|
||||||
" num_clusters=10,\n",
|
" num_clusters=10,\n",
|
||||||
" num_closest=1,\n",
|
" num_closest=1,\n",
|
||||||
" sorted = True,\n",
|
" sorted=True,\n",
|
||||||
" )\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"pipeline = DocumentCompressorPipeline(transformers=[filter_ordered_by_retriever])\n",
|
"pipeline = DocumentCompressorPipeline(transformers=[filter_ordered_by_retriever])\n",
|
||||||
"compression_retriever = ContextualCompressionRetriever(\n",
|
"compression_retriever = ContextualCompressionRetriever(\n",
|
||||||
" base_compressor=pipeline, base_retriever=lotr\n",
|
" base_compressor=pipeline, base_retriever=lotr\n",
|
||||||
")\n"
|
")"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -111,9 +111,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Set up Zep Chat History. We'll use this to add chat histories to the memory store\n",
|
"# Set up Zep Chat History. We'll use this to add chat histories to the memory store\n",
|
||||||
"zep_chat_history = ZepChatMessageHistory(\n",
|
"zep_chat_history = ZepChatMessageHistory(\n",
|
||||||
" session_id=session_id,\n",
|
" session_id=session_id, url=ZEP_API_URL, api_key=zep_api_key\n",
|
||||||
" url=ZEP_API_URL,\n",
|
|
||||||
" api_key=zep_api_key\n",
|
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -247,7 +245,7 @@
|
|||||||
" session_id=session_id, # Ensure that you provide the session_id when instantiating the Retriever\n",
|
" session_id=session_id, # Ensure that you provide the session_id when instantiating the Retriever\n",
|
||||||
" url=ZEP_API_URL,\n",
|
" url=ZEP_API_URL,\n",
|
||||||
" top_k=5,\n",
|
" top_k=5,\n",
|
||||||
" api_key=zep_api_key\n",
|
" api_key=zep_api_key,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"await zep_retriever.aget_relevant_documents(\"Who wrote Parable of the Sower?\")"
|
"await zep_retriever.aget_relevant_documents(\"Who wrote Parable of the Sower?\")"
|
||||||
|
@ -65,7 +65,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Please login and get your API key from https://clarifai.com/settings/security \n",
|
"# Please login and get your API key from https://clarifai.com/settings/security\n",
|
||||||
"from getpass import getpass\n",
|
"from getpass import getpass\n",
|
||||||
"\n",
|
"\n",
|
||||||
"CLARIFAI_PAT = getpass()"
|
"CLARIFAI_PAT = getpass()"
|
||||||
@ -130,9 +130,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"USER_ID = 'openai'\n",
|
"USER_ID = \"openai\"\n",
|
||||||
"APP_ID = 'embed'\n",
|
"APP_ID = \"embed\"\n",
|
||||||
"MODEL_ID = 'text-embedding-ada'\n",
|
"MODEL_ID = \"text-embedding-ada\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# You can provide a specific model version as the model_version_id arg.\n",
|
"# You can provide a specific model version as the model_version_id arg.\n",
|
||||||
"# MODEL_VERSION_ID = \"MODEL_VERSION_ID\""
|
"# MODEL_VERSION_ID = \"MODEL_VERSION_ID\""
|
||||||
@ -148,7 +148,9 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Initialize a Clarifai embedding model\n",
|
"# Initialize a Clarifai embedding model\n",
|
||||||
"embeddings = ClarifaiEmbeddings(pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)"
|
"embeddings = ClarifaiEmbeddings(\n",
|
||||||
|
" pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -24,10 +24,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"\n",
|
"from langchain.embeddings.spacy_embeddings import SpacyEmbeddings"
|
||||||
"from langchain.embeddings.spacy_embeddings import SpacyEmbeddings\n",
|
|
||||||
"\n",
|
|
||||||
"\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -44,8 +41,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"\n",
|
"embedder = SpacyEmbeddings()"
|
||||||
"embedder = SpacyEmbeddings()\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -62,15 +58,12 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"texts = [\n",
|
"texts = [\n",
|
||||||
" \"The quick brown fox jumps over the lazy dog.\",\n",
|
" \"The quick brown fox jumps over the lazy dog.\",\n",
|
||||||
" \"Pack my box with five dozen liquor jugs.\",\n",
|
" \"Pack my box with five dozen liquor jugs.\",\n",
|
||||||
" \"How vexingly quick daft zebras jump!\",\n",
|
" \"How vexingly quick daft zebras jump!\",\n",
|
||||||
" \"Bright vixens jump; dozy fowl quack.\"\n",
|
" \"Bright vixens jump; dozy fowl quack.\",\n",
|
||||||
"]\n",
|
"]"
|
||||||
"\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -87,11 +80,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"\n",
|
|
||||||
"embeddings = embedder.embed_documents(texts)\n",
|
"embeddings = embedder.embed_documents(texts)\n",
|
||||||
"for i, embedding in enumerate(embeddings):\n",
|
"for i, embedding in enumerate(embeddings):\n",
|
||||||
" print(f\"Embedding for document {i+1}: {embedding}\")\n",
|
" print(f\"Embedding for document {i+1}: {embedding}\")"
|
||||||
"\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -108,7 +99,6 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"\n",
|
|
||||||
"query = \"Quick foxes and lazy dogs.\"\n",
|
"query = \"Quick foxes and lazy dogs.\"\n",
|
||||||
"query_embedding = embedder.embed_query(query)\n",
|
"query_embedding = embedder.embed_query(query)\n",
|
||||||
"print(f\"Embedding for query: {query_embedding}\")"
|
"print(f\"Embedding for query: {query_embedding}\")"
|
||||||
|
@ -62,7 +62,7 @@
|
|||||||
"import os\n",
|
"import os\n",
|
||||||
"import getpass\n",
|
"import getpass\n",
|
||||||
"\n",
|
"\n",
|
||||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n"
|
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
|
@ -40,7 +40,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||||
"documents = loader.load()\n",
|
"documents = loader.load()\n",
|
||||||
"text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)\n",
|
"text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)\n",
|
||||||
"docs = text_splitter.split_documents(documents)"
|
"docs = text_splitter.split_documents(documents)"
|
||||||
@ -154,10 +154,11 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"awadb_client = awadb.Client()\n",
|
"awadb_client = awadb.Client()\n",
|
||||||
"ret = awadb_client.Load('langchain_awadb')\n",
|
"ret = awadb_client.Load(\"langchain_awadb\")\n",
|
||||||
"if ret : print('awadb load table success')\n",
|
"if ret:\n",
|
||||||
|
" print(\"awadb load table success\")\n",
|
||||||
"else:\n",
|
"else:\n",
|
||||||
" print('awadb load table failed')"
|
" print(\"awadb load table failed\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -44,16 +44,18 @@
|
|||||||
"import os\n",
|
"import os\n",
|
||||||
"import getpass\n",
|
"import getpass\n",
|
||||||
"\n",
|
"\n",
|
||||||
"database_mode = (input('\\n(C)assandra or (A)stra DB? ')).upper()\n",
|
"database_mode = (input(\"\\n(C)assandra or (A)stra DB? \")).upper()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"keyspace_name = input('\\nKeyspace name? ')\n",
|
"keyspace_name = input(\"\\nKeyspace name? \")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if database_mode == 'A':\n",
|
"if database_mode == \"A\":\n",
|
||||||
" ASTRA_DB_APPLICATION_TOKEN = getpass.getpass('\\nAstra DB Token (\"AstraCS:...\") ')\n",
|
" ASTRA_DB_APPLICATION_TOKEN = getpass.getpass('\\nAstra DB Token (\"AstraCS:...\") ')\n",
|
||||||
" #\n",
|
" #\n",
|
||||||
" ASTRA_DB_SECURE_BUNDLE_PATH = input('Full path to your Secure Connect Bundle? ')\n",
|
" ASTRA_DB_SECURE_BUNDLE_PATH = input(\"Full path to your Secure Connect Bundle? \")\n",
|
||||||
"elif database_mode == 'C':\n",
|
"elif database_mode == \"C\":\n",
|
||||||
" CASSANDRA_CONTACT_POINTS = input('Contact points? (comma-separated, empty for localhost) ').strip()"
|
" CASSANDRA_CONTACT_POINTS = input(\n",
|
||||||
|
" \"Contact points? (comma-separated, empty for localhost) \"\n",
|
||||||
|
" ).strip()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -74,17 +76,15 @@
|
|||||||
"from cassandra.cluster import Cluster\n",
|
"from cassandra.cluster import Cluster\n",
|
||||||
"from cassandra.auth import PlainTextAuthProvider\n",
|
"from cassandra.auth import PlainTextAuthProvider\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if database_mode == 'C':\n",
|
"if database_mode == \"C\":\n",
|
||||||
" if CASSANDRA_CONTACT_POINTS:\n",
|
" if CASSANDRA_CONTACT_POINTS:\n",
|
||||||
" cluster = Cluster([\n",
|
" cluster = Cluster(\n",
|
||||||
" cp.strip()\n",
|
" [cp.strip() for cp in CASSANDRA_CONTACT_POINTS.split(\",\") if cp.strip()]\n",
|
||||||
" for cp in CASSANDRA_CONTACT_POINTS.split(',')\n",
|
" )\n",
|
||||||
" if cp.strip()\n",
|
|
||||||
" ])\n",
|
|
||||||
" else:\n",
|
" else:\n",
|
||||||
" cluster = Cluster()\n",
|
" cluster = Cluster()\n",
|
||||||
" session = cluster.connect()\n",
|
" session = cluster.connect()\n",
|
||||||
"elif database_mode == 'A':\n",
|
"elif database_mode == \"A\":\n",
|
||||||
" ASTRA_DB_CLIENT_ID = \"token\"\n",
|
" ASTRA_DB_CLIENT_ID = \"token\"\n",
|
||||||
" cluster = Cluster(\n",
|
" cluster = Cluster(\n",
|
||||||
" cloud={\n",
|
" cloud={\n",
|
||||||
@ -117,7 +117,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')"
|
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -151,7 +151,8 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from langchain.document_loaders import TextLoader\n",
|
"from langchain.document_loaders import TextLoader\n",
|
||||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
"\n",
|
||||||
|
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||||
"documents = loader.load()\n",
|
"documents = loader.load()\n",
|
||||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||||
"docs = text_splitter.split_documents(documents)\n",
|
"docs = text_splitter.split_documents(documents)\n",
|
||||||
@ -166,7 +167,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"table_name = 'my_vector_db_table'\n",
|
"table_name = \"my_vector_db_table\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"docsearch = Cassandra.from_documents(\n",
|
"docsearch = Cassandra.from_documents(\n",
|
||||||
" documents=docs,\n",
|
" documents=docs,\n",
|
||||||
|
@ -205,19 +205,25 @@
|
|||||||
"import chromadb\n",
|
"import chromadb\n",
|
||||||
"import uuid\n",
|
"import uuid\n",
|
||||||
"from chromadb.config import Settings\n",
|
"from chromadb.config import Settings\n",
|
||||||
"client = chromadb.Client(Settings(chroma_api_impl=\"rest\",\n",
|
"\n",
|
||||||
|
"client = chromadb.Client(\n",
|
||||||
|
" Settings(\n",
|
||||||
|
" chroma_api_impl=\"rest\",\n",
|
||||||
" chroma_server_host=\"localhost\",\n",
|
" chroma_server_host=\"localhost\",\n",
|
||||||
" chroma_server_http_port=\"8000\"\n",
|
" chroma_server_http_port=\"8000\",\n",
|
||||||
" ))\n",
|
" )\n",
|
||||||
|
")\n",
|
||||||
"client.reset() # resets the database\n",
|
"client.reset() # resets the database\n",
|
||||||
"collection = client.create_collection(\"my_collection\")\n",
|
"collection = client.create_collection(\"my_collection\")\n",
|
||||||
"for doc in docs:\n",
|
"for doc in docs:\n",
|
||||||
" collection.add(ids=[str(uuid.uuid1())], metadatas=doc.metadata, documents=doc.page_content)\n",
|
" collection.add(\n",
|
||||||
|
" ids=[str(uuid.uuid1())], metadatas=doc.metadata, documents=doc.page_content\n",
|
||||||
|
" )\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# tell LangChain to use our client and collection name\n",
|
"# tell LangChain to use our client and collection name\n",
|
||||||
"db4 = Chroma(client=client, collection_name=\"my_collection\")\n",
|
"db4 = Chroma(client=client, collection_name=\"my_collection\")\n",
|
||||||
"docs = db.similarity_search(query)\n",
|
"docs = db.similarity_search(query)\n",
|
||||||
"print(docs[0].page_content)\n"
|
"print(docs[0].page_content)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -262,7 +268,7 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# create simple ids\n",
|
"# create simple ids\n",
|
||||||
"ids = [str(i) for i in range(1, len(docs)+1)]\n",
|
"ids = [str(i) for i in range(1, len(docs) + 1)]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# add data\n",
|
"# add data\n",
|
||||||
"example_db = Chroma.from_documents(docs, embedding_function, ids=ids)\n",
|
"example_db = Chroma.from_documents(docs, embedding_function, ids=ids)\n",
|
||||||
@ -270,14 +276,17 @@
|
|||||||
"print(docs[0].metadata)\n",
|
"print(docs[0].metadata)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# update the metadata for a document\n",
|
"# update the metadata for a document\n",
|
||||||
"docs[0].metadata = {'source': '../../../state_of_the_union.txt', 'new_value': 'hello world'}\n",
|
"docs[0].metadata = {\n",
|
||||||
|
" \"source\": \"../../../state_of_the_union.txt\",\n",
|
||||||
|
" \"new_value\": \"hello world\",\n",
|
||||||
|
"}\n",
|
||||||
"example_db.update_document(ids[0], docs[0])\n",
|
"example_db.update_document(ids[0], docs[0])\n",
|
||||||
"print(example_db._collection.get(ids=[ids[0]]))\n",
|
"print(example_db._collection.get(ids=[ids[0]]))\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# delete the last document\n",
|
"# delete the last document\n",
|
||||||
"print(\"count before\", example_db._collection.count())\n",
|
"print(\"count before\", example_db._collection.count())\n",
|
||||||
"example_db._collection.delete(ids=[ids[-1]])\n",
|
"example_db._collection.delete(ids=[ids[-1]])\n",
|
||||||
"print(\"count after\", example_db._collection.count())\n"
|
"print(\"count after\", example_db._collection.count())"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -317,6 +326,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import os\n",
|
"import os\n",
|
||||||
|
"\n",
|
||||||
"os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY"
|
"os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -55,7 +55,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Please login and get your API key from https://clarifai.com/settings/security \n",
|
"# Please login and get your API key from https://clarifai.com/settings/security\n",
|
||||||
"from getpass import getpass\n",
|
"from getpass import getpass\n",
|
||||||
"\n",
|
"\n",
|
||||||
"CLARIFAI_PAT = getpass()"
|
"CLARIFAI_PAT = getpass()"
|
||||||
@ -104,8 +104,8 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"USER_ID = 'USERNAME_ID'\n",
|
"USER_ID = \"USERNAME_ID\"\n",
|
||||||
"APP_ID = 'APPLICATION_ID'\n",
|
"APP_ID = \"APPLICATION_ID\"\n",
|
||||||
"NUMBER_OF_DOCS = 4"
|
"NUMBER_OF_DOCS = 4"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -126,8 +126,13 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"texts = [\"I really enjoy spending time with you\", \"I hate spending time with my dog\", \"I want to go for a run\", \\\n",
|
"texts = [\n",
|
||||||
" \"I went to the movies yesterday\", \"I love playing soccer with my friends\"]\n",
|
" \"I really enjoy spending time with you\",\n",
|
||||||
|
" \"I hate spending time with my dog\",\n",
|
||||||
|
" \"I want to go for a run\",\n",
|
||||||
|
" \"I went to the movies yesterday\",\n",
|
||||||
|
" \"I love playing soccer with my friends\",\n",
|
||||||
|
"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"metadatas = [{\"id\": i, \"text\": text} for i, text in enumerate(texts)]"
|
"metadatas = [{\"id\": i, \"text\": text} for i, text in enumerate(texts)]"
|
||||||
]
|
]
|
||||||
@ -139,7 +144,14 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"clarifai_vector_db = Clarifai.from_texts(user_id=USER_ID, app_id=APP_ID, texts=texts, pat=CLARIFAI_PAT, number_of_docs=NUMBER_OF_DOCS, metadatas = metadatas)"
|
"clarifai_vector_db = Clarifai.from_texts(\n",
|
||||||
|
" user_id=USER_ID,\n",
|
||||||
|
" app_id=APP_ID,\n",
|
||||||
|
" texts=texts,\n",
|
||||||
|
" pat=CLARIFAI_PAT,\n",
|
||||||
|
" number_of_docs=NUMBER_OF_DOCS,\n",
|
||||||
|
" metadatas=metadatas,\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -184,7 +196,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||||
"documents = loader.load()\n",
|
"documents = loader.load()\n",
|
||||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||||
"docs = text_splitter.split_documents(documents)"
|
"docs = text_splitter.split_documents(documents)"
|
||||||
@ -221,8 +233,8 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"USER_ID = 'USERNAME_ID'\n",
|
"USER_ID = \"USERNAME_ID\"\n",
|
||||||
"APP_ID = 'APPLICATION_ID'\n",
|
"APP_ID = \"APPLICATION_ID\"\n",
|
||||||
"NUMBER_OF_DOCS = 4"
|
"NUMBER_OF_DOCS = 4"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -233,7 +245,13 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"clarifai_vector_db = Clarifai.from_documents(user_id=USER_ID, app_id=APP_ID, documents=docs, pat=CLARIFAI_PAT_KEY, number_of_docs=NUMBER_OF_DOCS)"
|
"clarifai_vector_db = Clarifai.from_documents(\n",
|
||||||
|
" user_id=USER_ID,\n",
|
||||||
|
" app_id=APP_ID,\n",
|
||||||
|
" documents=docs,\n",
|
||||||
|
" pat=CLARIFAI_PAT_KEY,\n",
|
||||||
|
" number_of_docs=NUMBER_OF_DOCS,\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -84,8 +84,8 @@
|
|||||||
"import os\n",
|
"import os\n",
|
||||||
"import getpass\n",
|
"import getpass\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if not os.environ['OPENAI_API_KEY']:\n",
|
"if not os.environ[\"OPENAI_API_KEY\"]:\n",
|
||||||
" os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')"
|
" os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -120,7 +120,8 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from langchain.document_loaders import TextLoader\n",
|
"from langchain.document_loaders import TextLoader\n",
|
||||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
"\n",
|
||||||
|
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||||
"documents = loader.load()\n",
|
"documents = loader.load()\n",
|
||||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||||
"docs = text_splitter.split_documents(documents)\n",
|
"docs = text_splitter.split_documents(documents)\n",
|
||||||
@ -149,7 +150,7 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"for d in docs:\n",
|
"for d in docs:\n",
|
||||||
" d.metadata = {'some': 'metadata'}\n",
|
" d.metadata = {\"some\": \"metadata\"}\n",
|
||||||
"settings = ClickhouseSettings(table=\"clickhouse_vector_search_example\")\n",
|
"settings = ClickhouseSettings(table=\"clickhouse_vector_search_example\")\n",
|
||||||
"docsearch = Clickhouse.from_documents(docs, embeddings, config=settings)\n",
|
"docsearch = Clickhouse.from_documents(docs, embeddings, config=settings)\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -308,7 +309,7 @@
|
|||||||
"from langchain.vectorstores import Clickhouse, ClickhouseSettings\n",
|
"from langchain.vectorstores import Clickhouse, ClickhouseSettings\n",
|
||||||
"from langchain.document_loaders import TextLoader\n",
|
"from langchain.document_loaders import TextLoader\n",
|
||||||
"\n",
|
"\n",
|
||||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||||
"documents = loader.load()\n",
|
"documents = loader.load()\n",
|
||||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||||
"docs = text_splitter.split_documents(documents)\n",
|
"docs = text_splitter.split_documents(documents)\n",
|
||||||
@ -316,7 +317,7 @@
|
|||||||
"embeddings = OpenAIEmbeddings()\n",
|
"embeddings = OpenAIEmbeddings()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"for i, d in enumerate(docs):\n",
|
"for i, d in enumerate(docs):\n",
|
||||||
" d.metadata = {'doc_id': i}\n",
|
" d.metadata = {\"doc_id\": i}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"docsearch = Clickhouse.from_documents(docs, embeddings)"
|
"docsearch = Clickhouse.from_documents(docs, embeddings)"
|
||||||
]
|
]
|
||||||
@ -345,10 +346,13 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"meta = docsearch.metadata_column\n",
|
"meta = docsearch.metadata_column\n",
|
||||||
"output = docsearch.similarity_search_with_relevance_scores('What did the president say about Ketanji Brown Jackson?', \n",
|
"output = docsearch.similarity_search_with_relevance_scores(\n",
|
||||||
" k=4, where_str=f\"{meta}.doc_id<10\")\n",
|
" \"What did the president say about Ketanji Brown Jackson?\",\n",
|
||||||
|
" k=4,\n",
|
||||||
|
" where_str=f\"{meta}.doc_id<10\",\n",
|
||||||
|
")\n",
|
||||||
"for d, dist in output:\n",
|
"for d, dist in output:\n",
|
||||||
" print(dist, d.metadata, d.page_content[:20] + '...')"
|
" print(dist, d.metadata, d.page_content[:20] + \"...\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -51,7 +51,8 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from langchain.document_loaders import TextLoader\n",
|
"from langchain.document_loaders import TextLoader\n",
|
||||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
"\n",
|
||||||
|
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||||
"documents = loader.load()\n",
|
"documents = loader.load()\n",
|
||||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||||
"docs = text_splitter.split_documents(documents)"
|
"docs = text_splitter.split_documents(documents)"
|
||||||
@ -72,7 +73,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"import marqo \n",
|
"import marqo\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# initialize marqo\n",
|
"# initialize marqo\n",
|
||||||
"marqo_url = \"http://localhost:8882\" # if using marqo cloud replace with your endpoint (console.marqo.ai)\n",
|
"marqo_url = \"http://localhost:8882\" # if using marqo cloud replace with your endpoint (console.marqo.ai)\n",
|
||||||
@ -190,7 +191,6 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"\n",
|
|
||||||
"# use a new index\n",
|
"# use a new index\n",
|
||||||
"index_name = \"langchain-multimodal-demo\"\n",
|
"index_name = \"langchain-multimodal-demo\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -199,22 +199,22 @@
|
|||||||
" client.delete_index(index_name)\n",
|
" client.delete_index(index_name)\n",
|
||||||
"except Exception:\n",
|
"except Exception:\n",
|
||||||
" print(f\"Creating {index_name}\")\n",
|
" print(f\"Creating {index_name}\")\n",
|
||||||
" \n",
|
"\n",
|
||||||
"# This index could have been created by another system\n",
|
"# This index could have been created by another system\n",
|
||||||
"settings = {\"treat_urls_and_pointers_as_images\": True, \"model\": \"ViT-L/14\"}\n",
|
"settings = {\"treat_urls_and_pointers_as_images\": True, \"model\": \"ViT-L/14\"}\n",
|
||||||
"client.create_index(index_name, **settings)\n",
|
"client.create_index(index_name, **settings)\n",
|
||||||
"client.index(index_name).add_documents(\n",
|
"client.index(index_name).add_documents(\n",
|
||||||
" [ \n",
|
" [\n",
|
||||||
" # image of a bus\n",
|
" # image of a bus\n",
|
||||||
" {\n",
|
" {\n",
|
||||||
" \"caption\": \"Bus\",\n",
|
" \"caption\": \"Bus\",\n",
|
||||||
" \"image\": \"https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image4.jpg\"\n",
|
" \"image\": \"https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image4.jpg\",\n",
|
||||||
" },\n",
|
" },\n",
|
||||||
" # image of a plane\n",
|
" # image of a plane\n",
|
||||||
" { \n",
|
" {\n",
|
||||||
" \"caption\": \"Plane\", \n",
|
" \"caption\": \"Plane\",\n",
|
||||||
" \"image\": \"https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image2.jpg\"\n",
|
" \"image\": \"https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image2.jpg\",\n",
|
||||||
" }\n",
|
" },\n",
|
||||||
" ],\n",
|
" ],\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
@ -230,6 +230,7 @@
|
|||||||
" \"\"\"Helper to format Marqo's documents into text to be used as page_content\"\"\"\n",
|
" \"\"\"Helper to format Marqo's documents into text to be used as page_content\"\"\"\n",
|
||||||
" return f\"{res['caption']}: {res['image']}\"\n",
|
" return f\"{res['caption']}: {res['image']}\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"docsearch = Marqo(client, index_name, page_content_builder=get_content)\n",
|
"docsearch = Marqo(client, index_name, page_content_builder=get_content)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -292,7 +293,6 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"\n",
|
|
||||||
"# use a new index\n",
|
"# use a new index\n",
|
||||||
"index_name = \"langchain-byo-index-demo\"\n",
|
"index_name = \"langchain-byo-index-demo\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -305,17 +305,17 @@
|
|||||||
"# This index could have been created by another system\n",
|
"# This index could have been created by another system\n",
|
||||||
"client.create_index(index_name)\n",
|
"client.create_index(index_name)\n",
|
||||||
"client.index(index_name).add_documents(\n",
|
"client.index(index_name).add_documents(\n",
|
||||||
" [ \n",
|
" [\n",
|
||||||
" {\n",
|
" {\n",
|
||||||
" \"Title\": \"Smartphone\",\n",
|
" \"Title\": \"Smartphone\",\n",
|
||||||
" \"Description\": \"A smartphone is a portable computer device that combines mobile telephone \"\n",
|
" \"Description\": \"A smartphone is a portable computer device that combines mobile telephone \"\n",
|
||||||
" \"functions and computing functions into one unit.\",\n",
|
" \"functions and computing functions into one unit.\",\n",
|
||||||
" },\n",
|
" },\n",
|
||||||
" { \n",
|
" {\n",
|
||||||
" \"Title\": \"Telephone\",\n",
|
" \"Title\": \"Telephone\",\n",
|
||||||
" \"Description\": \"A telephone is a telecommunications device that permits two or more users to\"\n",
|
" \"Description\": \"A telephone is a telecommunications device that permits two or more users to\"\n",
|
||||||
" \"conduct a conversation when they are too far apart to be easily heard directly.\",\n",
|
" \"conduct a conversation when they are too far apart to be easily heard directly.\",\n",
|
||||||
" }\n",
|
" },\n",
|
||||||
" ],\n",
|
" ],\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
@ -341,16 +341,17 @@
|
|||||||
"# Note text indexes retain the ability to use add_texts despite different field names in documents\n",
|
"# Note text indexes retain the ability to use add_texts despite different field names in documents\n",
|
||||||
"# this is because the page_content_builder callback lets you handle these document fields as required\n",
|
"# this is because the page_content_builder callback lets you handle these document fields as required\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"def get_content(res):\n",
|
"def get_content(res):\n",
|
||||||
" \"\"\"Helper to format Marqo's documents into text to be used as page_content\"\"\"\n",
|
" \"\"\"Helper to format Marqo's documents into text to be used as page_content\"\"\"\n",
|
||||||
" if 'text' in res:\n",
|
" if \"text\" in res:\n",
|
||||||
" return res['text']\n",
|
" return res[\"text\"]\n",
|
||||||
" return res['Description']\n",
|
" return res[\"Description\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"docsearch = Marqo(client, index_name, page_content_builder=get_content)\n",
|
"docsearch = Marqo(client, index_name, page_content_builder=get_content)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"docsearch.add_texts([\"This is a document that is about elephants\"])\n"
|
"docsearch.add_texts([\"This is a document that is about elephants\"])"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -421,7 +422,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"query = {\"communications devices\" : 1.0}\n",
|
"query = {\"communications devices\": 1.0}\n",
|
||||||
"doc_results = docsearch.similarity_search(query)\n",
|
"doc_results = docsearch.similarity_search(query)\n",
|
||||||
"print(doc_results[0].page_content)"
|
"print(doc_results[0].page_content)"
|
||||||
]
|
]
|
||||||
@ -441,7 +442,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"query = {\"communications devices\" : 1.0, \"technology post 2000\": -1.0}\n",
|
"query = {\"communications devices\": 1.0, \"technology post 2000\": -1.0}\n",
|
||||||
"doc_results = docsearch.similarity_search(query)\n",
|
"doc_results = docsearch.similarity_search(query)\n",
|
||||||
"print(doc_results[0].page_content)"
|
"print(doc_results[0].page_content)"
|
||||||
]
|
]
|
||||||
|
@ -1,15 +1,11 @@
|
|||||||
{
|
{
|
||||||
"cells":[
|
"cells": [
|
||||||
{
|
{
|
||||||
"attachments":{
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
},
|
"id": "683953b3",
|
||||||
"cell_type":"markdown",
|
"metadata": {},
|
||||||
"id":"683953b3",
|
"source": [
|
||||||
"metadata":{
|
|
||||||
|
|
||||||
},
|
|
||||||
"source":[
|
|
||||||
"# MongoDB Atlas\n",
|
"# MongoDB Atlas\n",
|
||||||
"\n",
|
"\n",
|
||||||
">[MongoDB Atlas](https://www.mongodb.com/docs/atlas/) is a fully-managed cloud database available in AWS , Azure, and GCP. It now has support for native Vector Search on your MongoDB document data.\n",
|
">[MongoDB Atlas](https://www.mongodb.com/docs/atlas/) is a fully-managed cloud database available in AWS , Azure, and GCP. It now has support for native Vector Search on your MongoDB document data.\n",
|
||||||
@ -23,75 +19,55 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type":"code",
|
"cell_type": "code",
|
||||||
"execution_count":null,
|
"execution_count": null,
|
||||||
"id":"b4c41cad-08ef-4f72-a545-2151e4598efe",
|
"id": "b4c41cad-08ef-4f72-a545-2151e4598efe",
|
||||||
"metadata":{
|
"metadata": {
|
||||||
"tags":[
|
"tags": []
|
||||||
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"outputs":[
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
],
|
|
||||||
"source":[
|
|
||||||
"!pip install pymongo"
|
"!pip install pymongo"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type":"code",
|
"cell_type": "code",
|
||||||
"execution_count":null,
|
"execution_count": null,
|
||||||
"id":"c1e38361-c1fe-4ac6-86e9-c90ebaf7ae87",
|
"id": "c1e38361-c1fe-4ac6-86e9-c90ebaf7ae87",
|
||||||
"metadata":{
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
},
|
"source": [
|
||||||
"outputs":[
|
|
||||||
|
|
||||||
],
|
|
||||||
"source":[
|
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"import getpass\n",
|
"import getpass\n",
|
||||||
"\n",
|
"\n",
|
||||||
"MONGODB_ATLAS_CLUSTER_URI = getpass.getpass(\"MongoDB Atlas Cluster URI:\")\n"
|
"MONGODB_ATLAS_CLUSTER_URI = getpass.getpass(\"MongoDB Atlas Cluster URI:\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments":{
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
},
|
"id": "457ace44-1d95-4001-9dd5-78811ab208ad",
|
||||||
"cell_type":"markdown",
|
"metadata": {},
|
||||||
"id":"457ace44-1d95-4001-9dd5-78811ab208ad",
|
"source": [
|
||||||
"metadata":{
|
|
||||||
|
|
||||||
},
|
|
||||||
"source":[
|
|
||||||
"We want to use `OpenAIEmbeddings` so we need to set up our OpenAI API Key. "
|
"We want to use `OpenAIEmbeddings` so we need to set up our OpenAI API Key. "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type":"code",
|
"cell_type": "code",
|
||||||
"execution_count":null,
|
"execution_count": null,
|
||||||
"id":"2d8f240d",
|
"id": "2d8f240d",
|
||||||
"metadata":{
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
},
|
"source": [
|
||||||
"outputs":[
|
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||||
|
|
||||||
],
|
|
||||||
"source":[
|
|
||||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments":{
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
},
|
"id": "1f3ecc42",
|
||||||
"cell_type":"markdown",
|
"metadata": {},
|
||||||
"id":"1f3ecc42",
|
"source": [
|
||||||
"metadata":{
|
|
||||||
|
|
||||||
},
|
|
||||||
"source":[
|
|
||||||
"Now, let's create a vector search index on your cluster. In the below example, `embedding` is the name of the field that contains the embedding vector. Please refer to the [documentation](https://www.mongodb.com/docs/atlas/atlas-search/define-field-mappings-for-vector-search) to get more details on how to define an Atlas Vector Search index.\n",
|
"Now, let's create a vector search index on your cluster. In the below example, `embedding` is the name of the field that contains the embedding vector. Please refer to the [documentation](https://www.mongodb.com/docs/atlas/atlas-search/define-field-mappings-for-vector-search) to get more details on how to define an Atlas Vector Search index.\n",
|
||||||
"You can name the index `langchain_demo` and create the index on the namespace `lanchain_db.langchain_col`. Finally, write the following definition in the JSON editor on MongoDB Atlas:\n",
|
"You can name the index `langchain_demo` and create the index on the namespace `lanchain_db.langchain_col`. Finally, write the following definition in the JSON editor on MongoDB Atlas:\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -112,18 +88,14 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type":"code",
|
"cell_type": "code",
|
||||||
"execution_count":2,
|
"execution_count": 2,
|
||||||
"id":"aac9563e",
|
"id": "aac9563e",
|
||||||
"metadata":{
|
"metadata": {
|
||||||
"tags":[
|
"tags": []
|
||||||
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"outputs":[
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
],
|
|
||||||
"source":[
|
|
||||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||||
"from langchain.vectorstores import MongoDBAtlasVectorSearch\n",
|
"from langchain.vectorstores import MongoDBAtlasVectorSearch\n",
|
||||||
@ -138,16 +110,12 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type":"code",
|
"cell_type": "code",
|
||||||
"execution_count":null,
|
"execution_count": null,
|
||||||
"id":"6e104aee",
|
"id": "6e104aee",
|
||||||
"metadata":{
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
},
|
"source": [
|
||||||
"outputs":[
|
|
||||||
|
|
||||||
],
|
|
||||||
"source":[
|
|
||||||
"from pymongo import MongoClient\n",
|
"from pymongo import MongoClient\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# initialize MongoDB python client\n",
|
"# initialize MongoDB python client\n",
|
||||||
@ -169,43 +137,31 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type":"code",
|
"cell_type": "code",
|
||||||
"execution_count":null,
|
"execution_count": null,
|
||||||
"id":"9c608226",
|
"id": "9c608226",
|
||||||
"metadata":{
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
},
|
"source": [
|
||||||
"outputs":[
|
|
||||||
|
|
||||||
],
|
|
||||||
"source":[
|
|
||||||
"print(docs[0].page_content)"
|
"print(docs[0].page_content)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments":{
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
},
|
"id": "851a2ec9-9390-49a4-8412-3e132c9f789d",
|
||||||
"cell_type":"markdown",
|
"metadata": {},
|
||||||
"id":"851a2ec9-9390-49a4-8412-3e132c9f789d",
|
"source": [
|
||||||
"metadata":{
|
|
||||||
|
|
||||||
},
|
|
||||||
"source":[
|
|
||||||
"You can also instantiate the vector store directly and execute a query as follows:"
|
"You can also instantiate the vector store directly and execute a query as follows:"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type":"code",
|
"cell_type": "code",
|
||||||
"execution_count":null,
|
"execution_count": null,
|
||||||
"id":"6336fe79-3e73-48be-b20a-0ff1bb6a4399",
|
"id": "6336fe79-3e73-48be-b20a-0ff1bb6a4399",
|
||||||
"metadata":{
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
},
|
"source": [
|
||||||
"outputs":[
|
|
||||||
|
|
||||||
],
|
|
||||||
"source":[
|
|
||||||
"# initialize vector store\n",
|
"# initialize vector store\n",
|
||||||
"vectorstore = MongoDBAtlasVectorSearch(\n",
|
"vectorstore = MongoDBAtlasVectorSearch(\n",
|
||||||
" collection, OpenAIEmbeddings(), index_name=index_name\n",
|
" collection, OpenAIEmbeddings(), index_name=index_name\n",
|
||||||
@ -219,25 +175,25 @@
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata":{
|
"metadata": {
|
||||||
"kernelspec":{
|
"kernelspec": {
|
||||||
"display_name":"Python 3 (ipykernel)",
|
"display_name": "Python 3 (ipykernel)",
|
||||||
"language":"python",
|
"language": "python",
|
||||||
"name":"python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
"language_info":{
|
"language_info": {
|
||||||
"codemirror_mode":{
|
"codemirror_mode": {
|
||||||
"name":"ipython",
|
"name": "ipython",
|
||||||
"version":3
|
"version": 3
|
||||||
},
|
},
|
||||||
"file_extension":".py",
|
"file_extension": ".py",
|
||||||
"mimetype":"text/x-python",
|
"mimetype": "text/x-python",
|
||||||
"name":"python",
|
"name": "python",
|
||||||
"nbconvert_exporter":"python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer":"ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version":"3.10.6"
|
"version": "3.10.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat":4,
|
"nbformat": 4,
|
||||||
"nbformat_minor":5
|
"nbformat_minor": 5
|
||||||
}
|
}
|
||||||
|
@ -169,8 +169,7 @@
|
|||||||
"# database=os.environ.get(\"PGVECTOR_DATABASE\", \"postgres\"),\n",
|
"# database=os.environ.get(\"PGVECTOR_DATABASE\", \"postgres\"),\n",
|
||||||
"# user=os.environ.get(\"PGVECTOR_USER\", \"postgres\"),\n",
|
"# user=os.environ.get(\"PGVECTOR_USER\", \"postgres\"),\n",
|
||||||
"# password=os.environ.get(\"PGVECTOR_PASSWORD\", \"postgres\"),\n",
|
"# password=os.environ.get(\"PGVECTOR_PASSWORD\", \"postgres\"),\n",
|
||||||
"# )\n",
|
"# )"
|
||||||
"\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -186,7 +185,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# The PGVector Module will try to create a table with the name of the collection. \n",
|
"# The PGVector Module will try to create a table with the name of the collection.\n",
|
||||||
"# So, make sure that the collection name is unique and the user has the permission to create a table.\n",
|
"# So, make sure that the collection name is unique and the user has the permission to create a table.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"COLLECTION_NAME = \"state_of_the_union_test\"\n",
|
"COLLECTION_NAME = \"state_of_the_union_test\"\n",
|
||||||
@ -293,7 +292,7 @@
|
|||||||
" collection_name=COLLECTION_NAME,\n",
|
" collection_name=COLLECTION_NAME,\n",
|
||||||
" connection_string=CONNECTION_STRING,\n",
|
" connection_string=CONNECTION_STRING,\n",
|
||||||
" embedding_function=embeddings,\n",
|
" embedding_function=embeddings,\n",
|
||||||
")\n"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -118,10 +118,9 @@
|
|||||||
"texts = [d.page_content for d in docs]\n",
|
"texts = [d.page_content for d in docs]\n",
|
||||||
"metadatas = [d.metadata for d in docs]\n",
|
"metadatas = [d.metadata for d in docs]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"rds, keys = Redis.from_texts_return_keys(texts,\n",
|
"rds, keys = Redis.from_texts_return_keys(\n",
|
||||||
" embeddings,\n",
|
" texts, embeddings, redis_url=\"redis://localhost:6379\", index_name=\"link\"\n",
|
||||||
" redis_url=\"redis://localhost:6379\",\n",
|
")"
|
||||||
" index_name=\"link\")"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -94,12 +94,14 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Make sure env variable ROCKSET_API_KEY is set\n",
|
"# Make sure env variable ROCKSET_API_KEY is set\n",
|
||||||
"ROCKSET_API_KEY = os.environ.get(\"ROCKSET_API_KEY\")\n",
|
"ROCKSET_API_KEY = os.environ.get(\"ROCKSET_API_KEY\")\n",
|
||||||
"ROCKSET_API_SERVER = rockset.Regions.usw2a1 # Make sure this points to the correct Rockset region\n",
|
"ROCKSET_API_SERVER = (\n",
|
||||||
|
" rockset.Regions.usw2a1\n",
|
||||||
|
") # Make sure this points to the correct Rockset region\n",
|
||||||
"rockset_client = rockset.RocksetClient(ROCKSET_API_SERVER, ROCKSET_API_KEY)\n",
|
"rockset_client = rockset.RocksetClient(ROCKSET_API_SERVER, ROCKSET_API_KEY)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"COLLECTION_NAME='langchain_demo'\n",
|
"COLLECTION_NAME = \"langchain_demo\"\n",
|
||||||
"TEXT_KEY='description'\n",
|
"TEXT_KEY = \"description\"\n",
|
||||||
"EMBEDDING_KEY='description_embedding'"
|
"EMBEDDING_KEY = \"description_embedding\""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -124,7 +126,7 @@
|
|||||||
"from langchain.document_loaders import TextLoader\n",
|
"from langchain.document_loaders import TextLoader\n",
|
||||||
"from langchain.vectorstores.rocksetdb import RocksetDB\n",
|
"from langchain.vectorstores.rocksetdb import RocksetDB\n",
|
||||||
"\n",
|
"\n",
|
||||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||||
"documents = loader.load()\n",
|
"documents = loader.load()\n",
|
||||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||||
"docs = text_splitter.split_documents(documents)"
|
"docs = text_splitter.split_documents(documents)"
|
||||||
@ -148,7 +150,7 @@
|
|||||||
"# Make sure the environment variable OPENAI_API_KEY is set up\n",
|
"# Make sure the environment variable OPENAI_API_KEY is set up\n",
|
||||||
"embeddings = OpenAIEmbeddings()\n",
|
"embeddings = OpenAIEmbeddings()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"docsearch=RocksetDB(\n",
|
"docsearch = RocksetDB(\n",
|
||||||
" client=rockset_client,\n",
|
" client=rockset_client,\n",
|
||||||
" embeddings=embeddings,\n",
|
" embeddings=embeddings,\n",
|
||||||
" collection_name=COLLECTION_NAME,\n",
|
" collection_name=COLLECTION_NAME,\n",
|
||||||
@ -156,7 +158,7 @@
|
|||||||
" embedding_key=EMBEDDING_KEY,\n",
|
" embedding_key=EMBEDDING_KEY,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ids=docsearch.add_texts(\n",
|
"ids = docsearch.add_texts(\n",
|
||||||
" texts=[d.page_content for d in docs],\n",
|
" texts=[d.page_content for d in docs],\n",
|
||||||
" metadatas=[d.metadata for d in docs],\n",
|
" metadatas=[d.metadata for d in docs],\n",
|
||||||
")\n",
|
")\n",
|
||||||
@ -182,11 +184,13 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||||
"output = docsearch.similarity_search_with_relevance_scores(query, 4, RocksetDB.DistanceFunction.COSINE_SIM)\n",
|
"output = docsearch.similarity_search_with_relevance_scores(\n",
|
||||||
|
" query, 4, RocksetDB.DistanceFunction.COSINE_SIM\n",
|
||||||
|
")\n",
|
||||||
"print(\"output length:\", len(output))\n",
|
"print(\"output length:\", len(output))\n",
|
||||||
"for d, dist in output:\n",
|
"for d, dist in output:\n",
|
||||||
" print(dist, d.metadata, d.page_content[:20] + '...')\n",
|
" print(dist, d.metadata, d.page_content[:20] + \"...\")\n",
|
||||||
" \n",
|
"\n",
|
||||||
"##\n",
|
"##\n",
|
||||||
"# output length: 4\n",
|
"# output length: 4\n",
|
||||||
"# 0.764990692109871 {'source': '../../../state_of_the_union.txt'} Madam Speaker, Madam...\n",
|
"# 0.764990692109871 {'source': '../../../state_of_the_union.txt'} Madam Speaker, Madam...\n",
|
||||||
@ -218,12 +222,12 @@
|
|||||||
" query,\n",
|
" query,\n",
|
||||||
" 4,\n",
|
" 4,\n",
|
||||||
" RocksetDB.DistanceFunction.COSINE_SIM,\n",
|
" RocksetDB.DistanceFunction.COSINE_SIM,\n",
|
||||||
" where_str=\"{} NOT LIKE '%citizens%'\".format(TEXT_KEY)\n",
|
" where_str=\"{} NOT LIKE '%citizens%'\".format(TEXT_KEY),\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"output length:\", len(output))\n",
|
"print(\"output length:\", len(output))\n",
|
||||||
"for d, dist in output:\n",
|
"for d, dist in output:\n",
|
||||||
" print(dist, d.metadata, d.page_content[:20] + '...')\n",
|
" print(dist, d.metadata, d.page_content[:20] + \"...\")\n",
|
||||||
" \n",
|
"\n",
|
||||||
"##\n",
|
"##\n",
|
||||||
"# output length: 4\n",
|
"# output length: 4\n",
|
||||||
"# 0.7651359650263554 {'source': '../../../state_of_the_union.txt'} Madam Speaker, Madam...\n",
|
"# 0.7651359650263554 {'source': '../../../state_of_the_union.txt'} Madam Speaker, Madam...\n",
|
||||||
|
@ -59,8 +59,8 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Load text samples \n",
|
"# Load text samples\n",
|
||||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||||
"documents = loader.load()\n",
|
"documents = loader.load()\n",
|
||||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||||
"docs = text_splitter.split_documents(documents)\n",
|
"docs = text_splitter.split_documents(documents)\n",
|
||||||
@ -90,7 +90,7 @@
|
|||||||
"docsearch = SingleStoreDB.from_documents(\n",
|
"docsearch = SingleStoreDB.from_documents(\n",
|
||||||
" docs,\n",
|
" docs,\n",
|
||||||
" embeddings,\n",
|
" embeddings,\n",
|
||||||
" table_name = \"notebook\", # use table with a custom name \n",
|
" table_name=\"notebook\", # use table with a custom name\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -62,7 +62,7 @@
|
|||||||
"from langchain.vectorstores.starrocks import StarRocksSettings\n",
|
"from langchain.vectorstores.starrocks import StarRocksSettings\n",
|
||||||
"from langchain.vectorstores import Chroma\n",
|
"from langchain.vectorstores import Chroma\n",
|
||||||
"from langchain.text_splitter import CharacterTextSplitter, TokenTextSplitter\n",
|
"from langchain.text_splitter import CharacterTextSplitter, TokenTextSplitter\n",
|
||||||
"from langchain import OpenAI,VectorDBQA\n",
|
"from langchain import OpenAI, VectorDBQA\n",
|
||||||
"from langchain.document_loaders import DirectoryLoader\n",
|
"from langchain.document_loaders import DirectoryLoader\n",
|
||||||
"from langchain.chains import RetrievalQA\n",
|
"from langchain.chains import RetrievalQA\n",
|
||||||
"from langchain.document_loaders import TextLoader, UnstructuredMarkdownLoader\n",
|
"from langchain.document_loaders import TextLoader, UnstructuredMarkdownLoader\n",
|
||||||
@ -95,7 +95,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"loader = DirectoryLoader('./docs', glob='**/*.md', loader_cls=UnstructuredMarkdownLoader)\n",
|
"loader = DirectoryLoader(\n",
|
||||||
|
" \"./docs\", glob=\"**/*.md\", loader_cls=UnstructuredMarkdownLoader\n",
|
||||||
|
")\n",
|
||||||
"documents = loader.load()"
|
"documents = loader.load()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -158,7 +160,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"print('# docs = %d, # splits = %d' % (len(documents), len(split_docs)))"
|
"print(\"# docs = %d, # splits = %d\" % (len(documents), len(split_docs)))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -186,10 +188,10 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"def gen_starrocks(update_vectordb, embeddings, settings):\n",
|
"def gen_starrocks(update_vectordb, embeddings, settings):\n",
|
||||||
" if update_vectordb:\n",
|
" if update_vectordb:\n",
|
||||||
" docsearch = StarRocks.from_documents(split_docs, embeddings, config = settings) \n",
|
" docsearch = StarRocks.from_documents(split_docs, embeddings, config=settings)\n",
|
||||||
" else:\n",
|
" else:\n",
|
||||||
" docsearch = StarRocks(embeddings, settings) \n",
|
" docsearch = StarRocks(embeddings, settings)\n",
|
||||||
" return docsearch\n"
|
" return docsearch"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -255,10 +257,10 @@
|
|||||||
"# configure starrocks settings(host/port/user/pw/db)\n",
|
"# configure starrocks settings(host/port/user/pw/db)\n",
|
||||||
"settings = StarRocksSettings()\n",
|
"settings = StarRocksSettings()\n",
|
||||||
"settings.port = 41003\n",
|
"settings.port = 41003\n",
|
||||||
"settings.host = '127.0.0.1'\n",
|
"settings.host = \"127.0.0.1\"\n",
|
||||||
"settings.username = 'root'\n",
|
"settings.username = \"root\"\n",
|
||||||
"settings.password = ''\n",
|
"settings.password = \"\"\n",
|
||||||
"settings.database = 'zya'\n",
|
"settings.database = \"zya\"\n",
|
||||||
"docsearch = gen_starrocks(update_vectordb, embeddings, settings)\n",
|
"docsearch = gen_starrocks(update_vectordb, embeddings, settings)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(docsearch)\n",
|
"print(docsearch)\n",
|
||||||
@ -290,7 +292,9 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"llm = OpenAI()\n",
|
"llm = OpenAI()\n",
|
||||||
"qa = RetrievalQA.from_chain_type(llm=llm, chain_type=\"stuff\", retriever=docsearch.as_retriever())\n",
|
"qa = RetrievalQA.from_chain_type(\n",
|
||||||
|
" llm=llm, chain_type=\"stuff\", retriever=docsearch.as_retriever()\n",
|
||||||
|
")\n",
|
||||||
"query = \"is profile enabled by default? if not, how to enable profile?\"\n",
|
"query = \"is profile enabled by default? if not, how to enable profile?\"\n",
|
||||||
"resp = qa.run(query)\n",
|
"resp = qa.run(query)\n",
|
||||||
"print(resp)"
|
"print(resp)"
|
||||||
|
@ -55,10 +55,10 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||||
"documents = loader.load()\n",
|
"documents = loader.load()\n",
|
||||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||||
"docs = text_splitter.split_documents(documents)\n"
|
"docs = text_splitter.split_documents(documents)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -74,9 +74,11 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"vectara = Vectara.from_documents(docs, \n",
|
"vectara = Vectara.from_documents(\n",
|
||||||
" embedding=FakeEmbeddings(size=768), \n",
|
" docs,\n",
|
||||||
" doc_metadata = {\"speech\": \"state-of-the-union\"})"
|
" embedding=FakeEmbeddings(size=768),\n",
|
||||||
|
" doc_metadata={\"speech\": \"state-of-the-union\"},\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -104,11 +106,17 @@
|
|||||||
"import urllib.request\n",
|
"import urllib.request\n",
|
||||||
"\n",
|
"\n",
|
||||||
"urls = [\n",
|
"urls = [\n",
|
||||||
" ['https://www.gilderlehrman.org/sites/default/files/inline-pdfs/king.dreamspeech.excerpts.pdf', 'I-have-a-dream'],\n",
|
" [\n",
|
||||||
" ['https://www.parkwayschools.net/cms/lib/MO01931486/Centricity/Domain/1578/Churchill_Beaches_Speech.pdf', 'we shall fight on the beaches'],\n",
|
" \"https://www.gilderlehrman.org/sites/default/files/inline-pdfs/king.dreamspeech.excerpts.pdf\",\n",
|
||||||
|
" \"I-have-a-dream\",\n",
|
||||||
|
" ],\n",
|
||||||
|
" [\n",
|
||||||
|
" \"https://www.parkwayschools.net/cms/lib/MO01931486/Centricity/Domain/1578/Churchill_Beaches_Speech.pdf\",\n",
|
||||||
|
" \"we shall fight on the beaches\",\n",
|
||||||
|
" ],\n",
|
||||||
"]\n",
|
"]\n",
|
||||||
"files_list = []\n",
|
"files_list = []\n",
|
||||||
"for url,_ in urls:\n",
|
"for url, _ in urls:\n",
|
||||||
" name = tempfile.NamedTemporaryFile().name\n",
|
" name = tempfile.NamedTemporaryFile().name\n",
|
||||||
" urllib.request.urlretrieve(url, name)\n",
|
" urllib.request.urlretrieve(url, name)\n",
|
||||||
" files_list.append(name)\n",
|
" files_list.append(name)\n",
|
||||||
@ -116,7 +124,7 @@
|
|||||||
"docsearch: Vectara = Vectara.from_files(\n",
|
"docsearch: Vectara = Vectara.from_files(\n",
|
||||||
" files=files_list,\n",
|
" files=files_list,\n",
|
||||||
" embedding=FakeEmbeddings(size=768),\n",
|
" embedding=FakeEmbeddings(size=768),\n",
|
||||||
" metadatas=[{\"url\": url, \"speech\": title} for url,title in urls],\n",
|
" metadatas=[{\"url\": url, \"speech\": title} for url, title in urls],\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -150,7 +158,9 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||||
"found_docs = vectara.similarity_search(query, n_sentence_context=0, filter=\"doc.speech = 'state-of-the-union'\")"
|
"found_docs = vectara.similarity_search(\n",
|
||||||
|
" query, n_sentence_context=0, filter=\"doc.speech = 'state-of-the-union'\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -207,7 +217,9 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||||
"found_docs = vectara.similarity_search_with_score(query, filter=\"doc.speech = 'state-of-the-union'\")"
|
"found_docs = vectara.similarity_search_with_score(\n",
|
||||||
|
" query, filter=\"doc.speech = 'state-of-the-union'\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -269,7 +281,9 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"query = \"We must forever conduct our struggle\"\n",
|
"query = \"We must forever conduct our struggle\"\n",
|
||||||
"found_docs = vectara.similarity_search_with_score(query, filter=\"doc.speech = 'I-have-a-dream'\")\n",
|
"found_docs = vectara.similarity_search_with_score(\n",
|
||||||
|
" query, filter=\"doc.speech = 'I-have-a-dream'\"\n",
|
||||||
|
")\n",
|
||||||
"print(found_docs[0])\n",
|
"print(found_docs[0])\n",
|
||||||
"print(found_docs[1])"
|
"print(found_docs[1])"
|
||||||
]
|
]
|
||||||
|
@ -44,16 +44,18 @@
|
|||||||
"import os\n",
|
"import os\n",
|
||||||
"import getpass\n",
|
"import getpass\n",
|
||||||
"\n",
|
"\n",
|
||||||
"database_mode = (input('\\n(C)assandra or (A)stra DB? ')).upper()\n",
|
"database_mode = (input(\"\\n(C)assandra or (A)stra DB? \")).upper()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"keyspace_name = input('\\nKeyspace name? ')\n",
|
"keyspace_name = input(\"\\nKeyspace name? \")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if database_mode == 'A':\n",
|
"if database_mode == \"A\":\n",
|
||||||
" ASTRA_DB_APPLICATION_TOKEN = getpass.getpass('\\nAstra DB Token (\"AstraCS:...\") ')\n",
|
" ASTRA_DB_APPLICATION_TOKEN = getpass.getpass('\\nAstra DB Token (\"AstraCS:...\") ')\n",
|
||||||
" #\n",
|
" #\n",
|
||||||
" ASTRA_DB_SECURE_BUNDLE_PATH = input('Full path to your Secure Connect Bundle? ')\n",
|
" ASTRA_DB_SECURE_BUNDLE_PATH = input(\"Full path to your Secure Connect Bundle? \")\n",
|
||||||
"elif database_mode == 'C':\n",
|
"elif database_mode == \"C\":\n",
|
||||||
" CASSANDRA_CONTACT_POINTS = input('Contact points? (comma-separated, empty for localhost) ').strip()"
|
" CASSANDRA_CONTACT_POINTS = input(\n",
|
||||||
|
" \"Contact points? (comma-separated, empty for localhost) \"\n",
|
||||||
|
" ).strip()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -74,17 +76,15 @@
|
|||||||
"from cassandra.cluster import Cluster\n",
|
"from cassandra.cluster import Cluster\n",
|
||||||
"from cassandra.auth import PlainTextAuthProvider\n",
|
"from cassandra.auth import PlainTextAuthProvider\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if database_mode == 'C':\n",
|
"if database_mode == \"C\":\n",
|
||||||
" if CASSANDRA_CONTACT_POINTS:\n",
|
" if CASSANDRA_CONTACT_POINTS:\n",
|
||||||
" cluster = Cluster([\n",
|
" cluster = Cluster(\n",
|
||||||
" cp.strip()\n",
|
" [cp.strip() for cp in CASSANDRA_CONTACT_POINTS.split(\",\") if cp.strip()]\n",
|
||||||
" for cp in CASSANDRA_CONTACT_POINTS.split(',')\n",
|
" )\n",
|
||||||
" if cp.strip()\n",
|
|
||||||
" ])\n",
|
|
||||||
" else:\n",
|
" else:\n",
|
||||||
" cluster = Cluster()\n",
|
" cluster = Cluster()\n",
|
||||||
" session = cluster.connect()\n",
|
" session = cluster.connect()\n",
|
||||||
"elif database_mode == 'A':\n",
|
"elif database_mode == \"A\":\n",
|
||||||
" ASTRA_DB_CLIENT_ID = \"token\"\n",
|
" ASTRA_DB_CLIENT_ID = \"token\"\n",
|
||||||
" cluster = Cluster(\n",
|
" cluster = Cluster(\n",
|
||||||
" cloud={\n",
|
" cloud={\n",
|
||||||
|
@ -203,7 +203,9 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"messages = [\n",
|
"messages = [\n",
|
||||||
" HumanMessage(content=\"How do I create a python function to identify all prime numbers?\")\n",
|
" HumanMessage(\n",
|
||||||
|
" content=\"How do I create a python function to identify all prime numbers?\"\n",
|
||||||
|
" )\n",
|
||||||
"]\n",
|
"]\n",
|
||||||
"chat(messages)"
|
"chat(messages)"
|
||||||
]
|
]
|
||||||
|
@ -93,19 +93,19 @@
|
|||||||
"text": [
|
"text": [
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\u001B[1m> Entering new chain...\u001B[0m\n",
|
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||||
"\u001B[32;1m\u001B[1;3m\n",
|
"\u001b[32;1m\u001b[1;3m\n",
|
||||||
"I need to use the print function to output the string \"Hello, world!\"\n",
|
"I need to use the print function to output the string \"Hello, world!\"\n",
|
||||||
"Action: Python_REPL\n",
|
"Action: Python_REPL\n",
|
||||||
"Action Input: `print(\"Hello, world!\")`\u001B[0m\n",
|
"Action Input: `print(\"Hello, world!\")`\u001b[0m\n",
|
||||||
"Observation: \u001B[36;1m\u001B[1;3mHello, world!\n",
|
"Observation: \u001b[36;1m\u001b[1;3mHello, world!\n",
|
||||||
"\u001B[0m\n",
|
"\u001b[0m\n",
|
||||||
"Thought:\u001B[32;1m\u001B[1;3m\n",
|
"Thought:\u001b[32;1m\u001b[1;3m\n",
|
||||||
"I now know how to print a string in Python\n",
|
"I now know how to print a string in Python\n",
|
||||||
"Final Answer:\n",
|
"Final Answer:\n",
|
||||||
"Hello, world!\u001B[0m\n",
|
"Hello, world!\u001b[0m\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\u001B[1m> Finished chain.\u001B[0m\n"
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -148,9 +148,11 @@
|
|||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Now let's test it out!\n",
|
"# Now let's test it out!\n",
|
||||||
"agent.run(\"\"\"\n",
|
"agent.run(\n",
|
||||||
|
" \"\"\"\n",
|
||||||
"Write a Python script that prints \"Hello, world!\"\n",
|
"Write a Python script that prints \"Hello, world!\"\n",
|
||||||
"\"\"\")"
|
"\"\"\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -164,21 +166,21 @@
|
|||||||
"text": [
|
"text": [
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\u001B[1m> Entering new chain...\u001B[0m\n",
|
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||||
"\u001B[32;1m\u001B[1;3m I need to use the calculator to find the answer\n",
|
"\u001b[32;1m\u001b[1;3m I need to use the calculator to find the answer\n",
|
||||||
"Action: Calculator\n",
|
"Action: Calculator\n",
|
||||||
"Action Input: 2.3 ^ 4.5\u001B[0m\n",
|
"Action Input: 2.3 ^ 4.5\u001b[0m\n",
|
||||||
"Observation: \u001B[33;1m\u001B[1;3mAnswer: 42.43998894277659\u001B[0m\n",
|
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 42.43998894277659\u001b[0m\n",
|
||||||
"Thought:\u001B[32;1m\u001B[1;3m I now know the final answer\n",
|
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||||
"Final Answer: 42.43998894277659\n",
|
"Final Answer: 42.43998894277659\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Question: \n",
|
"Question: \n",
|
||||||
"What is the square root of 144?\n",
|
"What is the square root of 144?\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Thought: I need to use the calculator to find the answer\n",
|
"Thought: I need to use the calculator to find the answer\n",
|
||||||
"Action:\u001B[0m\n",
|
"Action:\u001b[0m\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\u001B[1m> Finished chain.\u001B[0m\n"
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -65,7 +65,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Please login and get your API key from https://clarifai.com/settings/security \n",
|
"# Please login and get your API key from https://clarifai.com/settings/security\n",
|
||||||
"from getpass import getpass\n",
|
"from getpass import getpass\n",
|
||||||
"\n",
|
"\n",
|
||||||
"CLARIFAI_PAT = getpass()"
|
"CLARIFAI_PAT = getpass()"
|
||||||
@ -130,9 +130,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"USER_ID = 'openai'\n",
|
"USER_ID = \"openai\"\n",
|
||||||
"APP_ID = 'chat-completion'\n",
|
"APP_ID = \"chat-completion\"\n",
|
||||||
"MODEL_ID = 'GPT-3_5-turbo'\n",
|
"MODEL_ID = \"GPT-3_5-turbo\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# You can provide a specific model version as the model_version_id arg.\n",
|
"# You can provide a specific model version as the model_version_id arg.\n",
|
||||||
"# MODEL_VERSION_ID = \"MODEL_VERSION_ID\""
|
"# MODEL_VERSION_ID = \"MODEL_VERSION_ID\""
|
||||||
@ -148,7 +148,9 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Initialize a Clarifai LLM\n",
|
"# Initialize a Clarifai LLM\n",
|
||||||
"clarifai_llm = Clarifai(pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID)"
|
"clarifai_llm = Clarifai(\n",
|
||||||
|
" pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -165,7 +165,9 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64})\n",
|
"llm = HuggingFaceHub(\n",
|
||||||
|
" repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n",
|
||||||
|
")\n",
|
||||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(llm_chain.run(question))"
|
"print(llm_chain.run(question))"
|
||||||
@ -211,7 +213,9 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64})\n",
|
"llm = HuggingFaceHub(\n",
|
||||||
|
" repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n",
|
||||||
|
")\n",
|
||||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||||
"print(llm_chain.run(question))"
|
"print(llm_chain.run(question))"
|
||||||
]
|
]
|
||||||
@ -245,7 +249,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64})\n",
|
"llm = HuggingFaceHub(\n",
|
||||||
|
" repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n",
|
||||||
|
")\n",
|
||||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||||
"print(llm_chain.run(question))"
|
"print(llm_chain.run(question))"
|
||||||
]
|
]
|
||||||
@ -277,7 +283,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64})\n",
|
"llm = HuggingFaceHub(\n",
|
||||||
|
" repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n",
|
||||||
|
")\n",
|
||||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||||
"print(llm_chain.run(question))"
|
"print(llm_chain.run(question))"
|
||||||
]
|
]
|
||||||
@ -309,7 +317,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64})\n",
|
"llm = HuggingFaceHub(\n",
|
||||||
|
" repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n",
|
||||||
|
")\n",
|
||||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||||
"print(llm_chain.run(question))"
|
"print(llm_chain.run(question))"
|
||||||
]
|
]
|
||||||
|
@ -68,7 +68,7 @@
|
|||||||
" \"seed\": None,\n",
|
" \"seed\": None,\n",
|
||||||
" \"stop\": [],\n",
|
" \"stop\": [],\n",
|
||||||
" },\n",
|
" },\n",
|
||||||
" )"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -56,7 +56,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from langchain.llms import OpenLLM\n",
|
"from langchain.llms import OpenLLM\n",
|
||||||
"\n",
|
"\n",
|
||||||
"server_url = \"http://localhost:3000\" # Replace with remote host if you are running on a remote server \n",
|
"server_url = \"http://localhost:3000\" # Replace with remote host if you are running on a remote server\n",
|
||||||
"llm = OpenLLM(server_url=server_url)"
|
"llm = OpenLLM(server_url=server_url)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -200,7 +200,6 @@
|
|||||||
" dataset_path=\"hub://davitbun/twitter-algorithm\",\n",
|
" dataset_path=\"hub://davitbun/twitter-algorithm\",\n",
|
||||||
" read_only=True,\n",
|
" read_only=True,\n",
|
||||||
" embedding_function=embeddings,\n",
|
" embedding_function=embeddings,\n",
|
||||||
" \n",
|
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -37,10 +37,11 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# Load Notion page as a markdownfile file\n",
|
"# Load Notion page as a markdownfile file\n",
|
||||||
"from langchain.document_loaders import NotionDirectoryLoader\n",
|
"from langchain.document_loaders import NotionDirectoryLoader\n",
|
||||||
"path='../Notion_DB/'\n",
|
"\n",
|
||||||
|
"path = \"../Notion_DB/\"\n",
|
||||||
"loader = NotionDirectoryLoader(path)\n",
|
"loader = NotionDirectoryLoader(path)\n",
|
||||||
"docs = loader.load()\n",
|
"docs = loader.load()\n",
|
||||||
"md_file=docs[0].page_content"
|
"md_file = docs[0].page_content"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -52,6 +53,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# Let's create groups based on the section headers in our page\n",
|
"# Let's create groups based on the section headers in our page\n",
|
||||||
"from langchain.text_splitter import MarkdownHeaderTextSplitter\n",
|
"from langchain.text_splitter import MarkdownHeaderTextSplitter\n",
|
||||||
|
"\n",
|
||||||
"headers_to_split_on = [\n",
|
"headers_to_split_on = [\n",
|
||||||
" (\"###\", \"Section\"),\n",
|
" (\"###\", \"Section\"),\n",
|
||||||
"]\n",
|
"]\n",
|
||||||
@ -76,9 +78,12 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# Define our text splitter\n",
|
"# Define our text splitter\n",
|
||||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||||
|
"\n",
|
||||||
"chunk_size = 500\n",
|
"chunk_size = 500\n",
|
||||||
"chunk_overlap = 0\n",
|
"chunk_overlap = 0\n",
|
||||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)\n",
|
"text_splitter = RecursiveCharacterTextSplitter(\n",
|
||||||
|
" chunk_size=chunk_size, chunk_overlap=chunk_overlap\n",
|
||||||
|
")\n",
|
||||||
"all_splits = text_splitter.split_documents(md_header_splits)"
|
"all_splits = text_splitter.split_documents(md_header_splits)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -99,7 +104,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"! pip install chromadb "
|
"! pip install chromadb"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -112,8 +117,8 @@
|
|||||||
"# Build vectorstore and keep the metadata\n",
|
"# Build vectorstore and keep the metadata\n",
|
||||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||||
"from langchain.vectorstores import Chroma\n",
|
"from langchain.vectorstores import Chroma\n",
|
||||||
"vectorstore = Chroma.from_documents(documents=all_splits,\n",
|
"\n",
|
||||||
" embedding=OpenAIEmbeddings())"
|
"vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -131,7 +136,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Create retriever \n",
|
"# Create retriever\n",
|
||||||
"from langchain.llms import OpenAI\n",
|
"from langchain.llms import OpenAI\n",
|
||||||
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
|
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
|
||||||
"from langchain.chains.query_constructor.base import AttributeInfo\n",
|
"from langchain.chains.query_constructor.base import AttributeInfo\n",
|
||||||
@ -148,7 +153,9 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Define self query retriver\n",
|
"# Define self query retriver\n",
|
||||||
"llm = OpenAI(temperature=0)\n",
|
"llm = OpenAI(temperature=0)\n",
|
||||||
"retriever = SelfQueryRetriever.from_llm(llm, vectorstore, document_content_description, metadata_field_info, verbose=True)"
|
"retriever = SelfQueryRetriever.from_llm(\n",
|
||||||
|
" llm, vectorstore, document_content_description, metadata_field_info, verbose=True\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -297,8 +304,9 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from langchain.chains import RetrievalQA\n",
|
"from langchain.chains import RetrievalQA\n",
|
||||||
"from langchain.chat_models import ChatOpenAI\n",
|
"from langchain.chat_models import ChatOpenAI\n",
|
||||||
|
"\n",
|
||||||
"llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
|
"llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
|
||||||
"qa_chain = RetrievalQA.from_chain_type(llm,retriever=retriever)\n",
|
"qa_chain = RetrievalQA.from_chain_type(llm, retriever=retriever)\n",
|
||||||
"qa_chain.run(\"Summarize the Testing section of the document\")"
|
"qa_chain.run(\"Summarize the Testing section of the document\")"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
160
poetry.lock
generated
160
poetry.lock
generated
@ -1006,10 +1006,12 @@ files = [
|
|||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
click = ">=8.0.0"
|
click = ">=8.0.0"
|
||||||
|
ipython = {version = ">=7.8.0", optional = true, markers = "extra == \"jupyter\""}
|
||||||
mypy-extensions = ">=0.4.3"
|
mypy-extensions = ">=0.4.3"
|
||||||
packaging = ">=22.0"
|
packaging = ">=22.0"
|
||||||
pathspec = ">=0.9.0"
|
pathspec = ">=0.9.0"
|
||||||
platformdirs = ">=2"
|
platformdirs = ">=2"
|
||||||
|
tokenize-rt = {version = ">=3.2.0", optional = true, markers = "extra == \"jupyter\""}
|
||||||
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
|
tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
|
||||||
typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}
|
typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}
|
||||||
|
|
||||||
@ -2659,6 +2661,25 @@ dev = ["pre-commit (>=2.17.0,<3.0.0)", "ruff (==0.0.138)", "uvicorn[standard] (>
|
|||||||
doc = ["mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-markdownextradata-plugin (>=0.1.7,<0.3.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pyyaml (>=5.3.1,<7.0.0)", "typer-cli (>=0.0.13,<0.0.14)", "typer[all] (>=0.6.1,<0.8.0)"]
|
doc = ["mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-markdownextradata-plugin (>=0.1.7,<0.3.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pyyaml (>=5.3.1,<7.0.0)", "typer-cli (>=0.0.13,<0.0.14)", "typer[all] (>=0.6.1,<0.8.0)"]
|
||||||
test = ["anyio[trio] (>=3.2.1,<4.0.0)", "black (==23.1.0)", "coverage[toml] (>=6.5.0,<8.0)", "databases[sqlite] (>=0.3.2,<0.7.0)", "email-validator (>=1.1.1,<2.0.0)", "flask (>=1.1.2,<3.0.0)", "httpx (>=0.23.0,<0.24.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.982)", "orjson (>=3.2.1,<4.0.0)", "passlib[bcrypt] (>=1.7.2,<2.0.0)", "peewee (>=3.13.3,<4.0.0)", "pytest (>=7.1.3,<8.0.0)", "python-jose[cryptography] (>=3.3.0,<4.0.0)", "python-multipart (>=0.0.5,<0.0.7)", "pyyaml (>=5.3.1,<7.0.0)", "ruff (==0.0.138)", "sqlalchemy (>=1.3.18,<1.4.43)", "types-orjson (==3.6.2)", "types-ujson (==5.7.0.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0)"]
|
test = ["anyio[trio] (>=3.2.1,<4.0.0)", "black (==23.1.0)", "coverage[toml] (>=6.5.0,<8.0)", "databases[sqlite] (>=0.3.2,<0.7.0)", "email-validator (>=1.1.1,<2.0.0)", "flask (>=1.1.2,<3.0.0)", "httpx (>=0.23.0,<0.24.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.982)", "orjson (>=3.2.1,<4.0.0)", "passlib[bcrypt] (>=1.7.2,<2.0.0)", "peewee (>=3.13.3,<4.0.0)", "pytest (>=7.1.3,<8.0.0)", "python-jose[cryptography] (>=3.3.0,<4.0.0)", "python-multipart (>=0.0.5,<0.0.7)", "pyyaml (>=5.3.1,<7.0.0)", "ruff (==0.0.138)", "sqlalchemy (>=1.3.18,<1.4.43)", "types-orjson (==3.6.2)", "types-ujson (==5.7.0.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fastcore"
|
||||||
|
version = "1.4.2"
|
||||||
|
description = "Python supercharged for fastai development"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
files = [
|
||||||
|
{file = "fastcore-1.4.2-py3-none-any.whl", hash = "sha256:86c31395a87ea429c35f9291e826098f525684a3a6555238448a371274224ddb"},
|
||||||
|
{file = "fastcore-1.4.2.tar.gz", hash = "sha256:43bb895507788168317f28cf82031c5ce1796d95a229fc515c10bd90f2355a37"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
packaging = "*"
|
||||||
|
pip = "*"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dev = ["matplotlib", "nbdev (>=0.2.39)", "numpy", "pandas", "pillow", "torch"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fastjsonschema"
|
name = "fastjsonschema"
|
||||||
version = "2.17.1"
|
version = "2.17.1"
|
||||||
@ -2674,6 +2695,25 @@ files = [
|
|||||||
[package.extras]
|
[package.extras]
|
||||||
devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"]
|
devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fastrelease"
|
||||||
|
version = "0.1.17"
|
||||||
|
description = "Simplified releases using GitHub Issues"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
files = [
|
||||||
|
{file = "fastrelease-0.1.17-py3-none-any.whl", hash = "sha256:8b5bbaa9a566fee9c08bbc7cc8fb052a04499a76b464a8b0669707843cb15e2f"},
|
||||||
|
{file = "fastrelease-0.1.17.tar.gz", hash = "sha256:c4c6717dcb8c5b37496511dd2a3f97f810b70e1350380b6ae023bb15757b6a63"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
fastcore = ">=1.3.13"
|
||||||
|
ghapi = "*"
|
||||||
|
packaging = "*"
|
||||||
|
pip = "*"
|
||||||
|
pyyaml = "*"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "feedparser"
|
name = "feedparser"
|
||||||
version = "6.0.10"
|
version = "6.0.10"
|
||||||
@ -2966,6 +3006,26 @@ files = [
|
|||||||
click = "*"
|
click = "*"
|
||||||
six = "*"
|
six = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ghapi"
|
||||||
|
version = "0.1.22"
|
||||||
|
description = "A python client for the GitHub API"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
files = [
|
||||||
|
{file = "ghapi-0.1.22-py3-none-any.whl", hash = "sha256:fba8d57dfc2992d97fe93eaa12e80b9becbed7f13ac6cdcd1c85283c68b04505"},
|
||||||
|
{file = "ghapi-0.1.22.tar.gz", hash = "sha256:fbe31684c141767a62899020fff8c6922a8ecaadd6d4614f0673242939cbb655"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
fastcore = "*"
|
||||||
|
packaging = "*"
|
||||||
|
pip = "*"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dev = ["jsonref"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "gitdb"
|
name = "gitdb"
|
||||||
version = "4.0.10"
|
version = "4.0.10"
|
||||||
@ -4387,27 +4447,28 @@ testing = ["coverage", "ipykernel", "jupytext", "matplotlib", "nbdime", "nbforma
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "jupyter-client"
|
name = "jupyter-client"
|
||||||
version = "8.2.0"
|
version = "7.4.9"
|
||||||
description = "Jupyter protocol implementation and client libraries"
|
description = "Jupyter protocol implementation and client libraries"
|
||||||
category = "dev"
|
category = "dev"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.8"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "jupyter_client-8.2.0-py3-none-any.whl", hash = "sha256:b18219aa695d39e2ad570533e0d71fb7881d35a873051054a84ee2a17c4b7389"},
|
{file = "jupyter_client-7.4.9-py3-none-any.whl", hash = "sha256:214668aaea208195f4c13d28eb272ba79f945fc0cf3f11c7092c20b2ca1980e7"},
|
||||||
{file = "jupyter_client-8.2.0.tar.gz", hash = "sha256:9fe233834edd0e6c0aa5f05ca2ab4bdea1842bfd2d8a932878212fc5301ddaf0"},
|
{file = "jupyter_client-7.4.9.tar.gz", hash = "sha256:52be28e04171f07aed8f20e1616a5a552ab9fee9cbbe6c1896ae170c3880d392"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
importlib-metadata = {version = ">=4.8.3", markers = "python_version < \"3.10\""}
|
entrypoints = "*"
|
||||||
jupyter-core = ">=4.12,<5.0.0 || >=5.1.0"
|
jupyter-core = ">=4.9.2"
|
||||||
|
nest-asyncio = ">=1.5.4"
|
||||||
python-dateutil = ">=2.8.2"
|
python-dateutil = ">=2.8.2"
|
||||||
pyzmq = ">=23.0"
|
pyzmq = ">=23.0"
|
||||||
tornado = ">=6.2"
|
tornado = ">=6.2"
|
||||||
traitlets = ">=5.3"
|
traitlets = "*"
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
docs = ["ipykernel", "myst-parser", "pydata-sphinx-theme", "sphinx (>=4)", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"]
|
doc = ["ipykernel", "myst-parser", "sphinx (>=1.3.6)", "sphinx-rtd-theme", "sphinxcontrib-github-alt"]
|
||||||
test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pytest", "pytest-cov", "pytest-jupyter[client] (>=0.4.1)", "pytest-timeout"]
|
test = ["codecov", "coverage", "ipykernel (>=6.12)", "ipython", "mypy", "pre-commit", "pytest", "pytest-asyncio (>=0.18)", "pytest-cov", "pytest-timeout"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "jupyter-console"
|
name = "jupyter-console"
|
||||||
@ -5808,6 +5869,52 @@ serve = ["tornado (>=6.1)"]
|
|||||||
test = ["ipykernel", "ipywidgets (>=7)", "pre-commit", "pytest", "pytest-dependency"]
|
test = ["ipykernel", "ipywidgets (>=7)", "pre-commit", "pytest", "pytest-dependency"]
|
||||||
webpdf = ["pyppeteer (>=1,<1.1)"]
|
webpdf = ["pyppeteer (>=1,<1.1)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nbdev"
|
||||||
|
version = "1.2.0"
|
||||||
|
description = "Writing a library entirely in notebooks"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
files = [
|
||||||
|
{file = "nbdev-1.2.0-py3-none-any.whl", hash = "sha256:236bacb23d241e1addfa683d6c3466d3ff960f0bad2639f0142afdfe06daa4f8"},
|
||||||
|
{file = "nbdev-1.2.0.tar.gz", hash = "sha256:474b708beef23e89665b695d304f62cd0458703e889cbe2696ae027cf299714e"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
fastcore = ">=1.3.21"
|
||||||
|
fastrelease = "*"
|
||||||
|
ghapi = "*"
|
||||||
|
ipykernel = "*"
|
||||||
|
jupyter = "*"
|
||||||
|
jupyter-client = "<8"
|
||||||
|
nbconvert = ">=6.4.1"
|
||||||
|
nbformat = ">=4.4.0"
|
||||||
|
packaging = "*"
|
||||||
|
pip = "*"
|
||||||
|
pyyaml = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nbdoc"
|
||||||
|
version = "0.0.82"
|
||||||
|
description = "Generate beautiful, testable documentation with Jupyter Notebooks"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "nbdoc-0.0.82-py3-none-any.whl", hash = "sha256:84f57e0c20b389eb748eb2055d55d5698b2f44fdef455943d396112d26103fb9"},
|
||||||
|
{file = "nbdoc-0.0.82.tar.gz", hash = "sha256:234b785e710025357d66c90be0bf7d620a7a507786380030d749b1c4c5146600"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
black = {version = ">=22.1.0", extras = ["jupyter"]}
|
||||||
|
fastcore = "1.4.2"
|
||||||
|
nbconvert = ">=6.4.1"
|
||||||
|
nbdev = "1.2.0"
|
||||||
|
numpydoc = "1.2"
|
||||||
|
packaging = "*"
|
||||||
|
pip = "*"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nbformat"
|
name = "nbformat"
|
||||||
version = "5.9.0"
|
version = "5.9.0"
|
||||||
@ -6153,6 +6260,25 @@ files = [
|
|||||||
{file = "numpy-1.24.3.tar.gz", hash = "sha256:ab344f1bf21f140adab8e47fdbc7c35a477dc01408791f8ba00d018dd0bc5155"},
|
{file = "numpy-1.24.3.tar.gz", hash = "sha256:ab344f1bf21f140adab8e47fdbc7c35a477dc01408791f8ba00d018dd0bc5155"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "numpydoc"
|
||||||
|
version = "1.2"
|
||||||
|
description = "Sphinx extension to support docstrings in Numpy format"
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
files = [
|
||||||
|
{file = "numpydoc-1.2-py3-none-any.whl", hash = "sha256:3ecbb9feae080031714b63128912988ebdfd4c582a085d25b8d9f7ac23c2d9ef"},
|
||||||
|
{file = "numpydoc-1.2.tar.gz", hash = "sha256:0cec233740c6b125913005d16e8a9996e060528afcb8b7cad3f2706629dfd6f7"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
Jinja2 = ">=2.10"
|
||||||
|
sphinx = ">=1.8"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
testing = ["matplotlib", "pytest", "pytest-cov"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nvidia-cublas-cu11"
|
name = "nvidia-cublas-cu11"
|
||||||
version = "11.10.3.66"
|
version = "11.10.3.66"
|
||||||
@ -7179,7 +7305,7 @@ name = "pip"
|
|||||||
version = "23.1.2"
|
version = "23.1.2"
|
||||||
description = "The PyPA recommended tool for installing Python packages."
|
description = "The PyPA recommended tool for installing Python packages."
|
||||||
category = "main"
|
category = "main"
|
||||||
optional = true
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "pip-23.1.2-py3-none-any.whl", hash = "sha256:3ef6ac33239e4027d9a5598a381b9d30880a1477e50039db2eac6e8a8f6d1b18"},
|
{file = "pip-23.1.2-py3-none-any.whl", hash = "sha256:3ef6ac33239e4027d9a5598a381b9d30880a1477e50039db2eac6e8a8f6d1b18"},
|
||||||
@ -11047,6 +11173,18 @@ webencodings = ">=0.4"
|
|||||||
doc = ["sphinx", "sphinx_rtd_theme"]
|
doc = ["sphinx", "sphinx_rtd_theme"]
|
||||||
test = ["flake8", "isort", "pytest"]
|
test = ["flake8", "isort", "pytest"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tokenize-rt"
|
||||||
|
version = "5.1.0"
|
||||||
|
description = "A wrapper around the stdlib `tokenize` which roundtrips."
|
||||||
|
category = "dev"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "tokenize_rt-5.1.0-py2.py3-none-any.whl", hash = "sha256:9b7bb843e77dd6ed0be5564bfaaba200083911e0497841cd3e9235a6a9794d74"},
|
||||||
|
{file = "tokenize_rt-5.1.0.tar.gz", hash = "sha256:08f0c2daa94c4052e53c2fcaa8e32585e6ae9bdfc800974092d031401694e002"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokenizers"
|
name = "tokenizers"
|
||||||
version = "0.13.3"
|
version = "0.13.3"
|
||||||
@ -12562,4 +12700,4 @@ text-helpers = ["chardet"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.8.1,<4.0"
|
python-versions = ">=3.8.1,<4.0"
|
||||||
content-hash = "b2ccab03db87c42aeed22fdba0c6bae45db6e6968074572561ebddd58e5ce910"
|
content-hash = "8bb95a90cfff1af5cd7e485a3f271db72de49f4af95bcf0a907ae00384ac35ed"
|
||||||
|
@ -133,6 +133,7 @@ toml = "^0.10.2"
|
|||||||
myst-nb = "^0.17.1"
|
myst-nb = "^0.17.1"
|
||||||
linkchecker = "^10.2.1"
|
linkchecker = "^10.2.1"
|
||||||
sphinx-copybutton = "^0.5.1"
|
sphinx-copybutton = "^0.5.1"
|
||||||
|
nbdoc = "^0.0.82"
|
||||||
|
|
||||||
[tool.poetry.group.test.dependencies]
|
[tool.poetry.group.test.dependencies]
|
||||||
# The only dependencies that should be added are
|
# The only dependencies that should be added are
|
||||||
|
Loading…
Reference in New Issue
Block a user