mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-31 18:38:48 +00:00
DOCS: update rag use case (#13319)
This commit is contained in:
@@ -8,6 +8,7 @@ Notebook | Description
|
||||
[Semi_Structured_RAG.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_Structured_RAG.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data, including text and tables, using unstructured for parsing, multi-vector retriever for storing, and lcel for implementing chains.
|
||||
[Semi_structured_and_multi_moda...](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data and images, using unstructured for parsing, multi-vector retriever for storage and retrieval, and lcel for implementing chains.
|
||||
[Semi_structured_multi_modal_RA...](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data and images, using various tools and methods such as unstructured for parsing, multi-vector retriever for storing, lcel for implementing chains, and open source language models like llama2, llava, and gpt4all.
|
||||
[analyze_document.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/analyze_document.ipynb) | Analyze a single long document.
|
||||
[autogpt/autogpt.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/autogpt/autogpt.ipynb) | Implement autogpt, a language model, with langchain primitives such as llms, prompttemplates, vectorstores, embeddings, and tools.
|
||||
[autogpt/marathon_times.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/autogpt/marathon_times.ipynb) | Implement autogpt for finding winning marathon times.
|
||||
[baby_agi.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/baby_agi.ipynb) | Implement babyagi, an ai agent that can generate and execute tasks based on a given objective, with the flexibility to swap out specific vectorstores/model providers.
|
||||
@@ -44,6 +45,7 @@ Notebook | Description
|
||||
[plan_and_execute_agent.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/plan_and_execute_agent.ipynb) | Create plan-and-execute agents that accomplish objectives by planning tasks with a language model (llm) and executing them with a separate agent.
|
||||
[press_releases.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/press_releases.ipynb) | Retrieve and query company press release data powered by [Kay.ai](https://kay.ai).
|
||||
[program_aided_language_model.i...](https://github.com/langchain-ai/langchain/tree/master/cookbook/program_aided_language_model.ipynb) | Implement program-aided language models as described in the provided research paper.
|
||||
[qa_citations.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/qa_citations.ipynb) | Different ways to get a model to cite its sources.
|
||||
[retrieval_in_sql.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/retrieval_in_sql.ipynb) | Perform retrieval-augmented-generation (rag) on a PostgreSQL database using pgvector.
|
||||
[sales_agent_with_context.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/sales_agent_with_context.ipynb) | Implement a context-aware ai sales agent, salesgpt, that can have natural sales conversations, interact with other systems, and use a product knowledge base to discuss a company's offerings.
|
||||
[self_query_hotel_search.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/self_query_hotel_search.ipynb) | Build a hotel room search feature with self-querying retrieval, using a specific hotel recommendation dataset.
|
||||
|
105
cookbook/analyze_document.ipynb
Normal file
105
cookbook/analyze_document.ipynb
Normal file
@@ -0,0 +1,105 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f69d4a4c-137d-47e9-bea1-786afce9c1c0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Analyze a single long document\n",
|
||||
"\n",
|
||||
"The AnalyzeDocumentChain takes in a single document, splits it up, and then runs it through a CombineDocumentsChain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "2a0707ce-6d2d-471b-bc33-64da32a7b3f0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open(\"../docs/docs/modules/state_of_the_union.txt\") as f:\n",
|
||||
" state_of_the_union = f.read()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "ca14d161-2d5b-4a6c-a296-77d8ce4b28cd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import AnalyzeDocumentChain\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "9f97406c-85a9-45fb-99ce-9138c0ba3731",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.question_answering import load_qa_chain\n",
|
||||
"\n",
|
||||
"qa_chain = load_qa_chain(llm, chain_type=\"map_reduce\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "0871a753-f5bb-4b4f-a394-f87f2691f659",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"qa_document_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "e6f86428-3c2c-46a0-a57c-e22826fdbf91",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The President said, \"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service.\"'"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"qa_document_chain.run(\n",
|
||||
" input_document=state_of_the_union,\n",
|
||||
" question=\"what did the president say about justice breyer?\",\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
179
cookbook/qa_citations.ipynb
Normal file
179
cookbook/qa_citations.ipynb
Normal file
@@ -0,0 +1,179 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9b5c258f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Citing retrieval sources\n",
|
||||
"\n",
|
||||
"This notebook shows how to use OpenAI functions ability to extract citations from text."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "eae4ca3e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.4) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chains import create_citation_fuzzy_match_chain\n",
|
||||
"from langchain.chat_models import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "2c6e62ee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"question = \"What did the author do during college?\"\n",
|
||||
"context = \"\"\"\n",
|
||||
"My name is Jason Liu, and I grew up in Toronto Canada but I was born in China.\n",
|
||||
"I went to an arts highschool but in university I studied Computational Mathematics and physics. \n",
|
||||
"As part of coop I worked at many companies including Stitchfix, Facebook.\n",
|
||||
"I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years.\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "078e0300",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "02cad6d0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = create_citation_fuzzy_match_chain(llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "e3c6e7ba",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = chain.run(question=question, context=context)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "6f7615f2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"question='What did the author do during college?' answer=[FactWithEvidence(fact='The author studied Computational Mathematics and physics in university.', substring_quote=['in university I studied Computational Mathematics and physics']), FactWithEvidence(fact='The author started the Data Science club at the University of Waterloo and was the president of the club for 2 years.', substring_quote=['started the Data Science club at the University of Waterloo', 'president of the club for 2 years'])]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "3be6f366",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def highlight(text, span):\n",
|
||||
" return (\n",
|
||||
" \"...\"\n",
|
||||
" + text[span[0] - 20 : span[0]]\n",
|
||||
" + \"*\"\n",
|
||||
" + \"\\033[91m\"\n",
|
||||
" + text[span[0] : span[1]]\n",
|
||||
" + \"\\033[0m\"\n",
|
||||
" + \"*\"\n",
|
||||
" + text[span[1] : span[1] + 20]\n",
|
||||
" + \"...\"\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "636c4528",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Statement: The author studied Computational Mathematics and physics in university.\n",
|
||||
"Citation: ...arts highschool but *\u001b[91min university I studied Computational Mathematics and physics\u001b[0m*. \n",
|
||||
"As part of coop I...\n",
|
||||
"\n",
|
||||
"Statement: The author started the Data Science club at the University of Waterloo and was the president of the club for 2 years.\n",
|
||||
"Citation: ...x, Facebook.\n",
|
||||
"I also *\u001b[91mstarted the Data Science club at the University of Waterloo\u001b[0m* and I was the presi...\n",
|
||||
"Citation: ...erloo and I was the *\u001b[91mpresident of the club for 2 years\u001b[0m*.\n",
|
||||
"...\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for fact in result.answer:\n",
|
||||
" print(\"Statement:\", fact.fact)\n",
|
||||
" for span in fact.get_spans(context):\n",
|
||||
" print(\"Citation:\", highlight(context, span))\n",
|
||||
" print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8409cab0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
Reference in New Issue
Block a user