mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-31 18:38:48 +00:00
IMPROVEMENT: Conditionally import core type hints (#13700)
This commit is contained in:
@@ -52,6 +52,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c6fb4903-f845-4907-ae14-df305891b0ff",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Data Loading\n",
|
||||
@@ -76,17 +77,18 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"id": "fc0767d4-9155-4591-855c-ef2e14e0e10f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import tempfile\n",
|
||||
"from pathlib import Path\n",
|
||||
"from pprint import pprint\n",
|
||||
"import requests\n",
|
||||
"import tempfile\n",
|
||||
"from time import sleep\n",
|
||||
"from typing import Dict, List\n",
|
||||
"\n",
|
||||
"import requests\n",
|
||||
"from docugami import Docugami\n",
|
||||
"from docugami.types import Document as DocugamiDocument\n",
|
||||
"\n",
|
||||
@@ -166,6 +168,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"id": "ce0b2b21-7623-46e7-ae2c-3a9f67e8b9b9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -207,6 +210,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "01f035e5-c3f8-4d23-9d1b-8d2babdea8e9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you are on the free Docugami tier, your files should be done in ~15 minutes or less depending on the number of pages uploaded and available resources (please contact Docugami for paid plans for faster processing). You can re-run the code above without reprocessing your files to continue waiting if your notebook is not continuously running (it does not re-upload)."
|
||||
@@ -225,6 +229,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"id": "05fcdd57-090f-44bf-a1fb-2c3609c80e34",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -268,6 +273,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bfc1f2c9-e6d4-4d98-a799-6bc30bc61661",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The file processed by Docugami in the example above was [this one](https://data.ntsb.gov/carol-repgen/api/Aviation/ReportMain/GenerateNewestReport/192541/pdf) from the NTSB and you can look at the PDF side by side to compare the XML chunks above. \n",
|
||||
@@ -278,6 +284,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"id": "8a4b49e0-de78-4790-a930-ad7cf324697a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -326,6 +333,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1cfc06bc-67d2-46dd-b04d-95efa3619d0a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Docugami XML Deep Dive: Jane Doe NDA Example\n",
|
||||
@@ -336,6 +344,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 109,
|
||||
"id": "7b697d30-1e94-47f0-87e8-f81d4b180da2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -361,6 +370,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 98,
|
||||
"id": "14714576-6e1d-499b-bcc8-39140bb2fd78",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -415,6 +425,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dc09ba64-4973-4471-9501-54294c1143fc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The Docugami XML contains extremely detailed semantics and visual bounding boxes for all elements. The `dgml-utils` library parses text and non-text elements into formats appropriate to pass into LLMs (chunked text with XML semantic labels)"
|
||||
@@ -423,6 +434,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 100,
|
||||
"id": "2b4ece00-2e43-4254-adc9-66dbb79139a6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -460,6 +472,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 101,
|
||||
"id": "08350119-aa22-4ec1-8f65-b1316a0d4123",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -476,6 +489,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dca87b46-c0c2-4973-94ec-689c18075653",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The XML markup contains structural as well as semantic tags, which provide additional semantics to the LLM for improved retrieval and generation.\n",
|
||||
@@ -486,6 +500,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 112,
|
||||
"id": "bcac8294-c54a-4b6e-af9d-3911a69620b2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -539,8 +554,8 @@
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.prompts import (\n",
|
||||
" ChatPromptTemplate,\n",
|
||||
" SystemMessagePromptTemplate,\n",
|
||||
" HumanMessagePromptTemplate,\n",
|
||||
" SystemMessagePromptTemplate,\n",
|
||||
")\n",
|
||||
"from langchain.schema.output_parser import StrOutputParser"
|
||||
]
|
||||
@@ -610,11 +625,12 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import uuid\n",
|
||||
"from langchain.vectorstores.chroma import Chroma\n",
|
||||
"from langchain.storage import InMemoryStore\n",
|
||||
"from langchain.schema.document import Document\n",
|
||||
"\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
|
||||
"from langchain.schema.document import Document\n",
|
||||
"from langchain.storage import InMemoryStore\n",
|
||||
"from langchain.vectorstores.chroma import Chroma\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def build_retriever(text_elements, tables, table_summaries):\n",
|
||||
@@ -710,6 +726,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 120,
|
||||
"id": "636e992f-823b-496b-a082-8b4fcd479de5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -743,6 +760,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "86cad5db-81fe-4ae6-a20e-550b85fcbe96",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# RAG on Llama2 paper\n",
|
||||
@@ -753,6 +771,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 121,
|
||||
"id": "0e4a2f43-dd48-4ae3-8e27-7e87d169965f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -777,6 +796,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 124,
|
||||
"id": "56b78fb3-603d-4343-ae72-be54a3c5dd72",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -801,6 +821,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 125,
|
||||
"id": "d3cc5ba9-8553-4eda-a5d1-b799751186af",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -812,6 +833,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 126,
|
||||
"id": "d7c73faf-74cb-400d-8059-b69e2493de38",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -823,6 +845,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 127,
|
||||
"id": "4c553722-be42-42ce-83b8-76a17f323f1c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -832,6 +855,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 128,
|
||||
"id": "65dce40b-f1c3-494a-949e-69a9c9544ddb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -851,6 +875,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "59877edf-9a02-45db-95cb-b7f4234abfa3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can check the [trace](https://smith.langchain.com/public/5de100c3-bb40-4234-bf02-64bc708686a1/r) to see what chunks were retrieved.\n",
|
||||
@@ -939,6 +964,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0879349e-7298-4f2c-b246-f1142e97a8e5",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
|
@@ -69,8 +69,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_experimental.llm_bash.prompt import BashOutputParser\n",
|
||||
"from langchain.prompts.prompt import PromptTemplate\n",
|
||||
"from langchain_experimental.llm_bash.prompt import BashOutputParser\n",
|
||||
"\n",
|
||||
"_PROMPT_TEMPLATE = \"\"\"If someone asks you to perform a task, your job is to come up with a series of bash commands that will perform the task. There is no need to put \"#!/bin/bash\" in your answer. Make sure to reason step by step, using this format:\n",
|
||||
"Question: \"copy the files in the directory named 'target' into a new directory at the same level as target called 'myNewDirectory'\"\n",
|
||||
|
Reference in New Issue
Block a user