mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-23 07:09:31 +00:00
docs[minor],community[patch]: Minor tutorial docs improvement, minor import error quick fix. (#22725)
minor changes to module import error handling and minor issues in tutorial documents. --------- Co-authored-by: Bagatur <baskaryan@gmail.com> Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com> Co-authored-by: Eugene Yurtsev <eugene@langchain.dev>
This commit is contained in:
parent
7545b1d29b
commit
a349fce880
@ -69,6 +69,17 @@
|
||||
"Once we have loaded PDFs into LangChain `Document` objects, we can index them (e.g., a RAG application) in the usual way:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c3b932bb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install faiss-cpu \n",
|
||||
"# use `pip install faiss-gpu` for CUDA GPU support"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
@ -12,6 +12,19 @@
|
||||
"This covers how to load `Microsoft PowerPoint` documents into a document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "aef1500f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install packages\n",
|
||||
"%pip install unstructured\n",
|
||||
"%pip install python-magic\n",
|
||||
"%pip install python-pptx"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
|
@ -322,7 +322,7 @@
|
||||
"\n",
|
||||
"Now we can build our full QA chain. This is as simple as updating the retriever to be our new `history_aware_retriever`.\n",
|
||||
"\n",
|
||||
"Again, we will use [create_stuff_documents_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html) to generate a `question_answer_chain`, with input keys `context`, `chat_history`, and `input`-- it accepts the retrieved context alongside the conversation history and query to generate an answer.\n",
|
||||
"Again, we will use [create_stuff_documents_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html) to generate a `question_answer_chain`, with input keys `context`, `chat_history`, and `input`-- it accepts the retrieved context alongside the conversation history and query to generate an answer. A more detailed explaination is over [here](/docs/tutorials/rag/#built-in-chains)\n",
|
||||
"\n",
|
||||
"We build our final `rag_chain` with [create_retrieval_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval.create_retrieval_chain.html). This chain applies the `history_aware_retriever` and `question_answer_chain` in sequence, retaining intermediate outputs such as the retrieved context for convenience. It has input keys `input` and `chat_history`, and includes `input`, `chat_history`, `context`, and `answer` in its output."
|
||||
]
|
||||
@ -760,13 +760,6 @@
|
||||
"id": "931c4fe3-c603-4efb-9b37-5f7cbbb1cbbd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error in LangChainTracer.on_tool_end callback: TracerException(\"Found chain run at ID 0ec120e2-b1fc-4593-9fee-2dd4f4cae256, but expected {'tool'} run.\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
@ -1030,6 +1023,7 @@
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"from langgraph.checkpoint.sqlite import SqliteSaver\n",
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"memory = SqliteSaver.from_conn_string(\":memory:\")\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
|
||||
|
@ -24,7 +24,7 @@ _IMPORT_RE = re.compile(
|
||||
|
||||
_CURRENT_PATH = Path(__file__).parent.absolute()
|
||||
# Directory where generated markdown files are stored
|
||||
_DOCS_DIR = _CURRENT_PATH / "docs"
|
||||
_DOCS_DIR = _CURRENT_PATH.parent.parent / "docs"
|
||||
|
||||
|
||||
def find_files(path):
|
||||
@ -75,6 +75,7 @@ def main():
|
||||
|
||||
for file in find_files(args.docs_dir):
|
||||
file_imports = replace_imports(file)
|
||||
print(file)
|
||||
|
||||
if file_imports:
|
||||
# Use relative file path as key
|
||||
|
@ -89,7 +89,13 @@ class PyPDFParser(BaseBlobParser):
|
||||
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]: # type: ignore[valid-type]
|
||||
"""Lazily parse the blob."""
|
||||
try:
|
||||
import pypdf
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`pypdf` package not found, please install it with "
|
||||
"`pip install pypdf`"
|
||||
)
|
||||
|
||||
with blob.as_bytes_io() as pdf_file_obj: # type: ignore[attr-defined]
|
||||
pdf_reader = pypdf.PdfReader(pdf_file_obj, password=self.password)
|
||||
@ -144,7 +150,13 @@ class PDFMinerParser(BaseBlobParser):
|
||||
"""Lazily parse the blob."""
|
||||
|
||||
if not self.extract_images:
|
||||
try:
|
||||
from pdfminer.high_level import extract_text
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`pdfminer` package not found, please install it with "
|
||||
"`pip install pdfminer.six`"
|
||||
)
|
||||
|
||||
with blob.as_bytes_io() as pdf_file_obj: # type: ignore[attr-defined]
|
||||
if self.concatenate_pages:
|
||||
|
Loading…
Reference in New Issue
Block a user