partner: Upstage quick documentation update (#20869)

* Updating the provider docs page. 
The RAG example was meant to be moved to cookbook, but was merged by
mistake.

* Fix bug in Groundedness Check

---------

Co-authored-by: JuHyung-Son <sonju0427@gmail.com>
Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Sean
2024-04-25 09:36:54 -07:00
committed by William Fu-Hinthorn
parent 2d0c5c071d
commit 2e7a163520
4 changed files with 35 additions and 84 deletions

View File

@@ -53,7 +53,7 @@
"| Chat | Build assistants using Solar Mini Chat | `from langchain_upstage import ChatUpstage` | [Go](../../chat/upstage) |\n",
"| Text Embedding | Embed strings to vectors | `from langchain_upstage import UpstageEmbeddings` | [Go](../../text_embedding/upstage) |\n",
"| Groundedness Check | Verify groundedness of assistant's response | `from langchain_upstage import GroundednessCheck` | [Go](../../tools/upstage_groundedness_check) |\n",
"| Layout Analysis | Serialize documents with tables and figures | `from langchain_upstage import UpstageLayoutAnalysis` | [Go](../../document_loaders/upstage) |\n",
"| Layout Analysis | Serialize documents with tables and figures | `from langchain_upstage import UpstageLayoutAnalysisLoader` | [Go](../../document_loaders/upstage) |\n",
"\n",
"See [documentations](https://developers.upstage.ai/) for more details about the features."
]
@@ -172,10 +172,10 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain_upstage import UpstageLayoutAnalysis\n",
"from langchain_upstage import UpstageLayoutAnalysisLoader\n",
"\n",
"file_path = \"/PATH/TO/YOUR/FILE.pdf\"\n",
"layzer = UpstageLayoutAnalysis(file_path, split=\"page\")\n",
"layzer = UpstageLayoutAnalysisLoader(file_path, split=\"page\")\n",
"\n",
"# For improved memory efficiency, consider using the lazy_load method to load documents page by page.\n",
"docs = layzer.load() # or layzer.lazy_load()\n",
@@ -183,81 +183,6 @@
"for doc in docs[:3]:\n",
" print(doc)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Retrieval-Augmented Generation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from typing import List\n",
"\n",
"from langchain_community.vectorstores import DocArrayInMemorySearch\n",
"from langchain_core.documents.base import Document\n",
"from langchain_core.output_parsers import StrOutputParser\n",
"from langchain_core.prompts import ChatPromptTemplate\n",
"from langchain_core.runnables import RunnablePassthrough\n",
"from langchain_core.runnables.base import RunnableSerializable\n",
"from langchain_upstage import (\n",
" ChatUpstage,\n",
" GroundednessCheck,\n",
" UpstageEmbeddings,\n",
" UpstageLayoutAnalysis,\n",
")\n",
"\n",
"model = ChatUpstage()\n",
"\n",
"file_path = (\n",
" \"/PATH/TO/YOUR/FILE.pdf\" # e.g. \"libs/partners/upstage/tests/examples/solar.pdf\"\n",
")\n",
"loader = UpstageLayoutAnalysis(file_path=file_path, split=\"element\")\n",
"docs = loader.load()\n",
"\n",
"vectorstore = DocArrayInMemorySearch.from_documents(docs, embedding=UpstageEmbeddings())\n",
"retriever = vectorstore.as_retriever()\n",
"\n",
"template = \"\"\"Answer the question based only on the following context:\n",
"{context}\n",
"\n",
"Question: {question}\n",
"\"\"\"\n",
"prompt = ChatPromptTemplate.from_template(template)\n",
"output_parser = StrOutputParser()\n",
"\n",
"question = \"ASK ANY QUESTION HERE.\" # e.g. \"How many parameters in SOLAR model?\"\n",
"\n",
"retrieved_docs = retriever.get_relevant_documents(question)\n",
"\n",
"groundedness_check = GroundednessCheck()\n",
"groundedness = \"\"\n",
"while groundedness != \"grounded\":\n",
" chain: RunnableSerializable = RunnablePassthrough() | prompt | model | output_parser\n",
"\n",
" result = chain.invoke(\n",
" {\n",
" \"context\": retrieved_docs,\n",
" \"question\": question,\n",
" }\n",
" )\n",
"\n",
" # convert all Documents to string\n",
" def formatDocumentsAsString(docs: List[Document]) -> str:\n",
" return \"\\n\".join([doc.page_content for doc in docs])\n",
"\n",
" groundedness = groundedness_check.run(\n",
" {\n",
" \"context\": formatDocumentsAsString(retrieved_docs),\n",
" \"assistant_message\": result,\n",
" }\n",
" ).content"
]
}
],
"metadata": {