diff --git a/cookbook/advanced_rag_eval.ipynb b/cookbook/advanced_rag_eval.ipynb
index 1d35b94fd60..a2de7261091 100644
--- a/cookbook/advanced_rag_eval.ipynb
+++ b/cookbook/advanced_rag_eval.ipynb
@@ -63,13 +63,11 @@
"\n",
"# Load\n",
"from langchain.document_loaders import PyPDFLoader\n",
- "\n",
"loader = PyPDFLoader(path + \"cpi.pdf\")\n",
"pdf_pages = loader.load()\n",
"\n",
"# Split\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
- "\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
"all_splits_pypdf = text_splitter.split_documents(pdf_pages)\n",
"all_splits_pypdf_texts = [d.page_content for d in all_splits_pypdf]"
@@ -134,13 +132,10 @@
"source": [
"from langchain.vectorstores import Chroma\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
- "\n",
- "baseline = Chroma.from_texts(\n",
- " texts=all_splits_pypdf_texts,\n",
- " collection_name=\"baseline\",\n",
- " embedding=OpenAIEmbeddings(),\n",
- ")\n",
- "retriever_baseline = baseline.as_retriever()"
+ "baseline = Chroma.from_texts(texts=all_splits_pypdf_texts,\n",
+ " collection_name=\"baseline\",\n",
+ " embedding=OpenAIEmbeddings())\n",
+ "retriever_baseline=baseline.as_retriever()"
]
},
{
@@ -174,7 +169,7 @@
"model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n",
"summarize_chain = {\"element\": lambda x: x} | prompt | model | StrOutputParser()\n",
"\n",
- "# Apply to text\n",
+ "# Apply to text \n",
"text_summaries = summarize_chain.batch(texts, {\"max_concurrency\": 5})\n",
"\n",
"# Apply to tables\n",
@@ -202,25 +197,26 @@
"from PIL import Image\n",
"from langchain.schema.messages import HumanMessage\n",
"\n",
- "\n",
"def encode_image(image_path):\n",
- " \"\"\"Getting the base64 string\"\"\"\n",
+ " ''' Getting the base64 string '''\n",
" with open(image_path, \"rb\") as image_file:\n",
- " return base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
- "\n",
- "\n",
- "def image_summarize(img_base64, prompt):\n",
- " \"\"\"Image summary\"\"\"\n",
- " chat = ChatOpenAI(model=\"gpt-4-vision-preview\", max_tokens=1024)\n",
+ " return base64.b64encode(image_file.read()).decode('utf-8') \n",
"\n",
+ "def image_summarize(img_base64,prompt):\n",
+ " ''' Image summary '''\n",
+ " chat = ChatOpenAI(model=\"gpt-4-vision-preview\",\n",
+ " max_tokens=1024)\n",
+ " \n",
" msg = chat.invoke(\n",
" [\n",
" HumanMessage(\n",
" content=[\n",
- " {\"type\": \"text\", \"text\": prompt},\n",
+ " {\"type\": \"text\", \"text\":prompt},\n",
" {\n",
" \"type\": \"image_url\",\n",
- " \"image_url\": {\"url\": f\"data:image/jpeg;base64,{img_base64}\"},\n",
+ " \"image_url\": {\n",
+ " \"url\": f\"data:image/jpeg;base64,{img_base64}\"\n",
+ " },\n",
" },\n",
" ]\n",
" )\n",
@@ -228,7 +224,6 @@
" )\n",
" return msg.content\n",
"\n",
- "\n",
"# Store base64 encoded images\n",
"img_base64_list = []\n",
"\n",
@@ -242,11 +237,11 @@
"\n",
"# Apply to images\n",
"for img_file in sorted(os.listdir(path)):\n",
- " if img_file.endswith(\".jpg\"):\n",
+ " if img_file.endswith('.jpg'):\n",
" img_path = os.path.join(path, img_file)\n",
" base64_image = encode_image(img_path)\n",
" img_base64_list.append(base64_image)\n",
- " image_summaries.append(image_summarize(base64_image, prompt))"
+ " image_summaries.append(image_summarize(base64_image,prompt))"
]
},
{
@@ -272,10 +267,14 @@
"from langchain.schema.document import Document\n",
"from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
"\n",
- "\n",
- "def create_multi_vector_retriever(\n",
- " vectorstore, text_summaries, texts, table_summaries, tables, image_summaries, images\n",
- "):\n",
+ "def create_multi_vector_retriever(vectorstore, \n",
+ " text_summaries, \n",
+ " texts, \n",
+ " table_summaries, \n",
+ " tables, \n",
+ " image_summaries, \n",
+ " images):\n",
+ " \n",
" # Initialize the storage layer\n",
" store = InMemoryStore()\n",
" id_key = \"doc_id\"\n",
@@ -310,22 +309,18 @@
"\n",
" return retriever\n",
"\n",
- "\n",
"# The vectorstore to use to index the summaries\n",
- "multi_vector_img = Chroma(\n",
- " collection_name=\"multi_vector_img\", embedding_function=OpenAIEmbeddings()\n",
- ")\n",
+ "multi_vector_img = Chroma(collection_name=\"multi_vector_img\", \n",
+ " embedding_function=OpenAIEmbeddings())\n",
"\n",
"# Create retriever\n",
- "retriever_multi_vector_img = create_multi_vector_retriever(\n",
- " multi_vector_img,\n",
- " text_summaries,\n",
- " texts,\n",
- " table_summaries,\n",
- " tables,\n",
- " image_summaries,\n",
- " img_base64_list,\n",
- ")"
+ "retriever_multi_vector_img = create_multi_vector_retriever(multi_vector_img,\n",
+ " text_summaries,\n",
+ " texts,\n",
+ " table_summaries, \n",
+ " tables, \n",
+ " image_summaries, \n",
+ " img_base64_list)"
]
},
{
@@ -335,10 +330,10 @@
"metadata": {},
"outputs": [],
"source": [
- "# Testing on retrieval\n",
- "query = \"What percentage of CPI is dedicated to Housing, and how does it compare to the combined percentage of Medical Care, Apparel, and Other Goods and Services?\"\n",
- "suffix_for_images = \" Include any pie charts, graphs, or tables.\"\n",
- "docs = retriever_multi_vector_img.get_relevant_documents(query + suffix_for_images)"
+ "# Testing on retrieval \n",
+ "query=\"What percentage of CPI is dedicated to Housing, and how does it compare to the combined percentage of Medical Care, Apparel, and Other Goods and Services?\"\n",
+ "suffix_for_images=\" Include any pie charts, graphs, or tables.\"\n",
+ "docs = retriever_multi_vector_img.get_relevant_documents(query+suffix_for_images)"
]
},
{
@@ -362,16 +357,14 @@
],
"source": [
"from IPython.display import display, HTML\n",
- "\n",
- "\n",
"def plt_img_base64(img_base64):\n",
+ "\n",
" # Create an HTML img tag with the base64 string as the source\n",
" image_html = f'
'\n",
- "\n",
+ " \n",
" # Display the image by rendering the HTML\n",
" display(HTML(image_html))\n",
"\n",
- "\n",
"plt_img_base64(docs[1])"
]
},
@@ -393,20 +386,17 @@
"outputs": [],
"source": [
"# The vectorstore to use to index the summaries\n",
- "multi_vector_text = Chroma(\n",
- " collection_name=\"multi_vector_text\", embedding_function=OpenAIEmbeddings()\n",
- ")\n",
+ "multi_vector_text = Chroma(collection_name=\"multi_vector_text\", \n",
+ " embedding_function=OpenAIEmbeddings())\n",
"\n",
"# Create retriever\n",
- "retriever_multi_vector_img_summary = create_multi_vector_retriever(\n",
- " multi_vector_text,\n",
- " text_summaries,\n",
- " texts,\n",
- " table_summaries,\n",
- " tables,\n",
- " image_summaries,\n",
- " image_summaries,\n",
- ")"
+ "retriever_multi_vector_img_summary = create_multi_vector_retriever(multi_vector_text,\n",
+ " text_summaries,\n",
+ " texts,\n",
+ " table_summaries, \n",
+ " tables, \n",
+ " image_summaries, \n",
+ " image_summaries)"
]
},
{
@@ -428,17 +418,14 @@
"\n",
"# Create chroma w/ multi-modal embeddings\n",
"multimodal_embd = Chroma(\n",
- " collection_name=\"multimodal_embd\", embedding_function=OpenCLIPEmbeddings()\n",
+ " collection_name=\"multimodal_embd\",\n",
+ " embedding_function=OpenCLIPEmbeddings()\n",
")\n",
"\n",
"# Get image URIs\n",
- "image_uris = sorted(\n",
- " [\n",
- " os.path.join(path, image_name)\n",
- " for image_name in os.listdir(path)\n",
- " if image_name.endswith(\".jpg\")\n",
- " ]\n",
- ")\n",
+ "image_uris = sorted([os.path.join(path, image_name) \n",
+ " for image_name in os.listdir(path) \n",
+ " if image_name.endswith('.jpg')])\n",
"\n",
"# Add images and documents\n",
"if image_uris:\n",
@@ -448,7 +435,7 @@
"if tables:\n",
" multimodal_embd.add_texts(texts=tables)\n",
"\n",
- "# Make retriever\n",
+ "# Make retriever \n",
"retriever_multimodal_embd = multimodal_embd.as_retriever()"
]
},
@@ -479,14 +466,14 @@
"\"\"\"\n",
"rag_prompt_text = ChatPromptTemplate.from_template(template)\n",
"\n",
- "\n",
- "# Build\n",
+ "# Build \n",
"def text_rag_chain(retriever):\n",
- " \"\"\"RAG chain\"\"\"\n",
+ " \n",
+ " ''' RAG chain '''\n",
"\n",
" # LLM\n",
" model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n",
- "\n",
+ " \n",
" # RAG pipeline\n",
" chain = (\n",
" {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
@@ -513,15 +500,13 @@
"metadata": {},
"outputs": [],
"source": [
- "import re\n",
+ "import re \n",
"from langchain.schema import Document\n",
"from langchain.schema.runnable import RunnableLambda\n",
"\n",
- "\n",
"def looks_like_base64(sb):\n",
" \"\"\"Check if the string looks like base64.\"\"\"\n",
- " return re.match(\"^[A-Za-z0-9+/]+[=]{0,2}$\", sb) is not None\n",
- "\n",
+ " return re.match('^[A-Za-z0-9+/]+[=]{0,2}$', sb) is not None\n",
"\n",
"def is_image_data(b64data):\n",
" \"\"\"Check if the base64 data is an image by looking at the start of the data.\"\"\"\n",
@@ -529,7 +514,7 @@
" b\"\\xFF\\xD8\\xFF\": \"jpg\",\n",
" b\"\\x89\\x50\\x4E\\x47\\x0D\\x0A\\x1A\\x0A\": \"png\",\n",
" b\"\\x47\\x49\\x46\\x38\": \"gif\",\n",
- " b\"\\x52\\x49\\x46\\x46\": \"webp\",\n",
+ " b\"\\x52\\x49\\x46\\x46\": \"webp\"\n",
" }\n",
" try:\n",
" header = base64.b64decode(b64data)[:8] # Decode and get the first 8 bytes\n",
@@ -540,7 +525,6 @@
" except Exception:\n",
" return False\n",
"\n",
- "\n",
"def split_image_text_types(docs):\n",
" \"\"\"Split base64-encoded images and texts.\"\"\"\n",
" b64_images = []\n",
@@ -555,7 +539,6 @@
" texts.append(doc)\n",
" return {\"images\": b64_images, \"texts\": texts}\n",
"\n",
- "\n",
"def img_prompt_func(data_dict):\n",
" # Joining the context texts into a single string\n",
" formatted_texts = \"\\n\".join(data_dict[\"context\"][\"texts\"])\n",
@@ -567,7 +550,7 @@
" \"type\": \"image_url\",\n",
" \"image_url\": {\n",
" \"url\": f\"data:image/jpeg;base64,{data_dict['context']['images'][0]}\"\n",
- " },\n",
+ " }\n",
" }\n",
" messages.append(image_message)\n",
"\n",
@@ -580,24 +563,22 @@
" f\"User-provided question / keywords: {data_dict['question']}\\n\\n\"\n",
" \"Text and / or tables:\\n\"\n",
" f\"{formatted_texts}\"\n",
- " ),\n",
+ " )\n",
" }\n",
" messages.append(text_message)\n",
" return [HumanMessage(content=messages)]\n",
"\n",
- "\n",
"def multi_modal_rag_chain(retriever):\n",
- " \"\"\"Multi-modal RAG chain\"\"\"\n",
+ " ''' Multi-modal RAG chain '''\n",
"\n",
" # Multi-modal LLM\n",
- " model = ChatOpenAI(temperature=0, model=\"gpt-4-vision-preview\", max_tokens=1024)\n",
- "\n",
+ " model = ChatOpenAI(temperature=0, \n",
+ " model=\"gpt-4-vision-preview\", \n",
+ " max_tokens=1024)\n",
+ " \n",
" # RAG pipeline\n",
" chain = (\n",
- " {\n",
- " \"context\": retriever | RunnableLambda(split_image_text_types),\n",
- " \"question\": RunnablePassthrough(),\n",
- " }\n",
+ " {\"context\": retriever | RunnableLambda(split_image_text_types), \"question\": RunnablePassthrough()}\n",
" | RunnableLambda(img_prompt_func)\n",
" | model\n",
" | StrOutputParser()\n",
@@ -622,12 +603,12 @@
"outputs": [],
"source": [
"# RAG chains\n",
- "chain_baseline = text_rag_chain(retriever_baseline)\n",
- "chain_mv_text = text_rag_chain(retriever_multi_vector_img_summary)\n",
+ "chain_baseline=text_rag_chain(retriever_baseline)\n",
+ "chain_mv_text=text_rag_chain(retriever_multi_vector_img_summary)\n",
"\n",
"# Multi-modal RAG chains\n",
- "chain_multimodal_mv_img = multi_modal_rag_chain(retriever_multi_vector_img)\n",
- "chain_multimodal_embd = multi_modal_rag_chain(retriever_multimodal_embd)"
+ "chain_multimodal_mv_img=multi_modal_rag_chain(retriever_multi_vector_img)\n",
+ "chain_multimodal_embd=multi_modal_rag_chain(retriever_multimodal_embd)"
]
},
{
@@ -713,8 +694,7 @@
"source": [
"# Read\n",
"import pandas as pd\n",
- "\n",
- "eval_set = pd.read_csv(path + \"cpi_eval.csv\")\n",
+ "eval_set = pd.read_csv(path+'cpi_eval.csv')\n",
"eval_set.head(3)"
]
},
@@ -735,12 +715,12 @@
"# Populate dataset\n",
"for _, row in eval_set.iterrows():\n",
" # Get Q, A\n",
- " q = row[\"Question\"]\n",
- " a = row[\"Answer\"]\n",
+ " q = row['Question']\n",
+ " a = row['Answer']\n",
" # Use the values in your function\n",
- " client.create_example(\n",
- " inputs={\"question\": q}, outputs={\"answer\": a}, dataset_id=dataset.id\n",
- " )"
+ " client.create_example(inputs={\"question\": q}, \n",
+ " outputs={\"answer\": a}, \n",
+ " dataset_id=dataset.id)"
]
},
{
@@ -784,22 +764,17 @@
" evaluators=[\"qa\"],\n",
")\n",
"\n",
- "\n",
- "def run_eval(chain, run_name, dataset_name):\n",
+ "def run_eval(chain,run_name,dataset_name):\n",
" _ = client.run_on_dataset(\n",
" dataset_name=dataset_name,\n",
- " llm_or_chain_factory=lambda: (lambda x: x[\"question\"] + suffix_for_images)\n",
- " | chain,\n",
+ " llm_or_chain_factory=lambda: (lambda x: x[\"question\"]+suffix_for_images) | chain,\n",
" evaluation=eval_config,\n",
" project_name=run_name,\n",
" )\n",
"\n",
- "\n",
- "for chain, run in zip(\n",
- " [chain_baseline, chain_mv_text, chain_multimodal_mv_img, chain_multimodal_embd],\n",
- " [\"baseline\", \"mv_text\", \"mv_img\", \"mm_embd\"],\n",
- "):\n",
- " run_eval(chain, dataset_name + \"-\" + run, dataset_name)"
+ "for chain, run in zip([chain_baseline, chain_mv_text, chain_multimodal_mv_img, chain_multimodal_embd], \n",
+ " [\"baseline\", \"mv_text\", \"mv_img\", \"mm_embd\"]):\n",
+ " run_eval(chain, dataset_name+\"-\"+run, dataset_name)"
]
}
],
diff --git a/cookbook/multi_modal_RAG_chroma.ipynb b/cookbook/multi_modal_RAG_chroma.ipynb
index 372d2ad5b59..e0c7f81fbb9 100644
--- a/cookbook/multi_modal_RAG_chroma.ipynb
+++ b/cookbook/multi_modal_RAG_chroma.ipynb
@@ -115,7 +115,7 @@
"metadata": {},
"outputs": [],
"source": [
- "# Folder with pdf and extracted images\n",
+ "# Folder with pdf and extracted images \n",
"path = \"/Users/rlm/Desktop/photos/\""
]
},
@@ -128,10 +128,9 @@
"source": [
"# Extract images, tables, and chunk text\n",
"from unstructured.partition.pdf import partition_pdf\n",
- "\n",
"raw_pdf_elements = partition_pdf(\n",
" filename=path + \"photos.pdf\",\n",
- " extract_images_in_pdf=True,\n",
+ " extract_images_in_pdf=True, \n",
" infer_table_structure=True,\n",
" chunking_strategy=\"by_title\",\n",
" max_characters=4000,\n",
@@ -192,17 +191,14 @@
"\n",
"# Create chroma\n",
"vectorstore = Chroma(\n",
- " collection_name=\"mm_rag_clip_photos\", embedding_function=OpenCLIPEmbeddings()\n",
+ " collection_name=\"mm_rag_clip_photos\",\n",
+ " embedding_function=OpenCLIPEmbeddings()\n",
")\n",
"\n",
"# Get image URIs with .jpg extension only\n",
- "image_uris = sorted(\n",
- " [\n",
- " os.path.join(path, image_name)\n",
- " for image_name in os.listdir(path)\n",
- " if image_name.endswith(\".jpg\")\n",
- " ]\n",
- ")\n",
+ "image_uris = sorted([os.path.join(path, image_name) \n",
+ " for image_name in os.listdir(path) \n",
+ " if image_name.endswith('.jpg')])\n",
"\n",
"# Add images\n",
"vectorstore.add_images(uris=image_uris)\n",
@@ -210,7 +206,7 @@
"# Add documents\n",
"vectorstore.add_texts(texts=texts)\n",
"\n",
- "# Make retriever\n",
+ "# Make retriever \n",
"retriever = vectorstore.as_retriever()"
]
},
@@ -239,7 +235,6 @@
"from io import BytesIO\n",
"from PIL import Image\n",
"\n",
- "\n",
"def resize_base64_image(base64_string, size=(128, 128)):\n",
" \"\"\"\n",
" Resize an image encoded as a Base64 string.\n",
@@ -263,31 +258,30 @@
" resized_img.save(buffered, format=img.format)\n",
"\n",
" # Encode the resized image to Base64\n",
- " return base64.b64encode(buffered.getvalue()).decode(\"utf-8\")\n",
- "\n",
+ " return base64.b64encode(buffered.getvalue()).decode('utf-8')\n",
"\n",
"def is_base64(s):\n",
- " \"\"\"Check if a string is Base64 encoded\"\"\"\n",
+ " ''' Check if a string is Base64 encoded '''\n",
" try:\n",
" return base64.b64encode(base64.b64decode(s)) == s.encode()\n",
" except Exception:\n",
" return False\n",
- "\n",
- "\n",
+ " \n",
"def split_image_text_types(docs):\n",
- " \"\"\"Split numpy array images and texts\"\"\"\n",
+ " ''' Split numpy array images and texts '''\n",
" images = []\n",
" text = []\n",
" for doc in docs:\n",
- " doc = doc.page_content # Extract Document contents\n",
+ " doc = doc.page_content # Extract Document contents \n",
" if is_base64(doc):\n",
" # Resize image to avoid OAI server error\n",
- " images.append(\n",
- " resize_base64_image(doc, size=(250, 250))\n",
- " ) # base64 encoded str\n",
+ " images.append(resize_base64_image(doc, size=(250, 250))) # base64 encoded str \n",
" else:\n",
- " text.append(doc)\n",
- " return {\"images\": images, \"texts\": text}"
+ " text.append(doc) \n",
+ " return {\n",
+ " \"images\": images,\n",
+ " \"texts\": text\n",
+ " }"
]
},
{
@@ -317,7 +311,6 @@
"from langchain.schema.runnable import RunnablePassthrough, RunnableLambda\n",
"from langchain.schema.messages import HumanMessage, SystemMessage\n",
"\n",
- "\n",
"def prompt_func(data_dict):\n",
" # Joining the context texts into a single string\n",
" formatted_texts = \"\\n\".join(data_dict[\"context\"][\"texts\"])\n",
@@ -329,7 +322,7 @@
" \"type\": \"image_url\",\n",
" \"image_url\": {\n",
" \"url\": f\"data:image/jpeg;base64,{data_dict['context']['images'][0]}\"\n",
- " },\n",
+ " }\n",
" }\n",
" messages.append(image_message)\n",
"\n",
@@ -349,21 +342,17 @@
" f\"User-provided keywords: {data_dict['question']}\\n\\n\"\n",
" \"Text and / or tables:\\n\"\n",
" f\"{formatted_texts}\"\n",
- " ),\n",
+ " )\n",
" }\n",
" messages.append(text_message)\n",
"\n",
" return [HumanMessage(content=messages)]\n",
- "\n",
- "\n",
+ " \n",
"model = ChatOpenAI(temperature=0, model=\"gpt-4-vision-preview\", max_tokens=1024)\n",
"\n",
"# RAG pipeline\n",
"chain = (\n",
- " {\n",
- " \"context\": retriever | RunnableLambda(split_image_text_types),\n",
- " \"question\": RunnablePassthrough(),\n",
- " }\n",
+ " {\"context\": retriever | RunnableLambda(split_image_text_types), \"question\": RunnablePassthrough()}\n",
" | RunnableLambda(prompt_func)\n",
" | model\n",
" | StrOutputParser()\n",
@@ -423,16 +412,15 @@
"source": [
"from IPython.display import display, HTML\n",
"\n",
- "\n",
"def plt_img_base64(img_base64):\n",
+ "\n",
" # Create an HTML img tag with the base64 string as the source\n",
" image_html = f'
'\n",
- "\n",
+ " \n",
" # Display the image by rendering the HTML\n",
" display(HTML(image_html))\n",
"\n",
- "\n",
- "docs = retriever.get_relevant_documents(\"Woman with children\", k=10)\n",
+ "docs = retriever.get_relevant_documents(\"Woman with children\",k=10)\n",
"for doc in docs:\n",
" if is_base64(doc.page_content):\n",
" plt_img_base64(doc.page_content)\n",
@@ -458,7 +446,9 @@
}
],
"source": [
- "chain.invoke(\"Woman with children\")"
+ "chain.invoke(\n",
+ " \"Woman with children\"\n",
+ ")"
]
},
{
diff --git a/cookbook/qianfan_baidu_elasticesearch_RAG.ipynb b/cookbook/qianfan_baidu_elasticesearch_RAG.ipynb
index 59af499212a..b6dac21d2b6 100644
--- a/cookbook/qianfan_baidu_elasticesearch_RAG.ipynb
+++ b/cookbook/qianfan_baidu_elasticesearch_RAG.ipynb
@@ -82,9 +82,7 @@
"secret_access_key = \"your bos access sk\"\n",
"\n",
"# create BceClientConfiguration\n",
- "config = BceClientConfiguration(\n",
- " credentials=BceCredentials(access_key_id, secret_access_key), endpoint=bos_host\n",
- ")\n",
+ "config = BceClientConfiguration(credentials=BceCredentials(access_key_id, secret_access_key), endpoint = bos_host)\n",
"\n",
"loader = BaiduBOSDirectoryLoader(conf=config, bucket=\"llm-test\", prefix=\"llm/\")\n",
"documents = loader.load()\n",
@@ -111,14 +109,10 @@
"embeddings.client = sentence_transformers.SentenceTransformer(embeddings.model_name)\n",
"\n",
"db = BESVectorStore.from_documents(\n",
- " documents=split_docs,\n",
- " embedding=embeddings,\n",
- " bes_url=\"your bes url\",\n",
- " index_name=\"test-index\",\n",
- " vector_query_field=\"vector\",\n",
- ")\n",
+ " documents=split_docs, embedding=embeddings, bes_url=\"your bes url\", index_name='test-index', vector_query_field='vector'\n",
+ " )\n",
"\n",
- "db.client.indices.refresh(index=\"test-index\")\n",
+ "db.client.indices.refresh(index='test-index')\n",
"retriever = db.as_retriever()"
]
},
@@ -136,15 +130,8 @@
"metadata": {},
"outputs": [],
"source": [
- "llm = QianfanLLMEndpoint(\n",
- " model=\"ERNIE-Bot\",\n",
- " qianfan_ak=\"your qianfan ak\",\n",
- " qianfan_sk=\"your qianfan sk\",\n",
- " streaming=True,\n",
- ")\n",
- "qa = RetrievalQA.from_chain_type(\n",
- " llm=llm, chain_type=\"refine\", retriever=retriever, return_source_documents=True\n",
- ")\n",
+ "llm = QianfanLLMEndpoint(model=\"ERNIE-Bot\", qianfan_ak='your qianfan ak', qianfan_sk='your qianfan sk', streaming=True)\n",
+ "qa = RetrievalQA.from_chain_type(llm=llm, chain_type=\"refine\", retriever=retriever, return_source_documents=True)\n",
"\n",
"query = \"什么是张量?\"\n",
"print(qa.run(query))"
diff --git a/docs/docs/integrations/document_loaders/docusaurus.ipynb b/docs/docs/integrations/document_loaders/docusaurus.ipynb
index 0ffa9a0b1d3..ca953cb6684 100644
--- a/docs/docs/integrations/document_loaders/docusaurus.ipynb
+++ b/docs/docs/integrations/document_loaders/docusaurus.ipynb
@@ -118,9 +118,7 @@
"source": [
"loader = DocusaurusLoader(\n",
" \"https://python.langchain.com\",\n",
- " filter_urls=[\n",
- " \"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"\n",
- " ],\n",
+ " filter_urls=[\"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"],\n",
")\n",
"documents = loader.load()"
]
@@ -164,11 +162,9 @@
"source": [
"loader = DocusaurusLoader(\n",
" \"https://python.langchain.com\",\n",
- " filter_urls=[\n",
- " \"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"\n",
- " ],\n",
+ " filter_urls=[\"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"],\n",
" # This will only include the content that matches these tags, otherwise they will be removed\n",
- " custom_html_tags=[\"#content\", \".main\"],\n",
+ " custom_html_tags=[\"#content\", \".main\"]\n",
")"
]
},
@@ -217,9 +213,7 @@
"source": [
"loader = DocusaurusLoader(\n",
" \"https://python.langchain.com\",\n",
- " filter_urls=[\n",
- " \"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"\n",
- " ],\n",
+ " filter_urls=[\"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"],\n",
" parsing_function=remove_nav_and_header_elements,\n",
")"
]
diff --git a/docs/docs/integrations/memory/neo4j_chat_message_history.ipynb b/docs/docs/integrations/memory/neo4j_chat_message_history.ipynb
index f31915bb748..238beb09982 100644
--- a/docs/docs/integrations/memory/neo4j_chat_message_history.ipynb
+++ b/docs/docs/integrations/memory/neo4j_chat_message_history.ipynb
@@ -25,7 +25,7 @@
" url=\"bolt://localhost:7687\",\n",
" username=\"neo4j\",\n",
" password=\"password\",\n",
- " session_id=\"session_id_1\",\n",
+ " session_id=\"session_id_1\"\n",
")\n",
"\n",
"history.add_user_message(\"hi!\")\n",
diff --git a/docs/docs/integrations/text_embedding/fastembed.ipynb b/docs/docs/integrations/text_embedding/fastembed.ipynb
index edac78ecb5c..9d6826f92d2 100644
--- a/docs/docs/integrations/text_embedding/fastembed.ipynb
+++ b/docs/docs/integrations/text_embedding/fastembed.ipynb
@@ -110,9 +110,7 @@
"metadata": {},
"outputs": [],
"source": [
- "document_embeddings = embeddings.embed_documents(\n",
- " [\"This is a document\", \"This is some other document\"]\n",
- ")"
+ "document_embeddings = embeddings.embed_documents([\"This is a document\", \"This is some other document\"])"
]
},
{
diff --git a/docs/docs/integrations/text_embedding/open_clip.ipynb b/docs/docs/integrations/text_embedding/open_clip.ipynb
index c1e1ba9ed81..d3c3fd3766a 100644
--- a/docs/docs/integrations/text_embedding/open_clip.ipynb
+++ b/docs/docs/integrations/text_embedding/open_clip.ipynb
@@ -48,7 +48,6 @@
"outputs": [],
"source": [
"import open_clip\n",
- "\n",
"open_clip.list_pretrained()"
]
},
@@ -148,8 +147,8 @@
" \"rocket\": \"a rocket standing on a launchpad\",\n",
" \"motorcycle_right\": \"a red motorcycle standing in a garage\",\n",
" \"camera\": \"a person looking at a camera on a tripod\",\n",
- " \"horse\": \"a black-and-white silhouette of a horse\",\n",
- " \"coffee\": \"a cup of coffee on a saucer\",\n",
+ " \"horse\": \"a black-and-white silhouette of a horse\", \n",
+ " \"coffee\": \"a cup of coffee on a saucer\"\n",
"}\n",
"\n",
"original_images = []\n",
@@ -159,18 +158,14 @@
"plt.figure(figsize=(16, 5))\n",
"\n",
"# Loop to display and prepare images and assemble URIs\n",
- "for filename in [\n",
- " filename\n",
- " for filename in os.listdir(skimage.data_dir)\n",
- " if filename.endswith(\".png\") or filename.endswith(\".jpg\")\n",
- "]:\n",
+ "for filename in [filename for filename in os.listdir(skimage.data_dir) if filename.endswith(\".png\") or filename.endswith(\".jpg\")]:\n",
" name = os.path.splitext(filename)[0]\n",
" if name not in descriptions:\n",
" continue\n",
"\n",
" image_path = os.path.join(skimage.data_dir, filename)\n",
" image = Image.open(image_path).convert(\"RGB\")\n",
- "\n",
+ " \n",
" plt.subplot(2, 4, len(images) + 1)\n",
" plt.imshow(image)\n",
" plt.title(f\"{filename}\\n{descriptions[name]}\")\n",
@@ -178,7 +173,7 @@
" plt.yticks([])\n",
"\n",
" original_images.append(image)\n",
- " images.append(image) # Origional code does preprocessing here\n",
+ " images.append(image) # Origional code does preprocessing here\n",
" texts.append(descriptions[name])\n",
" image_uris.append(image_path) # Add the image URI to the list\n",
"\n",
@@ -221,7 +216,7 @@
"# Instantiate your model\n",
"clip_embd = OpenCLIPEmbeddings()\n",
"\n",
- "# Embed images and text\n",
+ "# Embed images and text \n",
"img_features = clip_embd.embed_image(image_uris)\n",
"text_features = clip_embd.embed_documents([\"This is \" + desc for desc in texts])\n",
"\n",
@@ -246,7 +241,7 @@
" plt.text(x, y, f\"{similarity[y, x]:.2f}\", ha=\"center\", va=\"center\", size=12)\n",
"\n",
"for side in [\"left\", \"top\", \"right\", \"bottom\"]:\n",
- " plt.gca().spines[side].set_visible(False)\n",
+ " plt.gca().spines[side].set_visible(False)\n",
"\n",
"plt.xlim([-0.5, count - 0.5])\n",
"plt.ylim([count + 0.5, -2])\n",
diff --git a/docs/docs/integrations/vectorstores/elasticsearch.ipynb b/docs/docs/integrations/vectorstores/elasticsearch.ipynb
index ff17a2fc8e0..93343a499cc 100644
--- a/docs/docs/integrations/vectorstores/elasticsearch.ipynb
+++ b/docs/docs/integrations/vectorstores/elasticsearch.ipynb
@@ -794,18 +794,13 @@
"from typing import Dict\n",
"from langchain.docstore.document import Document\n",
"\n",
- "\n",
"def custom_document_builder(hit: Dict) -> Document:\n",
" src = hit.get(\"_source\", {})\n",
" return Document(\n",
" page_content=src.get(\"content\", \"Missing content!\"),\n",
- " metadata={\n",
- " \"page_number\": src.get(\"page_number\", -1),\n",
- " \"original_filename\": src.get(\"original_filename\", \"Missing filename!\"),\n",
- " },\n",
+ " metadata={\"page_number\": src.get(\"page_number\", -1), \"original_filename\": src.get(\"original_filename\", \"Missing filename!\")},\n",
" )\n",
"\n",
- "\n",
"results = db.similarity_search(\n",
" \"What did the president say about Ketanji Brown Jackson\",\n",
" k=4,\n",
diff --git a/docs/docs/integrations/vectorstores/weaviate.ipynb b/docs/docs/integrations/vectorstores/weaviate.ipynb
index 03b9c933c6c..dd3d6a2646c 100644
--- a/docs/docs/integrations/vectorstores/weaviate.ipynb
+++ b/docs/docs/integrations/vectorstores/weaviate.ipynb
@@ -149,7 +149,12 @@
"metadata": {},
"outputs": [],
"source": [
- "db = Weaviate.from_documents(docs, embeddings, weaviate_url=WEAVIATE_URL, by_text=False)"
+ "db = Weaviate.from_documents(\n",
+ " docs, \n",
+ " embeddings, \n",
+ " weaviate_url=WEAVIATE_URL, \n",
+ " by_text=False\n",
+ ")"
]
},
{
@@ -222,7 +227,8 @@
"import weaviate\n",
"\n",
"client = weaviate.Client(\n",
- " url=WEAVIATE_URL, auth_client_secret=weaviate.AuthApiKey(WEAVIATE_API_KEY)\n",
+ " url=WEAVIATE_URL, \n",
+ " auth_client_secret=weaviate.AuthApiKey(WEAVIATE_API_KEY)\n",
")\n",
"\n",
"# client = weaviate.Client(\n",
@@ -234,7 +240,10 @@
"# )\n",
"\n",
"vectorstore = Weaviate.from_documents(\n",
- " documents, embeddings, client=client, by_text=False\n",
+ " documents, \n",
+ " embeddings, \n",
+ " client=client, \n",
+ " by_text=False\n",
")"
]
},
@@ -369,7 +378,6 @@
],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
- "\n",
"llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
"llm.predict(\"What did the president say about Justice Breyer\")"
]
@@ -567,10 +575,10 @@
"from langchain.schema.output_parser import StrOutputParser\n",
"\n",
"rag_chain = (\n",
- " {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
- " | prompt\n",
+ " {\"context\": retriever, \"question\": RunnablePassthrough()} \n",
+ " | prompt \n",
" | llm\n",
- " | StrOutputParser()\n",
+ " | StrOutputParser() \n",
")\n",
"\n",
"rag_chain.invoke(\"What did the president say about Justice Breyer\")"
diff --git a/docs/docs/modules/agents/index.ipynb b/docs/docs/modules/agents/index.ipynb
index ec321e6a883..0fe978bd0a0 100644
--- a/docs/docs/modules/agents/index.ipynb
+++ b/docs/docs/modules/agents/index.ipynb
@@ -198,7 +198,6 @@
"source": [
"from langchain.agents import tool\n",
"\n",
- "\n",
"@tool\n",
"def get_word_length(word: str) -> int:\n",
" \"\"\"Returns the length of a word.\"\"\"\n",
@@ -607,12 +606,10 @@
"source": [
"input1 = \"how many letters in the word educa?\"\n",
"result = agent_executor.invoke({\"input\": input1, \"chat_history\": chat_history})\n",
- "chat_history.extend(\n",
- " [\n",
- " HumanMessage(content=input1),\n",
- " AIMessage(content=result[\"output\"]),\n",
- " ]\n",
- ")\n",
+ "chat_history.extend([\n",
+ " HumanMessage(content=input1),\n",
+ " AIMessage(content=result[\"output\"]),\n",
+ "])\n",
"agent_executor.invoke({\"input\": \"is that a real word?\", \"chat_history\": chat_history})"
]
},
diff --git a/docs/scripts/copy_templates.py b/docs/scripts/copy_templates.py
index 21b0c7a4f37..11120d4d0fa 100644
--- a/docs/scripts/copy_templates.py
+++ b/docs/scripts/copy_templates.py
@@ -1,15 +1,16 @@
import glob
import os
+from pathlib import Path
import re
import shutil
-from pathlib import Path
+
TEMPLATES_DIR = Path(os.path.abspath(__file__)).parents[2] / "templates"
DOCS_TEMPLATES_DIR = Path(os.path.abspath(__file__)).parents[1] / "docs" / "templates"
readmes = list(glob.glob(str(TEMPLATES_DIR) + "/*/README.md"))
-destinations = [readme[len(str(TEMPLATES_DIR)) + 1 : -10] + ".md" for readme in readmes]
+destinations = [readme[len(str(TEMPLATES_DIR)) + 1:-10] + ".md" for readme in readmes]
for source, destination in zip(readmes, destinations):
full_destination = DOCS_TEMPLATES_DIR / destination
shutil.copyfile(source, full_destination)
@@ -32,3 +33,4 @@ with open(TEMPLATES_INDEX_DESTINATION, "r") as f:
content = re.sub("\]\(\.\.\/", "](/docs/templates/", content)
with open(TEMPLATES_INDEX_DESTINATION, "w") as f:
f.write(sidebar_hidden + content)
+
diff --git a/templates/rag-timescale-conversation/rag_conversation.ipynb b/templates/rag-timescale-conversation/rag_conversation.ipynb
index 82842bdae9c..4203689a153 100644
--- a/templates/rag-timescale-conversation/rag_conversation.ipynb
+++ b/templates/rag-timescale-conversation/rag_conversation.ipynb
@@ -79,6 +79,7 @@
}
],
"source": [
+ "\n",
"answer = rag_app.invoke(\n",
" {\n",
" \"question\": \"What commits did the person with my name make?\",\n",
@@ -124,7 +125,7 @@
" \"end_date\": \"2016-01-01 00:00:00\",\n",
" }\n",
")\n",
- "answer"
+ "answer\n"
]
},
{