rm

2026-02-21 06:33:41 +00:00 · 2023-11-14 09:29:43 -08:00
parent 2f6fe6ddf3
commit f6e4c062f6
13 changed files with 220 additions and 176 deletions
--- a/cookbook/advanced_rag_eval.ipynb
+++ b/cookbook/advanced_rag_eval.ipynb
@@ -63,11 +63,13 @@
    "\n",
    "# Load\n",
    "from langchain.document_loaders import PyPDFLoader\n",
+    "\n",
    "loader = PyPDFLoader(path + \"cpi.pdf\")\n",
    "pdf_pages = loader.load()\n",
    "\n",
    "# Split\n",
    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
+    "\n",
    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
    "all_splits_pypdf = text_splitter.split_documents(pdf_pages)\n",
    "all_splits_pypdf_texts = [d.page_content for d in all_splits_pypdf]"
@@ -132,10 +134,13 @@
   "source": [
    "from langchain.vectorstores import Chroma\n",
    "from langchain.embeddings import OpenAIEmbeddings\n",
-    "baseline = Chroma.from_texts(texts=all_splits_pypdf_texts,\n",
-    "                             collection_name=\"baseline\",\n",
-    "                             embedding=OpenAIEmbeddings())\n",
-    "retriever_baseline=baseline.as_retriever()"
+    "\n",
+    "baseline = Chroma.from_texts(\n",
+    "    texts=all_splits_pypdf_texts,\n",
+    "    collection_name=\"baseline\",\n",
+    "    embedding=OpenAIEmbeddings(),\n",
+    ")\n",
+    "retriever_baseline = baseline.as_retriever()"
   ]
  },
  {
@@ -169,7 +174,7 @@
    "model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n",
    "summarize_chain = {\"element\": lambda x: x} | prompt | model | StrOutputParser()\n",
    "\n",
-    "# Apply to text \n",
+    "# Apply to text\n",
    "text_summaries = summarize_chain.batch(texts, {\"max_concurrency\": 5})\n",
    "\n",
    "# Apply to tables\n",
@@ -197,26 +202,25 @@
    "from PIL import Image\n",
    "from langchain.schema.messages import HumanMessage\n",
    "\n",
-    "def encode_image(image_path):\n",
-    "    ''' Getting the base64 string '''\n",
-    "    with open(image_path, \"rb\") as image_file:\n",
-    "        return base64.b64encode(image_file.read()).decode('utf-8') \n",
    "\n",
-    "def image_summarize(img_base64,prompt):\n",
-    "    ''' Image summary '''\n",
-    "    chat = ChatOpenAI(model=\"gpt-4-vision-preview\",\n",
-    "                      max_tokens=1024)\n",
-    "    \n",
+    "def encode_image(image_path):\n",
+    "    \"\"\"Getting the base64 string\"\"\"\n",
+    "    with open(image_path, \"rb\") as image_file:\n",
+    "        return base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
+    "\n",
+    "\n",
+    "def image_summarize(img_base64, prompt):\n",
+    "    \"\"\"Image summary\"\"\"\n",
+    "    chat = ChatOpenAI(model=\"gpt-4-vision-preview\", max_tokens=1024)\n",
+    "\n",
    "    msg = chat.invoke(\n",
    "        [\n",
    "            HumanMessage(\n",
    "                content=[\n",
-    "                    {\"type\": \"text\", \"text\":prompt},\n",
+    "                    {\"type\": \"text\", \"text\": prompt},\n",
    "                    {\n",
    "                        \"type\": \"image_url\",\n",
-    "                        \"image_url\": {\n",
-    "                            \"url\": f\"data:image/jpeg;base64,{img_base64}\"\n",
-    "                        },\n",
+    "                        \"image_url\": {\"url\": f\"data:image/jpeg;base64,{img_base64}\"},\n",
    "                    },\n",
    "                ]\n",
    "            )\n",
@@ -224,6 +228,7 @@
    "    )\n",
    "    return msg.content\n",
    "\n",
+    "\n",
    "# Store base64 encoded images\n",
    "img_base64_list = []\n",
    "\n",
@@ -237,11 +242,11 @@
    "\n",
    "# Apply to images\n",
    "for img_file in sorted(os.listdir(path)):\n",
-    "    if img_file.endswith('.jpg'):\n",
+    "    if img_file.endswith(\".jpg\"):\n",
    "        img_path = os.path.join(path, img_file)\n",
    "        base64_image = encode_image(img_path)\n",
    "        img_base64_list.append(base64_image)\n",
-    "        image_summaries.append(image_summarize(base64_image,prompt))"
+    "        image_summaries.append(image_summarize(base64_image, prompt))"
   ]
  },
  {
@@ -267,14 +272,10 @@
    "from langchain.schema.document import Document\n",
    "from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
    "\n",
-    "def create_multi_vector_retriever(vectorstore, \n",
-    "                                  text_summaries, \n",
-    "                                  texts, \n",
-    "                                  table_summaries, \n",
-    "                                  tables, \n",
-    "                                  image_summaries, \n",
-    "                                  images):\n",
-    "    \n",
+    "\n",
+    "def create_multi_vector_retriever(\n",
+    "    vectorstore, text_summaries, texts, table_summaries, tables, image_summaries, images\n",
+    "):\n",
    "    # Initialize the storage layer\n",
    "    store = InMemoryStore()\n",
    "    id_key = \"doc_id\"\n",
@@ -309,18 +310,22 @@
    "\n",
    "    return retriever\n",
    "\n",
+    "\n",
    "# The vectorstore to use to index the summaries\n",
-    "multi_vector_img = Chroma(collection_name=\"multi_vector_img\", \n",
-    "                     embedding_function=OpenAIEmbeddings())\n",
+    "multi_vector_img = Chroma(\n",
+    "    collection_name=\"multi_vector_img\", embedding_function=OpenAIEmbeddings()\n",
+    ")\n",
    "\n",
    "# Create retriever\n",
-    "retriever_multi_vector_img = create_multi_vector_retriever(multi_vector_img,\n",
-    "                              text_summaries,\n",
-    "                              texts,\n",
-    "                              table_summaries, \n",
-    "                              tables, \n",
-    "                              image_summaries, \n",
-    "                              img_base64_list)"
+    "retriever_multi_vector_img = create_multi_vector_retriever(\n",
+    "    multi_vector_img,\n",
+    "    text_summaries,\n",
+    "    texts,\n",
+    "    table_summaries,\n",
+    "    tables,\n",
+    "    image_summaries,\n",
+    "    img_base64_list,\n",
+    ")"
   ]
  },
  {
@@ -330,10 +335,10 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# Testing on retrieval \n",
-    "query=\"What percentage of CPI is dedicated to Housing, and how does it compare to the combined percentage of Medical Care, Apparel, and Other Goods and Services?\"\n",
-    "suffix_for_images=\" Include any pie charts, graphs, or tables.\"\n",
-    "docs = retriever_multi_vector_img.get_relevant_documents(query+suffix_for_images)"
+    "# Testing on retrieval\n",
+    "query = \"What percentage of CPI is dedicated to Housing, and how does it compare to the combined percentage of Medical Care, Apparel, and Other Goods and Services?\"\n",
+    "suffix_for_images = \" Include any pie charts, graphs, or tables.\"\n",
+    "docs = retriever_multi_vector_img.get_relevant_documents(query + suffix_for_images)"
   ]
  },
  {
@@ -357,14 +362,16 @@
   ],
   "source": [
    "from IPython.display import display, HTML\n",
-    "def plt_img_base64(img_base64):\n",
    "\n",
+    "\n",
+    "def plt_img_base64(img_base64):\n",
    "    # Create an HTML img tag with the base64 string as the source\n",
    "    image_html = f'<img src=\"data:image/jpeg;base64,{img_base64}\" />'\n",
-    "    \n",
+    "\n",
    "    # Display the image by rendering the HTML\n",
    "    display(HTML(image_html))\n",
    "\n",
+    "\n",
    "plt_img_base64(docs[1])"
   ]
  },
@@ -386,17 +393,20 @@
   "outputs": [],
   "source": [
    "# The vectorstore to use to index the summaries\n",
-    "multi_vector_text = Chroma(collection_name=\"multi_vector_text\", \n",
-    "                           embedding_function=OpenAIEmbeddings())\n",
+    "multi_vector_text = Chroma(\n",
+    "    collection_name=\"multi_vector_text\", embedding_function=OpenAIEmbeddings()\n",
+    ")\n",
    "\n",
    "# Create retriever\n",
-    "retriever_multi_vector_img_summary = create_multi_vector_retriever(multi_vector_text,\n",
-    "                              text_summaries,\n",
-    "                              texts,\n",
-    "                              table_summaries, \n",
-    "                              tables, \n",
-    "                              image_summaries, \n",
-    "                              image_summaries)"
+    "retriever_multi_vector_img_summary = create_multi_vector_retriever(\n",
+    "    multi_vector_text,\n",
+    "    text_summaries,\n",
+    "    texts,\n",
+    "    table_summaries,\n",
+    "    tables,\n",
+    "    image_summaries,\n",
+    "    image_summaries,\n",
+    ")"
   ]
  },
  {
@@ -418,14 +428,17 @@
    "\n",
    "# Create chroma w/ multi-modal embeddings\n",
    "multimodal_embd = Chroma(\n",
-    "    collection_name=\"multimodal_embd\",\n",
-    "    embedding_function=OpenCLIPEmbeddings()\n",
+    "    collection_name=\"multimodal_embd\", embedding_function=OpenCLIPEmbeddings()\n",
    ")\n",
    "\n",
    "# Get image URIs\n",
-    "image_uris = sorted([os.path.join(path, image_name) \n",
-    "                     for image_name in os.listdir(path) \n",
-    "                     if image_name.endswith('.jpg')])\n",
+    "image_uris = sorted(\n",
+    "    [\n",
+    "        os.path.join(path, image_name)\n",
+    "        for image_name in os.listdir(path)\n",
+    "        if image_name.endswith(\".jpg\")\n",
+    "    ]\n",
+    ")\n",
    "\n",
    "# Add images and documents\n",
    "if image_uris:\n",
@@ -435,7 +448,7 @@
    "if tables:\n",
    "    multimodal_embd.add_texts(texts=tables)\n",
    "\n",
-    "# Make retriever \n",
+    "# Make retriever\n",
    "retriever_multimodal_embd = multimodal_embd.as_retriever()"
   ]
  },
@@ -466,14 +479,14 @@
    "\"\"\"\n",
    "rag_prompt_text = ChatPromptTemplate.from_template(template)\n",
    "\n",
-    "# Build \n",
+    "\n",
+    "# Build\n",
    "def text_rag_chain(retriever):\n",
-    "    \n",
-    "    ''' RAG chain '''\n",
+    "    \"\"\"RAG chain\"\"\"\n",
    "\n",
    "    # LLM\n",
    "    model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n",
-    "    \n",
+    "\n",
    "    # RAG pipeline\n",
    "    chain = (\n",
    "        {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
@@ -500,13 +513,15 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "import re \n",
+    "import re\n",
    "from langchain.schema import Document\n",
    "from langchain.schema.runnable import RunnableLambda\n",
    "\n",
+    "\n",
    "def looks_like_base64(sb):\n",
    "    \"\"\"Check if the string looks like base64.\"\"\"\n",
-    "    return re.match('^[A-Za-z0-9+/]+[=]{0,2}$', sb) is not None\n",
+    "    return re.match(\"^[A-Za-z0-9+/]+[=]{0,2}$\", sb) is not None\n",
+    "\n",
    "\n",
    "def is_image_data(b64data):\n",
    "    \"\"\"Check if the base64 data is an image by looking at the start of the data.\"\"\"\n",
@@ -514,7 +529,7 @@
    "        b\"\\xFF\\xD8\\xFF\": \"jpg\",\n",
    "        b\"\\x89\\x50\\x4E\\x47\\x0D\\x0A\\x1A\\x0A\": \"png\",\n",
    "        b\"\\x47\\x49\\x46\\x38\": \"gif\",\n",
-    "        b\"\\x52\\x49\\x46\\x46\": \"webp\"\n",
+    "        b\"\\x52\\x49\\x46\\x46\": \"webp\",\n",
    "    }\n",
    "    try:\n",
    "        header = base64.b64decode(b64data)[:8]  # Decode and get the first 8 bytes\n",
@@ -525,6 +540,7 @@
    "    except Exception:\n",
    "        return False\n",
    "\n",
+    "\n",
    "def split_image_text_types(docs):\n",
    "    \"\"\"Split base64-encoded images and texts.\"\"\"\n",
    "    b64_images = []\n",
@@ -539,6 +555,7 @@
    "            texts.append(doc)\n",
    "    return {\"images\": b64_images, \"texts\": texts}\n",
    "\n",
+    "\n",
    "def img_prompt_func(data_dict):\n",
    "    # Joining the context texts into a single string\n",
    "    formatted_texts = \"\\n\".join(data_dict[\"context\"][\"texts\"])\n",
@@ -550,7 +567,7 @@
    "            \"type\": \"image_url\",\n",
    "            \"image_url\": {\n",
    "                \"url\": f\"data:image/jpeg;base64,{data_dict['context']['images'][0]}\"\n",
-    "            }\n",
+    "            },\n",
    "        }\n",
    "        messages.append(image_message)\n",
    "\n",
@@ -563,22 +580,24 @@
    "            f\"User-provided question / keywords: {data_dict['question']}\\n\\n\"\n",
    "            \"Text and / or tables:\\n\"\n",
    "            f\"{formatted_texts}\"\n",
-    "        )\n",
+    "        ),\n",
    "    }\n",
    "    messages.append(text_message)\n",
    "    return [HumanMessage(content=messages)]\n",
    "\n",
+    "\n",
    "def multi_modal_rag_chain(retriever):\n",
-    "    ''' Multi-modal RAG chain '''\n",
+    "    \"\"\"Multi-modal RAG chain\"\"\"\n",
    "\n",
    "    # Multi-modal LLM\n",
-    "    model = ChatOpenAI(temperature=0, \n",
-    "                       model=\"gpt-4-vision-preview\", \n",
-    "                       max_tokens=1024)\n",
-    "    \n",
+    "    model = ChatOpenAI(temperature=0, model=\"gpt-4-vision-preview\", max_tokens=1024)\n",
+    "\n",
    "    # RAG pipeline\n",
    "    chain = (\n",
-    "        {\"context\": retriever | RunnableLambda(split_image_text_types), \"question\": RunnablePassthrough()}\n",
+    "        {\n",
+    "            \"context\": retriever | RunnableLambda(split_image_text_types),\n",
+    "            \"question\": RunnablePassthrough(),\n",
+    "        }\n",
    "        | RunnableLambda(img_prompt_func)\n",
    "        | model\n",
    "        | StrOutputParser()\n",
@@ -603,12 +622,12 @@
   "outputs": [],
   "source": [
    "# RAG chains\n",
-    "chain_baseline=text_rag_chain(retriever_baseline)\n",
-    "chain_mv_text=text_rag_chain(retriever_multi_vector_img_summary)\n",
+    "chain_baseline = text_rag_chain(retriever_baseline)\n",
+    "chain_mv_text = text_rag_chain(retriever_multi_vector_img_summary)\n",
    "\n",
    "# Multi-modal RAG chains\n",
-    "chain_multimodal_mv_img=multi_modal_rag_chain(retriever_multi_vector_img)\n",
-    "chain_multimodal_embd=multi_modal_rag_chain(retriever_multimodal_embd)"
+    "chain_multimodal_mv_img = multi_modal_rag_chain(retriever_multi_vector_img)\n",
+    "chain_multimodal_embd = multi_modal_rag_chain(retriever_multimodal_embd)"
   ]
  },
  {
@@ -694,7 +713,8 @@
   "source": [
    "# Read\n",
    "import pandas as pd\n",
-    "eval_set = pd.read_csv(path+'cpi_eval.csv')\n",
+    "\n",
+    "eval_set = pd.read_csv(path + \"cpi_eval.csv\")\n",
    "eval_set.head(3)"
   ]
  },
@@ -715,12 +735,12 @@
    "# Populate dataset\n",
    "for _, row in eval_set.iterrows():\n",
    "    # Get Q, A\n",
-    "    q = row['Question']\n",
-    "    a = row['Answer']\n",
+    "    q = row[\"Question\"]\n",
+    "    a = row[\"Answer\"]\n",
    "    # Use the values in your function\n",
-    "    client.create_example(inputs={\"question\": q}, \n",
-    "                          outputs={\"answer\": a}, \n",
-    "                          dataset_id=dataset.id)"
+    "    client.create_example(\n",
+    "        inputs={\"question\": q}, outputs={\"answer\": a}, dataset_id=dataset.id\n",
+    "    )"
   ]
  },
  {
@@ -764,17 +784,22 @@
    "    evaluators=[\"qa\"],\n",
    ")\n",
    "\n",
-    "def run_eval(chain,run_name,dataset_name):\n",
+    "\n",
+    "def run_eval(chain, run_name, dataset_name):\n",
    "    _ = client.run_on_dataset(\n",
    "        dataset_name=dataset_name,\n",
-    "        llm_or_chain_factory=lambda: (lambda x: x[\"question\"]+suffix_for_images) | chain,\n",
+    "        llm_or_chain_factory=lambda: (lambda x: x[\"question\"] + suffix_for_images)\n",
+    "        | chain,\n",
    "        evaluation=eval_config,\n",
    "        project_name=run_name,\n",
    "    )\n",
    "\n",
-    "for chain, run in zip([chain_baseline, chain_mv_text, chain_multimodal_mv_img, chain_multimodal_embd], \n",
-    "                      [\"baseline\", \"mv_text\", \"mv_img\", \"mm_embd\"]):\n",
-    "    run_eval(chain, dataset_name+\"-\"+run, dataset_name)"
+    "\n",
+    "for chain, run in zip(\n",
+    "    [chain_baseline, chain_mv_text, chain_multimodal_mv_img, chain_multimodal_embd],\n",
+    "    [\"baseline\", \"mv_text\", \"mv_img\", \"mm_embd\"],\n",
+    "):\n",
+    "    run_eval(chain, dataset_name + \"-\" + run, dataset_name)"
   ]
  }
 ],
--- a/cookbook/multi_modal_RAG_chroma.ipynb
+++ b/cookbook/multi_modal_RAG_chroma.ipynb
@@ -115,7 +115,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# Folder with pdf and extracted images \n",
+    "# Folder with pdf and extracted images\n",
    "path = \"/Users/rlm/Desktop/photos/\""
   ]
  },
@@ -128,9 +128,10 @@
   "source": [
    "# Extract images, tables, and chunk text\n",
    "from unstructured.partition.pdf import partition_pdf\n",
+    "\n",
    "raw_pdf_elements = partition_pdf(\n",
    "    filename=path + \"photos.pdf\",\n",
-    "    extract_images_in_pdf=True, \n",
+    "    extract_images_in_pdf=True,\n",
    "    infer_table_structure=True,\n",
    "    chunking_strategy=\"by_title\",\n",
    "    max_characters=4000,\n",
@@ -191,14 +192,17 @@
    "\n",
    "# Create chroma\n",
    "vectorstore = Chroma(\n",
-    "    collection_name=\"mm_rag_clip_photos\",\n",
-    "    embedding_function=OpenCLIPEmbeddings()\n",
+    "    collection_name=\"mm_rag_clip_photos\", embedding_function=OpenCLIPEmbeddings()\n",
    ")\n",
    "\n",
    "# Get image URIs with .jpg extension only\n",
-    "image_uris = sorted([os.path.join(path, image_name) \n",
-    "                     for image_name in os.listdir(path) \n",
-    "                     if image_name.endswith('.jpg')])\n",
+    "image_uris = sorted(\n",
+    "    [\n",
+    "        os.path.join(path, image_name)\n",
+    "        for image_name in os.listdir(path)\n",
+    "        if image_name.endswith(\".jpg\")\n",
+    "    ]\n",
+    ")\n",
    "\n",
    "# Add images\n",
    "vectorstore.add_images(uris=image_uris)\n",
@@ -206,7 +210,7 @@
    "# Add documents\n",
    "vectorstore.add_texts(texts=texts)\n",
    "\n",
-    "# Make retriever \n",
+    "# Make retriever\n",
    "retriever = vectorstore.as_retriever()"
   ]
  },
@@ -235,6 +239,7 @@
    "from io import BytesIO\n",
    "from PIL import Image\n",
    "\n",
+    "\n",
    "def resize_base64_image(base64_string, size=(128, 128)):\n",
    "    \"\"\"\n",
    "    Resize an image encoded as a Base64 string.\n",
@@ -258,30 +263,31 @@
    "    resized_img.save(buffered, format=img.format)\n",
    "\n",
    "    # Encode the resized image to Base64\n",
-    "    return base64.b64encode(buffered.getvalue()).decode('utf-8')\n",
+    "    return base64.b64encode(buffered.getvalue()).decode(\"utf-8\")\n",
+    "\n",
    "\n",
    "def is_base64(s):\n",
-    "    ''' Check if a string is Base64 encoded '''\n",
+    "    \"\"\"Check if a string is Base64 encoded\"\"\"\n",
    "    try:\n",
    "        return base64.b64encode(base64.b64decode(s)) == s.encode()\n",
    "    except Exception:\n",
    "        return False\n",
-    "        \n",
+    "\n",
+    "\n",
    "def split_image_text_types(docs):\n",
-    "    ''' Split numpy array images and texts '''\n",
+    "    \"\"\"Split numpy array images and texts\"\"\"\n",
    "    images = []\n",
    "    text = []\n",
    "    for doc in docs:\n",
-    "        doc = doc.page_content # Extract Document contents \n",
+    "        doc = doc.page_content  # Extract Document contents\n",
    "        if is_base64(doc):\n",
    "            # Resize image to avoid OAI server error\n",
-    "            images.append(resize_base64_image(doc, size=(250, 250)))  # base64 encoded str \n",
+    "            images.append(\n",
+    "                resize_base64_image(doc, size=(250, 250))\n",
+    "            )  # base64 encoded str\n",
    "        else:\n",
-    "            text.append(doc) \n",
-    "    return {\n",
-    "        \"images\": images,\n",
-    "        \"texts\": text\n",
-    "    }"
+    "            text.append(doc)\n",
+    "    return {\"images\": images, \"texts\": text}"
   ]
  },
  {
@@ -311,6 +317,7 @@
    "from langchain.schema.runnable import RunnablePassthrough, RunnableLambda\n",
    "from langchain.schema.messages import HumanMessage, SystemMessage\n",
    "\n",
+    "\n",
    "def prompt_func(data_dict):\n",
    "    # Joining the context texts into a single string\n",
    "    formatted_texts = \"\\n\".join(data_dict[\"context\"][\"texts\"])\n",
@@ -322,7 +329,7 @@
    "            \"type\": \"image_url\",\n",
    "            \"image_url\": {\n",
    "                \"url\": f\"data:image/jpeg;base64,{data_dict['context']['images'][0]}\"\n",
-    "            }\n",
+    "            },\n",
    "        }\n",
    "        messages.append(image_message)\n",
    "\n",
@@ -342,17 +349,21 @@
    "            f\"User-provided keywords: {data_dict['question']}\\n\\n\"\n",
    "            \"Text and / or tables:\\n\"\n",
    "            f\"{formatted_texts}\"\n",
-    "        )\n",
+    "        ),\n",
    "    }\n",
    "    messages.append(text_message)\n",
    "\n",
    "    return [HumanMessage(content=messages)]\n",
-    "    \n",
+    "\n",
+    "\n",
    "model = ChatOpenAI(temperature=0, model=\"gpt-4-vision-preview\", max_tokens=1024)\n",
    "\n",
    "# RAG pipeline\n",
    "chain = (\n",
-    "    {\"context\": retriever | RunnableLambda(split_image_text_types), \"question\": RunnablePassthrough()}\n",
+    "    {\n",
+    "        \"context\": retriever | RunnableLambda(split_image_text_types),\n",
+    "        \"question\": RunnablePassthrough(),\n",
+    "    }\n",
    "    | RunnableLambda(prompt_func)\n",
    "    | model\n",
    "    | StrOutputParser()\n",
@@ -412,15 +423,16 @@
   "source": [
    "from IPython.display import display, HTML\n",
    "\n",
-    "def plt_img_base64(img_base64):\n",
    "\n",
+    "def plt_img_base64(img_base64):\n",
    "    # Create an HTML img tag with the base64 string as the source\n",
    "    image_html = f'<img src=\"data:image/jpeg;base64,{img_base64}\" />'\n",
-    "    \n",
+    "\n",
    "    # Display the image by rendering the HTML\n",
    "    display(HTML(image_html))\n",
    "\n",
-    "docs = retriever.get_relevant_documents(\"Woman with children\",k=10)\n",
+    "\n",
+    "docs = retriever.get_relevant_documents(\"Woman with children\", k=10)\n",
    "for doc in docs:\n",
    "    if is_base64(doc.page_content):\n",
    "        plt_img_base64(doc.page_content)\n",
@@ -446,9 +458,7 @@
    }
   ],
   "source": [
-    "chain.invoke(\n",
-    "    \"Woman with children\"\n",
-    ")"
+    "chain.invoke(\"Woman with children\")"
   ]
  },
  {
--- a/cookbook/qianfan_baidu_elasticesearch_RAG.ipynb
+++ b/cookbook/qianfan_baidu_elasticesearch_RAG.ipynb
@@ -82,7 +82,9 @@
    "secret_access_key = \"your bos access sk\"\n",
    "\n",
    "# create BceClientConfiguration\n",
-    "config = BceClientConfiguration(credentials=BceCredentials(access_key_id, secret_access_key), endpoint = bos_host)\n",
+    "config = BceClientConfiguration(\n",
+    "    credentials=BceCredentials(access_key_id, secret_access_key), endpoint=bos_host\n",
+    ")\n",
    "\n",
    "loader = BaiduBOSDirectoryLoader(conf=config, bucket=\"llm-test\", prefix=\"llm/\")\n",
    "documents = loader.load()\n",
@@ -109,10 +111,14 @@
    "embeddings.client = sentence_transformers.SentenceTransformer(embeddings.model_name)\n",
    "\n",
    "db = BESVectorStore.from_documents(\n",
-    "  documents=split_docs, embedding=embeddings, bes_url=\"your bes url\", index_name='test-index', vector_query_field='vector'\n",
-    " )\n",
+    "    documents=split_docs,\n",
+    "    embedding=embeddings,\n",
+    "    bes_url=\"your bes url\",\n",
+    "    index_name=\"test-index\",\n",
+    "    vector_query_field=\"vector\",\n",
+    ")\n",
    "\n",
-    "db.client.indices.refresh(index='test-index')\n",
+    "db.client.indices.refresh(index=\"test-index\")\n",
    "retriever = db.as_retriever()"
   ]
  },
@@ -130,8 +136,15 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "llm = QianfanLLMEndpoint(model=\"ERNIE-Bot\", qianfan_ak='your qianfan ak', qianfan_sk='your qianfan sk', streaming=True)\n",
-    "qa = RetrievalQA.from_chain_type(llm=llm, chain_type=\"refine\", retriever=retriever, return_source_documents=True)\n",
+    "llm = QianfanLLMEndpoint(\n",
+    "    model=\"ERNIE-Bot\",\n",
+    "    qianfan_ak=\"your qianfan ak\",\n",
+    "    qianfan_sk=\"your qianfan sk\",\n",
+    "    streaming=True,\n",
+    ")\n",
+    "qa = RetrievalQA.from_chain_type(\n",
+    "    llm=llm, chain_type=\"refine\", retriever=retriever, return_source_documents=True\n",
+    ")\n",
    "\n",
    "query = \"什么是张量?\"\n",
    "print(qa.run(query))"
--- a/docs/docs/integrations/document_loaders/docusaurus.ipynb
+++ b/docs/docs/integrations/document_loaders/docusaurus.ipynb
@@ -118,7 +118,9 @@
   "source": [
    "loader = DocusaurusLoader(\n",
    "    \"https://python.langchain.com\",\n",
-    "    filter_urls=[\"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"],\n",
+    "    filter_urls=[\n",
+    "        \"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"\n",
+    "    ],\n",
    ")\n",
    "documents = loader.load()"
   ]
@@ -162,9 +164,11 @@
   "source": [
    "loader = DocusaurusLoader(\n",
    "    \"https://python.langchain.com\",\n",
-    "    filter_urls=[\"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"],\n",
+    "    filter_urls=[\n",
+    "        \"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"\n",
+    "    ],\n",
    "    # This will only include the content that matches these tags, otherwise they will be removed\n",
-    "    custom_html_tags=[\"#content\", \".main\"]\n",
+    "    custom_html_tags=[\"#content\", \".main\"],\n",
    ")"
   ]
  },
@@ -213,7 +217,9 @@
   "source": [
    "loader = DocusaurusLoader(\n",
    "    \"https://python.langchain.com\",\n",
-    "    filter_urls=[\"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"],\n",
+    "    filter_urls=[\n",
+    "        \"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"\n",
+    "    ],\n",
    "    parsing_function=remove_nav_and_header_elements,\n",
    ")"
   ]
--- a/docs/docs/integrations/memory/neo4j_chat_message_history.ipynb
+++ b/docs/docs/integrations/memory/neo4j_chat_message_history.ipynb
@@ -25,7 +25,7 @@
    "    url=\"bolt://localhost:7687\",\n",
    "    username=\"neo4j\",\n",
    "    password=\"password\",\n",
-    "    session_id=\"session_id_1\"\n",
+    "    session_id=\"session_id_1\",\n",
    ")\n",
    "\n",
    "history.add_user_message(\"hi!\")\n",
--- a/docs/docs/integrations/text_embedding/fastembed.ipynb
+++ b/docs/docs/integrations/text_embedding/fastembed.ipynb
@@ -110,7 +110,9 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "document_embeddings = embeddings.embed_documents([\"This is a document\", \"This is some other document\"])"
+    "document_embeddings = embeddings.embed_documents(\n",
+    "    [\"This is a document\", \"This is some other document\"]\n",
+    ")"
   ]
  },
  {
--- a/docs/docs/integrations/text_embedding/open_clip.ipynb
+++ b/docs/docs/integrations/text_embedding/open_clip.ipynb
@@ -48,6 +48,7 @@
   "outputs": [],
   "source": [
    "import open_clip\n",
+    "\n",
    "open_clip.list_pretrained()"
   ]
  },
@@ -147,8 +148,8 @@
    "    \"rocket\": \"a rocket standing on a launchpad\",\n",
    "    \"motorcycle_right\": \"a red motorcycle standing in a garage\",\n",
    "    \"camera\": \"a person looking at a camera on a tripod\",\n",
-    "    \"horse\": \"a black-and-white silhouette of a horse\", \n",
-    "    \"coffee\": \"a cup of coffee on a saucer\"\n",
+    "    \"horse\": \"a black-and-white silhouette of a horse\",\n",
+    "    \"coffee\": \"a cup of coffee on a saucer\",\n",
    "}\n",
    "\n",
    "original_images = []\n",
@@ -158,14 +159,18 @@
    "plt.figure(figsize=(16, 5))\n",
    "\n",
    "# Loop to display and prepare images and assemble URIs\n",
-    "for filename in [filename for filename in os.listdir(skimage.data_dir) if filename.endswith(\".png\") or filename.endswith(\".jpg\")]:\n",
+    "for filename in [\n",
+    "    filename\n",
+    "    for filename in os.listdir(skimage.data_dir)\n",
+    "    if filename.endswith(\".png\") or filename.endswith(\".jpg\")\n",
+    "]:\n",
    "    name = os.path.splitext(filename)[0]\n",
    "    if name not in descriptions:\n",
    "        continue\n",
    "\n",
    "    image_path = os.path.join(skimage.data_dir, filename)\n",
    "    image = Image.open(image_path).convert(\"RGB\")\n",
-    "  \n",
+    "\n",
    "    plt.subplot(2, 4, len(images) + 1)\n",
    "    plt.imshow(image)\n",
    "    plt.title(f\"{filename}\\n{descriptions[name]}\")\n",
@@ -173,7 +178,7 @@
    "    plt.yticks([])\n",
    "\n",
    "    original_images.append(image)\n",
-    "    images.append(image) # Origional code does preprocessing here\n",
+    "    images.append(image)  # Origional code does preprocessing here\n",
    "    texts.append(descriptions[name])\n",
    "    image_uris.append(image_path)  # Add the image URI to the list\n",
    "\n",
@@ -216,7 +221,7 @@
    "# Instantiate your model\n",
    "clip_embd = OpenCLIPEmbeddings()\n",
    "\n",
-    "# Embed images and text \n",
+    "# Embed images and text\n",
    "img_features = clip_embd.embed_image(image_uris)\n",
    "text_features = clip_embd.embed_documents([\"This is \" + desc for desc in texts])\n",
    "\n",
@@ -241,7 +246,7 @@
    "        plt.text(x, y, f\"{similarity[y, x]:.2f}\", ha=\"center\", va=\"center\", size=12)\n",
    "\n",
    "for side in [\"left\", \"top\", \"right\", \"bottom\"]:\n",
-    "  plt.gca().spines[side].set_visible(False)\n",
+    "    plt.gca().spines[side].set_visible(False)\n",
    "\n",
    "plt.xlim([-0.5, count - 0.5])\n",
    "plt.ylim([count + 0.5, -2])\n",
--- a/docs/docs/integrations/vectorstores/elasticsearch.ipynb
+++ b/docs/docs/integrations/vectorstores/elasticsearch.ipynb
@@ -794,13 +794,18 @@
    "from typing import Dict\n",
    "from langchain.docstore.document import Document\n",
    "\n",
+    "\n",
    "def custom_document_builder(hit: Dict) -> Document:\n",
    "    src = hit.get(\"_source\", {})\n",
    "    return Document(\n",
    "        page_content=src.get(\"content\", \"Missing content!\"),\n",
-    "        metadata={\"page_number\": src.get(\"page_number\", -1), \"original_filename\": src.get(\"original_filename\", \"Missing filename!\")},\n",
+    "        metadata={\n",
+    "            \"page_number\": src.get(\"page_number\", -1),\n",
+    "            \"original_filename\": src.get(\"original_filename\", \"Missing filename!\"),\n",
+    "        },\n",
    "    )\n",
    "\n",
+    "\n",
    "results = db.similarity_search(\n",
    "    \"What did the president say about Ketanji Brown Jackson\",\n",
    "    k=4,\n",
--- a/docs/docs/integrations/vectorstores/weaviate.ipynb
+++ b/docs/docs/integrations/vectorstores/weaviate.ipynb
@@ -149,12 +149,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "db = Weaviate.from_documents(\n",
-    "    docs, \n",
-    "    embeddings, \n",
-    "    weaviate_url=WEAVIATE_URL, \n",
-    "    by_text=False\n",
-    ")"
+    "db = Weaviate.from_documents(docs, embeddings, weaviate_url=WEAVIATE_URL, by_text=False)"
   ]
  },
  {
@@ -227,8 +222,7 @@
    "import weaviate\n",
    "\n",
    "client = weaviate.Client(\n",
-    "    url=WEAVIATE_URL, \n",
-    "    auth_client_secret=weaviate.AuthApiKey(WEAVIATE_API_KEY)\n",
+    "    url=WEAVIATE_URL, auth_client_secret=weaviate.AuthApiKey(WEAVIATE_API_KEY)\n",
    ")\n",
    "\n",
    "# client = weaviate.Client(\n",
@@ -240,10 +234,7 @@
    "# )\n",
    "\n",
    "vectorstore = Weaviate.from_documents(\n",
-    "    documents, \n",
-    "    embeddings, \n",
-    "    client=client, \n",
-    "    by_text=False\n",
+    "    documents, embeddings, client=client, by_text=False\n",
    ")"
   ]
  },
@@ -378,6 +369,7 @@
   ],
   "source": [
    "from langchain.chat_models import ChatOpenAI\n",
+    "\n",
    "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
    "llm.predict(\"What did the president say about Justice Breyer\")"
   ]
@@ -575,10 +567,10 @@
    "from langchain.schema.output_parser import StrOutputParser\n",
    "\n",
    "rag_chain = (\n",
-    "    {\"context\": retriever,  \"question\": RunnablePassthrough()} \n",
-    "    | prompt \n",
+    "    {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
+    "    | prompt\n",
    "    | llm\n",
-    "    | StrOutputParser() \n",
+    "    | StrOutputParser()\n",
    ")\n",
    "\n",
    "rag_chain.invoke(\"What did the president say about Justice Breyer\")"
--- a/docs/docs/modules/agents/index.ipynb
+++ b/docs/docs/modules/agents/index.ipynb
@@ -198,6 +198,7 @@
   "source": [
    "from langchain.agents import tool\n",
    "\n",
+    "\n",
    "@tool\n",
    "def get_word_length(word: str) -> int:\n",
    "    \"\"\"Returns the length of a word.\"\"\"\n",
@@ -606,10 +607,12 @@
   "source": [
    "input1 = \"how many letters in the word educa?\"\n",
    "result = agent_executor.invoke({\"input\": input1, \"chat_history\": chat_history})\n",
-    "chat_history.extend([\n",
-    "    HumanMessage(content=input1),\n",
-    "    AIMessage(content=result[\"output\"]),\n",
-    "])\n",
+    "chat_history.extend(\n",
+    "    [\n",
+    "        HumanMessage(content=input1),\n",
+    "        AIMessage(content=result[\"output\"]),\n",
+    "    ]\n",
+    ")\n",
    "agent_executor.invoke({\"input\": \"is that a real word?\", \"chat_history\": chat_history})"
   ]
  },
--- a/docs/scripts/copy_templates.py
+++ b/docs/scripts/copy_templates.py
@@ -1,16 +1,15 @@
 import glob
 import os
-from pathlib import Path
 import re
 import shutil
-
+from pathlib import Path

 TEMPLATES_DIR = Path(os.path.abspath(__file__)).parents[2] / "templates"
 DOCS_TEMPLATES_DIR = Path(os.path.abspath(__file__)).parents[1] / "docs" / "templates"


 readmes = list(glob.glob(str(TEMPLATES_DIR) + "/*/README.md"))
-destinations = [readme[len(str(TEMPLATES_DIR)) + 1:-10] + ".md" for readme in readmes]
+destinations = [readme[len(str(TEMPLATES_DIR)) + 1 : -10] + ".md" for readme in readmes]
 for source, destination in zip(readmes, destinations):
    full_destination = DOCS_TEMPLATES_DIR / destination
    shutil.copyfile(source, full_destination)
@@ -33,4 +32,3 @@ with open(TEMPLATES_INDEX_DESTINATION, "r") as f:
 content = re.sub("\]\(\.\.\/", "](/docs/templates/", content)
 with open(TEMPLATES_INDEX_DESTINATION, "w") as f:
    f.write(sidebar_hidden + content)
-
--- a/libs/langchain/langchain/agents/agent.py
+++ b/libs/langchain/langchain/agents/agent.py
@@ -821,20 +821,6 @@ class AgentExecutor(Chain):
                )
        return values

-    @root_validator()
-    def validate_return_direct_tool(cls, values: Dict) -> Dict:
-        """Validate that tools are compatible with agent."""
-        agent = values["agent"]
-        tools = values["tools"]
-        if isinstance(agent, BaseMultiActionAgent):
-            for tool in tools:
-                if tool.return_direct:
-                    raise ValueError(
-                        "Tools that have `return_direct=True` are not allowed "
-                        "in multi-action agents"
-                    )
-        return values
-
    @root_validator(pre=True)
    def validate_runnable_agent(cls, values: Dict) -> Dict:
        """Convert runnable to agent if passed in."""
--- a/templates/rag-timescale-conversation/rag_conversation.ipynb
+++ b/templates/rag-timescale-conversation/rag_conversation.ipynb
@@ -79,7 +79,6 @@
    }
   ],
   "source": [
-    "\n",
    "answer = rag_app.invoke(\n",
    "    {\n",
    "        \"question\": \"What commits did the person with my name make?\",\n",
@@ -125,7 +124,7 @@
    "        \"end_date\": \"2016-01-01 00:00:00\",\n",
    "    }\n",
    ")\n",
-    "answer\n"
+    "answer"
   ]
  },
  {