From f6e4c062f6586bd10b9c7878c73c777597ba2eb3 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Tue, 14 Nov 2023 09:29:43 -0800 Subject: [PATCH] rm --- cookbook/advanced_rag_eval.ipynb | 201 ++++++++++-------- cookbook/multi_modal_RAG_chroma.ipynb | 68 +++--- .../qianfan_baidu_elasticesearch_RAG.ipynb | 25 ++- .../document_loaders/docusaurus.ipynb | 14 +- .../memory/neo4j_chat_message_history.ipynb | 2 +- .../text_embedding/fastembed.ipynb | 4 +- .../text_embedding/open_clip.ipynb | 19 +- .../vectorstores/elasticsearch.ipynb | 7 +- .../integrations/vectorstores/weaviate.ipynb | 22 +- docs/docs/modules/agents/index.ipynb | 11 +- docs/scripts/copy_templates.py | 6 +- libs/langchain/langchain/agents/agent.py | 14 -- .../rag_conversation.ipynb | 3 +- 13 files changed, 220 insertions(+), 176 deletions(-) diff --git a/cookbook/advanced_rag_eval.ipynb b/cookbook/advanced_rag_eval.ipynb index a2de7261091..1d35b94fd60 100644 --- a/cookbook/advanced_rag_eval.ipynb +++ b/cookbook/advanced_rag_eval.ipynb @@ -63,11 +63,13 @@ "\n", "# Load\n", "from langchain.document_loaders import PyPDFLoader\n", + "\n", "loader = PyPDFLoader(path + \"cpi.pdf\")\n", "pdf_pages = loader.load()\n", "\n", "# Split\n", "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "\n", "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n", "all_splits_pypdf = text_splitter.split_documents(pdf_pages)\n", "all_splits_pypdf_texts = [d.page_content for d in all_splits_pypdf]" @@ -132,10 +134,13 @@ "source": [ "from langchain.vectorstores import Chroma\n", "from langchain.embeddings import OpenAIEmbeddings\n", - "baseline = Chroma.from_texts(texts=all_splits_pypdf_texts,\n", - " collection_name=\"baseline\",\n", - " embedding=OpenAIEmbeddings())\n", - "retriever_baseline=baseline.as_retriever()" + "\n", + "baseline = Chroma.from_texts(\n", + " texts=all_splits_pypdf_texts,\n", + " collection_name=\"baseline\",\n", + " embedding=OpenAIEmbeddings(),\n", + ")\n", + "retriever_baseline = baseline.as_retriever()" ] }, { @@ -169,7 +174,7 @@ "model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n", "summarize_chain = {\"element\": lambda x: x} | prompt | model | StrOutputParser()\n", "\n", - "# Apply to text \n", + "# Apply to text\n", "text_summaries = summarize_chain.batch(texts, {\"max_concurrency\": 5})\n", "\n", "# Apply to tables\n", @@ -197,26 +202,25 @@ "from PIL import Image\n", "from langchain.schema.messages import HumanMessage\n", "\n", - "def encode_image(image_path):\n", - " ''' Getting the base64 string '''\n", - " with open(image_path, \"rb\") as image_file:\n", - " return base64.b64encode(image_file.read()).decode('utf-8') \n", "\n", - "def image_summarize(img_base64,prompt):\n", - " ''' Image summary '''\n", - " chat = ChatOpenAI(model=\"gpt-4-vision-preview\",\n", - " max_tokens=1024)\n", - " \n", + "def encode_image(image_path):\n", + " \"\"\"Getting the base64 string\"\"\"\n", + " with open(image_path, \"rb\") as image_file:\n", + " return base64.b64encode(image_file.read()).decode(\"utf-8\")\n", + "\n", + "\n", + "def image_summarize(img_base64, prompt):\n", + " \"\"\"Image summary\"\"\"\n", + " chat = ChatOpenAI(model=\"gpt-4-vision-preview\", max_tokens=1024)\n", + "\n", " msg = chat.invoke(\n", " [\n", " HumanMessage(\n", " content=[\n", - " {\"type\": \"text\", \"text\":prompt},\n", + " {\"type\": \"text\", \"text\": prompt},\n", " {\n", " \"type\": \"image_url\",\n", - " \"image_url\": {\n", - " \"url\": f\"data:image/jpeg;base64,{img_base64}\"\n", - " },\n", + " \"image_url\": {\"url\": f\"data:image/jpeg;base64,{img_base64}\"},\n", " },\n", " ]\n", " )\n", @@ -224,6 +228,7 @@ " )\n", " return msg.content\n", "\n", + "\n", "# Store base64 encoded images\n", "img_base64_list = []\n", "\n", @@ -237,11 +242,11 @@ "\n", "# Apply to images\n", "for img_file in sorted(os.listdir(path)):\n", - " if img_file.endswith('.jpg'):\n", + " if img_file.endswith(\".jpg\"):\n", " img_path = os.path.join(path, img_file)\n", " base64_image = encode_image(img_path)\n", " img_base64_list.append(base64_image)\n", - " image_summaries.append(image_summarize(base64_image,prompt))" + " image_summaries.append(image_summarize(base64_image, prompt))" ] }, { @@ -267,14 +272,10 @@ "from langchain.schema.document import Document\n", "from langchain.retrievers.multi_vector import MultiVectorRetriever\n", "\n", - "def create_multi_vector_retriever(vectorstore, \n", - " text_summaries, \n", - " texts, \n", - " table_summaries, \n", - " tables, \n", - " image_summaries, \n", - " images):\n", - " \n", + "\n", + "def create_multi_vector_retriever(\n", + " vectorstore, text_summaries, texts, table_summaries, tables, image_summaries, images\n", + "):\n", " # Initialize the storage layer\n", " store = InMemoryStore()\n", " id_key = \"doc_id\"\n", @@ -309,18 +310,22 @@ "\n", " return retriever\n", "\n", + "\n", "# The vectorstore to use to index the summaries\n", - "multi_vector_img = Chroma(collection_name=\"multi_vector_img\", \n", - " embedding_function=OpenAIEmbeddings())\n", + "multi_vector_img = Chroma(\n", + " collection_name=\"multi_vector_img\", embedding_function=OpenAIEmbeddings()\n", + ")\n", "\n", "# Create retriever\n", - "retriever_multi_vector_img = create_multi_vector_retriever(multi_vector_img,\n", - " text_summaries,\n", - " texts,\n", - " table_summaries, \n", - " tables, \n", - " image_summaries, \n", - " img_base64_list)" + "retriever_multi_vector_img = create_multi_vector_retriever(\n", + " multi_vector_img,\n", + " text_summaries,\n", + " texts,\n", + " table_summaries,\n", + " tables,\n", + " image_summaries,\n", + " img_base64_list,\n", + ")" ] }, { @@ -330,10 +335,10 @@ "metadata": {}, "outputs": [], "source": [ - "# Testing on retrieval \n", - "query=\"What percentage of CPI is dedicated to Housing, and how does it compare to the combined percentage of Medical Care, Apparel, and Other Goods and Services?\"\n", - "suffix_for_images=\" Include any pie charts, graphs, or tables.\"\n", - "docs = retriever_multi_vector_img.get_relevant_documents(query+suffix_for_images)" + "# Testing on retrieval\n", + "query = \"What percentage of CPI is dedicated to Housing, and how does it compare to the combined percentage of Medical Care, Apparel, and Other Goods and Services?\"\n", + "suffix_for_images = \" Include any pie charts, graphs, or tables.\"\n", + "docs = retriever_multi_vector_img.get_relevant_documents(query + suffix_for_images)" ] }, { @@ -357,14 +362,16 @@ ], "source": [ "from IPython.display import display, HTML\n", - "def plt_img_base64(img_base64):\n", "\n", + "\n", + "def plt_img_base64(img_base64):\n", " # Create an HTML img tag with the base64 string as the source\n", " image_html = f''\n", - " \n", + "\n", " # Display the image by rendering the HTML\n", " display(HTML(image_html))\n", "\n", + "\n", "plt_img_base64(docs[1])" ] }, @@ -386,17 +393,20 @@ "outputs": [], "source": [ "# The vectorstore to use to index the summaries\n", - "multi_vector_text = Chroma(collection_name=\"multi_vector_text\", \n", - " embedding_function=OpenAIEmbeddings())\n", + "multi_vector_text = Chroma(\n", + " collection_name=\"multi_vector_text\", embedding_function=OpenAIEmbeddings()\n", + ")\n", "\n", "# Create retriever\n", - "retriever_multi_vector_img_summary = create_multi_vector_retriever(multi_vector_text,\n", - " text_summaries,\n", - " texts,\n", - " table_summaries, \n", - " tables, \n", - " image_summaries, \n", - " image_summaries)" + "retriever_multi_vector_img_summary = create_multi_vector_retriever(\n", + " multi_vector_text,\n", + " text_summaries,\n", + " texts,\n", + " table_summaries,\n", + " tables,\n", + " image_summaries,\n", + " image_summaries,\n", + ")" ] }, { @@ -418,14 +428,17 @@ "\n", "# Create chroma w/ multi-modal embeddings\n", "multimodal_embd = Chroma(\n", - " collection_name=\"multimodal_embd\",\n", - " embedding_function=OpenCLIPEmbeddings()\n", + " collection_name=\"multimodal_embd\", embedding_function=OpenCLIPEmbeddings()\n", ")\n", "\n", "# Get image URIs\n", - "image_uris = sorted([os.path.join(path, image_name) \n", - " for image_name in os.listdir(path) \n", - " if image_name.endswith('.jpg')])\n", + "image_uris = sorted(\n", + " [\n", + " os.path.join(path, image_name)\n", + " for image_name in os.listdir(path)\n", + " if image_name.endswith(\".jpg\")\n", + " ]\n", + ")\n", "\n", "# Add images and documents\n", "if image_uris:\n", @@ -435,7 +448,7 @@ "if tables:\n", " multimodal_embd.add_texts(texts=tables)\n", "\n", - "# Make retriever \n", + "# Make retriever\n", "retriever_multimodal_embd = multimodal_embd.as_retriever()" ] }, @@ -466,14 +479,14 @@ "\"\"\"\n", "rag_prompt_text = ChatPromptTemplate.from_template(template)\n", "\n", - "# Build \n", + "\n", + "# Build\n", "def text_rag_chain(retriever):\n", - " \n", - " ''' RAG chain '''\n", + " \"\"\"RAG chain\"\"\"\n", "\n", " # LLM\n", " model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n", - " \n", + "\n", " # RAG pipeline\n", " chain = (\n", " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", @@ -500,13 +513,15 @@ "metadata": {}, "outputs": [], "source": [ - "import re \n", + "import re\n", "from langchain.schema import Document\n", "from langchain.schema.runnable import RunnableLambda\n", "\n", + "\n", "def looks_like_base64(sb):\n", " \"\"\"Check if the string looks like base64.\"\"\"\n", - " return re.match('^[A-Za-z0-9+/]+[=]{0,2}$', sb) is not None\n", + " return re.match(\"^[A-Za-z0-9+/]+[=]{0,2}$\", sb) is not None\n", + "\n", "\n", "def is_image_data(b64data):\n", " \"\"\"Check if the base64 data is an image by looking at the start of the data.\"\"\"\n", @@ -514,7 +529,7 @@ " b\"\\xFF\\xD8\\xFF\": \"jpg\",\n", " b\"\\x89\\x50\\x4E\\x47\\x0D\\x0A\\x1A\\x0A\": \"png\",\n", " b\"\\x47\\x49\\x46\\x38\": \"gif\",\n", - " b\"\\x52\\x49\\x46\\x46\": \"webp\"\n", + " b\"\\x52\\x49\\x46\\x46\": \"webp\",\n", " }\n", " try:\n", " header = base64.b64decode(b64data)[:8] # Decode and get the first 8 bytes\n", @@ -525,6 +540,7 @@ " except Exception:\n", " return False\n", "\n", + "\n", "def split_image_text_types(docs):\n", " \"\"\"Split base64-encoded images and texts.\"\"\"\n", " b64_images = []\n", @@ -539,6 +555,7 @@ " texts.append(doc)\n", " return {\"images\": b64_images, \"texts\": texts}\n", "\n", + "\n", "def img_prompt_func(data_dict):\n", " # Joining the context texts into a single string\n", " formatted_texts = \"\\n\".join(data_dict[\"context\"][\"texts\"])\n", @@ -550,7 +567,7 @@ " \"type\": \"image_url\",\n", " \"image_url\": {\n", " \"url\": f\"data:image/jpeg;base64,{data_dict['context']['images'][0]}\"\n", - " }\n", + " },\n", " }\n", " messages.append(image_message)\n", "\n", @@ -563,22 +580,24 @@ " f\"User-provided question / keywords: {data_dict['question']}\\n\\n\"\n", " \"Text and / or tables:\\n\"\n", " f\"{formatted_texts}\"\n", - " )\n", + " ),\n", " }\n", " messages.append(text_message)\n", " return [HumanMessage(content=messages)]\n", "\n", + "\n", "def multi_modal_rag_chain(retriever):\n", - " ''' Multi-modal RAG chain '''\n", + " \"\"\"Multi-modal RAG chain\"\"\"\n", "\n", " # Multi-modal LLM\n", - " model = ChatOpenAI(temperature=0, \n", - " model=\"gpt-4-vision-preview\", \n", - " max_tokens=1024)\n", - " \n", + " model = ChatOpenAI(temperature=0, model=\"gpt-4-vision-preview\", max_tokens=1024)\n", + "\n", " # RAG pipeline\n", " chain = (\n", - " {\"context\": retriever | RunnableLambda(split_image_text_types), \"question\": RunnablePassthrough()}\n", + " {\n", + " \"context\": retriever | RunnableLambda(split_image_text_types),\n", + " \"question\": RunnablePassthrough(),\n", + " }\n", " | RunnableLambda(img_prompt_func)\n", " | model\n", " | StrOutputParser()\n", @@ -603,12 +622,12 @@ "outputs": [], "source": [ "# RAG chains\n", - "chain_baseline=text_rag_chain(retriever_baseline)\n", - "chain_mv_text=text_rag_chain(retriever_multi_vector_img_summary)\n", + "chain_baseline = text_rag_chain(retriever_baseline)\n", + "chain_mv_text = text_rag_chain(retriever_multi_vector_img_summary)\n", "\n", "# Multi-modal RAG chains\n", - "chain_multimodal_mv_img=multi_modal_rag_chain(retriever_multi_vector_img)\n", - "chain_multimodal_embd=multi_modal_rag_chain(retriever_multimodal_embd)" + "chain_multimodal_mv_img = multi_modal_rag_chain(retriever_multi_vector_img)\n", + "chain_multimodal_embd = multi_modal_rag_chain(retriever_multimodal_embd)" ] }, { @@ -694,7 +713,8 @@ "source": [ "# Read\n", "import pandas as pd\n", - "eval_set = pd.read_csv(path+'cpi_eval.csv')\n", + "\n", + "eval_set = pd.read_csv(path + \"cpi_eval.csv\")\n", "eval_set.head(3)" ] }, @@ -715,12 +735,12 @@ "# Populate dataset\n", "for _, row in eval_set.iterrows():\n", " # Get Q, A\n", - " q = row['Question']\n", - " a = row['Answer']\n", + " q = row[\"Question\"]\n", + " a = row[\"Answer\"]\n", " # Use the values in your function\n", - " client.create_example(inputs={\"question\": q}, \n", - " outputs={\"answer\": a}, \n", - " dataset_id=dataset.id)" + " client.create_example(\n", + " inputs={\"question\": q}, outputs={\"answer\": a}, dataset_id=dataset.id\n", + " )" ] }, { @@ -764,17 +784,22 @@ " evaluators=[\"qa\"],\n", ")\n", "\n", - "def run_eval(chain,run_name,dataset_name):\n", + "\n", + "def run_eval(chain, run_name, dataset_name):\n", " _ = client.run_on_dataset(\n", " dataset_name=dataset_name,\n", - " llm_or_chain_factory=lambda: (lambda x: x[\"question\"]+suffix_for_images) | chain,\n", + " llm_or_chain_factory=lambda: (lambda x: x[\"question\"] + suffix_for_images)\n", + " | chain,\n", " evaluation=eval_config,\n", " project_name=run_name,\n", " )\n", "\n", - "for chain, run in zip([chain_baseline, chain_mv_text, chain_multimodal_mv_img, chain_multimodal_embd], \n", - " [\"baseline\", \"mv_text\", \"mv_img\", \"mm_embd\"]):\n", - " run_eval(chain, dataset_name+\"-\"+run, dataset_name)" + "\n", + "for chain, run in zip(\n", + " [chain_baseline, chain_mv_text, chain_multimodal_mv_img, chain_multimodal_embd],\n", + " [\"baseline\", \"mv_text\", \"mv_img\", \"mm_embd\"],\n", + "):\n", + " run_eval(chain, dataset_name + \"-\" + run, dataset_name)" ] } ], diff --git a/cookbook/multi_modal_RAG_chroma.ipynb b/cookbook/multi_modal_RAG_chroma.ipynb index e0c7f81fbb9..372d2ad5b59 100644 --- a/cookbook/multi_modal_RAG_chroma.ipynb +++ b/cookbook/multi_modal_RAG_chroma.ipynb @@ -115,7 +115,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Folder with pdf and extracted images \n", + "# Folder with pdf and extracted images\n", "path = \"/Users/rlm/Desktop/photos/\"" ] }, @@ -128,9 +128,10 @@ "source": [ "# Extract images, tables, and chunk text\n", "from unstructured.partition.pdf import partition_pdf\n", + "\n", "raw_pdf_elements = partition_pdf(\n", " filename=path + \"photos.pdf\",\n", - " extract_images_in_pdf=True, \n", + " extract_images_in_pdf=True,\n", " infer_table_structure=True,\n", " chunking_strategy=\"by_title\",\n", " max_characters=4000,\n", @@ -191,14 +192,17 @@ "\n", "# Create chroma\n", "vectorstore = Chroma(\n", - " collection_name=\"mm_rag_clip_photos\",\n", - " embedding_function=OpenCLIPEmbeddings()\n", + " collection_name=\"mm_rag_clip_photos\", embedding_function=OpenCLIPEmbeddings()\n", ")\n", "\n", "# Get image URIs with .jpg extension only\n", - "image_uris = sorted([os.path.join(path, image_name) \n", - " for image_name in os.listdir(path) \n", - " if image_name.endswith('.jpg')])\n", + "image_uris = sorted(\n", + " [\n", + " os.path.join(path, image_name)\n", + " for image_name in os.listdir(path)\n", + " if image_name.endswith(\".jpg\")\n", + " ]\n", + ")\n", "\n", "# Add images\n", "vectorstore.add_images(uris=image_uris)\n", @@ -206,7 +210,7 @@ "# Add documents\n", "vectorstore.add_texts(texts=texts)\n", "\n", - "# Make retriever \n", + "# Make retriever\n", "retriever = vectorstore.as_retriever()" ] }, @@ -235,6 +239,7 @@ "from io import BytesIO\n", "from PIL import Image\n", "\n", + "\n", "def resize_base64_image(base64_string, size=(128, 128)):\n", " \"\"\"\n", " Resize an image encoded as a Base64 string.\n", @@ -258,30 +263,31 @@ " resized_img.save(buffered, format=img.format)\n", "\n", " # Encode the resized image to Base64\n", - " return base64.b64encode(buffered.getvalue()).decode('utf-8')\n", + " return base64.b64encode(buffered.getvalue()).decode(\"utf-8\")\n", + "\n", "\n", "def is_base64(s):\n", - " ''' Check if a string is Base64 encoded '''\n", + " \"\"\"Check if a string is Base64 encoded\"\"\"\n", " try:\n", " return base64.b64encode(base64.b64decode(s)) == s.encode()\n", " except Exception:\n", " return False\n", - " \n", + "\n", + "\n", "def split_image_text_types(docs):\n", - " ''' Split numpy array images and texts '''\n", + " \"\"\"Split numpy array images and texts\"\"\"\n", " images = []\n", " text = []\n", " for doc in docs:\n", - " doc = doc.page_content # Extract Document contents \n", + " doc = doc.page_content # Extract Document contents\n", " if is_base64(doc):\n", " # Resize image to avoid OAI server error\n", - " images.append(resize_base64_image(doc, size=(250, 250))) # base64 encoded str \n", + " images.append(\n", + " resize_base64_image(doc, size=(250, 250))\n", + " ) # base64 encoded str\n", " else:\n", - " text.append(doc) \n", - " return {\n", - " \"images\": images,\n", - " \"texts\": text\n", - " }" + " text.append(doc)\n", + " return {\"images\": images, \"texts\": text}" ] }, { @@ -311,6 +317,7 @@ "from langchain.schema.runnable import RunnablePassthrough, RunnableLambda\n", "from langchain.schema.messages import HumanMessage, SystemMessage\n", "\n", + "\n", "def prompt_func(data_dict):\n", " # Joining the context texts into a single string\n", " formatted_texts = \"\\n\".join(data_dict[\"context\"][\"texts\"])\n", @@ -322,7 +329,7 @@ " \"type\": \"image_url\",\n", " \"image_url\": {\n", " \"url\": f\"data:image/jpeg;base64,{data_dict['context']['images'][0]}\"\n", - " }\n", + " },\n", " }\n", " messages.append(image_message)\n", "\n", @@ -342,17 +349,21 @@ " f\"User-provided keywords: {data_dict['question']}\\n\\n\"\n", " \"Text and / or tables:\\n\"\n", " f\"{formatted_texts}\"\n", - " )\n", + " ),\n", " }\n", " messages.append(text_message)\n", "\n", " return [HumanMessage(content=messages)]\n", - " \n", + "\n", + "\n", "model = ChatOpenAI(temperature=0, model=\"gpt-4-vision-preview\", max_tokens=1024)\n", "\n", "# RAG pipeline\n", "chain = (\n", - " {\"context\": retriever | RunnableLambda(split_image_text_types), \"question\": RunnablePassthrough()}\n", + " {\n", + " \"context\": retriever | RunnableLambda(split_image_text_types),\n", + " \"question\": RunnablePassthrough(),\n", + " }\n", " | RunnableLambda(prompt_func)\n", " | model\n", " | StrOutputParser()\n", @@ -412,15 +423,16 @@ "source": [ "from IPython.display import display, HTML\n", "\n", - "def plt_img_base64(img_base64):\n", "\n", + "def plt_img_base64(img_base64):\n", " # Create an HTML img tag with the base64 string as the source\n", " image_html = f''\n", - " \n", + "\n", " # Display the image by rendering the HTML\n", " display(HTML(image_html))\n", "\n", - "docs = retriever.get_relevant_documents(\"Woman with children\",k=10)\n", + "\n", + "docs = retriever.get_relevant_documents(\"Woman with children\", k=10)\n", "for doc in docs:\n", " if is_base64(doc.page_content):\n", " plt_img_base64(doc.page_content)\n", @@ -446,9 +458,7 @@ } ], "source": [ - "chain.invoke(\n", - " \"Woman with children\"\n", - ")" + "chain.invoke(\"Woman with children\")" ] }, { diff --git a/cookbook/qianfan_baidu_elasticesearch_RAG.ipynb b/cookbook/qianfan_baidu_elasticesearch_RAG.ipynb index b6dac21d2b6..59af499212a 100644 --- a/cookbook/qianfan_baidu_elasticesearch_RAG.ipynb +++ b/cookbook/qianfan_baidu_elasticesearch_RAG.ipynb @@ -82,7 +82,9 @@ "secret_access_key = \"your bos access sk\"\n", "\n", "# create BceClientConfiguration\n", - "config = BceClientConfiguration(credentials=BceCredentials(access_key_id, secret_access_key), endpoint = bos_host)\n", + "config = BceClientConfiguration(\n", + " credentials=BceCredentials(access_key_id, secret_access_key), endpoint=bos_host\n", + ")\n", "\n", "loader = BaiduBOSDirectoryLoader(conf=config, bucket=\"llm-test\", prefix=\"llm/\")\n", "documents = loader.load()\n", @@ -109,10 +111,14 @@ "embeddings.client = sentence_transformers.SentenceTransformer(embeddings.model_name)\n", "\n", "db = BESVectorStore.from_documents(\n", - " documents=split_docs, embedding=embeddings, bes_url=\"your bes url\", index_name='test-index', vector_query_field='vector'\n", - " )\n", + " documents=split_docs,\n", + " embedding=embeddings,\n", + " bes_url=\"your bes url\",\n", + " index_name=\"test-index\",\n", + " vector_query_field=\"vector\",\n", + ")\n", "\n", - "db.client.indices.refresh(index='test-index')\n", + "db.client.indices.refresh(index=\"test-index\")\n", "retriever = db.as_retriever()" ] }, @@ -130,8 +136,15 @@ "metadata": {}, "outputs": [], "source": [ - "llm = QianfanLLMEndpoint(model=\"ERNIE-Bot\", qianfan_ak='your qianfan ak', qianfan_sk='your qianfan sk', streaming=True)\n", - "qa = RetrievalQA.from_chain_type(llm=llm, chain_type=\"refine\", retriever=retriever, return_source_documents=True)\n", + "llm = QianfanLLMEndpoint(\n", + " model=\"ERNIE-Bot\",\n", + " qianfan_ak=\"your qianfan ak\",\n", + " qianfan_sk=\"your qianfan sk\",\n", + " streaming=True,\n", + ")\n", + "qa = RetrievalQA.from_chain_type(\n", + " llm=llm, chain_type=\"refine\", retriever=retriever, return_source_documents=True\n", + ")\n", "\n", "query = \"什么是张量?\"\n", "print(qa.run(query))" diff --git a/docs/docs/integrations/document_loaders/docusaurus.ipynb b/docs/docs/integrations/document_loaders/docusaurus.ipynb index ca953cb6684..0ffa9a0b1d3 100644 --- a/docs/docs/integrations/document_loaders/docusaurus.ipynb +++ b/docs/docs/integrations/document_loaders/docusaurus.ipynb @@ -118,7 +118,9 @@ "source": [ "loader = DocusaurusLoader(\n", " \"https://python.langchain.com\",\n", - " filter_urls=[\"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"],\n", + " filter_urls=[\n", + " \"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"\n", + " ],\n", ")\n", "documents = loader.load()" ] @@ -162,9 +164,11 @@ "source": [ "loader = DocusaurusLoader(\n", " \"https://python.langchain.com\",\n", - " filter_urls=[\"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"],\n", + " filter_urls=[\n", + " \"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"\n", + " ],\n", " # This will only include the content that matches these tags, otherwise they will be removed\n", - " custom_html_tags=[\"#content\", \".main\"]\n", + " custom_html_tags=[\"#content\", \".main\"],\n", ")" ] }, @@ -213,7 +217,9 @@ "source": [ "loader = DocusaurusLoader(\n", " \"https://python.langchain.com\",\n", - " filter_urls=[\"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"],\n", + " filter_urls=[\n", + " \"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"\n", + " ],\n", " parsing_function=remove_nav_and_header_elements,\n", ")" ] diff --git a/docs/docs/integrations/memory/neo4j_chat_message_history.ipynb b/docs/docs/integrations/memory/neo4j_chat_message_history.ipynb index 238beb09982..f31915bb748 100644 --- a/docs/docs/integrations/memory/neo4j_chat_message_history.ipynb +++ b/docs/docs/integrations/memory/neo4j_chat_message_history.ipynb @@ -25,7 +25,7 @@ " url=\"bolt://localhost:7687\",\n", " username=\"neo4j\",\n", " password=\"password\",\n", - " session_id=\"session_id_1\"\n", + " session_id=\"session_id_1\",\n", ")\n", "\n", "history.add_user_message(\"hi!\")\n", diff --git a/docs/docs/integrations/text_embedding/fastembed.ipynb b/docs/docs/integrations/text_embedding/fastembed.ipynb index 9d6826f92d2..edac78ecb5c 100644 --- a/docs/docs/integrations/text_embedding/fastembed.ipynb +++ b/docs/docs/integrations/text_embedding/fastembed.ipynb @@ -110,7 +110,9 @@ "metadata": {}, "outputs": [], "source": [ - "document_embeddings = embeddings.embed_documents([\"This is a document\", \"This is some other document\"])" + "document_embeddings = embeddings.embed_documents(\n", + " [\"This is a document\", \"This is some other document\"]\n", + ")" ] }, { diff --git a/docs/docs/integrations/text_embedding/open_clip.ipynb b/docs/docs/integrations/text_embedding/open_clip.ipynb index d3c3fd3766a..c1e1ba9ed81 100644 --- a/docs/docs/integrations/text_embedding/open_clip.ipynb +++ b/docs/docs/integrations/text_embedding/open_clip.ipynb @@ -48,6 +48,7 @@ "outputs": [], "source": [ "import open_clip\n", + "\n", "open_clip.list_pretrained()" ] }, @@ -147,8 +148,8 @@ " \"rocket\": \"a rocket standing on a launchpad\",\n", " \"motorcycle_right\": \"a red motorcycle standing in a garage\",\n", " \"camera\": \"a person looking at a camera on a tripod\",\n", - " \"horse\": \"a black-and-white silhouette of a horse\", \n", - " \"coffee\": \"a cup of coffee on a saucer\"\n", + " \"horse\": \"a black-and-white silhouette of a horse\",\n", + " \"coffee\": \"a cup of coffee on a saucer\",\n", "}\n", "\n", "original_images = []\n", @@ -158,14 +159,18 @@ "plt.figure(figsize=(16, 5))\n", "\n", "# Loop to display and prepare images and assemble URIs\n", - "for filename in [filename for filename in os.listdir(skimage.data_dir) if filename.endswith(\".png\") or filename.endswith(\".jpg\")]:\n", + "for filename in [\n", + " filename\n", + " for filename in os.listdir(skimage.data_dir)\n", + " if filename.endswith(\".png\") or filename.endswith(\".jpg\")\n", + "]:\n", " name = os.path.splitext(filename)[0]\n", " if name not in descriptions:\n", " continue\n", "\n", " image_path = os.path.join(skimage.data_dir, filename)\n", " image = Image.open(image_path).convert(\"RGB\")\n", - " \n", + "\n", " plt.subplot(2, 4, len(images) + 1)\n", " plt.imshow(image)\n", " plt.title(f\"{filename}\\n{descriptions[name]}\")\n", @@ -173,7 +178,7 @@ " plt.yticks([])\n", "\n", " original_images.append(image)\n", - " images.append(image) # Origional code does preprocessing here\n", + " images.append(image) # Origional code does preprocessing here\n", " texts.append(descriptions[name])\n", " image_uris.append(image_path) # Add the image URI to the list\n", "\n", @@ -216,7 +221,7 @@ "# Instantiate your model\n", "clip_embd = OpenCLIPEmbeddings()\n", "\n", - "# Embed images and text \n", + "# Embed images and text\n", "img_features = clip_embd.embed_image(image_uris)\n", "text_features = clip_embd.embed_documents([\"This is \" + desc for desc in texts])\n", "\n", @@ -241,7 +246,7 @@ " plt.text(x, y, f\"{similarity[y, x]:.2f}\", ha=\"center\", va=\"center\", size=12)\n", "\n", "for side in [\"left\", \"top\", \"right\", \"bottom\"]:\n", - " plt.gca().spines[side].set_visible(False)\n", + " plt.gca().spines[side].set_visible(False)\n", "\n", "plt.xlim([-0.5, count - 0.5])\n", "plt.ylim([count + 0.5, -2])\n", diff --git a/docs/docs/integrations/vectorstores/elasticsearch.ipynb b/docs/docs/integrations/vectorstores/elasticsearch.ipynb index 93343a499cc..ff17a2fc8e0 100644 --- a/docs/docs/integrations/vectorstores/elasticsearch.ipynb +++ b/docs/docs/integrations/vectorstores/elasticsearch.ipynb @@ -794,13 +794,18 @@ "from typing import Dict\n", "from langchain.docstore.document import Document\n", "\n", + "\n", "def custom_document_builder(hit: Dict) -> Document:\n", " src = hit.get(\"_source\", {})\n", " return Document(\n", " page_content=src.get(\"content\", \"Missing content!\"),\n", - " metadata={\"page_number\": src.get(\"page_number\", -1), \"original_filename\": src.get(\"original_filename\", \"Missing filename!\")},\n", + " metadata={\n", + " \"page_number\": src.get(\"page_number\", -1),\n", + " \"original_filename\": src.get(\"original_filename\", \"Missing filename!\"),\n", + " },\n", " )\n", "\n", + "\n", "results = db.similarity_search(\n", " \"What did the president say about Ketanji Brown Jackson\",\n", " k=4,\n", diff --git a/docs/docs/integrations/vectorstores/weaviate.ipynb b/docs/docs/integrations/vectorstores/weaviate.ipynb index dd3d6a2646c..03b9c933c6c 100644 --- a/docs/docs/integrations/vectorstores/weaviate.ipynb +++ b/docs/docs/integrations/vectorstores/weaviate.ipynb @@ -149,12 +149,7 @@ "metadata": {}, "outputs": [], "source": [ - "db = Weaviate.from_documents(\n", - " docs, \n", - " embeddings, \n", - " weaviate_url=WEAVIATE_URL, \n", - " by_text=False\n", - ")" + "db = Weaviate.from_documents(docs, embeddings, weaviate_url=WEAVIATE_URL, by_text=False)" ] }, { @@ -227,8 +222,7 @@ "import weaviate\n", "\n", "client = weaviate.Client(\n", - " url=WEAVIATE_URL, \n", - " auth_client_secret=weaviate.AuthApiKey(WEAVIATE_API_KEY)\n", + " url=WEAVIATE_URL, auth_client_secret=weaviate.AuthApiKey(WEAVIATE_API_KEY)\n", ")\n", "\n", "# client = weaviate.Client(\n", @@ -240,10 +234,7 @@ "# )\n", "\n", "vectorstore = Weaviate.from_documents(\n", - " documents, \n", - " embeddings, \n", - " client=client, \n", - " by_text=False\n", + " documents, embeddings, client=client, by_text=False\n", ")" ] }, @@ -378,6 +369,7 @@ ], "source": [ "from langchain.chat_models import ChatOpenAI\n", + "\n", "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", "llm.predict(\"What did the president say about Justice Breyer\")" ] @@ -575,10 +567,10 @@ "from langchain.schema.output_parser import StrOutputParser\n", "\n", "rag_chain = (\n", - " {\"context\": retriever, \"question\": RunnablePassthrough()} \n", - " | prompt \n", + " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", + " | prompt\n", " | llm\n", - " | StrOutputParser() \n", + " | StrOutputParser()\n", ")\n", "\n", "rag_chain.invoke(\"What did the president say about Justice Breyer\")" diff --git a/docs/docs/modules/agents/index.ipynb b/docs/docs/modules/agents/index.ipynb index 0fe978bd0a0..ec321e6a883 100644 --- a/docs/docs/modules/agents/index.ipynb +++ b/docs/docs/modules/agents/index.ipynb @@ -198,6 +198,7 @@ "source": [ "from langchain.agents import tool\n", "\n", + "\n", "@tool\n", "def get_word_length(word: str) -> int:\n", " \"\"\"Returns the length of a word.\"\"\"\n", @@ -606,10 +607,12 @@ "source": [ "input1 = \"how many letters in the word educa?\"\n", "result = agent_executor.invoke({\"input\": input1, \"chat_history\": chat_history})\n", - "chat_history.extend([\n", - " HumanMessage(content=input1),\n", - " AIMessage(content=result[\"output\"]),\n", - "])\n", + "chat_history.extend(\n", + " [\n", + " HumanMessage(content=input1),\n", + " AIMessage(content=result[\"output\"]),\n", + " ]\n", + ")\n", "agent_executor.invoke({\"input\": \"is that a real word?\", \"chat_history\": chat_history})" ] }, diff --git a/docs/scripts/copy_templates.py b/docs/scripts/copy_templates.py index 11120d4d0fa..21b0c7a4f37 100644 --- a/docs/scripts/copy_templates.py +++ b/docs/scripts/copy_templates.py @@ -1,16 +1,15 @@ import glob import os -from pathlib import Path import re import shutil - +from pathlib import Path TEMPLATES_DIR = Path(os.path.abspath(__file__)).parents[2] / "templates" DOCS_TEMPLATES_DIR = Path(os.path.abspath(__file__)).parents[1] / "docs" / "templates" readmes = list(glob.glob(str(TEMPLATES_DIR) + "/*/README.md")) -destinations = [readme[len(str(TEMPLATES_DIR)) + 1:-10] + ".md" for readme in readmes] +destinations = [readme[len(str(TEMPLATES_DIR)) + 1 : -10] + ".md" for readme in readmes] for source, destination in zip(readmes, destinations): full_destination = DOCS_TEMPLATES_DIR / destination shutil.copyfile(source, full_destination) @@ -33,4 +32,3 @@ with open(TEMPLATES_INDEX_DESTINATION, "r") as f: content = re.sub("\]\(\.\.\/", "](/docs/templates/", content) with open(TEMPLATES_INDEX_DESTINATION, "w") as f: f.write(sidebar_hidden + content) - diff --git a/libs/langchain/langchain/agents/agent.py b/libs/langchain/langchain/agents/agent.py index da9db9f566d..cd4a939f251 100644 --- a/libs/langchain/langchain/agents/agent.py +++ b/libs/langchain/langchain/agents/agent.py @@ -821,20 +821,6 @@ class AgentExecutor(Chain): ) return values - @root_validator() - def validate_return_direct_tool(cls, values: Dict) -> Dict: - """Validate that tools are compatible with agent.""" - agent = values["agent"] - tools = values["tools"] - if isinstance(agent, BaseMultiActionAgent): - for tool in tools: - if tool.return_direct: - raise ValueError( - "Tools that have `return_direct=True` are not allowed " - "in multi-action agents" - ) - return values - @root_validator(pre=True) def validate_runnable_agent(cls, values: Dict) -> Dict: """Convert runnable to agent if passed in.""" diff --git a/templates/rag-timescale-conversation/rag_conversation.ipynb b/templates/rag-timescale-conversation/rag_conversation.ipynb index 4203689a153..82842bdae9c 100644 --- a/templates/rag-timescale-conversation/rag_conversation.ipynb +++ b/templates/rag-timescale-conversation/rag_conversation.ipynb @@ -79,7 +79,6 @@ } ], "source": [ - "\n", "answer = rag_app.invoke(\n", " {\n", " \"question\": \"What commits did the person with my name make?\",\n", @@ -125,7 +124,7 @@ " \"end_date\": \"2016-01-01 00:00:00\",\n", " }\n", ")\n", - "answer\n" + "answer" ] }, {