From 2ebd167dbae709c9efcf9c0408550c34debd33e5 Mon Sep 17 00:00:00 2001 From: Predrag Gruevski <2348618+obi1kenobi@users.noreply.github.com> Date: Tue, 14 Nov 2023 15:58:22 -0500 Subject: [PATCH] Lint Python notebooks with ruff. (#12677) The new ruff version fixed the blocking bugs, and I was able to fairly easily us to a passing state: ruff fixed some issues on its own, I fixed a handful by hand, and I added a list of narrowly-targeted exclusions for files that are currently failing ruff rules that we probably should look into eventually. I went pretty lenient on the docs / cookbooks rules, allowing dead code and such things. Perhaps in the future we may want to tighten the rules further, but this is already a good set of checks that found real issues and will prevent them going forward. --- .../workflows/extract_ignored_words_list.py | 6 +- Makefile | 4 +- cookbook/LLaMA2_sql_chat.ipynb | 1 - cookbook/Semi_Structured_RAG.ipynb | 4 +- .../Semi_structured_and_multi_modal_RAG.ipynb | 11 +- ...mi_structured_multi_modal_RAG_LLaMA2.ipynb | 12 +- cookbook/advanced_rag_eval.ipynb | 209 +-- cookbook/autogpt/marathon_times.ipynb | 1 - cookbook/baby_agi.ipynb | 10 +- cookbook/baby_agi_with_agent.ipynb | 16 +- cookbook/code-analysis-deeplake.ipynb | 2 +- .../custom_agent_with_plugin_retrieval.ipynb | 6 +- ...ith_plugin_retrieval_using_plugnplai.ipynb | 6 +- .../deeplake_semantic_search_over_chat.ipynb | 4 +- cookbook/elasticsearch_db_qa.ipynb | 1 - ...oking_retrieval_augmented_generation.ipynb | 8 +- cookbook/gymnasium_agent_simulation.ipynb | 7 +- cookbook/meta_prompt.ipynb | 10 +- cookbook/multi_modal_RAG_chroma.ipynb | 68 +- cookbook/multi_player_dnd.ipynb | 4 +- cookbook/multiagent_authoritarian.ipynb | 8 +- cookbook/multiagent_bidding.ipynb | 5 +- cookbook/myscale_vector_sql.ipynb | 3 +- cookbook/petting_zoo.ipynb | 2 +- .../qianfan_baidu_elasticesearch_RAG.ipynb | 25 +- cookbook/rag_fusion.ipynb | 1 - cookbook/rewrite.ipynb | 4 +- ...lecting_llms_based_on_context_length.ipynb | 4 +- cookbook/self_query_hotel_search.ipynb | 1 - ...tter-the-algorithm-analysis-deeplake.ipynb | 2 +- cookbook/two_agent_debate_tools.ipynb | 7 +- cookbook/two_player_dnd.ipynb | 2 +- cookbook/wikibase_agent.ipynb | 2 +- .../cookbook/code_writing.ipynb | 2 - .../cookbook/multiple_chains.ipynb | 2 +- docs/docs/guides/evaluation/string/json.ipynb | 2 +- .../trajectory/trajectory_eval.ipynb | 605 ++++---- docs/docs/guides/local_llms.ipynb | 12 +- docs/docs/guides/model_laboratory.ipynb | 6 +- .../presidio_data_anonymization/index.ipynb | 2 +- .../reversible.ipynb | 1264 ++++++++--------- .../safety/amazon_comprehend_chain.ipynb | 3 +- .../docs/integrations/callbacks/argilla.ipynb | 2 +- .../integrations/callbacks/confident.ipynb | 1 - .../docs/integrations/callbacks/context.ipynb | 8 +- docs/docs/integrations/callbacks/infino.ipynb | 2 - .../callbacks/sagemaker_tracking.ipynb | 1 - .../integrations/callbacks/trubrics.ipynb | 1 - docs/docs/integrations/chat/anthropic.ipynb | 8 +- .../integrations/chat/azure_chat_openai.ipynb | 10 +- .../chat/baidu_qianfan_endpoint.ipynb | 2 +- docs/docs/integrations/chat/cohere.ipynb | 2 +- docs/docs/integrations/chat/jinachat.ipynb | 13 +- docs/docs/integrations/chat/konko.ipynb | 8 +- docs/docs/integrations/chat/litellm.ipynb | 8 +- docs/docs/integrations/chat/openai.ipynb | 3 +- docs/docs/integrations/chat/tongyi.ipynb | 2 +- docs/docs/integrations/chat/vllm.ipynb | 3 +- .../integrations/chat_loaders/gmail.ipynb | 7 - .../document_loaders/docugami.ipynb | 1 - .../document_loaders/docusaurus.ipynb | 14 +- .../integrations/document_loaders/figma.ipynb | 4 - .../document_loaders/google_drive.ipynb | 2 +- .../integrations/document_loaders/iugu.ipynb | 3 - .../document_loaders/modern_treasury.ipynb | 3 - .../integrations/document_loaders/quip.ipynb | 166 ++- .../document_loaders/rockset.ipynb | 8 +- .../document_loaders/stripe.ipynb | 3 - .../document_loaders/web_base.ipynb | 22 +- .../openai_metadata_tagger.ipynb | 6 +- docs/docs/integrations/llms/azure_ml.ipynb | 1 - .../llms/baidu_qianfan_endpoint.ipynb | 2 +- docs/docs/integrations/llms/banana.ipynb | 4 +- docs/docs/integrations/llms/beam.ipynb | 37 +- docs/docs/integrations/llms/bittensor.ipynb | 5 - docs/docs/integrations/llms/chatglm.ipynb | 7 +- docs/docs/integrations/llms/edenai.ipynb | 15 +- docs/docs/integrations/llms/fireworks.ipynb | 5 - docs/docs/integrations/llms/gooseai.ipynb | 7 +- docs/docs/integrations/llms/javelin.ipynb | 1 - .../llms/jsonformer_experimental.ipynb | 5 +- docs/docs/integrations/llms/llm_caching.ipynb | 1 - docs/docs/integrations/llms/openlm.ipynb | 8 +- .../integrations/llms/predictionguard.ipynb | 68 +- .../integrations/llms/titan_takeoff.ipynb | 40 +- .../integrations/memory/aws_dynamodb.ipynb | 3 +- .../cassandra_chat_message_history.ipynb | 1 - .../memory/neo4j_chat_message_history.ipynb | 2 +- .../memory/rockset_chat_message_history.ipynb | 3 +- .../providers/arthur_tracking.ipynb | 12 +- .../providers/clearml_tracking.ipynb | 1 - .../providers/comet_tracking.ipynb | 3 - docs/docs/integrations/providers/rebuff.ipynb | 1 - .../providers/vectara/vectara_chat.ipynb | 1 - .../vectara/vectara_text_generation.ipynb | 1 - .../Activeloop DeepMemory+LangChain.ipynb | 3 +- .../retrievers/amazon_kendra_retriever.ipynb | 1 - .../retrievers/docarray_retriever.ipynb | 1 - docs/docs/integrations/retrievers/kay.ipynb | 1 - docs/docs/integrations/retrievers/metal.ipynb | 2 +- .../activeloop_deeplake_self_query.ipynb | 5 +- .../self_query/chroma_self_query.ipynb | 5 +- .../self_query/elasticsearch_self_query.ipynb | 3 +- .../self_query/myscale_self_query.ipynb | 1 - .../retrievers/self_query/pinecone.ipynb | 5 +- .../self_query/supabase_self_query.ipynb | 1 - .../timescalevector_self_query.ipynb | 5 +- .../self_query/vectara_self_query.ipynb | 3 - .../self_query/weaviate_self_query.ipynb | 4 +- .../baidu_qianfan_endpoint.ipynb | 2 - .../text_embedding/clarifai.ipynb | 2 +- .../integrations/text_embedding/edenai.ipynb | 14 +- .../text_embedding/elasticsearch.ipynb | 69 +- .../text_embedding/fastembed.ipynb | 10 +- .../text_embedding/open_clip.ipynb | 19 +- .../text_embedding/sagemaker-endpoint.ipynb | 1 - .../sentence_transformers.ipynb | 2 +- docs/docs/integrations/toolkits/github.ipynb | 1 - docs/docs/integrations/toolkits/json.ipynb | 5 +- docs/docs/integrations/toolkits/multion.ipynb | 1 - docs/docs/integrations/toolkits/openapi.ipynb | 4 +- .../integrations/toolkits/openapi_nla.ipynb | 6 +- .../integrations/toolkits/playwright.ipynb | 3 +- docs/docs/integrations/toolkits/powerbi.ipynb | 1 - docs/docs/integrations/toolkits/python.ipynb | 1 - .../integrations/toolkits/sql_database.ipynb | 22 +- .../integrations/toolkits/vectorstore.ipynb | 1 - .../tools/dalle_image_generator.ipynb | 2 - docs/docs/integrations/tools/filesystem.ipynb | 8 - docs/docs/integrations/tools/graphql.ipynb | 1 - .../vectorstores/activeloop_deeplake.ipynb | 2 +- .../integrations/vectorstores/atlas.ipynb | 1 - .../integrations/vectorstores/awadb.ipynb | 14 +- .../vectorstores/azure_cosmos_db.ipynb | 207 ++- .../vectorstores/azuresearch.ipynb | 2 - .../integrations/vectorstores/dingo.ipynb | 2 +- .../vectorstores/elasticsearch.ipynb | 9 +- .../integrations/vectorstores/hippo.ipynb | 314 ++-- .../vectorstores/matchingengine.ipynb | 6 +- .../vectorstores/mongodb_atlas.ipynb | 2 - .../integrations/vectorstores/myscale.ipynb | 2 +- .../vectorstores/pgvecto_rs.ipynb | 2 +- .../integrations/vectorstores/pgvector.ipynb | 55 +- .../integrations/vectorstores/scann.ipynb | 1 - .../integrations/vectorstores/starrocks.ipynb | 7 +- .../vectorstores/timescalevector.ipynb | 11 +- .../integrations/vectorstores/vectara.ipynb | 1 - .../integrations/vectorstores/vespa.ipynb | 576 ++++---- .../integrations/vectorstores/weaviate.ipynb | 22 +- .../agent_types/openai_functions_agent.ipynb | 1 - .../agents/agent_types/structured_chat.ipynb | 3 +- .../how_to/add_memory_openai_functions.ipynb | 1 - .../modules/agents/how_to/agent_iter.ipynb | 6 +- .../agents/how_to/agent_vectorstore.ipynb | 4 +- .../modules/agents/how_to/async_agent.ipynb | 3 - .../modules/agents/how_to/chatgpt_clone.ipynb | 4 +- .../modules/agents/how_to/custom_agent.ipynb | 2 +- .../custom_agent_with_tool_retrieval.ipynb | 6 +- .../how_to/custom_multi_action_agent.ipynb | 2 +- .../agents/how_to/handle_parsing_errors.ipynb | 7 +- .../agents/how_to/max_iterations.ipynb | 1 - .../agents/how_to/max_time_limit.ipynb | 1 - .../use_toolkits_with_openai_functions.ipynb | 6 +- docs/docs/modules/agents/index.ipynb | 11 +- .../modules/agents/tools/custom_tools.ipynb | 4 +- .../callbacks/multiple_callbacks.ipynb | 1 - .../modules/chains/document/map_reduce.ipynb | 1 - .../post_retrieval/long_context_reorder.ipynb | 2 - .../retrievers/self_query.ipynb | 3 +- .../text_embedding/caching_embeddings.ipynb | 3 +- .../adding_memory_chain_multiple_inputs.ipynb | 5 +- .../memory/agent_with_memory_in_db.ipynb | 4 +- docs/docs/modules/model_io/chat/index.ipynb | 2 +- .../model_io/output_parsers/index.ipynb | 2 - .../model_io/output_parsers/retry.ipynb | 6 +- .../connecting_to_a_feature_store.ipynb | 2 +- .../prompt_templates/prompts_pipelining.ipynb | 1 - docs/docs/use_cases/chatbots.ipynb | 2 +- docs/docs/use_cases/data_generation.ipynb | 3 +- docs/docs/use_cases/extraction.ipynb | 4 - .../use_cases/graph/graph_networkx_qa.ipynb | 3 +- docs/docs/use_cases/qa_structured/sql.ipynb | 30 +- .../code_understanding.ipynb | 1 - .../vector_db_text_generation.ipynb | 1 - docs/docs/use_cases/summarization.ipynb | 1 - docs/docs/use_cases/tagging.ipynb | 2 - docs/scripts/copy_templates.py | 3 +- poetry.lock | 52 +- pyproject.toml | 63 +- 189 files changed, 2249 insertions(+), 2362 deletions(-) diff --git a/.github/workflows/extract_ignored_words_list.py b/.github/workflows/extract_ignored_words_list.py index ea1fc410bdd..7c800e0df0b 100644 --- a/.github/workflows/extract_ignored_words_list.py +++ b/.github/workflows/extract_ignored_words_list.py @@ -3,6 +3,8 @@ import toml pyproject_toml = toml.load("pyproject.toml") # Extract the ignore words list (adjust the key as per your TOML structure) -ignore_words_list = pyproject_toml.get("tool", {}).get("codespell", {}).get("ignore-words-list") +ignore_words_list = ( + pyproject_toml.get("tool", {}).get("codespell", {}).get("ignore-words-list") +) -print(f"::set-output name=ignore_words_list::{ignore_words_list}") \ No newline at end of file +print(f"::set-output name=ignore_words_list::{ignore_words_list}") diff --git a/Makefile b/Makefile index 3319cd720fe..6fa779b07fa 100644 --- a/Makefile +++ b/Makefile @@ -43,10 +43,10 @@ spell_fix: lint: poetry run ruff docs templates cookbook - poetry run black docs templates cookbook --diff + poetry run ruff format docs templates cookbook --diff format format_diff: - poetry run black docs templates cookbook + poetry run ruff format docs templates cookbook poetry run ruff --select I --fix docs templates cookbook ###################### diff --git a/cookbook/LLaMA2_sql_chat.ipynb b/cookbook/LLaMA2_sql_chat.ipynb index cc9a21c5f37..61e3d5a7654 100644 --- a/cookbook/LLaMA2_sql_chat.ipynb +++ b/cookbook/LLaMA2_sql_chat.ipynb @@ -67,7 +67,6 @@ "llama2_code = ChatOllama(model=\"codellama:7b-instruct\")\n", "\n", "# API\n", - "from getpass import getpass\n", "from langchain.llms import Replicate\n", "\n", "# REPLICATE_API_TOKEN = getpass()\n", diff --git a/cookbook/Semi_Structured_RAG.ipynb b/cookbook/Semi_Structured_RAG.ipynb index c6cc363368d..46b1935ac8c 100644 --- a/cookbook/Semi_Structured_RAG.ipynb +++ b/cookbook/Semi_Structured_RAG.ipynb @@ -102,9 +102,8 @@ "metadata": {}, "outputs": [], "source": [ - "from lxml import html\n", "from pydantic import BaseModel\n", - "from typing import Any, Optional\n", + "from typing import Any\n", "from unstructured.partition.pdf import partition_pdf\n", "\n", "# Get elements\n", @@ -373,7 +372,6 @@ "metadata": {}, "outputs": [], "source": [ - "from operator import itemgetter\n", "from langchain.schema.runnable import RunnablePassthrough\n", "\n", "# Prompt template\n", diff --git a/cookbook/Semi_structured_and_multi_modal_RAG.ipynb b/cookbook/Semi_structured_and_multi_modal_RAG.ipynb index fc011aa52e1..4a1e3838952 100644 --- a/cookbook/Semi_structured_and_multi_modal_RAG.ipynb +++ b/cookbook/Semi_structured_and_multi_modal_RAG.ipynb @@ -92,9 +92,8 @@ "metadata": {}, "outputs": [], "source": [ - "from lxml import html\n", "from pydantic import BaseModel\n", - "from typing import Any, Optional\n", + "from typing import Any\n", "from unstructured.partition.pdf import partition_pdf\n", "\n", "# Get elements\n", @@ -224,7 +223,7 @@ "outputs": [], "source": [ "# Prompt\n", - "prompt_text = \"\"\"You are an assistant tasked with summarizing tables and text. \\ \n", + "prompt_text = \"\"\"You are an assistant tasked with summarizing tables and text. \\\n", "Give a concise summary of the table or text. Table or text chunk: {element} \"\"\"\n", "prompt = ChatPromptTemplate.from_template(prompt_text)\n", "\n", @@ -313,7 +312,7 @@ " # Execute the command and save the output to the defined output file\n", " /Users/rlm/Desktop/Code/llama.cpp/bin/llava -m ../models/llava-7b/ggml-model-q5_k.gguf --mmproj ../models/llava-7b/mmproj-model-f16.gguf --temp 0.1 -p \"Describe the image in detail. Be specific about graphs, such as bar plots.\" --image \"$img\" > \"$output_file\"\n", "\n", - "done" + "done\n" ] }, { @@ -337,7 +336,8 @@ "metadata": {}, "outputs": [], "source": [ - "import os, glob\n", + "import os\n", + "import glob\n", "\n", "# Get all .txt file summaries\n", "file_paths = glob.glob(os.path.expanduser(os.path.join(path, \"*.txt\")))\n", @@ -644,7 +644,6 @@ "metadata": {}, "outputs": [], "source": [ - "from operator import itemgetter\n", "from langchain.schema.runnable import RunnablePassthrough\n", "\n", "# Prompt template\n", diff --git a/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb b/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb index c5eeacb8259..5d01fa2578c 100644 --- a/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb +++ b/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb @@ -82,10 +82,8 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "from lxml import html\n", "from pydantic import BaseModel\n", - "from typing import Any, Optional\n", + "from typing import Any\n", "from unstructured.partition.pdf import partition_pdf\n", "\n", "# Path to save images\n", @@ -223,7 +221,7 @@ "outputs": [], "source": [ "# Prompt\n", - "prompt_text = \"\"\"You are an assistant tasked with summarizing tables and text. \\ \n", + "prompt_text = \"\"\"You are an assistant tasked with summarizing tables and text. \\\n", "Give a concise summary of the table or text. Table or text chunk: {element} \"\"\"\n", "prompt = ChatPromptTemplate.from_template(prompt_text)\n", "\n", @@ -312,7 +310,7 @@ " # Execute the command and save the output to the defined output file\n", " /Users/rlm/Desktop/Code/llama.cpp/bin/llava -m ../models/llava-7b/ggml-model-q5_k.gguf --mmproj ../models/llava-7b/mmproj-model-f16.gguf --temp 0.1 -p \"Describe the image in detail. Be specific about graphs, such as bar plots.\" --image \"$img\" > \"$output_file\"\n", "\n", - "done" + "done\n" ] }, { @@ -322,7 +320,8 @@ "metadata": {}, "outputs": [], "source": [ - "import os, glob\n", + "import os\n", + "import glob\n", "\n", "# Get all .txt files in the directory\n", "file_paths = glob.glob(os.path.expanduser(os.path.join(path, \"*.txt\")))\n", @@ -531,7 +530,6 @@ "metadata": {}, "outputs": [], "source": [ - "from operator import itemgetter\n", "from langchain.schema.runnable import RunnablePassthrough\n", "\n", "# Prompt template\n", diff --git a/cookbook/advanced_rag_eval.ipynb b/cookbook/advanced_rag_eval.ipynb index a2de7261091..640c06c9f2e 100644 --- a/cookbook/advanced_rag_eval.ipynb +++ b/cookbook/advanced_rag_eval.ipynb @@ -63,11 +63,13 @@ "\n", "# Load\n", "from langchain.document_loaders import PyPDFLoader\n", + "\n", "loader = PyPDFLoader(path + \"cpi.pdf\")\n", "pdf_pages = loader.load()\n", "\n", "# Split\n", "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "\n", "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n", "all_splits_pypdf = text_splitter.split_documents(pdf_pages)\n", "all_splits_pypdf_texts = [d.page_content for d in all_splits_pypdf]" @@ -132,10 +134,13 @@ "source": [ "from langchain.vectorstores import Chroma\n", "from langchain.embeddings import OpenAIEmbeddings\n", - "baseline = Chroma.from_texts(texts=all_splits_pypdf_texts,\n", - " collection_name=\"baseline\",\n", - " embedding=OpenAIEmbeddings())\n", - "retriever_baseline=baseline.as_retriever()" + "\n", + "baseline = Chroma.from_texts(\n", + " texts=all_splits_pypdf_texts,\n", + " collection_name=\"baseline\",\n", + " embedding=OpenAIEmbeddings(),\n", + ")\n", + "retriever_baseline = baseline.as_retriever()" ] }, { @@ -160,7 +165,7 @@ "from langchain.schema.output_parser import StrOutputParser\n", "\n", "# Prompt\n", - "prompt_text = \"\"\"You are an assistant tasked with summarizing tables and text for retrieval. \\ \n", + "prompt_text = \"\"\"You are an assistant tasked with summarizing tables and text for retrieval. \\\n", "These summaries will be embedded and used to retrieve the raw text or table elements. \\\n", "Give a concise summary of the table or text that is well optimized for retrieval. Table or text: {element} \"\"\"\n", "prompt = ChatPromptTemplate.from_template(prompt_text)\n", @@ -169,7 +174,7 @@ "model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n", "summarize_chain = {\"element\": lambda x: x} | prompt | model | StrOutputParser()\n", "\n", - "# Apply to text \n", + "# Apply to text\n", "text_summaries = summarize_chain.batch(texts, {\"max_concurrency\": 5})\n", "\n", "# Apply to tables\n", @@ -192,31 +197,32 @@ "outputs": [], "source": [ "# Image summary chain\n", - "import os, base64, io\n", + "import os\n", + "import base64\n", + "import io\n", "from io import BytesIO\n", "from PIL import Image\n", "from langchain.schema.messages import HumanMessage\n", "\n", - "def encode_image(image_path):\n", - " ''' Getting the base64 string '''\n", - " with open(image_path, \"rb\") as image_file:\n", - " return base64.b64encode(image_file.read()).decode('utf-8') \n", "\n", - "def image_summarize(img_base64,prompt):\n", - " ''' Image summary '''\n", - " chat = ChatOpenAI(model=\"gpt-4-vision-preview\",\n", - " max_tokens=1024)\n", - " \n", + "def encode_image(image_path):\n", + " \"\"\"Getting the base64 string\"\"\"\n", + " with open(image_path, \"rb\") as image_file:\n", + " return base64.b64encode(image_file.read()).decode(\"utf-8\")\n", + "\n", + "\n", + "def image_summarize(img_base64, prompt):\n", + " \"\"\"Image summary\"\"\"\n", + " chat = ChatOpenAI(model=\"gpt-4-vision-preview\", max_tokens=1024)\n", + "\n", " msg = chat.invoke(\n", " [\n", " HumanMessage(\n", " content=[\n", - " {\"type\": \"text\", \"text\":prompt},\n", + " {\"type\": \"text\", \"text\": prompt},\n", " {\n", " \"type\": \"image_url\",\n", - " \"image_url\": {\n", - " \"url\": f\"data:image/jpeg;base64,{img_base64}\"\n", - " },\n", + " \"image_url\": {\"url\": f\"data:image/jpeg;base64,{img_base64}\"},\n", " },\n", " ]\n", " )\n", @@ -224,6 +230,7 @@ " )\n", " return msg.content\n", "\n", + "\n", "# Store base64 encoded images\n", "img_base64_list = []\n", "\n", @@ -231,17 +238,17 @@ "image_summaries = []\n", "\n", "# Prompt\n", - "prompt = \"\"\"You are an assistant tasked with summarizing images for retrieval. \\ \n", + "prompt = \"\"\"You are an assistant tasked with summarizing images for retrieval. \\\n", "These summaries will be embedded and used to retrieve the raw image. \\\n", "Give a concise summary of the image that is well optimized for retrieval.\"\"\"\n", "\n", "# Apply to images\n", "for img_file in sorted(os.listdir(path)):\n", - " if img_file.endswith('.jpg'):\n", + " if img_file.endswith(\".jpg\"):\n", " img_path = os.path.join(path, img_file)\n", " base64_image = encode_image(img_path)\n", " img_base64_list.append(base64_image)\n", - " image_summaries.append(image_summarize(base64_image,prompt))" + " image_summaries.append(image_summarize(base64_image, prompt))" ] }, { @@ -267,14 +274,10 @@ "from langchain.schema.document import Document\n", "from langchain.retrievers.multi_vector import MultiVectorRetriever\n", "\n", - "def create_multi_vector_retriever(vectorstore, \n", - " text_summaries, \n", - " texts, \n", - " table_summaries, \n", - " tables, \n", - " image_summaries, \n", - " images):\n", - " \n", + "\n", + "def create_multi_vector_retriever(\n", + " vectorstore, text_summaries, texts, table_summaries, tables, image_summaries, images\n", + "):\n", " # Initialize the storage layer\n", " store = InMemoryStore()\n", " id_key = \"doc_id\"\n", @@ -309,18 +312,22 @@ "\n", " return retriever\n", "\n", + "\n", "# The vectorstore to use to index the summaries\n", - "multi_vector_img = Chroma(collection_name=\"multi_vector_img\", \n", - " embedding_function=OpenAIEmbeddings())\n", + "multi_vector_img = Chroma(\n", + " collection_name=\"multi_vector_img\", embedding_function=OpenAIEmbeddings()\n", + ")\n", "\n", "# Create retriever\n", - "retriever_multi_vector_img = create_multi_vector_retriever(multi_vector_img,\n", - " text_summaries,\n", - " texts,\n", - " table_summaries, \n", - " tables, \n", - " image_summaries, \n", - " img_base64_list)" + "retriever_multi_vector_img = create_multi_vector_retriever(\n", + " multi_vector_img,\n", + " text_summaries,\n", + " texts,\n", + " table_summaries,\n", + " tables,\n", + " image_summaries,\n", + " img_base64_list,\n", + ")" ] }, { @@ -330,10 +337,10 @@ "metadata": {}, "outputs": [], "source": [ - "# Testing on retrieval \n", - "query=\"What percentage of CPI is dedicated to Housing, and how does it compare to the combined percentage of Medical Care, Apparel, and Other Goods and Services?\"\n", - "suffix_for_images=\" Include any pie charts, graphs, or tables.\"\n", - "docs = retriever_multi_vector_img.get_relevant_documents(query+suffix_for_images)" + "# Testing on retrieval\n", + "query = \"What percentage of CPI is dedicated to Housing, and how does it compare to the combined percentage of Medical Care, Apparel, and Other Goods and Services?\"\n", + "suffix_for_images = \" Include any pie charts, graphs, or tables.\"\n", + "docs = retriever_multi_vector_img.get_relevant_documents(query + suffix_for_images)" ] }, { @@ -357,14 +364,16 @@ ], "source": [ "from IPython.display import display, HTML\n", - "def plt_img_base64(img_base64):\n", "\n", + "\n", + "def plt_img_base64(img_base64):\n", " # Create an HTML img tag with the base64 string as the source\n", " image_html = f''\n", - " \n", + "\n", " # Display the image by rendering the HTML\n", " display(HTML(image_html))\n", "\n", + "\n", "plt_img_base64(docs[1])" ] }, @@ -386,17 +395,20 @@ "outputs": [], "source": [ "# The vectorstore to use to index the summaries\n", - "multi_vector_text = Chroma(collection_name=\"multi_vector_text\", \n", - " embedding_function=OpenAIEmbeddings())\n", + "multi_vector_text = Chroma(\n", + " collection_name=\"multi_vector_text\", embedding_function=OpenAIEmbeddings()\n", + ")\n", "\n", "# Create retriever\n", - "retriever_multi_vector_img_summary = create_multi_vector_retriever(multi_vector_text,\n", - " text_summaries,\n", - " texts,\n", - " table_summaries, \n", - " tables, \n", - " image_summaries, \n", - " image_summaries)" + "retriever_multi_vector_img_summary = create_multi_vector_retriever(\n", + " multi_vector_text,\n", + " text_summaries,\n", + " texts,\n", + " table_summaries,\n", + " tables,\n", + " image_summaries,\n", + " image_summaries,\n", + ")" ] }, { @@ -418,14 +430,17 @@ "\n", "# Create chroma w/ multi-modal embeddings\n", "multimodal_embd = Chroma(\n", - " collection_name=\"multimodal_embd\",\n", - " embedding_function=OpenCLIPEmbeddings()\n", + " collection_name=\"multimodal_embd\", embedding_function=OpenCLIPEmbeddings()\n", ")\n", "\n", "# Get image URIs\n", - "image_uris = sorted([os.path.join(path, image_name) \n", - " for image_name in os.listdir(path) \n", - " if image_name.endswith('.jpg')])\n", + "image_uris = sorted(\n", + " [\n", + " os.path.join(path, image_name)\n", + " for image_name in os.listdir(path)\n", + " if image_name.endswith(\".jpg\")\n", + " ]\n", + ")\n", "\n", "# Add images and documents\n", "if image_uris:\n", @@ -435,7 +450,7 @@ "if tables:\n", " multimodal_embd.add_texts(texts=tables)\n", "\n", - "# Make retriever \n", + "# Make retriever\n", "retriever_multimodal_embd = multimodal_embd.as_retriever()" ] }, @@ -466,14 +481,14 @@ "\"\"\"\n", "rag_prompt_text = ChatPromptTemplate.from_template(template)\n", "\n", - "# Build \n", + "\n", + "# Build\n", "def text_rag_chain(retriever):\n", - " \n", - " ''' RAG chain '''\n", + " \"\"\"RAG chain\"\"\"\n", "\n", " # LLM\n", " model = ChatOpenAI(temperature=0, model=\"gpt-4\")\n", - " \n", + "\n", " # RAG pipeline\n", " chain = (\n", " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", @@ -500,13 +515,15 @@ "metadata": {}, "outputs": [], "source": [ - "import re \n", + "import re\n", "from langchain.schema import Document\n", "from langchain.schema.runnable import RunnableLambda\n", "\n", + "\n", "def looks_like_base64(sb):\n", " \"\"\"Check if the string looks like base64.\"\"\"\n", - " return re.match('^[A-Za-z0-9+/]+[=]{0,2}$', sb) is not None\n", + " return re.match(\"^[A-Za-z0-9+/]+[=]{0,2}$\", sb) is not None\n", + "\n", "\n", "def is_image_data(b64data):\n", " \"\"\"Check if the base64 data is an image by looking at the start of the data.\"\"\"\n", @@ -514,7 +531,7 @@ " b\"\\xFF\\xD8\\xFF\": \"jpg\",\n", " b\"\\x89\\x50\\x4E\\x47\\x0D\\x0A\\x1A\\x0A\": \"png\",\n", " b\"\\x47\\x49\\x46\\x38\": \"gif\",\n", - " b\"\\x52\\x49\\x46\\x46\": \"webp\"\n", + " b\"\\x52\\x49\\x46\\x46\": \"webp\",\n", " }\n", " try:\n", " header = base64.b64decode(b64data)[:8] # Decode and get the first 8 bytes\n", @@ -525,6 +542,7 @@ " except Exception:\n", " return False\n", "\n", + "\n", "def split_image_text_types(docs):\n", " \"\"\"Split base64-encoded images and texts.\"\"\"\n", " b64_images = []\n", @@ -539,6 +557,7 @@ " texts.append(doc)\n", " return {\"images\": b64_images, \"texts\": texts}\n", "\n", + "\n", "def img_prompt_func(data_dict):\n", " # Joining the context texts into a single string\n", " formatted_texts = \"\\n\".join(data_dict[\"context\"][\"texts\"])\n", @@ -550,7 +569,7 @@ " \"type\": \"image_url\",\n", " \"image_url\": {\n", " \"url\": f\"data:image/jpeg;base64,{data_dict['context']['images'][0]}\"\n", - " }\n", + " },\n", " }\n", " messages.append(image_message)\n", "\n", @@ -563,22 +582,24 @@ " f\"User-provided question / keywords: {data_dict['question']}\\n\\n\"\n", " \"Text and / or tables:\\n\"\n", " f\"{formatted_texts}\"\n", - " )\n", + " ),\n", " }\n", " messages.append(text_message)\n", " return [HumanMessage(content=messages)]\n", "\n", + "\n", "def multi_modal_rag_chain(retriever):\n", - " ''' Multi-modal RAG chain '''\n", + " \"\"\"Multi-modal RAG chain\"\"\"\n", "\n", " # Multi-modal LLM\n", - " model = ChatOpenAI(temperature=0, \n", - " model=\"gpt-4-vision-preview\", \n", - " max_tokens=1024)\n", - " \n", + " model = ChatOpenAI(temperature=0, model=\"gpt-4-vision-preview\", max_tokens=1024)\n", + "\n", " # RAG pipeline\n", " chain = (\n", - " {\"context\": retriever | RunnableLambda(split_image_text_types), \"question\": RunnablePassthrough()}\n", + " {\n", + " \"context\": retriever | RunnableLambda(split_image_text_types),\n", + " \"question\": RunnablePassthrough(),\n", + " }\n", " | RunnableLambda(img_prompt_func)\n", " | model\n", " | StrOutputParser()\n", @@ -603,12 +624,12 @@ "outputs": [], "source": [ "# RAG chains\n", - "chain_baseline=text_rag_chain(retriever_baseline)\n", - "chain_mv_text=text_rag_chain(retriever_multi_vector_img_summary)\n", + "chain_baseline = text_rag_chain(retriever_baseline)\n", + "chain_mv_text = text_rag_chain(retriever_multi_vector_img_summary)\n", "\n", "# Multi-modal RAG chains\n", - "chain_multimodal_mv_img=multi_modal_rag_chain(retriever_multi_vector_img)\n", - "chain_multimodal_embd=multi_modal_rag_chain(retriever_multimodal_embd)" + "chain_multimodal_mv_img = multi_modal_rag_chain(retriever_multi_vector_img)\n", + "chain_multimodal_embd = multi_modal_rag_chain(retriever_multimodal_embd)" ] }, { @@ -694,7 +715,8 @@ "source": [ "# Read\n", "import pandas as pd\n", - "eval_set = pd.read_csv(path+'cpi_eval.csv')\n", + "\n", + "eval_set = pd.read_csv(path + \"cpi_eval.csv\")\n", "eval_set.head(3)" ] }, @@ -715,12 +737,12 @@ "# Populate dataset\n", "for _, row in eval_set.iterrows():\n", " # Get Q, A\n", - " q = row['Question']\n", - " a = row['Answer']\n", + " q = row[\"Question\"]\n", + " a = row[\"Answer\"]\n", " # Use the values in your function\n", - " client.create_example(inputs={\"question\": q}, \n", - " outputs={\"answer\": a}, \n", - " dataset_id=dataset.id)" + " client.create_example(\n", + " inputs={\"question\": q}, outputs={\"answer\": a}, dataset_id=dataset.id\n", + " )" ] }, { @@ -764,17 +786,22 @@ " evaluators=[\"qa\"],\n", ")\n", "\n", - "def run_eval(chain,run_name,dataset_name):\n", + "\n", + "def run_eval(chain, run_name, dataset_name):\n", " _ = client.run_on_dataset(\n", " dataset_name=dataset_name,\n", - " llm_or_chain_factory=lambda: (lambda x: x[\"question\"]+suffix_for_images) | chain,\n", + " llm_or_chain_factory=lambda: (lambda x: x[\"question\"] + suffix_for_images)\n", + " | chain,\n", " evaluation=eval_config,\n", " project_name=run_name,\n", " )\n", "\n", - "for chain, run in zip([chain_baseline, chain_mv_text, chain_multimodal_mv_img, chain_multimodal_embd], \n", - " [\"baseline\", \"mv_text\", \"mv_img\", \"mm_embd\"]):\n", - " run_eval(chain, dataset_name+\"-\"+run, dataset_name)" + "\n", + "for chain, run in zip(\n", + " [chain_baseline, chain_mv_text, chain_multimodal_mv_img, chain_multimodal_embd],\n", + " [\"baseline\", \"mv_text\", \"mv_img\", \"mm_embd\"],\n", + "):\n", + " run_eval(chain, dataset_name + \"-\" + run, dataset_name)" ] } ], diff --git a/cookbook/autogpt/marathon_times.ipynb b/cookbook/autogpt/marathon_times.ipynb index aaaa3a64517..04d1219b4fc 100644 --- a/cookbook/autogpt/marathon_times.ipynb +++ b/cookbook/autogpt/marathon_times.ipynb @@ -314,7 +314,6 @@ "from langchain.vectorstores import FAISS\n", "from langchain.docstore import InMemoryDocstore\n", "from langchain.embeddings import OpenAIEmbeddings\n", - "from langchain.tools.human.tool import HumanInputRun\n", "\n", "embeddings_model = OpenAIEmbeddings()\n", "embedding_size = 1536\n", diff --git a/cookbook/baby_agi.ipynb b/cookbook/baby_agi.ipynb index c30d2fceebf..84309be2baa 100644 --- a/cookbook/baby_agi.ipynb +++ b/cookbook/baby_agi.ipynb @@ -29,16 +29,10 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", - "from collections import deque\n", - "from typing import Dict, List, Optional, Any\n", + "from typing import Optional\n", "\n", - "from langchain.chains import LLMChain\nfrom langchain.llms import OpenAI\nfrom langchain.prompts import PromptTemplate\n", + "from langchain.llms import OpenAI\n", "from langchain.embeddings import OpenAIEmbeddings\n", - "from langchain.llms import BaseLLM\n", - "from langchain.schema.vectorstore import VectorStore\n", - "from pydantic import BaseModel, Field\n", - "from langchain.chains.base import Chain\n", "from langchain_experimental.autonomous_agents import BabyAGI" ] }, diff --git a/cookbook/baby_agi_with_agent.ipynb b/cookbook/baby_agi_with_agent.ipynb index b3cc5db10a6..8636cb9b906 100644 --- a/cookbook/baby_agi_with_agent.ipynb +++ b/cookbook/baby_agi_with_agent.ipynb @@ -25,16 +25,12 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", - "from collections import deque\n", - "from typing import Dict, List, Optional, Any\n", + "from typing import Optional\n", "\n", - "from langchain.chains import LLMChain\nfrom langchain.llms import OpenAI\nfrom langchain.prompts import PromptTemplate\n", + "from langchain.chains import LLMChain\n", + "from langchain.llms import OpenAI\n", + "from langchain.prompts import PromptTemplate\n", "from langchain.embeddings import OpenAIEmbeddings\n", - "from langchain.llms import BaseLLM\n", - "from langchain.schema.vectorstore import VectorStore\n", - "from pydantic import BaseModel, Field\n", - "from langchain.chains.base import Chain\n", "from langchain_experimental.autonomous_agents import BabyAGI" ] }, @@ -111,7 +107,9 @@ "outputs": [], "source": [ "from langchain.agents import ZeroShotAgent, Tool, AgentExecutor\n", - "from langchain.llms import OpenAI\nfrom langchain.utilities import SerpAPIWrapper\nfrom langchain.chains import LLMChain\n", + "from langchain.llms import OpenAI\n", + "from langchain.utilities import SerpAPIWrapper\n", + "from langchain.chains import LLMChain\n", "\n", "todo_prompt = PromptTemplate.from_template(\n", " \"You are a planner who is an expert at coming up with a todo list for a given objective. Come up with a todo list for this objective: {objective}\"\n", diff --git a/cookbook/code-analysis-deeplake.ipynb b/cookbook/code-analysis-deeplake.ipynb index 41f22db2f6b..913a286bd98 100644 --- a/cookbook/code-analysis-deeplake.ipynb +++ b/cookbook/code-analysis-deeplake.ipynb @@ -177,7 +177,7 @@ " try:\n", " loader = TextLoader(os.path.join(dirpath, file), encoding=\"utf-8\")\n", " docs.extend(loader.load_and_split())\n", - " except Exception as e:\n", + " except Exception:\n", " pass\n", "print(f\"{len(docs)}\")" ] diff --git a/cookbook/custom_agent_with_plugin_retrieval.ipynb b/cookbook/custom_agent_with_plugin_retrieval.ipynb index ba1d12fb849..5b04e21bf3a 100644 --- a/cookbook/custom_agent_with_plugin_retrieval.ipynb +++ b/cookbook/custom_agent_with_plugin_retrieval.ipynb @@ -33,14 +33,14 @@ "outputs": [], "source": [ "from langchain.agents import (\n", - " Tool,\n", " AgentExecutor,\n", " LLMSingleActionAgent,\n", " AgentOutputParser,\n", ")\n", "from langchain.prompts import StringPromptTemplate\n", - "from langchain.llms import OpenAI\nfrom langchain.utilities import SerpAPIWrapper\nfrom langchain.chains import LLMChain\n", - "from typing import List, Union\n", + "from langchain.llms import OpenAI\n", + "from langchain.chains import LLMChain\n", + "from typing import Union\n", "from langchain.schema import AgentAction, AgentFinish\n", "from langchain.agents.agent_toolkits import NLAToolkit\n", "from langchain.tools.plugin import AIPlugin\n", diff --git a/cookbook/custom_agent_with_plugin_retrieval_using_plugnplai.ipynb b/cookbook/custom_agent_with_plugin_retrieval_using_plugnplai.ipynb index 4284fe459e8..6bc5effcbeb 100644 --- a/cookbook/custom_agent_with_plugin_retrieval_using_plugnplai.ipynb +++ b/cookbook/custom_agent_with_plugin_retrieval_using_plugnplai.ipynb @@ -57,14 +57,14 @@ "outputs": [], "source": [ "from langchain.agents import (\n", - " Tool,\n", " AgentExecutor,\n", " LLMSingleActionAgent,\n", " AgentOutputParser,\n", ")\n", "from langchain.prompts import StringPromptTemplate\n", - "from langchain.llms import OpenAI\nfrom langchain.utilities import SerpAPIWrapper\nfrom langchain.chains import LLMChain\n", - "from typing import List, Union\n", + "from langchain.llms import OpenAI\n", + "from langchain.chains import LLMChain\n", + "from typing import Union\n", "from langchain.schema import AgentAction, AgentFinish\n", "from langchain.agents.agent_toolkits import NLAToolkit\n", "from langchain.tools.plugin import AIPlugin\n", diff --git a/cookbook/deeplake_semantic_search_over_chat.ipynb b/cookbook/deeplake_semantic_search_over_chat.ipynb index 73e09dba7c4..89f771f9e7e 100644 --- a/cookbook/deeplake_semantic_search_over_chat.ipynb +++ b/cookbook/deeplake_semantic_search_over_chat.ipynb @@ -50,15 +50,13 @@ "source": [ "import os\n", "import getpass\n", - "from langchain.document_loaders import PyPDFLoader, TextLoader\n", "from langchain.embeddings.openai import OpenAIEmbeddings\n", "from langchain.text_splitter import (\n", " RecursiveCharacterTextSplitter,\n", " CharacterTextSplitter,\n", ")\n", "from langchain.vectorstores import DeepLake\n", - "from langchain.chains import ConversationalRetrievalChain, RetrievalQA\n", - "from langchain.chat_models import ChatOpenAI\n", + "from langchain.chains import RetrievalQA\n", "from langchain.llms import OpenAI\n", "\n", "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n", diff --git a/cookbook/elasticsearch_db_qa.ipynb b/cookbook/elasticsearch_db_qa.ipynb index 33c6455d79a..cee48b36937 100644 --- a/cookbook/elasticsearch_db_qa.ipynb +++ b/cookbook/elasticsearch_db_qa.ipynb @@ -112,7 +112,6 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.chains.elasticsearch_database.prompts import DEFAULT_DSL_TEMPLATE\n", "from langchain.prompts.prompt import PromptTemplate\n", "\n", "PROMPT_TEMPLATE = \"\"\"Given an input question, create a syntactically correct Elasticsearch query to run. Unless the user specifies in their question a specific number of examples they wish to obtain, always limit your query to at most {top_k} results. You can order the results by a relevant column to return the most interesting examples in the database.\n", diff --git a/cookbook/forward_looking_retrieval_augmented_generation.ipynb b/cookbook/forward_looking_retrieval_augmented_generation.ipynb index bb269f6a738..347fb5e1fec 100644 --- a/cookbook/forward_looking_retrieval_augmented_generation.ipynb +++ b/cookbook/forward_looking_retrieval_augmented_generation.ipynb @@ -56,7 +56,8 @@ "source": [ "import os\n", "\n", - "os.environ[\"SERPER_API_KEY\"] = \"\"os.environ[\"OPENAI_API_KEY\"] = \"\"" + "os.environ[\"SERPER_API_KEY\"] = \"\"\n", + "os.environ[\"OPENAI_API_KEY\"] = \"\"" ] }, { @@ -66,17 +67,12 @@ "metadata": {}, "outputs": [], "source": [ - "import re\n", - "\n", - "import numpy as np\n", - "\n", "from langchain.schema import BaseRetriever\n", "from langchain.callbacks.manager import (\n", " AsyncCallbackManagerForRetrieverRun,\n", " CallbackManagerForRetrieverRun,\n", ")\n", "from langchain.utilities import GoogleSerperAPIWrapper\n", - "from langchain.embeddings import OpenAIEmbeddings\n", "from langchain.chat_models import ChatOpenAI\n", "from langchain.llms import OpenAI\n", "from langchain.schema import Document\n", diff --git a/cookbook/gymnasium_agent_simulation.ipynb b/cookbook/gymnasium_agent_simulation.ipynb index 1feefae5b0e..9f8236b8a32 100644 --- a/cookbook/gymnasium_agent_simulation.ipynb +++ b/cookbook/gymnasium_agent_simulation.ipynb @@ -27,16 +27,11 @@ "metadata": {}, "outputs": [], "source": [ - "import gymnasium as gym\n", - "import inspect\n", "import tenacity\n", "\n", - "from langchain.chat_models import ChatOpenAI\n", "from langchain.schema import (\n", - " AIMessage,\n", " HumanMessage,\n", " SystemMessage,\n", - " BaseMessage,\n", ")\n", "from langchain.output_parsers import RegexParser" ] @@ -131,7 +126,7 @@ " ):\n", " with attempt:\n", " action = self._act()\n", - " except tenacity.RetryError as e:\n", + " except tenacity.RetryError:\n", " action = self.random_action()\n", " return action" ] diff --git a/cookbook/meta_prompt.ipynb b/cookbook/meta_prompt.ipynb index c5a532900c6..45da5f957ea 100644 --- a/cookbook/meta_prompt.ipynb +++ b/cookbook/meta_prompt.ipynb @@ -56,7 +56,9 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.llms import OpenAI\nfrom langchain.chains import LLMChain\nfrom langchain.prompts import PromptTemplate\n", + "from langchain.llms import OpenAI\n", + "from langchain.chains import LLMChain\n", + "from langchain.prompts import PromptTemplate\n", "from langchain.memory import ConversationBufferWindowMemory" ] }, @@ -152,13 +154,13 @@ " for j in range(max_iters):\n", " print(f\"(Step {j+1}/{max_iters})\")\n", " print(f\"Assistant: {output}\")\n", - " print(f\"Human: \")\n", + " print(\"Human: \")\n", " human_input = input()\n", " if any(phrase in human_input.lower() for phrase in key_phrases):\n", " break\n", " output = chain.predict(human_input=human_input)\n", " if success_phrase in human_input.lower():\n", - " print(f\"You succeeded! Thanks for playing!\")\n", + " print(\"You succeeded! Thanks for playing!\")\n", " return\n", " meta_chain = initialize_meta_chain()\n", " meta_output = meta_chain.predict(chat_history=get_chat_history(chain.memory))\n", @@ -166,7 +168,7 @@ " instructions = get_new_instructions(meta_output)\n", " print(f\"New Instructions: {instructions}\")\n", " print(\"\\n\" + \"#\" * 80 + \"\\n\")\n", - " print(f\"You failed! Thanks for playing!\")" + " print(\"You failed! Thanks for playing!\")" ] }, { diff --git a/cookbook/multi_modal_RAG_chroma.ipynb b/cookbook/multi_modal_RAG_chroma.ipynb index e0c7f81fbb9..372d2ad5b59 100644 --- a/cookbook/multi_modal_RAG_chroma.ipynb +++ b/cookbook/multi_modal_RAG_chroma.ipynb @@ -115,7 +115,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Folder with pdf and extracted images \n", + "# Folder with pdf and extracted images\n", "path = \"/Users/rlm/Desktop/photos/\"" ] }, @@ -128,9 +128,10 @@ "source": [ "# Extract images, tables, and chunk text\n", "from unstructured.partition.pdf import partition_pdf\n", + "\n", "raw_pdf_elements = partition_pdf(\n", " filename=path + \"photos.pdf\",\n", - " extract_images_in_pdf=True, \n", + " extract_images_in_pdf=True,\n", " infer_table_structure=True,\n", " chunking_strategy=\"by_title\",\n", " max_characters=4000,\n", @@ -191,14 +192,17 @@ "\n", "# Create chroma\n", "vectorstore = Chroma(\n", - " collection_name=\"mm_rag_clip_photos\",\n", - " embedding_function=OpenCLIPEmbeddings()\n", + " collection_name=\"mm_rag_clip_photos\", embedding_function=OpenCLIPEmbeddings()\n", ")\n", "\n", "# Get image URIs with .jpg extension only\n", - "image_uris = sorted([os.path.join(path, image_name) \n", - " for image_name in os.listdir(path) \n", - " if image_name.endswith('.jpg')])\n", + "image_uris = sorted(\n", + " [\n", + " os.path.join(path, image_name)\n", + " for image_name in os.listdir(path)\n", + " if image_name.endswith(\".jpg\")\n", + " ]\n", + ")\n", "\n", "# Add images\n", "vectorstore.add_images(uris=image_uris)\n", @@ -206,7 +210,7 @@ "# Add documents\n", "vectorstore.add_texts(texts=texts)\n", "\n", - "# Make retriever \n", + "# Make retriever\n", "retriever = vectorstore.as_retriever()" ] }, @@ -235,6 +239,7 @@ "from io import BytesIO\n", "from PIL import Image\n", "\n", + "\n", "def resize_base64_image(base64_string, size=(128, 128)):\n", " \"\"\"\n", " Resize an image encoded as a Base64 string.\n", @@ -258,30 +263,31 @@ " resized_img.save(buffered, format=img.format)\n", "\n", " # Encode the resized image to Base64\n", - " return base64.b64encode(buffered.getvalue()).decode('utf-8')\n", + " return base64.b64encode(buffered.getvalue()).decode(\"utf-8\")\n", + "\n", "\n", "def is_base64(s):\n", - " ''' Check if a string is Base64 encoded '''\n", + " \"\"\"Check if a string is Base64 encoded\"\"\"\n", " try:\n", " return base64.b64encode(base64.b64decode(s)) == s.encode()\n", " except Exception:\n", " return False\n", - " \n", + "\n", + "\n", "def split_image_text_types(docs):\n", - " ''' Split numpy array images and texts '''\n", + " \"\"\"Split numpy array images and texts\"\"\"\n", " images = []\n", " text = []\n", " for doc in docs:\n", - " doc = doc.page_content # Extract Document contents \n", + " doc = doc.page_content # Extract Document contents\n", " if is_base64(doc):\n", " # Resize image to avoid OAI server error\n", - " images.append(resize_base64_image(doc, size=(250, 250))) # base64 encoded str \n", + " images.append(\n", + " resize_base64_image(doc, size=(250, 250))\n", + " ) # base64 encoded str\n", " else:\n", - " text.append(doc) \n", - " return {\n", - " \"images\": images,\n", - " \"texts\": text\n", - " }" + " text.append(doc)\n", + " return {\"images\": images, \"texts\": text}" ] }, { @@ -311,6 +317,7 @@ "from langchain.schema.runnable import RunnablePassthrough, RunnableLambda\n", "from langchain.schema.messages import HumanMessage, SystemMessage\n", "\n", + "\n", "def prompt_func(data_dict):\n", " # Joining the context texts into a single string\n", " formatted_texts = \"\\n\".join(data_dict[\"context\"][\"texts\"])\n", @@ -322,7 +329,7 @@ " \"type\": \"image_url\",\n", " \"image_url\": {\n", " \"url\": f\"data:image/jpeg;base64,{data_dict['context']['images'][0]}\"\n", - " }\n", + " },\n", " }\n", " messages.append(image_message)\n", "\n", @@ -342,17 +349,21 @@ " f\"User-provided keywords: {data_dict['question']}\\n\\n\"\n", " \"Text and / or tables:\\n\"\n", " f\"{formatted_texts}\"\n", - " )\n", + " ),\n", " }\n", " messages.append(text_message)\n", "\n", " return [HumanMessage(content=messages)]\n", - " \n", + "\n", + "\n", "model = ChatOpenAI(temperature=0, model=\"gpt-4-vision-preview\", max_tokens=1024)\n", "\n", "# RAG pipeline\n", "chain = (\n", - " {\"context\": retriever | RunnableLambda(split_image_text_types), \"question\": RunnablePassthrough()}\n", + " {\n", + " \"context\": retriever | RunnableLambda(split_image_text_types),\n", + " \"question\": RunnablePassthrough(),\n", + " }\n", " | RunnableLambda(prompt_func)\n", " | model\n", " | StrOutputParser()\n", @@ -412,15 +423,16 @@ "source": [ "from IPython.display import display, HTML\n", "\n", - "def plt_img_base64(img_base64):\n", "\n", + "def plt_img_base64(img_base64):\n", " # Create an HTML img tag with the base64 string as the source\n", " image_html = f''\n", - " \n", + "\n", " # Display the image by rendering the HTML\n", " display(HTML(image_html))\n", "\n", - "docs = retriever.get_relevant_documents(\"Woman with children\",k=10)\n", + "\n", + "docs = retriever.get_relevant_documents(\"Woman with children\", k=10)\n", "for doc in docs:\n", " if is_base64(doc.page_content):\n", " plt_img_base64(doc.page_content)\n", @@ -446,9 +458,7 @@ } ], "source": [ - "chain.invoke(\n", - " \"Woman with children\"\n", - ")" + "chain.invoke(\"Woman with children\")" ] }, { diff --git a/cookbook/multi_player_dnd.ipynb b/cookbook/multi_player_dnd.ipynb index d7119139af4..7921cde9826 100644 --- a/cookbook/multi_player_dnd.ipynb +++ b/cookbook/multi_player_dnd.ipynb @@ -26,13 +26,11 @@ "metadata": {}, "outputs": [], "source": [ - "from typing import List, Dict, Callable\n", + "from typing import List, Callable\n", "from langchain.chat_models import ChatOpenAI\n", "from langchain.schema import (\n", - " AIMessage,\n", " HumanMessage,\n", " SystemMessage,\n", - " BaseMessage,\n", ")" ] }, diff --git a/cookbook/multiagent_authoritarian.ipynb b/cookbook/multiagent_authoritarian.ipynb index 65e7e948e84..790cc1cfd57 100644 --- a/cookbook/multiagent_authoritarian.ipynb +++ b/cookbook/multiagent_authoritarian.ipynb @@ -30,23 +30,17 @@ "from collections import OrderedDict\n", "import functools\n", "import random\n", - "import re\n", "import tenacity\n", - "from typing import List, Dict, Callable\n", + "from typing import List, Callable\n", "\n", "from langchain.prompts import (\n", - " ChatPromptTemplate,\n", - " HumanMessagePromptTemplate,\n", " PromptTemplate,\n", ")\n", - "from langchain.chains import LLMChain\n", "from langchain.chat_models import ChatOpenAI\n", "from langchain.output_parsers import RegexParser\n", "from langchain.schema import (\n", - " AIMessage,\n", " HumanMessage,\n", " SystemMessage,\n", - " BaseMessage,\n", ")" ] }, diff --git a/cookbook/multiagent_bidding.ipynb b/cookbook/multiagent_bidding.ipynb index 200a056f83a..49a2ab5c262 100644 --- a/cookbook/multiagent_bidding.ipynb +++ b/cookbook/multiagent_bidding.ipynb @@ -25,16 +25,13 @@ "outputs": [], "source": [ "from langchain.prompts import PromptTemplate\n", - "import re\n", "import tenacity\n", - "from typing import List, Dict, Callable\n", + "from typing import List, Callable\n", "from langchain.chat_models import ChatOpenAI\n", "from langchain.output_parsers import RegexParser\n", "from langchain.schema import (\n", - " AIMessage,\n", " HumanMessage,\n", " SystemMessage,\n", - " BaseMessage,\n", ")" ] }, diff --git a/cookbook/myscale_vector_sql.ipynb b/cookbook/myscale_vector_sql.ipynb index 282ed0a87e3..6d958afae46 100644 --- a/cookbook/myscale_vector_sql.ipynb +++ b/cookbook/myscale_vector_sql.ipynb @@ -29,12 +29,11 @@ "source": [ "from os import environ\n", "import getpass\n", - "from typing import Dict, Any\n", "from langchain.llms import OpenAI\n", "from langchain.utilities import SQLDatabase\n", "from langchain.chains import LLMChain\n", "from langchain_experimental.sql.vector_sql import VectorSQLDatabaseChain\n", - "from sqlalchemy import create_engine, Column, MetaData\n", + "from sqlalchemy import create_engine, MetaData\n", "from langchain.prompts import PromptTemplate\n", "\n", "\n", diff --git a/cookbook/petting_zoo.ipynb b/cookbook/petting_zoo.ipynb index d706815d2c0..5091fe1980f 100644 --- a/cookbook/petting_zoo.ipynb +++ b/cookbook/petting_zoo.ipynb @@ -146,7 +146,7 @@ " ):\n", " with attempt:\n", " action = self._act()\n", - " except tenacity.RetryError as e:\n", + " except tenacity.RetryError:\n", " action = self.random_action()\n", " return action" ] diff --git a/cookbook/qianfan_baidu_elasticesearch_RAG.ipynb b/cookbook/qianfan_baidu_elasticesearch_RAG.ipynb index b6dac21d2b6..59af499212a 100644 --- a/cookbook/qianfan_baidu_elasticesearch_RAG.ipynb +++ b/cookbook/qianfan_baidu_elasticesearch_RAG.ipynb @@ -82,7 +82,9 @@ "secret_access_key = \"your bos access sk\"\n", "\n", "# create BceClientConfiguration\n", - "config = BceClientConfiguration(credentials=BceCredentials(access_key_id, secret_access_key), endpoint = bos_host)\n", + "config = BceClientConfiguration(\n", + " credentials=BceCredentials(access_key_id, secret_access_key), endpoint=bos_host\n", + ")\n", "\n", "loader = BaiduBOSDirectoryLoader(conf=config, bucket=\"llm-test\", prefix=\"llm/\")\n", "documents = loader.load()\n", @@ -109,10 +111,14 @@ "embeddings.client = sentence_transformers.SentenceTransformer(embeddings.model_name)\n", "\n", "db = BESVectorStore.from_documents(\n", - " documents=split_docs, embedding=embeddings, bes_url=\"your bes url\", index_name='test-index', vector_query_field='vector'\n", - " )\n", + " documents=split_docs,\n", + " embedding=embeddings,\n", + " bes_url=\"your bes url\",\n", + " index_name=\"test-index\",\n", + " vector_query_field=\"vector\",\n", + ")\n", "\n", - "db.client.indices.refresh(index='test-index')\n", + "db.client.indices.refresh(index=\"test-index\")\n", "retriever = db.as_retriever()" ] }, @@ -130,8 +136,15 @@ "metadata": {}, "outputs": [], "source": [ - "llm = QianfanLLMEndpoint(model=\"ERNIE-Bot\", qianfan_ak='your qianfan ak', qianfan_sk='your qianfan sk', streaming=True)\n", - "qa = RetrievalQA.from_chain_type(llm=llm, chain_type=\"refine\", retriever=retriever, return_source_documents=True)\n", + "llm = QianfanLLMEndpoint(\n", + " model=\"ERNIE-Bot\",\n", + " qianfan_ak=\"your qianfan ak\",\n", + " qianfan_sk=\"your qianfan sk\",\n", + " streaming=True,\n", + ")\n", + "qa = RetrievalQA.from_chain_type(\n", + " llm=llm, chain_type=\"refine\", retriever=retriever, return_source_documents=True\n", + ")\n", "\n", "query = \"什么是张量?\"\n", "print(qa.run(query))" diff --git a/cookbook/rag_fusion.ipynb b/cookbook/rag_fusion.ipynb index 388e37026be..c47b95add6c 100644 --- a/cookbook/rag_fusion.ipynb +++ b/cookbook/rag_fusion.ipynb @@ -87,7 +87,6 @@ "outputs": [], "source": [ "from langchain.chat_models import ChatOpenAI\n", - "from langchain.prompts import ChatPromptTemplate\n", "from langchain.schema.output_parser import StrOutputParser" ] }, diff --git a/cookbook/rewrite.ipynb b/cookbook/rewrite.ipynb index 3bd2968a28b..3099ef28269 100644 --- a/cookbook/rewrite.ipynb +++ b/cookbook/rewrite.ipynb @@ -31,12 +31,10 @@ "metadata": {}, "outputs": [], "source": [ - "from operator import itemgetter\n", - "\n", "from langchain.prompts import ChatPromptTemplate\n", "from langchain.chat_models import ChatOpenAI\n", "from langchain.schema.output_parser import StrOutputParser\n", - "from langchain.schema.runnable import RunnablePassthrough, RunnableLambda\n", + "from langchain.schema.runnable import RunnablePassthrough\n", "from langchain.utilities import DuckDuckGoSearchAPIWrapper" ] }, diff --git a/cookbook/selecting_llms_based_on_context_length.ipynb b/cookbook/selecting_llms_based_on_context_length.ipynb index 7637983c9bd..7a58a6518f1 100644 --- a/cookbook/selecting_llms_based_on_context_length.ipynb +++ b/cookbook/selecting_llms_based_on_context_length.ipynb @@ -19,10 +19,8 @@ "source": [ "from langchain.prompts import PromptTemplate\n", "from langchain.schema.prompt import PromptValue\n", - "from langchain.schema.messages import BaseMessage\n", "from langchain.chat_models import ChatOpenAI\n", - "from langchain.schema.output_parser import StrOutputParser\n", - "from typing import Union, Sequence" + "from langchain.schema.output_parser import StrOutputParser" ] }, { diff --git a/cookbook/self_query_hotel_search.ipynb b/cookbook/self_query_hotel_search.ipynb index 58c15430eaf..5a84a02f999 100644 --- a/cookbook/self_query_hotel_search.ipynb +++ b/cookbook/self_query_hotel_search.ipynb @@ -1084,7 +1084,6 @@ "outputs": [], "source": [ "from langchain.embeddings import OpenAIEmbeddings\n", - "from langchain.schema import Document\n", "from langchain.vectorstores import ElasticsearchStore\n", "\n", "embeddings = OpenAIEmbeddings()" diff --git a/cookbook/twitter-the-algorithm-analysis-deeplake.ipynb b/cookbook/twitter-the-algorithm-analysis-deeplake.ipynb index c02c0646a77..1f120fa3087 100644 --- a/cookbook/twitter-the-algorithm-analysis-deeplake.ipynb +++ b/cookbook/twitter-the-algorithm-analysis-deeplake.ipynb @@ -118,7 +118,7 @@ " try:\n", " loader = TextLoader(os.path.join(dirpath, file), encoding=\"utf-8\")\n", " docs.extend(loader.load_and_split())\n", - " except Exception as e:\n", + " except Exception:\n", " pass" ] }, diff --git a/cookbook/two_agent_debate_tools.ipynb b/cookbook/two_agent_debate_tools.ipynb index c78b7406a55..ae7f1c11116 100644 --- a/cookbook/two_agent_debate_tools.ipynb +++ b/cookbook/two_agent_debate_tools.ipynb @@ -22,17 +22,13 @@ "metadata": {}, "outputs": [], "source": [ - "from typing import List, Dict, Callable\n", - "from langchain.chains import ConversationChain\n", + "from typing import List, Callable\n", "from langchain.chat_models import ChatOpenAI\n", - "from langchain.llms import OpenAI\n", "from langchain.memory import ConversationBufferMemory\n", - "from langchain.prompts.prompt import PromptTemplate\n", "from langchain.schema import (\n", " AIMessage,\n", " HumanMessage,\n", " SystemMessage,\n", - " BaseMessage,\n", ")" ] }, @@ -49,7 +45,6 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.agents import Tool\n", "from langchain.agents import initialize_agent\n", "from langchain.agents import AgentType\n", "from langchain.agents import load_tools" diff --git a/cookbook/two_player_dnd.ipynb b/cookbook/two_player_dnd.ipynb index d109f63fe8d..ab44b519859 100644 --- a/cookbook/two_player_dnd.ipynb +++ b/cookbook/two_player_dnd.ipynb @@ -22,7 +22,7 @@ "metadata": {}, "outputs": [], "source": [ - "from typing import List, Dict, Callable\n", + "from typing import List, Callable\n", "from langchain.chat_models import ChatOpenAI\n", "from langchain.schema import (\n", " HumanMessage,\n", diff --git a/cookbook/wikibase_agent.ipynb b/cookbook/wikibase_agent.ipynb index 5d7d39338d6..c2f3b196104 100644 --- a/cookbook/wikibase_agent.ipynb +++ b/cookbook/wikibase_agent.ipynb @@ -396,7 +396,7 @@ " AgentOutputParser,\n", ")\n", "from langchain.prompts import StringPromptTemplate\n", - "from langchain.llms import OpenAI\nfrom langchain.chains import LLMChain\n", + "from langchain.chains import LLMChain\n", "from typing import List, Union\n", "from langchain.schema import AgentAction, AgentFinish\n", "import re" diff --git a/docs/docs/expression_language/cookbook/code_writing.ipynb b/docs/docs/expression_language/cookbook/code_writing.ipynb index 1b1f2d2fa29..005043629fe 100644 --- a/docs/docs/expression_language/cookbook/code_writing.ipynb +++ b/docs/docs/expression_language/cookbook/code_writing.ipynb @@ -20,8 +20,6 @@ "from langchain.chat_models import ChatOpenAI\n", "from langchain.prompts import (\n", " ChatPromptTemplate,\n", - " SystemMessagePromptTemplate,\n", - " HumanMessagePromptTemplate,\n", ")\n", "from langchain.schema.output_parser import StrOutputParser\n", "from langchain_experimental.utilities import PythonREPL" diff --git a/docs/docs/expression_language/cookbook/multiple_chains.ipynb b/docs/docs/expression_language/cookbook/multiple_chains.ipynb index d92d37588a8..c9713ac11ab 100644 --- a/docs/docs/expression_language/cookbook/multiple_chains.ipynb +++ b/docs/docs/expression_language/cookbook/multiple_chains.ipynb @@ -69,7 +69,7 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.schema.runnable import RunnableMap, RunnablePassthrough\n", + "from langchain.schema.runnable import RunnablePassthrough\n", "\n", "prompt1 = ChatPromptTemplate.from_template(\n", " \"generate a {attribute} color. Return the name of the color and nothing else:\"\n", diff --git a/docs/docs/guides/evaluation/string/json.ipynb b/docs/docs/guides/evaluation/string/json.ipynb index a669e6302b0..4db1284482e 100644 --- a/docs/docs/guides/evaluation/string/json.ipynb +++ b/docs/docs/guides/evaluation/string/json.ipynb @@ -33,7 +33,7 @@ } ], "source": [ - "from langchain.evaluation import JsonValidityEvaluator, load_evaluator\n", + "from langchain.evaluation import JsonValidityEvaluator\n", "\n", "evaluator = JsonValidityEvaluator()\n", "# Equivalently\n", diff --git a/docs/docs/guides/evaluation/trajectory/trajectory_eval.ipynb b/docs/docs/guides/evaluation/trajectory/trajectory_eval.ipynb index cba5cebb905..7519f8dfec7 100644 --- a/docs/docs/guides/evaluation/trajectory/trajectory_eval.ipynb +++ b/docs/docs/guides/evaluation/trajectory/trajectory_eval.ipynb @@ -1,305 +1,304 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "6e5ea1a1-7e74-459b-bf14-688f87d09124", - "metadata": { - "tags": [] - }, - "source": [ - "# Agent Trajectory\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/guides/evaluation/trajectory/trajectory_eval.ipynb)\n", - "\n", - "Agents can be difficult to holistically evaluate due to the breadth of actions and generation they can make. We recommend using multiple evaluation techniques appropriate to your use case. One way to evaluate an agent is to look at the whole trajectory of actions taken along with their responses.\n", - "\n", - "Evaluators that do this can implement the `AgentTrajectoryEvaluator` interface. This walkthrough will show how to use the `trajectory` evaluator to grade an OpenAI functions agent.\n", - "\n", - "For more information, check out the reference docs for the [TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain) for more info." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "149402da-5212-43e2-b7c0-a701727f5293", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from langchain.evaluation import load_evaluator\n", - "\n", - "evaluator = load_evaluator(\"trajectory\")" - ] - }, - { - "cell_type": "markdown", - "id": "b1c64c1a", - "metadata": {}, - "source": [ - "## Methods\n", - "\n", - "\n", - "The Agent Trajectory Evaluators are used with the [evaluate_agent_trajectory](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.evaluate_agent_trajectory) (and async [aevaluate_agent_trajectory](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.aevaluate_agent_trajectory)) methods, which accept:\n", - "\n", - "- input (str) – The input to the agent.\n", - "- prediction (str) – The final predicted response.\n", - "- agent_trajectory (List[Tuple[AgentAction, str]]) – The intermediate steps forming the agent trajectory\n", - "\n", - "They return a dictionary with the following values:\n", - "- score: Float from 0 to 1, where 1 would mean \"most effective\" and 0 would mean \"least effective\"\n", - "- reasoning: String \"chain of thought reasoning\" from the LLM generated prior to creating the score" - ] - }, - { - "cell_type": "markdown", - "id": "e733562c-4c17-4942-9647-acfc5ebfaca2", - "metadata": {}, - "source": [ - "## Capturing Trajectory\n", - "\n", - "The easiest way to return an agent's trajectory (without using tracing callbacks like those in LangSmith) for evaluation is to initialize the agent with `return_intermediate_steps=True`.\n", - "\n", - "Below, create an example agent we will call to evaluate." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "451cb0cb-6f42-4abd-aa6d-fb871fce034d", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "import os\n", - "import subprocess\n", - "\n", - "from langchain.chat_models import ChatOpenAI\n", - "from langchain.tools import tool\n", - "from langchain.agents import AgentType, initialize_agent\n", - "\n", - "from pydantic import HttpUrl\n", - "from urllib.parse import urlparse\n", - "\n", - "\n", - "@tool\n", - "def ping(url: HttpUrl, return_error: bool) -> str:\n", - " \"\"\"Ping the fully specified url. Must include https:// in the url.\"\"\"\n", - " hostname = urlparse(str(url)).netloc\n", - " completed_process = subprocess.run(\n", - " [\"ping\", \"-c\", \"1\", hostname], capture_output=True, text=True\n", - " )\n", - " output = completed_process.stdout\n", - " if return_error and completed_process.returncode != 0:\n", - " return completed_process.stderr\n", - " return output\n", - "\n", - "\n", - "@tool\n", - "def trace_route(url: HttpUrl, return_error: bool) -> str:\n", - " \"\"\"Trace the route to the specified url. Must include https:// in the url.\"\"\"\n", - " hostname = urlparse(str(url)).netloc\n", - " completed_process = subprocess.run(\n", - " [\"traceroute\", hostname], capture_output=True, text=True\n", - " )\n", - " output = completed_process.stdout\n", - " if return_error and completed_process.returncode != 0:\n", - " return completed_process.stderr\n", - " return output\n", - "\n", - "\n", - "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0613\", temperature=0)\n", - "agent = initialize_agent(\n", - " llm=llm,\n", - " tools=[ping, trace_route],\n", - " agent=AgentType.OPENAI_MULTI_FUNCTIONS,\n", - " return_intermediate_steps=True, # IMPORTANT!\n", - ")\n", - "\n", - "result = agent(\"What's the latency like for https://langchain.com?\")" - ] - }, - { - "cell_type": "markdown", - "id": "2df34eed-45a5-4f91-88d3-9aa55f28391a", - "metadata": { - "tags": [] - }, - "source": [ - "## Evaluate Trajectory\n", - "\n", - "Pass the input, trajectory, and pass to the [evaluate_agent_trajectory](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.schema.AgentTrajectoryEvaluator.html#langchain.evaluation.schema.AgentTrajectoryEvaluator.evaluate_agent_trajectory) method." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "8d2c8703-98ed-4068-8a8b-393f0f1f64ea", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'score': 1.0,\n", - " 'reasoning': \"i. The final answer is helpful. It directly answers the user's question about the latency for the website https://langchain.com.\\n\\nii. The AI language model uses a logical sequence of tools to answer the question. It uses the 'ping' tool to measure the latency of the website, which is the correct tool for this task.\\n\\niii. The AI language model uses the tool in a helpful way. It inputs the URL into the 'ping' tool and correctly interprets the output to provide the latency in milliseconds.\\n\\niv. The AI language model does not use too many steps to answer the question. It only uses one step, which is appropriate for this type of question.\\n\\nv. The appropriate tool is used to answer the question. The 'ping' tool is the correct tool to measure website latency.\\n\\nGiven these considerations, the AI language model's performance is excellent. It uses the correct tool, interprets the output correctly, and provides a helpful and direct answer to the user's question.\"}" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "evaluation_result = evaluator.evaluate_agent_trajectory(\n", - " prediction=result[\"output\"],\n", - " input=result[\"input\"],\n", - " agent_trajectory=result[\"intermediate_steps\"],\n", - ")\n", - "evaluation_result" - ] - }, - { - "cell_type": "markdown", - "id": "fc5467c1-ea92-405f-949a-3011388fa9ee", - "metadata": {}, - "source": [ - "## Configuring the Evaluation LLM\n", - "\n", - "If you don't select an LLM to use for evaluation, the [load_evaluator](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.loading.load_evaluator.html#langchain.evaluation.loading.load_evaluator) function will use `gpt-4` to power the evaluation chain. You can select any chat model for the agent trajectory evaluator as below." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "1f6318f3-642a-4766-bc7a-f91239795ee7", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# %pip install anthropic\n", - "# ANTHROPIC_API_KEY=" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "b2852289-5df9-402e-95b5-7efebf0fc943", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from langchain.chat_models import ChatAnthropic\n", - "\n", - "eval_llm = ChatAnthropic(temperature=0)\n", - "evaluator = load_evaluator(\"trajectory\", llm=eval_llm)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "ff72d21a-93b9-4c2f-8613-733d9c9330d7", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'score': 1.0,\n", - " 'reasoning': \"Here is my detailed evaluation of the AI's response:\\n\\ni. The final answer is helpful, as it directly provides the latency measurement for the requested website.\\n\\nii. The sequence of using the ping tool to measure latency is logical for this question.\\n\\niii. The ping tool is used in a helpful way, with the website URL provided as input and the output latency measurement extracted.\\n\\niv. Only one step is used, which is appropriate for simply measuring latency. More steps are not needed.\\n\\nv. The ping tool is an appropriate choice to measure latency. \\n\\nIn summary, the AI uses an optimal single step approach with the right tool and extracts the needed output. The final answer directly answers the question in a helpful way.\\n\\nOverall\"}" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "evaluation_result = evaluator.evaluate_agent_trajectory(\n", - " prediction=result[\"output\"],\n", - " input=result[\"input\"],\n", - " agent_trajectory=result[\"intermediate_steps\"],\n", - ")\n", - "evaluation_result" - ] - }, - { - "cell_type": "markdown", - "id": "95ce4240-f5a0-4810-8d09-b2f4c9e18b7f", - "metadata": {}, - "source": [ - "## Providing List of Valid Tools\n", - "\n", - "By default, the evaluator doesn't take into account the tools the agent is permitted to call. You can provide these to the evaluator via the `agent_tools` argument.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "24c10566-2ef5-45c5-9213-a8fb28e2ca1f", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from langchain.evaluation import load_evaluator\n", - "\n", - "evaluator = load_evaluator(\"trajectory\", agent_tools=[ping, trace_route])" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "7b995786-5b78-4d9e-8e8a-1f2a203113e2", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'score': 1.0,\n", - " 'reasoning': \"i. The final answer is helpful. It directly answers the user's question about the latency for the specified website.\\n\\nii. The AI language model uses a logical sequence of tools to answer the question. In this case, only one tool was needed to answer the question, and the model chose the correct one.\\n\\niii. The AI language model uses the tool in a helpful way. The 'ping' tool was used to determine the latency of the website, which was the information the user was seeking.\\n\\niv. The AI language model does not use too many steps to answer the question. Only one step was needed and used.\\n\\nv. The appropriate tool was used to answer the question. The 'ping' tool is designed to measure latency, which was the information the user was seeking.\\n\\nGiven these considerations, the AI language model's performance in answering this question is excellent.\"}" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "evaluation_result = evaluator.evaluate_agent_trajectory(\n", - " prediction=result[\"output\"],\n", - " input=result[\"input\"],\n", - " agent_trajectory=result[\"intermediate_steps\"],\n", - ")\n", - "evaluation_result" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.2" - } - }, - "nbformat": 4, - "nbformat_minor": 5 + "cells": [ + { + "cell_type": "markdown", + "id": "6e5ea1a1-7e74-459b-bf14-688f87d09124", + "metadata": { + "tags": [] + }, + "source": [ + "# Agent Trajectory\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/guides/evaluation/trajectory/trajectory_eval.ipynb)\n", + "\n", + "Agents can be difficult to holistically evaluate due to the breadth of actions and generation they can make. We recommend using multiple evaluation techniques appropriate to your use case. One way to evaluate an agent is to look at the whole trajectory of actions taken along with their responses.\n", + "\n", + "Evaluators that do this can implement the `AgentTrajectoryEvaluator` interface. This walkthrough will show how to use the `trajectory` evaluator to grade an OpenAI functions agent.\n", + "\n", + "For more information, check out the reference docs for the [TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain) for more info." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "149402da-5212-43e2-b7c0-a701727f5293", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.evaluation import load_evaluator\n", + "\n", + "evaluator = load_evaluator(\"trajectory\")" + ] + }, + { + "cell_type": "markdown", + "id": "b1c64c1a", + "metadata": {}, + "source": [ + "## Methods\n", + "\n", + "\n", + "The Agent Trajectory Evaluators are used with the [evaluate_agent_trajectory](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.evaluate_agent_trajectory) (and async [aevaluate_agent_trajectory](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.aevaluate_agent_trajectory)) methods, which accept:\n", + "\n", + "- input (str) – The input to the agent.\n", + "- prediction (str) – The final predicted response.\n", + "- agent_trajectory (List[Tuple[AgentAction, str]]) – The intermediate steps forming the agent trajectory\n", + "\n", + "They return a dictionary with the following values:\n", + "- score: Float from 0 to 1, where 1 would mean \"most effective\" and 0 would mean \"least effective\"\n", + "- reasoning: String \"chain of thought reasoning\" from the LLM generated prior to creating the score" + ] + }, + { + "cell_type": "markdown", + "id": "e733562c-4c17-4942-9647-acfc5ebfaca2", + "metadata": {}, + "source": [ + "## Capturing Trajectory\n", + "\n", + "The easiest way to return an agent's trajectory (without using tracing callbacks like those in LangSmith) for evaluation is to initialize the agent with `return_intermediate_steps=True`.\n", + "\n", + "Below, create an example agent we will call to evaluate." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "451cb0cb-6f42-4abd-aa6d-fb871fce034d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import subprocess\n", + "\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.tools import tool\n", + "from langchain.agents import AgentType, initialize_agent\n", + "\n", + "from pydantic import HttpUrl\n", + "from urllib.parse import urlparse\n", + "\n", + "\n", + "@tool\n", + "def ping(url: HttpUrl, return_error: bool) -> str:\n", + " \"\"\"Ping the fully specified url. Must include https:// in the url.\"\"\"\n", + " hostname = urlparse(str(url)).netloc\n", + " completed_process = subprocess.run(\n", + " [\"ping\", \"-c\", \"1\", hostname], capture_output=True, text=True\n", + " )\n", + " output = completed_process.stdout\n", + " if return_error and completed_process.returncode != 0:\n", + " return completed_process.stderr\n", + " return output\n", + "\n", + "\n", + "@tool\n", + "def trace_route(url: HttpUrl, return_error: bool) -> str:\n", + " \"\"\"Trace the route to the specified url. Must include https:// in the url.\"\"\"\n", + " hostname = urlparse(str(url)).netloc\n", + " completed_process = subprocess.run(\n", + " [\"traceroute\", hostname], capture_output=True, text=True\n", + " )\n", + " output = completed_process.stdout\n", + " if return_error and completed_process.returncode != 0:\n", + " return completed_process.stderr\n", + " return output\n", + "\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0613\", temperature=0)\n", + "agent = initialize_agent(\n", + " llm=llm,\n", + " tools=[ping, trace_route],\n", + " agent=AgentType.OPENAI_MULTI_FUNCTIONS,\n", + " return_intermediate_steps=True, # IMPORTANT!\n", + ")\n", + "\n", + "result = agent(\"What's the latency like for https://langchain.com?\")" + ] + }, + { + "cell_type": "markdown", + "id": "2df34eed-45a5-4f91-88d3-9aa55f28391a", + "metadata": { + "tags": [] + }, + "source": [ + "## Evaluate Trajectory\n", + "\n", + "Pass the input, trajectory, and pass to the [evaluate_agent_trajectory](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.schema.AgentTrajectoryEvaluator.html#langchain.evaluation.schema.AgentTrajectoryEvaluator.evaluate_agent_trajectory) method." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8d2c8703-98ed-4068-8a8b-393f0f1f64ea", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'score': 1.0,\n", + " 'reasoning': \"i. The final answer is helpful. It directly answers the user's question about the latency for the website https://langchain.com.\\n\\nii. The AI language model uses a logical sequence of tools to answer the question. It uses the 'ping' tool to measure the latency of the website, which is the correct tool for this task.\\n\\niii. The AI language model uses the tool in a helpful way. It inputs the URL into the 'ping' tool and correctly interprets the output to provide the latency in milliseconds.\\n\\niv. The AI language model does not use too many steps to answer the question. It only uses one step, which is appropriate for this type of question.\\n\\nv. The appropriate tool is used to answer the question. The 'ping' tool is the correct tool to measure website latency.\\n\\nGiven these considerations, the AI language model's performance is excellent. It uses the correct tool, interprets the output correctly, and provides a helpful and direct answer to the user's question.\"}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "evaluation_result = evaluator.evaluate_agent_trajectory(\n", + " prediction=result[\"output\"],\n", + " input=result[\"input\"],\n", + " agent_trajectory=result[\"intermediate_steps\"],\n", + ")\n", + "evaluation_result" + ] + }, + { + "cell_type": "markdown", + "id": "fc5467c1-ea92-405f-949a-3011388fa9ee", + "metadata": {}, + "source": [ + "## Configuring the Evaluation LLM\n", + "\n", + "If you don't select an LLM to use for evaluation, the [load_evaluator](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.loading.load_evaluator.html#langchain.evaluation.loading.load_evaluator) function will use `gpt-4` to power the evaluation chain. You can select any chat model for the agent trajectory evaluator as below." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "1f6318f3-642a-4766-bc7a-f91239795ee7", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# %pip install anthropic\n", + "# ANTHROPIC_API_KEY=" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b2852289-5df9-402e-95b5-7efebf0fc943", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.chat_models import ChatAnthropic\n", + "\n", + "eval_llm = ChatAnthropic(temperature=0)\n", + "evaluator = load_evaluator(\"trajectory\", llm=eval_llm)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ff72d21a-93b9-4c2f-8613-733d9c9330d7", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'score': 1.0,\n", + " 'reasoning': \"Here is my detailed evaluation of the AI's response:\\n\\ni. The final answer is helpful, as it directly provides the latency measurement for the requested website.\\n\\nii. The sequence of using the ping tool to measure latency is logical for this question.\\n\\niii. The ping tool is used in a helpful way, with the website URL provided as input and the output latency measurement extracted.\\n\\niv. Only one step is used, which is appropriate for simply measuring latency. More steps are not needed.\\n\\nv. The ping tool is an appropriate choice to measure latency. \\n\\nIn summary, the AI uses an optimal single step approach with the right tool and extracts the needed output. The final answer directly answers the question in a helpful way.\\n\\nOverall\"}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "evaluation_result = evaluator.evaluate_agent_trajectory(\n", + " prediction=result[\"output\"],\n", + " input=result[\"input\"],\n", + " agent_trajectory=result[\"intermediate_steps\"],\n", + ")\n", + "evaluation_result" + ] + }, + { + "cell_type": "markdown", + "id": "95ce4240-f5a0-4810-8d09-b2f4c9e18b7f", + "metadata": {}, + "source": [ + "## Providing List of Valid Tools\n", + "\n", + "By default, the evaluator doesn't take into account the tools the agent is permitted to call. You can provide these to the evaluator via the `agent_tools` argument.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "24c10566-2ef5-45c5-9213-a8fb28e2ca1f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.evaluation import load_evaluator\n", + "\n", + "evaluator = load_evaluator(\"trajectory\", agent_tools=[ping, trace_route])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7b995786-5b78-4d9e-8e8a-1f2a203113e2", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'score': 1.0,\n", + " 'reasoning': \"i. The final answer is helpful. It directly answers the user's question about the latency for the specified website.\\n\\nii. The AI language model uses a logical sequence of tools to answer the question. In this case, only one tool was needed to answer the question, and the model chose the correct one.\\n\\niii. The AI language model uses the tool in a helpful way. The 'ping' tool was used to determine the latency of the website, which was the information the user was seeking.\\n\\niv. The AI language model does not use too many steps to answer the question. Only one step was needed and used.\\n\\nv. The appropriate tool was used to answer the question. The 'ping' tool is designed to measure latency, which was the information the user was seeking.\\n\\nGiven these considerations, the AI language model's performance in answering this question is excellent.\"}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "evaluation_result = evaluator.evaluate_agent_trajectory(\n", + " prediction=result[\"output\"],\n", + " input=result[\"input\"],\n", + " agent_trajectory=result[\"intermediate_steps\"],\n", + ")\n", + "evaluation_result" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } \ No newline at end of file diff --git a/docs/docs/guides/local_llms.ipynb b/docs/docs/guides/local_llms.ipynb index e6aeba52b1e..dc26204223e 100644 --- a/docs/docs/guides/local_llms.ipynb +++ b/docs/docs/guides/local_llms.ipynb @@ -265,10 +265,16 @@ "cell_type": "code", "execution_count": null, "id": "5eba38dc", - "metadata": {}, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, "outputs": [], "source": [ - "CMAKE_ARGS=\"-DLLAMA_METAL=on\" FORCE_CMAKE=1 pip install -U llama-cpp-python --no-cache-dirclear" + "%env CMAKE_ARGS=\"-DLLAMA_METAL=on\"\n", + "%env FORCE_CMAKE=1\n", + "%pip install -U llama-cpp-python --no-cache-dirclear`" ] }, { @@ -379,7 +385,7 @@ "metadata": {}, "outputs": [], "source": [ - "pip install gpt4all" + "pip install gpt4all\n" ] }, { diff --git a/docs/docs/guides/model_laboratory.ipynb b/docs/docs/guides/model_laboratory.ipynb index 3f4614ca9c2..3eb8c46ddfb 100644 --- a/docs/docs/guides/model_laboratory.ipynb +++ b/docs/docs/guides/model_laboratory.ipynb @@ -19,7 +19,8 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.chains import LLMChain\nfrom langchain.llms import OpenAI, Cohere, HuggingFaceHub\nfrom langchain.prompts import PromptTemplate\n", + "from langchain.llms import OpenAI, Cohere, HuggingFaceHub\n", + "from langchain.prompts import PromptTemplate\n", "from langchain.model_laboratory import ModelLaboratory" ] }, @@ -139,7 +140,8 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.chains import SelfAskWithSearchChain\nfrom langchain.utilities import SerpAPIWrapper\n", + "from langchain.chains import SelfAskWithSearchChain\n", + "from langchain.utilities import SerpAPIWrapper\n", "\n", "open_ai_llm = OpenAI(temperature=0)\n", "search = SerpAPIWrapper()\n", diff --git a/docs/docs/guides/privacy/presidio_data_anonymization/index.ipynb b/docs/docs/guides/privacy/presidio_data_anonymization/index.ipynb index 2310892a11a..b03f5d3ba46 100644 --- a/docs/docs/guides/privacy/presidio_data_anonymization/index.ipynb +++ b/docs/docs/guides/privacy/presidio_data_anonymization/index.ipynb @@ -98,7 +98,7 @@ "metadata": {}, "outputs": [], "source": [ - "text = f\"\"\"Slim Shady recently lost his wallet. \n", + "text = \"\"\"Slim Shady recently lost his wallet. \n", "Inside is some cash and his credit card with the number 4916 0387 9536 0861. \n", "If you would find it, please call at 313-666-7440 or write an email here: real.slim.shady@gmail.com.\"\"\"" ] diff --git a/docs/docs/guides/privacy/presidio_data_anonymization/reversible.ipynb b/docs/docs/guides/privacy/presidio_data_anonymization/reversible.ipynb index 41ba4036088..6b49f5aa026 100644 --- a/docs/docs/guides/privacy/presidio_data_anonymization/reversible.ipynb +++ b/docs/docs/guides/privacy/presidio_data_anonymization/reversible.ipynb @@ -1,636 +1,636 @@ { - "cells": [ - { - "cell_type": "raw", - "metadata": {}, - "source": [ - "---\n", - "sidebar_position: 1\n", - "title: Reversible anonymization \n", - "---" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Reversible data anonymization with Microsoft Presidio\n", - "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/guides/privacy/presidio_data_anonymization/reversible.ipynb)\n", - "\n", - "\n", - "## Use case\n", - "\n", - "We have already written about the importance of anonymizing sensitive data in the previous section. **Reversible Anonymization** is an equally essential technology while sharing information with language models, as it balances data protection with data usability. This technique involves masking sensitive personally identifiable information (PII), yet it can be reversed and original data can be restored when authorized users need it. Its main advantage lies in the fact that while it conceals individual identities to prevent misuse, it also allows the concealed data to be accurately unmasked should it be necessary for legal or compliance purposes. \n", - "\n", - "## Overview\n", - "\n", - "We implemented the `PresidioReversibleAnonymizer`, which consists of two parts:\n", - "\n", - "1. anonymization - it works the same way as `PresidioAnonymizer`, plus the object itself stores a mapping of made-up values to original ones, for example:\n", - "```\n", - " {\n", - " \"PERSON\": {\n", - " \"\": \"\",\n", - " \"John Doe\": \"Slim Shady\"\n", - " },\n", - " \"PHONE_NUMBER\": {\n", - " \"111-111-1111\": \"555-555-5555\"\n", - " }\n", - " ...\n", - " }\n", - "```\n", - "\n", - "2. deanonymization - using the mapping described above, it matches fake data with original data and then substitutes it.\n", - "\n", - "Between anonymization and deanonymization user can perform different operations, for example, passing the output to LLM.\n", - "\n", - "## Quickstart\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# Install necessary packages\n", - "# ! pip install langchain langchain-experimental openai presidio-analyzer presidio-anonymizer spacy Faker\n", - "# ! python -m spacy download en_core_web_lg" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`PresidioReversibleAnonymizer` is not significantly different from its predecessor (`PresidioAnonymizer`) in terms of anonymization:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'My name is Maria Lynch, call me at 7344131647 or email me at jamesmichael@example.com. By the way, my card number is: 4838637940262'" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer\n", - "\n", - "anonymizer = PresidioReversibleAnonymizer(\n", - " analyzed_fields=[\"PERSON\", \"PHONE_NUMBER\", \"EMAIL_ADDRESS\", \"CREDIT_CARD\"],\n", - " # Faker seed is used here to make sure the same fake data is generated for the test purposes\n", - " # In production, it is recommended to remove the faker_seed parameter (it will default to None)\n", - " faker_seed=42,\n", - ")\n", - "\n", - "anonymizer.anonymize(\n", - " \"My name is Slim Shady, call me at 313-666-7440 or email me at real.slim.shady@gmail.com. \"\n", - " \"By the way, my card number is: 4916 0387 9536 0861\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This is what the full string we want to deanonymize looks like:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Maria Lynch recently lost his wallet. \n", - "Inside is some cash and his credit card with the number 4838637940262. \n", - "If you would find it, please call at 7344131647 or write an email here: jamesmichael@example.com.\n", - "Maria Lynch would be very grateful!\n" - ] - } - ], - "source": [ - "# We know this data, as we set the faker_seed parameter\n", - "fake_name = \"Maria Lynch\"\n", - "fake_phone = \"7344131647\"\n", - "fake_email = \"jamesmichael@example.com\"\n", - "fake_credit_card = \"4838637940262\"\n", - "\n", - "anonymized_text = f\"\"\"{fake_name} recently lost his wallet. \n", - "Inside is some cash and his credit card with the number {fake_credit_card}. \n", - "If you would find it, please call at {fake_phone} or write an email here: {fake_email}.\n", - "{fake_name} would be very grateful!\"\"\"\n", - "\n", - "print(anonymized_text)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "And now, using the `deanonymize` method, we can reverse the process:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Slim Shady recently lost his wallet. \n", - "Inside is some cash and his credit card with the number 4916 0387 9536 0861. \n", - "If you would find it, please call at 313-666-7440 or write an email here: real.slim.shady@gmail.com.\n", - "Slim Shady would be very grateful!\n" - ] - } - ], - "source": [ - "print(anonymizer.deanonymize(anonymized_text))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Using with LangChain Expression Language\n", - "\n", - "With LCEL we can easily chain together anonymization and deanonymization with the rest of our application. This is an example of using the anonymization mechanism with a query to LLM (without deanonymization for now):" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "text = f\"\"\"Slim Shady recently lost his wallet. \n", - "Inside is some cash and his credit card with the number 4916 0387 9536 0861. \n", - "If you would find it, please call at 313-666-7440 or write an email here: real.slim.shady@gmail.com.\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dear Sir/Madam,\n", - "\n", - "We regret to inform you that Monique Turner has recently misplaced his wallet, which contains a sum of cash and his credit card with the number 213152056829866. \n", - "\n", - "If you happen to come across this wallet, kindly contact us at (770)908-7734x2835 or send an email to barbara25@example.net.\n", - "\n", - "Thank you for your cooperation.\n", - "\n", - "Sincerely,\n", - "[Your Name]\n" - ] - } - ], - "source": [ - "from langchain.prompts.prompt import PromptTemplate\n", - "from langchain.chat_models import ChatOpenAI\n", - "\n", - "anonymizer = PresidioReversibleAnonymizer()\n", - "\n", - "template = \"\"\"Rewrite this text into an official, short email:\n", - "\n", - "{anonymized_text}\"\"\"\n", - "prompt = PromptTemplate.from_template(template)\n", - "llm = ChatOpenAI(temperature=0)\n", - "\n", - "chain = {\"anonymized_text\": anonymizer.anonymize} | prompt | llm\n", - "response = chain.invoke(text)\n", - "print(response.content)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, let's add **deanonymization step** to our sequence:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dear Sir/Madam,\n", - "\n", - "We regret to inform you that Slim Shady has recently misplaced his wallet, which contains a sum of cash and his credit card with the number 4916 0387 9536 0861. \n", - "\n", - "If you happen to come across this wallet, kindly contact us at 313-666-7440 or send an email to real.slim.shady@gmail.com.\n", - "\n", - "Thank you for your cooperation.\n", - "\n", - "Sincerely,\n", - "[Your Name]\n" - ] - } - ], - "source": [ - "chain = chain | (lambda ai_message: anonymizer.deanonymize(ai_message.content))\n", - "response = chain.invoke(text)\n", - "print(response)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Anonymized data was given to the model itself, and therefore it was protected from being leaked to the outside world. Then, the model's response was processed, and the factual value was replaced with the real one." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Extra knowledge" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`PresidioReversibleAnonymizer` stores the mapping of the fake values to the original values in the `deanonymizer_mapping` parameter, where key is fake PII and value is the original one: " - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'PERSON': {'Maria Lynch': 'Slim Shady'},\n", - " 'PHONE_NUMBER': {'7344131647': '313-666-7440'},\n", - " 'EMAIL_ADDRESS': {'jamesmichael@example.com': 'real.slim.shady@gmail.com'},\n", - " 'CREDIT_CARD': {'4838637940262': '4916 0387 9536 0861'}}" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer\n", - "\n", - "anonymizer = PresidioReversibleAnonymizer(\n", - " analyzed_fields=[\"PERSON\", \"PHONE_NUMBER\", \"EMAIL_ADDRESS\", \"CREDIT_CARD\"],\n", - " # Faker seed is used here to make sure the same fake data is generated for the test purposes\n", - " # In production, it is recommended to remove the faker_seed parameter (it will default to None)\n", - " faker_seed=42,\n", - ")\n", - "\n", - "anonymizer.anonymize(\n", - " \"My name is Slim Shady, call me at 313-666-7440 or email me at real.slim.shady@gmail.com. \"\n", - " \"By the way, my card number is: 4916 0387 9536 0861\"\n", - ")\n", - "\n", - "anonymizer.deanonymizer_mapping" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Anonymizing more texts will result in new mapping entries:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Do you have his VISA card number? Yep, it's 3537672423884966. I'm William Bowman by the way.\n" - ] - }, - { - "data": { - "text/plain": [ - "{'PERSON': {'Maria Lynch': 'Slim Shady', 'William Bowman': 'John Doe'},\n", - " 'PHONE_NUMBER': {'7344131647': '313-666-7440'},\n", - " 'EMAIL_ADDRESS': {'jamesmichael@example.com': 'real.slim.shady@gmail.com'},\n", - " 'CREDIT_CARD': {'4838637940262': '4916 0387 9536 0861',\n", - " '3537672423884966': '4001 9192 5753 7193'}}" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "print(\n", - " anonymizer.anonymize(\n", - " \"Do you have his VISA card number? Yep, it's 4001 9192 5753 7193. I'm John Doe by the way.\"\n", - " )\n", - ")\n", - "\n", - "anonymizer.deanonymizer_mapping" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Thanks to the built-in memory, entities that have already been detected and anonymised will take the same form in subsequent processed texts, so no duplicates will exist in the mapping:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "My VISA card number is 3537672423884966 and my name is William Bowman.\n" - ] - }, - { - "data": { - "text/plain": [ - "{'PERSON': {'Maria Lynch': 'Slim Shady', 'William Bowman': 'John Doe'},\n", - " 'PHONE_NUMBER': {'7344131647': '313-666-7440'},\n", - " 'EMAIL_ADDRESS': {'jamesmichael@example.com': 'real.slim.shady@gmail.com'},\n", - " 'CREDIT_CARD': {'4838637940262': '4916 0387 9536 0861',\n", - " '3537672423884966': '4001 9192 5753 7193'}}" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "print(\n", - " anonymizer.anonymize(\n", - " \"My VISA card number is 4001 9192 5753 7193 and my name is John Doe.\"\n", - " )\n", - ")\n", - "\n", - "anonymizer.deanonymizer_mapping" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can save the mapping itself to a file for future use: " - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "# We can save the deanonymizer mapping as a JSON or YAML file\n", - "\n", - "anonymizer.save_deanonymizer_mapping(\"deanonymizer_mapping.json\")\n", - "# anonymizer.save_deanonymizer_mapping(\"deanonymizer_mapping.yaml\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "And then, load it in another `PresidioReversibleAnonymizer` instance:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{}" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "anonymizer = PresidioReversibleAnonymizer()\n", - "\n", - "anonymizer.deanonymizer_mapping" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'PERSON': {'Maria Lynch': 'Slim Shady', 'William Bowman': 'John Doe'},\n", - " 'PHONE_NUMBER': {'7344131647': '313-666-7440'},\n", - " 'EMAIL_ADDRESS': {'jamesmichael@example.com': 'real.slim.shady@gmail.com'},\n", - " 'CREDIT_CARD': {'4838637940262': '4916 0387 9536 0861',\n", - " '3537672423884966': '4001 9192 5753 7193'}}" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "anonymizer.load_deanonymizer_mapping(\"deanonymizer_mapping.json\")\n", - "\n", - "anonymizer.deanonymizer_mapping" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Custom deanonymization strategy\n", - "\n", - "The default deanonymization strategy is to exactly match the substring in the text with the mapping entry. Due to the indeterminism of LLMs, it may be that the model will change the format of the private data slightly or make a typo, for example:\n", - "- *Keanu Reeves* -> *Kaenu Reeves*\n", - "- *John F. Kennedy* -> *John Kennedy*\n", - "- *Main St, New York* -> *New York*\n", - "\n", - "It is therefore worth considering appropriate prompt engineering (have the model return PII in unchanged format) or trying to implement your replacing strategy. For example, you can use fuzzy matching - this will solve problems with typos and minor changes in the text. Some implementations of the swapping strategy can be found in the file `deanonymizer_matching_strategies.py`." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "maria lynch\n", - "Slim Shady\n" - ] - } - ], - "source": [ - "from langchain_experimental.data_anonymizer.deanonymizer_matching_strategies import (\n", - " case_insensitive_matching_strategy,\n", - ")\n", - "\n", - "# Original name: Maria Lynch\n", - "print(anonymizer.deanonymize(\"maria lynch\"))\n", - "print(\n", - " anonymizer.deanonymize(\n", - " \"maria lynch\", deanonymizer_matching_strategy=case_insensitive_matching_strategy\n", - " )\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Call Maria K. Lynch at 734-413-1647\n", - "Call Slim Shady at 313-666-7440\n" - ] - } - ], - "source": [ - "from langchain_experimental.data_anonymizer.deanonymizer_matching_strategies import (\n", - " fuzzy_matching_strategy,\n", - ")\n", - "\n", - "# Original name: Maria Lynch\n", - "# Original phone number: 7344131647 (without dashes)\n", - "print(anonymizer.deanonymize(\"Call Maria K. Lynch at 734-413-1647\"))\n", - "print(\n", - " anonymizer.deanonymize(\n", - " \"Call Maria K. Lynch at 734-413-1647\",\n", - " deanonymizer_matching_strategy=fuzzy_matching_strategy,\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It seems that the combined method works best:\n", - "- first apply the exact match strategy\n", - "- then match the rest using the fuzzy strategy" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Are you Slim Shady? I found your card with number 4916 0387 9536 0861.\n", - "Is this your phone number: 313-666-7440?\n", - "Is this your email address: wdavis@example.net\n" - ] - } - ], - "source": [ - "from langchain_experimental.data_anonymizer.deanonymizer_matching_strategies import (\n", - " combined_exact_fuzzy_matching_strategy,\n", - ")\n", - "\n", - "# Changed some values for fuzzy match showcase:\n", - "# - \"Maria Lynch\" -> \"Maria K. Lynch\"\n", - "# - \"7344131647\" -> \"734-413-1647\"\n", - "# - \"213186379402654\" -> \"2131 8637 9402 654\"\n", - "print(\n", - " anonymizer.deanonymize(\n", - " (\n", - " \"Are you Maria F. Lynch? I found your card with number 4838 6379 40262.\\n\"\n", - " \"Is this your phone number: 734-413-1647?\\n\"\n", - " \"Is this your email address: wdavis@example.net\"\n", - " ),\n", - " deanonymizer_matching_strategy=combined_exact_fuzzy_matching_strategy,\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Of course, there is no perfect method and it is worth experimenting and finding the one best suited to your use case." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Future works\n", - "\n", - "- **better matching and substitution of fake values for real ones** - currently the strategy is based on matching full strings and then substituting them. Due to the indeterminism of language models, it may happen that the value in the answer is slightly changed (e.g. *John Doe* -> *John* or *Main St, New York* -> *New York*) and such a substitution is then no longer possible. Therefore, it is worth adjusting the matching for your needs." - ] - } + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_position: 1\n", + "title: Reversible anonymization \n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Reversible data anonymization with Microsoft Presidio\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/guides/privacy/presidio_data_anonymization/reversible.ipynb)\n", + "\n", + "\n", + "## Use case\n", + "\n", + "We have already written about the importance of anonymizing sensitive data in the previous section. **Reversible Anonymization** is an equally essential technology while sharing information with language models, as it balances data protection with data usability. This technique involves masking sensitive personally identifiable information (PII), yet it can be reversed and original data can be restored when authorized users need it. Its main advantage lies in the fact that while it conceals individual identities to prevent misuse, it also allows the concealed data to be accurately unmasked should it be necessary for legal or compliance purposes. \n", + "\n", + "## Overview\n", + "\n", + "We implemented the `PresidioReversibleAnonymizer`, which consists of two parts:\n", + "\n", + "1. anonymization - it works the same way as `PresidioAnonymizer`, plus the object itself stores a mapping of made-up values to original ones, for example:\n", + "```\n", + " {\n", + " \"PERSON\": {\n", + " \"\": \"\",\n", + " \"John Doe\": \"Slim Shady\"\n", + " },\n", + " \"PHONE_NUMBER\": {\n", + " \"111-111-1111\": \"555-555-5555\"\n", + " }\n", + " ...\n", + " }\n", + "```\n", + "\n", + "2. deanonymization - using the mapping described above, it matches fake data with original data and then substitutes it.\n", + "\n", + "Between anonymization and deanonymization user can perform different operations, for example, passing the output to LLM.\n", + "\n", + "## Quickstart\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Install necessary packages\n", + "# ! pip install langchain langchain-experimental openai presidio-analyzer presidio-anonymizer spacy Faker\n", + "# ! python -m spacy download en_core_web_lg" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`PresidioReversibleAnonymizer` is not significantly different from its predecessor (`PresidioAnonymizer`) in terms of anonymization:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'My name is Maria Lynch, call me at 7344131647 or email me at jamesmichael@example.com. By the way, my card number is: 4838637940262'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" - } + "source": [ + "from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer\n", + "\n", + "anonymizer = PresidioReversibleAnonymizer(\n", + " analyzed_fields=[\"PERSON\", \"PHONE_NUMBER\", \"EMAIL_ADDRESS\", \"CREDIT_CARD\"],\n", + " # Faker seed is used here to make sure the same fake data is generated for the test purposes\n", + " # In production, it is recommended to remove the faker_seed parameter (it will default to None)\n", + " faker_seed=42,\n", + ")\n", + "\n", + "anonymizer.anonymize(\n", + " \"My name is Slim Shady, call me at 313-666-7440 or email me at real.slim.shady@gmail.com. \"\n", + " \"By the way, my card number is: 4916 0387 9536 0861\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is what the full string we want to deanonymize looks like:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Maria Lynch recently lost his wallet. \n", + "Inside is some cash and his credit card with the number 4838637940262. \n", + "If you would find it, please call at 7344131647 or write an email here: jamesmichael@example.com.\n", + "Maria Lynch would be very grateful!\n" + ] + } + ], + "source": [ + "# We know this data, as we set the faker_seed parameter\n", + "fake_name = \"Maria Lynch\"\n", + "fake_phone = \"7344131647\"\n", + "fake_email = \"jamesmichael@example.com\"\n", + "fake_credit_card = \"4838637940262\"\n", + "\n", + "anonymized_text = f\"\"\"{fake_name} recently lost his wallet. \n", + "Inside is some cash and his credit card with the number {fake_credit_card}. \n", + "If you would find it, please call at {fake_phone} or write an email here: {fake_email}.\n", + "{fake_name} would be very grateful!\"\"\"\n", + "\n", + "print(anonymized_text)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And now, using the `deanonymize` method, we can reverse the process:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Slim Shady recently lost his wallet. \n", + "Inside is some cash and his credit card with the number 4916 0387 9536 0861. \n", + "If you would find it, please call at 313-666-7440 or write an email here: real.slim.shady@gmail.com.\n", + "Slim Shady would be very grateful!\n" + ] + } + ], + "source": [ + "print(anonymizer.deanonymize(anonymized_text))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using with LangChain Expression Language\n", + "\n", + "With LCEL we can easily chain together anonymization and deanonymization with the rest of our application. This is an example of using the anonymization mechanism with a query to LLM (without deanonymization for now):" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "text = \"\"\"Slim Shady recently lost his wallet. \n", + "Inside is some cash and his credit card with the number 4916 0387 9536 0861. \n", + "If you would find it, please call at 313-666-7440 or write an email here: real.slim.shady@gmail.com.\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dear Sir/Madam,\n", + "\n", + "We regret to inform you that Monique Turner has recently misplaced his wallet, which contains a sum of cash and his credit card with the number 213152056829866. \n", + "\n", + "If you happen to come across this wallet, kindly contact us at (770)908-7734x2835 or send an email to barbara25@example.net.\n", + "\n", + "Thank you for your cooperation.\n", + "\n", + "Sincerely,\n", + "[Your Name]\n" + ] + } + ], + "source": [ + "from langchain.prompts.prompt import PromptTemplate\n", + "from langchain.chat_models import ChatOpenAI\n", + "\n", + "anonymizer = PresidioReversibleAnonymizer()\n", + "\n", + "template = \"\"\"Rewrite this text into an official, short email:\n", + "\n", + "{anonymized_text}\"\"\"\n", + "prompt = PromptTemplate.from_template(template)\n", + "llm = ChatOpenAI(temperature=0)\n", + "\n", + "chain = {\"anonymized_text\": anonymizer.anonymize} | prompt | llm\n", + "response = chain.invoke(text)\n", + "print(response.content)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let's add **deanonymization step** to our sequence:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dear Sir/Madam,\n", + "\n", + "We regret to inform you that Slim Shady has recently misplaced his wallet, which contains a sum of cash and his credit card with the number 4916 0387 9536 0861. \n", + "\n", + "If you happen to come across this wallet, kindly contact us at 313-666-7440 or send an email to real.slim.shady@gmail.com.\n", + "\n", + "Thank you for your cooperation.\n", + "\n", + "Sincerely,\n", + "[Your Name]\n" + ] + } + ], + "source": [ + "chain = chain | (lambda ai_message: anonymizer.deanonymize(ai_message.content))\n", + "response = chain.invoke(text)\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Anonymized data was given to the model itself, and therefore it was protected from being leaked to the outside world. Then, the model's response was processed, and the factual value was replaced with the real one." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extra knowledge" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`PresidioReversibleAnonymizer` stores the mapping of the fake values to the original values in the `deanonymizer_mapping` parameter, where key is fake PII and value is the original one: " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'PERSON': {'Maria Lynch': 'Slim Shady'},\n", + " 'PHONE_NUMBER': {'7344131647': '313-666-7440'},\n", + " 'EMAIL_ADDRESS': {'jamesmichael@example.com': 'real.slim.shady@gmail.com'},\n", + " 'CREDIT_CARD': {'4838637940262': '4916 0387 9536 0861'}}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer\n", + "\n", + "anonymizer = PresidioReversibleAnonymizer(\n", + " analyzed_fields=[\"PERSON\", \"PHONE_NUMBER\", \"EMAIL_ADDRESS\", \"CREDIT_CARD\"],\n", + " # Faker seed is used here to make sure the same fake data is generated for the test purposes\n", + " # In production, it is recommended to remove the faker_seed parameter (it will default to None)\n", + " faker_seed=42,\n", + ")\n", + "\n", + "anonymizer.anonymize(\n", + " \"My name is Slim Shady, call me at 313-666-7440 or email me at real.slim.shady@gmail.com. \"\n", + " \"By the way, my card number is: 4916 0387 9536 0861\"\n", + ")\n", + "\n", + "anonymizer.deanonymizer_mapping" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Anonymizing more texts will result in new mapping entries:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Do you have his VISA card number? Yep, it's 3537672423884966. I'm William Bowman by the way.\n" + ] + }, + { + "data": { + "text/plain": [ + "{'PERSON': {'Maria Lynch': 'Slim Shady', 'William Bowman': 'John Doe'},\n", + " 'PHONE_NUMBER': {'7344131647': '313-666-7440'},\n", + " 'EMAIL_ADDRESS': {'jamesmichael@example.com': 'real.slim.shady@gmail.com'},\n", + " 'CREDIT_CARD': {'4838637940262': '4916 0387 9536 0861',\n", + " '3537672423884966': '4001 9192 5753 7193'}}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(\n", + " anonymizer.anonymize(\n", + " \"Do you have his VISA card number? Yep, it's 4001 9192 5753 7193. I'm John Doe by the way.\"\n", + " )\n", + ")\n", + "\n", + "anonymizer.deanonymizer_mapping" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Thanks to the built-in memory, entities that have already been detected and anonymised will take the same form in subsequent processed texts, so no duplicates will exist in the mapping:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "My VISA card number is 3537672423884966 and my name is William Bowman.\n" + ] + }, + { + "data": { + "text/plain": [ + "{'PERSON': {'Maria Lynch': 'Slim Shady', 'William Bowman': 'John Doe'},\n", + " 'PHONE_NUMBER': {'7344131647': '313-666-7440'},\n", + " 'EMAIL_ADDRESS': {'jamesmichael@example.com': 'real.slim.shady@gmail.com'},\n", + " 'CREDIT_CARD': {'4838637940262': '4916 0387 9536 0861',\n", + " '3537672423884966': '4001 9192 5753 7193'}}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(\n", + " anonymizer.anonymize(\n", + " \"My VISA card number is 4001 9192 5753 7193 and my name is John Doe.\"\n", + " )\n", + ")\n", + "\n", + "anonymizer.deanonymizer_mapping" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can save the mapping itself to a file for future use: " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# We can save the deanonymizer mapping as a JSON or YAML file\n", + "\n", + "anonymizer.save_deanonymizer_mapping(\"deanonymizer_mapping.json\")\n", + "# anonymizer.save_deanonymizer_mapping(\"deanonymizer_mapping.yaml\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And then, load it in another `PresidioReversibleAnonymizer` instance:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "anonymizer = PresidioReversibleAnonymizer()\n", + "\n", + "anonymizer.deanonymizer_mapping" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'PERSON': {'Maria Lynch': 'Slim Shady', 'William Bowman': 'John Doe'},\n", + " 'PHONE_NUMBER': {'7344131647': '313-666-7440'},\n", + " 'EMAIL_ADDRESS': {'jamesmichael@example.com': 'real.slim.shady@gmail.com'},\n", + " 'CREDIT_CARD': {'4838637940262': '4916 0387 9536 0861',\n", + " '3537672423884966': '4001 9192 5753 7193'}}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "anonymizer.load_deanonymizer_mapping(\"deanonymizer_mapping.json\")\n", + "\n", + "anonymizer.deanonymizer_mapping" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom deanonymization strategy\n", + "\n", + "The default deanonymization strategy is to exactly match the substring in the text with the mapping entry. Due to the indeterminism of LLMs, it may be that the model will change the format of the private data slightly or make a typo, for example:\n", + "- *Keanu Reeves* -> *Kaenu Reeves*\n", + "- *John F. Kennedy* -> *John Kennedy*\n", + "- *Main St, New York* -> *New York*\n", + "\n", + "It is therefore worth considering appropriate prompt engineering (have the model return PII in unchanged format) or trying to implement your replacing strategy. For example, you can use fuzzy matching - this will solve problems with typos and minor changes in the text. Some implementations of the swapping strategy can be found in the file `deanonymizer_matching_strategies.py`." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "maria lynch\n", + "Slim Shady\n" + ] + } + ], + "source": [ + "from langchain_experimental.data_anonymizer.deanonymizer_matching_strategies import (\n", + " case_insensitive_matching_strategy,\n", + ")\n", + "\n", + "# Original name: Maria Lynch\n", + "print(anonymizer.deanonymize(\"maria lynch\"))\n", + "print(\n", + " anonymizer.deanonymize(\n", + " \"maria lynch\", deanonymizer_matching_strategy=case_insensitive_matching_strategy\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Call Maria K. Lynch at 734-413-1647\n", + "Call Slim Shady at 313-666-7440\n" + ] + } + ], + "source": [ + "from langchain_experimental.data_anonymizer.deanonymizer_matching_strategies import (\n", + " fuzzy_matching_strategy,\n", + ")\n", + "\n", + "# Original name: Maria Lynch\n", + "# Original phone number: 7344131647 (without dashes)\n", + "print(anonymizer.deanonymize(\"Call Maria K. Lynch at 734-413-1647\"))\n", + "print(\n", + " anonymizer.deanonymize(\n", + " \"Call Maria K. Lynch at 734-413-1647\",\n", + " deanonymizer_matching_strategy=fuzzy_matching_strategy,\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It seems that the combined method works best:\n", + "- first apply the exact match strategy\n", + "- then match the rest using the fuzzy strategy" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Are you Slim Shady? I found your card with number 4916 0387 9536 0861.\n", + "Is this your phone number: 313-666-7440?\n", + "Is this your email address: wdavis@example.net\n" + ] + } + ], + "source": [ + "from langchain_experimental.data_anonymizer.deanonymizer_matching_strategies import (\n", + " combined_exact_fuzzy_matching_strategy,\n", + ")\n", + "\n", + "# Changed some values for fuzzy match showcase:\n", + "# - \"Maria Lynch\" -> \"Maria K. Lynch\"\n", + "# - \"7344131647\" -> \"734-413-1647\"\n", + "# - \"213186379402654\" -> \"2131 8637 9402 654\"\n", + "print(\n", + " anonymizer.deanonymize(\n", + " (\n", + " \"Are you Maria F. Lynch? I found your card with number 4838 6379 40262.\\n\"\n", + " \"Is this your phone number: 734-413-1647?\\n\"\n", + " \"Is this your email address: wdavis@example.net\"\n", + " ),\n", + " deanonymizer_matching_strategy=combined_exact_fuzzy_matching_strategy,\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Of course, there is no perfect method and it is worth experimenting and finding the one best suited to your use case." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Future works\n", + "\n", + "- **better matching and substitution of fake values for real ones** - currently the strategy is based on matching full strings and then substituting them. Due to the indeterminism of language models, it may happen that the value in the answer is slightly changed (e.g. *John Doe* -> *John* or *Main St, New York* -> *New York*) and such a substitution is then no longer possible. Therefore, it is worth adjusting the matching for your needs." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 }, - "nbformat": 4, - "nbformat_minor": 4 + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 } diff --git a/docs/docs/guides/safety/amazon_comprehend_chain.ipynb b/docs/docs/guides/safety/amazon_comprehend_chain.ipynb index 8843b370b41..7858c276756 100644 --- a/docs/docs/guides/safety/amazon_comprehend_chain.ipynb +++ b/docs/docs/guides/safety/amazon_comprehend_chain.ipynb @@ -78,7 +78,8 @@ "from langchain_experimental.comprehend_moderation import AmazonComprehendModerationChain\n", "\n", "comprehend_moderation = AmazonComprehendModerationChain(\n", - " client=comprehend_client, verbose=True # optional\n", + " client=comprehend_client,\n", + " verbose=True, # optional\n", ")" ] }, diff --git a/docs/docs/integrations/callbacks/argilla.ipynb b/docs/docs/integrations/callbacks/argilla.ipynb index a31b096945d..e19e0ade02a 100644 --- a/docs/docs/integrations/callbacks/argilla.ipynb +++ b/docs/docs/integrations/callbacks/argilla.ipynb @@ -147,7 +147,7 @@ " api_key=os.environ[\"ARGILLA_API_KEY\"],\n", ")\n", "\n", - "dataset.push_to_argilla(\"langchain-dataset\");" + "dataset.push_to_argilla(\"langchain-dataset\")" ] }, { diff --git a/docs/docs/integrations/callbacks/confident.ipynb b/docs/docs/integrations/callbacks/confident.ipynb index 36b92b3a6b6..b0d56763391 100644 --- a/docs/docs/integrations/callbacks/confident.ipynb +++ b/docs/docs/integrations/callbacks/confident.ipynb @@ -118,7 +118,6 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", "from langchain.callbacks.confident_callback import DeepEvalCallbackHandler\n", "\n", "deepeval_callback = DeepEvalCallbackHandler(\n", diff --git a/docs/docs/integrations/callbacks/context.ipynb b/docs/docs/integrations/callbacks/context.ipynb index 9da1e73f290..9ae9986df35 100644 --- a/docs/docs/integrations/callbacks/context.ipynb +++ b/docs/docs/integrations/callbacks/context.ipynb @@ -36,14 +36,10 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "vscode": { - "languageId": "shellscript" - } - }, + "metadata": {}, "outputs": [], "source": [ - "$ pip install context-python --upgrade" + "!pip install context-python --upgrade" ] }, { diff --git a/docs/docs/integrations/callbacks/infino.ipynb b/docs/docs/integrations/callbacks/infino.ipynb index 7d387a6eb34..6470ac58e6b 100644 --- a/docs/docs/integrations/callbacks/infino.ipynb +++ b/docs/docs/integrations/callbacks/infino.ipynb @@ -49,9 +49,7 @@ "from langchain.llms import OpenAI\n", "import matplotlib.pyplot as plt\n", "import matplotlib.dates as md\n", - "import os\n", "import time\n", - "import sys\n", "\n", "from infinopy import InfinoClient\n", "from langchain.callbacks import InfinoCallbackHandler" diff --git a/docs/docs/integrations/callbacks/sagemaker_tracking.ipynb b/docs/docs/integrations/callbacks/sagemaker_tracking.ipynb index 01e69385f1b..0532c0a237e 100644 --- a/docs/docs/integrations/callbacks/sagemaker_tracking.ipynb +++ b/docs/docs/integrations/callbacks/sagemaker_tracking.ipynb @@ -80,7 +80,6 @@ "from langchain.prompts import PromptTemplate\n", "from langchain.chains import LLMChain, SimpleSequentialChain\n", "from langchain.agents import initialize_agent, load_tools\n", - "from langchain.agents import Tool\n", "from langchain.callbacks import SageMakerCallbackHandler\n", "\n", "from sagemaker.analytics import ExperimentAnalytics\n", diff --git a/docs/docs/integrations/callbacks/trubrics.ipynb b/docs/docs/integrations/callbacks/trubrics.ipynb index 73babdc52c0..6e2b71ca320 100644 --- a/docs/docs/integrations/callbacks/trubrics.ipynb +++ b/docs/docs/integrations/callbacks/trubrics.ipynb @@ -266,7 +266,6 @@ "outputs": [], "source": [ "from langchain.chat_models import ChatOpenAI\n", - "from langchain.prompts import PromptTemplate\n", "from langchain.schema import HumanMessage, SystemMessage\n", "from langchain.callbacks import TrubricsCallbackHandler" ] diff --git a/docs/docs/integrations/chat/anthropic.ipynb b/docs/docs/integrations/chat/anthropic.ipynb index 3d575889b6f..00ad314f45d 100644 --- a/docs/docs/integrations/chat/anthropic.ipynb +++ b/docs/docs/integrations/chat/anthropic.ipynb @@ -20,13 +20,7 @@ "outputs": [], "source": [ "from langchain.chat_models import ChatAnthropic\n", - "from langchain.prompts.chat import (\n", - " ChatPromptTemplate,\n", - " SystemMessagePromptTemplate,\n", - " AIMessagePromptTemplate,\n", - " HumanMessagePromptTemplate,\n", - ")\n", - "from langchain.schema import AIMessage, HumanMessage, SystemMessage" + "from langchain.schema import HumanMessage" ] }, { diff --git a/docs/docs/integrations/chat/azure_chat_openai.ipynb b/docs/docs/integrations/chat/azure_chat_openai.ipynb index caf70196263..b759951e3b9 100644 --- a/docs/docs/integrations/chat/azure_chat_openai.ipynb +++ b/docs/docs/integrations/chat/azure_chat_openai.ipynb @@ -137,21 +137,13 @@ "source": [ "model0613 = AzureChatOpenAI(\n", " openai_api_version=\"2023-05-15\",\n", - " deployment_name=\"gpt-35-turbo,\n", + " deployment_name=\"gpt-35-turbo\",\n", " model_version=\"0613\",\n", ")\n", "with get_openai_callback() as cb:\n", " model0613([message])\n", " print(f\"Total Cost (USD): ${format(cb.total_cost, '.6f')}\")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "99682534", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/docs/docs/integrations/chat/baidu_qianfan_endpoint.ipynb b/docs/docs/integrations/chat/baidu_qianfan_endpoint.ipynb index 8bc1e68fc5d..b330d00e116 100644 --- a/docs/docs/integrations/chat/baidu_qianfan_endpoint.ipynb +++ b/docs/docs/integrations/chat/baidu_qianfan_endpoint.ipynb @@ -226,7 +226,7 @@ "source": [ "res = chat.stream(\n", " [HumanMessage(content=\"hi\")],\n", - " **{\"top_p\": 0.4, \"temperature\": 0.1, \"penalty_score\": 1}\n", + " **{\"top_p\": 0.4, \"temperature\": 0.1, \"penalty_score\": 1},\n", ")\n", "\n", "for r in res:\n", diff --git a/docs/docs/integrations/chat/cohere.ipynb b/docs/docs/integrations/chat/cohere.ipynb index ec4ba64f9b7..e9b90af3d21 100644 --- a/docs/docs/integrations/chat/cohere.ipynb +++ b/docs/docs/integrations/chat/cohere.ipynb @@ -20,7 +20,7 @@ "outputs": [], "source": [ "from langchain.chat_models import ChatCohere\n", - "from langchain.schema import AIMessage, HumanMessage" + "from langchain.schema import HumanMessage" ] }, { diff --git a/docs/docs/integrations/chat/jinachat.ipynb b/docs/docs/integrations/chat/jinachat.ipynb index 18fac8b41af..29cc50c4d59 100644 --- a/docs/docs/integrations/chat/jinachat.ipynb +++ b/docs/docs/integrations/chat/jinachat.ipynb @@ -23,10 +23,9 @@ "from langchain.prompts.chat import (\n", " ChatPromptTemplate,\n", " SystemMessagePromptTemplate,\n", - " AIMessagePromptTemplate,\n", " HumanMessagePromptTemplate,\n", ")\n", - "from langchain.schema import AIMessage, HumanMessage, SystemMessage" + "from langchain.schema import HumanMessage, SystemMessage" ] }, { @@ -139,11 +138,11 @@ } ], "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, "language_info": { "codemirror_mode": { "name": "ipython", diff --git a/docs/docs/integrations/chat/konko.ipynb b/docs/docs/integrations/chat/konko.ipynb index cd2ceae0e8e..2250a242e55 100644 --- a/docs/docs/integrations/chat/konko.ipynb +++ b/docs/docs/integrations/chat/konko.ipynb @@ -34,13 +34,7 @@ "outputs": [], "source": [ "from langchain.chat_models import ChatKonko\n", - "from langchain.prompts.chat import (\n", - " ChatPromptTemplate,\n", - " SystemMessagePromptTemplate,\n", - " AIMessagePromptTemplate,\n", - " HumanMessagePromptTemplate,\n", - ")\n", - "from langchain.schema import AIMessage, HumanMessage, SystemMessage" + "from langchain.schema import HumanMessage, SystemMessage" ] }, { diff --git a/docs/docs/integrations/chat/litellm.ipynb b/docs/docs/integrations/chat/litellm.ipynb index 977f5f15546..bd3c8ef2823 100644 --- a/docs/docs/integrations/chat/litellm.ipynb +++ b/docs/docs/integrations/chat/litellm.ipynb @@ -23,13 +23,7 @@ "outputs": [], "source": [ "from langchain.chat_models import ChatLiteLLM\n", - "from langchain.prompts.chat import (\n", - " ChatPromptTemplate,\n", - " SystemMessagePromptTemplate,\n", - " AIMessagePromptTemplate,\n", - " HumanMessagePromptTemplate,\n", - ")\n", - "from langchain.schema import AIMessage, HumanMessage, SystemMessage" + "from langchain.schema import HumanMessage" ] }, { diff --git a/docs/docs/integrations/chat/openai.ipynb b/docs/docs/integrations/chat/openai.ipynb index 2b318bc2dce..33323f0e62e 100644 --- a/docs/docs/integrations/chat/openai.ipynb +++ b/docs/docs/integrations/chat/openai.ipynb @@ -23,10 +23,9 @@ "from langchain.prompts.chat import (\n", " ChatPromptTemplate,\n", " SystemMessagePromptTemplate,\n", - " AIMessagePromptTemplate,\n", " HumanMessagePromptTemplate,\n", ")\n", - "from langchain.schema import AIMessage, HumanMessage, SystemMessage" + "from langchain.schema import HumanMessage, SystemMessage" ] }, { diff --git a/docs/docs/integrations/chat/tongyi.ipynb b/docs/docs/integrations/chat/tongyi.ipynb index f0856d33e6e..f3c64f28306 100644 --- a/docs/docs/integrations/chat/tongyi.ipynb +++ b/docs/docs/integrations/chat/tongyi.ipynb @@ -119,7 +119,7 @@ } ], "source": [ - "from langchain.schema import AIMessage, HumanMessage, SystemMessage\n", + "from langchain.schema import HumanMessage, SystemMessage\n", "\n", "messages = [\n", " SystemMessage(\n", diff --git a/docs/docs/integrations/chat/vllm.ipynb b/docs/docs/integrations/chat/vllm.ipynb index 45c5094304e..6a8c3e9a7a5 100644 --- a/docs/docs/integrations/chat/vllm.ipynb +++ b/docs/docs/integrations/chat/vllm.ipynb @@ -25,10 +25,9 @@ "from langchain.prompts.chat import (\n", " ChatPromptTemplate,\n", " SystemMessagePromptTemplate,\n", - " AIMessagePromptTemplate,\n", " HumanMessagePromptTemplate,\n", ")\n", - "from langchain.schema import AIMessage, HumanMessage, SystemMessage" + "from langchain.schema import HumanMessage, SystemMessage" ] }, { diff --git a/docs/docs/integrations/chat_loaders/gmail.ipynb b/docs/docs/integrations/chat_loaders/gmail.ipynb index 4735634341e..ef089f0260c 100644 --- a/docs/docs/integrations/chat_loaders/gmail.ipynb +++ b/docs/docs/integrations/chat_loaders/gmail.ipynb @@ -36,16 +36,9 @@ "outputs": [], "source": [ "import os.path\n", - "import base64\n", - "import json\n", - "import re\n", - "import time\n", "from google.auth.transport.requests import Request\n", "from google.oauth2.credentials import Credentials\n", "from google_auth_oauthlib.flow import InstalledAppFlow\n", - "from googleapiclient.discovery import build\n", - "import logging\n", - "import requests\n", "\n", "SCOPES = [\"https://www.googleapis.com/auth/gmail.readonly\"]\n", "\n", diff --git a/docs/docs/integrations/document_loaders/docugami.ipynb b/docs/docs/integrations/document_loaders/docugami.ipynb index 43079e51da7..3213d5d112f 100644 --- a/docs/docs/integrations/document_loaders/docugami.ipynb +++ b/docs/docs/integrations/document_loaders/docugami.ipynb @@ -148,7 +148,6 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.schema import Document\n", "from langchain.vectorstores import Chroma\n", "from langchain.embeddings import OpenAIEmbeddings\n", "from langchain.llms import OpenAI\n", diff --git a/docs/docs/integrations/document_loaders/docusaurus.ipynb b/docs/docs/integrations/document_loaders/docusaurus.ipynb index ca953cb6684..0ffa9a0b1d3 100644 --- a/docs/docs/integrations/document_loaders/docusaurus.ipynb +++ b/docs/docs/integrations/document_loaders/docusaurus.ipynb @@ -118,7 +118,9 @@ "source": [ "loader = DocusaurusLoader(\n", " \"https://python.langchain.com\",\n", - " filter_urls=[\"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"],\n", + " filter_urls=[\n", + " \"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"\n", + " ],\n", ")\n", "documents = loader.load()" ] @@ -162,9 +164,11 @@ "source": [ "loader = DocusaurusLoader(\n", " \"https://python.langchain.com\",\n", - " filter_urls=[\"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"],\n", + " filter_urls=[\n", + " \"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"\n", + " ],\n", " # This will only include the content that matches these tags, otherwise they will be removed\n", - " custom_html_tags=[\"#content\", \".main\"]\n", + " custom_html_tags=[\"#content\", \".main\"],\n", ")" ] }, @@ -213,7 +217,9 @@ "source": [ "loader = DocusaurusLoader(\n", " \"https://python.langchain.com\",\n", - " filter_urls=[\"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"],\n", + " filter_urls=[\n", + " \"https://python.langchain.com/docs/integrations/document_loaders/sitemap\"\n", + " ],\n", " parsing_function=remove_nav_and_header_elements,\n", ")" ] diff --git a/docs/docs/integrations/document_loaders/figma.ipynb b/docs/docs/integrations/document_loaders/figma.ipynb index c8615e71215..bf521ae4d01 100644 --- a/docs/docs/integrations/document_loaders/figma.ipynb +++ b/docs/docs/integrations/document_loaders/figma.ipynb @@ -26,15 +26,11 @@ "\n", "from langchain.document_loaders.figma import FigmaFileLoader\n", "\n", - "from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.chat_models import ChatOpenAI\n", "from langchain.indexes import VectorstoreIndexCreator\n", - "from langchain.chains import ConversationChain, LLMChain\n", - "from langchain.memory import ConversationBufferWindowMemory\n", "from langchain.prompts.chat import (\n", " ChatPromptTemplate,\n", " SystemMessagePromptTemplate,\n", - " AIMessagePromptTemplate,\n", " HumanMessagePromptTemplate,\n", ")" ] diff --git a/docs/docs/integrations/document_loaders/google_drive.ipynb b/docs/docs/integrations/document_loaders/google_drive.ipynb index 2a984f59220..c132e38c445 100644 --- a/docs/docs/integrations/document_loaders/google_drive.ipynb +++ b/docs/docs/integrations/document_loaders/google_drive.ipynb @@ -61,7 +61,7 @@ "source": [ "loader = GoogleDriveLoader(\n", " folder_id=\"1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5\",\n", - " token_path='/path/where/you/want/token/to/be/created/google_token.json'\n", + " token_path=\"/path/where/you/want/token/to/be/created/google_token.json\",\n", " # Optional: configure whether to recursively fetch files from subfolders. Defaults to False.\n", " recursive=False,\n", ")" diff --git a/docs/docs/integrations/document_loaders/iugu.ipynb b/docs/docs/integrations/document_loaders/iugu.ipynb index 8c7ece338f4..7bd4b30165f 100644 --- a/docs/docs/integrations/document_loaders/iugu.ipynb +++ b/docs/docs/integrations/document_loaders/iugu.ipynb @@ -18,9 +18,6 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", - "\n", - "\n", "from langchain.document_loaders import IuguLoader\n", "from langchain.indexes import VectorstoreIndexCreator" ] diff --git a/docs/docs/integrations/document_loaders/modern_treasury.ipynb b/docs/docs/integrations/document_loaders/modern_treasury.ipynb index a10ded52f59..7d9cbcc8847 100644 --- a/docs/docs/integrations/document_loaders/modern_treasury.ipynb +++ b/docs/docs/integrations/document_loaders/modern_treasury.ipynb @@ -21,9 +21,6 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", - "\n", - "\n", "from langchain.document_loaders import ModernTreasuryLoader\n", "from langchain.indexes import VectorstoreIndexCreator" ] diff --git a/docs/docs/integrations/document_loaders/quip.ipynb b/docs/docs/integrations/document_loaders/quip.ipynb index 5eec3d72e3e..16358041a62 100644 --- a/docs/docs/integrations/document_loaders/quip.ipynb +++ b/docs/docs/integrations/document_loaders/quip.ipynb @@ -1,66 +1,104 @@ { - "metadata": { - "vscode": { - "interpreter": { - "hash": "cc99336516f23363341912c6723b01ace86f02e26b4290be1efc0677e2e2ec24" - } - }, - "kernelspec": { - "name": "python", - "display_name": "Python (Pyodide)", - "language": "python" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.1" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Quip\n", + "\n", + ">[Quip](https://quip.com) is a collaborative productivity software suite for mobile and Web. It allows groups of people to create and edit documents and spreadsheets as a group, typically for business purposes.\n", + "\n", + "A loader for `Quip` docs.\n", + "\n", + "Please refer [here](https://quip.com/dev/automation/documentation/current#section/Authentication/Get-Access-to-Quip's-APIs) to know how to get personal access token. \n", + "\n", + "Specify a list `folder_ids` and/or `thread_ids` to load in the corresponding docs into Document objects, if both are specified, loader will get all `thread_ids` belong to this folder based on `folder_ids`, combine with passed `thread_ids`, the union of both sets will be returned.\n", + "\n", + "* How to know folder_id ? \n", + " go to quip folder, right click folder and copy link, extract suffix from link as folder_id. Hint: `https://example.quip.com/`\n", + "* How to know thread_id ? \n", + " thread_id is the document id. Go to quip doc, right click doc and copy link, extract suffix from link as thread_id. Hint: `https://exmaple.quip.com/`\n", + " \n", + "You can also set `include_all_folders` as `True` will fetch group_folder_ids and \n", + "You can also specify a boolean `include_attachments` to include attachments, this is set to False by default, if set to True all attachments will be downloaded and QuipLoader will extract the text from the attachments and add it to the Document object. Currently supported attachment types are: `PDF`, `PNG`, `JPEG/JPG`, `SVG`, `Word` and `Excel`. Also you can sepcify a boolean `include_comments` to include comments in document, this is set to False by default, if set to True all comments in document will be fetched and QuipLoader will add them to Document objec.\n" + ] }, - "nbformat_minor": 4, - "nbformat": 4, - "cells": [ - { - "cell_type": "markdown", - "source": "# Quip\n\n>[Quip](https://quip.com) is a collaborative productivity software suite for mobile and Web. It allows groups of people to create and edit documents and spreadsheets as a group, typically for business purposes.\n\nA loader for `Quip` docs.\n\nPlease refer [here](https://quip.com/dev/automation/documentation/current#section/Authentication/Get-Access-to-Quip's-APIs) to know how to get personal access token. \n\nSpecify a list `folder_ids` and/or `thread_ids` to load in the corresponding docs into Document objects, if both are specified, loader will get all `thread_ids` belong to this folder based on `folder_ids`, combine with passed `thread_ids`, the union of both sets will be returned.\n\n* How to know folder_id ? \n go to quip folder, right click folder and copy link, extract suffix from link as folder_id. Hint: `https://example.quip.com/`\n* How to know thread_id ? \n thread_id is the document id. Go to quip doc, right click doc and copy link, extract suffix from link as thread_id. Hint: `https://exmaple.quip.com/`\n \nYou can also set `include_all_folders` as `True` will fetch group_folder_ids and \nYou can also specify a boolean `include_attachments` to include attachments, this is set to False by default, if set to True all attachments will be downloaded and QuipLoader will extract the text from the attachments and add it to the Document object. Currently supported attachment types are: `PDF`, `PNG`, `JPEG/JPG`, `SVG`, `Word` and `Excel`. Also you can sepcify a boolean `include_comments` to include comments in document, this is set to False by default, if set to True all comments in document will be fetched and QuipLoader will add them to Document objec.\n", - "metadata": {} - }, - { - "cell_type": "markdown", - "source": "Before using QuipLoader make sure you have the latest version of the quip-api package installed:", - "metadata": {} - }, - { - "cell_type": "code", - "source": "#!pip install quip-api", - "metadata": { - "tags": [] - }, - "execution_count": 1, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": "## Examples", - "metadata": {} - }, - { - "cell_type": "markdown", - "source": "### Personal Access Token", - "metadata": {} - }, - { - "cell_type": "code", - "source": "from langchain.document_loaders import QuipLoader\n\nloader = QuipLoader(api_url=\"https://platform.quip.com\", access_token=\"change_me\", request_timeout=60)\ndocuments = loader.load(\n folder_ids={'123','456'}, thread_ids={'abc', 'efg'} include_attachments=False, include_comments=False\n)", - "metadata": {}, - "execution_count": null, - "outputs": [] - } - ] -} \ No newline at end of file + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before using QuipLoader make sure you have the latest version of the quip-api package installed:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "#!pip install quip-api\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Personal Access Token" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders import QuipLoader\n", + "\n", + "loader = QuipLoader(\n", + " api_url=\"https://platform.quip.com\", access_token=\"change_me\", request_timeout=60\n", + ")\n", + "documents = loader.load(\n", + " folder_ids={\"123\", \"456\"},\n", + " thread_ids={\"abc\", \"efg\"},\n", + " include_attachments=False,\n", + " include_comments=False,\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (Pyodide)", + "language": "python", + "name": "python" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + }, + "vscode": { + "interpreter": { + "hash": "cc99336516f23363341912c6723b01ace86f02e26b4290be1efc0677e2e2ec24" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/docs/integrations/document_loaders/rockset.ipynb b/docs/docs/integrations/document_loaders/rockset.ipynb index c0941552052..3bdad1cdd68 100644 --- a/docs/docs/integrations/document_loaders/rockset.ipynb +++ b/docs/docs/integrations/document_loaders/rockset.ipynb @@ -29,14 +29,10 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "vscode": { - "languageId": "shellscript" - } - }, + "metadata": {}, "outputs": [], "source": [ - "$ pip3 install rockset" + "!pip install rockset" ] }, { diff --git a/docs/docs/integrations/document_loaders/stripe.ipynb b/docs/docs/integrations/document_loaders/stripe.ipynb index 0188dd90a95..3fa5cca186b 100644 --- a/docs/docs/integrations/document_loaders/stripe.ipynb +++ b/docs/docs/integrations/document_loaders/stripe.ipynb @@ -18,9 +18,6 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", - "\n", - "\n", "from langchain.document_loaders import StripeLoader\n", "from langchain.indexes import VectorstoreIndexCreator" ] diff --git a/docs/docs/integrations/document_loaders/web_base.ipynb b/docs/docs/integrations/document_loaders/web_base.ipynb index cdf39ef8de2..89da41ca72d 100644 --- a/docs/docs/integrations/document_loaders/web_base.ipynb +++ b/docs/docs/integrations/document_loaders/web_base.ipynb @@ -91,7 +91,7 @@ "# Example: transcript = soup.select_one(\"td[class='scrtext']\").text\n", "# BS4 documentation can be found here: https://www.crummy.com/software/BeautifulSoup/bs4/doc/\n", "\n", - "\"\"\";" + "\"\"\"" ] }, { @@ -226,19 +226,23 @@ }, { "cell_type": "markdown", + "id": "672264ad", + "metadata": { + "collapsed": false + }, "source": [ "## Using proxies\n", "\n", "Sometimes you might need to use proxies to get around IP blocks. You can pass in a dictionary of proxies to the loader (and `requests` underneath) to use them." - ], - "metadata": { - "collapsed": false - }, - "id": "672264ad" + ] }, { "cell_type": "code", "execution_count": null, + "id": "9caf0310", + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "loader = WebBaseLoader(\n", @@ -249,11 +253,7 @@ " },\n", ")\n", "docs = loader.load()" - ], - "metadata": { - "collapsed": false - }, - "id": "9caf0310" + ] } ], "metadata": { diff --git a/docs/docs/integrations/document_transformers/openai_metadata_tagger.ipynb b/docs/docs/integrations/document_transformers/openai_metadata_tagger.ipynb index e8a60712139..595028ab6cc 100644 --- a/docs/docs/integrations/document_transformers/openai_metadata_tagger.ipynb +++ b/docs/docs/integrations/document_transformers/openai_metadata_tagger.ipynb @@ -109,7 +109,7 @@ "\n", "print(\n", " *[d.page_content + \"\\n\\n\" + json.dumps(d.metadata) for d in enhanced_documents],\n", - " sep=\"\\n\\n---------------\\n\\n\"\n", + " sep=\"\\n\\n---------------\\n\\n\",\n", ")" ] }, @@ -167,7 +167,7 @@ "\n", "print(\n", " *[d.page_content + \"\\n\\n\" + json.dumps(d.metadata) for d in enhanced_documents],\n", - " sep=\"\\n\\n---------------\\n\\n\"\n", + " sep=\"\\n\\n---------------\\n\\n\",\n", ")" ] }, @@ -225,7 +225,7 @@ "\n", "print(\n", " *[d.page_content + \"\\n\\n\" + json.dumps(d.metadata) for d in enhanced_documents],\n", - " sep=\"\\n\\n---------------\\n\\n\"\n", + " sep=\"\\n\\n---------------\\n\\n\",\n", ")" ] }, diff --git a/docs/docs/integrations/llms/azure_ml.ipynb b/docs/docs/integrations/llms/azure_ml.ipynb index 4443a53d797..32750700322 100644 --- a/docs/docs/integrations/llms/azure_ml.ipynb +++ b/docs/docs/integrations/llms/azure_ml.ipynb @@ -201,7 +201,6 @@ ], "source": [ "from langchain.llms.loading import load_llm\n", - "from langchain.llms.azureml_endpoint import AzureMLEndpointClient\n", "\n", "save_llm = AzureMLOnlineEndpoint(\n", " deployment_name=\"databricks-dolly-v2-12b-4\",\n", diff --git a/docs/docs/integrations/llms/baidu_qianfan_endpoint.ipynb b/docs/docs/integrations/llms/baidu_qianfan_endpoint.ipynb index 924e1de6991..f6854c4d7b8 100644 --- a/docs/docs/integrations/llms/baidu_qianfan_endpoint.ipynb +++ b/docs/docs/integrations/llms/baidu_qianfan_endpoint.ipynb @@ -227,7 +227,7 @@ "res = llm.generate(\n", " prompts=[\"hi\"],\n", " streaming=True,\n", - " **{\"top_p\": 0.4, \"temperature\": 0.1, \"penalty_score\": 1}\n", + " **{\"top_p\": 0.4, \"temperature\": 0.1, \"penalty_score\": 1},\n", ")\n", "\n", "for r in res:\n", diff --git a/docs/docs/integrations/llms/banana.ipynb b/docs/docs/integrations/llms/banana.ipynb index 76ccd6bd3a8..130b30d625e 100644 --- a/docs/docs/integrations/llms/banana.ipynb +++ b/docs/docs/integrations/llms/banana.ipynb @@ -37,7 +37,6 @@ "# * the model's url slug\n", "\n", "import os\n", - "from getpass import getpass\n", "\n", "# You can get this from the main dashboard\n", "# at https://app.banana.dev\n", @@ -53,7 +52,8 @@ "outputs": [], "source": [ "from langchain.llms import Banana\n", - "from langchain.prompts import PromptTemplate\nfrom langchain.chains import LLMChain" + "from langchain.prompts import PromptTemplate\n", + "from langchain.chains import LLMChain" ] }, { diff --git a/docs/docs/integrations/llms/beam.ipynb b/docs/docs/integrations/llms/beam.ipynb index 29fe1f5100c..091ae1da98e 100644 --- a/docs/docs/integrations/llms/beam.ipynb +++ b/docs/docs/integrations/llms/beam.ipynb @@ -2,6 +2,7 @@ "cells": [ { "cell_type": "markdown", + "id": "34803e5e", "metadata": { "id": "J-yvaDTmTTza" }, @@ -11,51 +12,50 @@ "Calls the Beam API wrapper to deploy and make subsequent calls to an instance of the gpt2 LLM in a cloud deployment. Requires installation of the Beam library and registration of Beam Client ID and Client Secret. By calling the wrapper an instance of the model is created and run, with returned text relating to the prompt. Additional calls can then be made by directly calling the Beam API.\n", "\n", "[Create an account](https://www.beam.cloud/), if you don't have one already. Grab your API keys from the [dashboard](https://www.beam.cloud/dashboard/settings/api-keys)." - ], - "id": "34803e5e" + ] }, { "cell_type": "markdown", + "id": "76af7763", "metadata": { "id": "CfTmesWtTfTS" }, "source": [ "Install the Beam CLI" - ], - "id": "76af7763" + ] }, { "cell_type": "code", "execution_count": null, + "id": "ef012b8d", "metadata": { "id": "G_tCCurqR7Ik" }, "outputs": [], "source": [ "!curl https://raw.githubusercontent.com/slai-labs/get-beam/main/get-beam.sh -sSfL | sh" - ], - "id": "ef012b8d" + ] }, { "cell_type": "markdown", + "id": "74be8c2e", "metadata": { "id": "jJkcNqOdThQ7" }, "source": [ "Register API Keys and set your beam client id and secret environment variables:" - ], - "id": "74be8c2e" + ] }, { "cell_type": "code", "execution_count": null, + "id": "2a176107", "metadata": { "id": "7gQd6fszSEaH" }, "outputs": [], "source": [ "import os\n", - "import subprocess\n", "\n", "beam_client_id = \"\"\n", "beam_client_secret = \"\"\n", @@ -66,33 +66,33 @@ "\n", "# Run the beam configure command\n", "!beam configure --clientId={beam_client_id} --clientSecret={beam_client_secret}" - ], - "id": "2a176107" + ] }, { "cell_type": "markdown", + "id": "64cc18b3", "metadata": { "id": "c20rkK18TrK2" }, "source": [ "Install the Beam SDK:" - ], - "id": "64cc18b3" + ] }, { "cell_type": "code", "execution_count": null, + "id": "a0014676", "metadata": { "id": "CH2Vop6ISNIf" }, "outputs": [], "source": [ "!pip install beam-sdk" - ], - "id": "a0014676" + ] }, { "cell_type": "markdown", + "id": "a48d515c", "metadata": { "id": "XflOsp3bTwl1" }, @@ -100,12 +100,12 @@ "**Deploy and call Beam directly from langchain!**\n", "\n", "Note that a cold start might take a couple of minutes to return the response, but subsequent calls will be faster!" - ], - "id": "a48d515c" + ] }, { "cell_type": "code", "execution_count": null, + "id": "c79e740b", "metadata": { "id": "KmaHxUqbSVnh" }, @@ -138,8 +138,7 @@ "response = llm._call(\"Running machine learning on a remote GPU\")\n", "\n", "print(response)" - ], - "id": "c79e740b" + ] } ], "metadata": { diff --git a/docs/docs/integrations/llms/bittensor.ipynb b/docs/docs/integrations/llms/bittensor.ipynb index a1e47b637f4..2d2603eb1dd 100644 --- a/docs/docs/integrations/llms/bittensor.ipynb +++ b/docs/docs/integrations/llms/bittensor.ipynb @@ -120,17 +120,12 @@ "outputs": [], "source": [ "from langchain.agents import (\n", - " AgentType,\n", - " initialize_agent,\n", - " load_tools,\n", " ZeroShotAgent,\n", - " Tool,\n", " AgentExecutor,\n", ")\n", "from langchain.memory import ConversationBufferMemory\n", "from langchain.chains import LLMChain\n", "from langchain.prompts import PromptTemplate\n", - "from langchain.utilities import GoogleSearchAPIWrapper, SerpAPIWrapper\n", "from langchain.llms import NIBittensorLLM\n", "\n", "memory = ConversationBufferMemory(memory_key=\"chat_history\")\n", diff --git a/docs/docs/integrations/llms/chatglm.ipynb b/docs/docs/integrations/llms/chatglm.ipynb index a1fbc3f95d8..d128fa0d6cf 100644 --- a/docs/docs/integrations/llms/chatglm.ipynb +++ b/docs/docs/integrations/llms/chatglm.ipynb @@ -22,7 +22,8 @@ "outputs": [], "source": [ "from langchain.llms import ChatGLM\n", - "from langchain.prompts import PromptTemplate\nfrom langchain.chains import LLMChain\n", + "from langchain.prompts import PromptTemplate\n", + "from langchain.chains import LLMChain\n", "\n", "# import os" ] @@ -52,7 +53,9 @@ "llm = ChatGLM(\n", " endpoint_url=endpoint_url,\n", " max_token=80000,\n", - " history=[[\"我将从美国到中国来旅游,出行前希望了解中国的城市\", \"欢迎问我任何问题。\"]],\n", + " history=[\n", + " [\"我将从美国到中国来旅游,出行前希望了解中国的城市\", \"欢迎问我任何问题。\"]\n", + " ],\n", " top_p=0.9,\n", " model_kwargs={\"sample_model_args\": False},\n", ")\n", diff --git a/docs/docs/integrations/llms/edenai.ipynb b/docs/docs/integrations/llms/edenai.ipynb index 2e57fd9ded9..46f18191739 100644 --- a/docs/docs/integrations/llms/edenai.ipynb +++ b/docs/docs/integrations/llms/edenai.ipynb @@ -32,16 +32,10 @@ "which you can get by creating an account https://app.edenai.run/user/register and heading here https://app.edenai.run/admin/account/settings\n", "\n", "Once we have a key we'll want to set it as an environment variable by running:\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "export EDENAI_API_KEY=\"...\"" + "\n", + "```bash\n", + "export EDENAI_API_KEY=\"...\"\n", + "```" ] }, { @@ -138,7 +132,6 @@ "import base64\n", "from io import BytesIO\n", "from PIL import Image\n", - "import json\n", "\n", "\n", "def print_base64_image(base64_string):\n", diff --git a/docs/docs/integrations/llms/fireworks.ipynb b/docs/docs/integrations/llms/fireworks.ipynb index bd29bfcdaa3..e0e955f0ed1 100644 --- a/docs/docs/integrations/llms/fireworks.ipynb +++ b/docs/docs/integrations/llms/fireworks.ipynb @@ -21,11 +21,6 @@ "source": [ "from langchain.llms.fireworks import Fireworks\n", "from langchain.prompts import PromptTemplate\n", - "from langchain.chains import LLMChain\n", - "from langchain.prompts.chat import (\n", - " ChatPromptTemplate,\n", - " HumanMessagePromptTemplate,\n", - ")\n", "import os" ] }, diff --git a/docs/docs/integrations/llms/gooseai.ipynb b/docs/docs/integrations/llms/gooseai.ipynb index fc0fad0bddf..52a9ddcad56 100644 --- a/docs/docs/integrations/llms/gooseai.ipynb +++ b/docs/docs/integrations/llms/gooseai.ipynb @@ -16,7 +16,7 @@ "metadata": {}, "source": [ "## Install openai\n", - "The `openai` package is required to use the GooseAI API. Install `openai` using `pip3 install openai`." + "The `openai` package is required to use the GooseAI API. Install `openai` using `pip install openai`." ] }, { @@ -25,7 +25,7 @@ "metadata": {}, "outputs": [], "source": [ - "$ pip3 install openai" + "!pip install openai" ] }, { @@ -43,7 +43,8 @@ "source": [ "import os\n", "from langchain.llms import GooseAI\n", - "from langchain.prompts import PromptTemplate\nfrom langchain.chains import LLMChain" + "from langchain.prompts import PromptTemplate\n", + "from langchain.chains import LLMChain" ] }, { diff --git a/docs/docs/integrations/llms/javelin.ipynb b/docs/docs/integrations/llms/javelin.ipynb index 5f016616dd4..6942ddf2ad9 100644 --- a/docs/docs/integrations/llms/javelin.ipynb +++ b/docs/docs/integrations/llms/javelin.ipynb @@ -141,7 +141,6 @@ ], "source": [ "from langchain.embeddings import JavelinAIGatewayEmbeddings\n", - "from langchain.embeddings.openai import OpenAIEmbeddings\n", "\n", "embeddings = JavelinAIGatewayEmbeddings(\n", " gateway_uri=\"http://localhost:8000\", # replace with service URL or host/port of Javelin\n", diff --git a/docs/docs/integrations/llms/jsonformer_experimental.ipynb b/docs/docs/integrations/llms/jsonformer_experimental.ipynb index abab5f125e3..0290108510f 100644 --- a/docs/docs/integrations/llms/jsonformer_experimental.ipynb +++ b/docs/docs/integrations/llms/jsonformer_experimental.ipynb @@ -59,7 +59,6 @@ }, "outputs": [], "source": [ - "from typing import Optional\n", "from langchain.tools import tool\n", "import os\n", "import json\n", @@ -124,9 +123,7 @@ "BEGIN! Answer the Human's question as best as you are able.\n", "------\n", "Human: 'What's the difference between an iterator and an iterable?'\n", - "AI Assistant:\"\"\".format(\n", - " arg_schema=ask_star_coder.args\n", - ")" + "AI Assistant:\"\"\".format(arg_schema=ask_star_coder.args)" ] }, { diff --git a/docs/docs/integrations/llms/llm_caching.ipynb b/docs/docs/integrations/llms/llm_caching.ipynb index 806b046ce0f..000ea53a6f1 100644 --- a/docs/docs/integrations/llms/llm_caching.ipynb +++ b/docs/docs/integrations/llms/llm_caching.ipynb @@ -1248,7 +1248,6 @@ "outputs": [], "source": [ "from langchain.text_splitter import CharacterTextSplitter\n", - "from langchain.chains.mapreduce import MapReduceChain\n", "\n", "text_splitter = CharacterTextSplitter()" ] diff --git a/docs/docs/integrations/llms/openlm.ipynb b/docs/docs/integrations/llms/openlm.ipynb index ecb4f0ee0c5..e22b8b31473 100644 --- a/docs/docs/integrations/llms/openlm.ipynb +++ b/docs/docs/integrations/llms/openlm.ipynb @@ -41,7 +41,6 @@ "source": [ "from getpass import getpass\n", "import os\n", - "import subprocess\n", "\n", "\n", "# Check if OPENAI_API_KEY environment variable is set\n", @@ -71,7 +70,8 @@ "outputs": [], "source": [ "from langchain.llms import OpenLM\n", - "from langchain.prompts import PromptTemplate\nfrom langchain.chains import LLMChain" + "from langchain.prompts import PromptTemplate\n", + "from langchain.chains import LLMChain" ] }, { @@ -106,9 +106,7 @@ " result = llm_chain.run(question)\n", " print(\n", " \"\"\"Model: {}\n", - "Result: {}\"\"\".format(\n", - " model, result\n", - " )\n", + "Result: {}\"\"\".format(model, result)\n", " )" ] } diff --git a/docs/docs/integrations/llms/predictionguard.ipynb b/docs/docs/integrations/llms/predictionguard.ipynb index 83950aefead..ac7518e9348 100644 --- a/docs/docs/integrations/llms/predictionguard.ipynb +++ b/docs/docs/integrations/llms/predictionguard.ipynb @@ -2,27 +2,28 @@ "cells": [ { "cell_type": "markdown", + "id": "3f0a201c", "metadata": {}, "source": [ "# Prediction Guard" - ], - "id": "3f0a201c" + ] }, { "cell_type": "code", "execution_count": null, + "id": "4f810331", "metadata": { "id": "3RqWPav7AtKL" }, "outputs": [], "source": [ "! pip install predictionguard langchain" - ], - "id": "4f810331" + ] }, { "cell_type": "code", "execution_count": null, + "id": "7191a5ce", "metadata": { "id": "2xe8JEUwA7_y" }, @@ -30,26 +31,26 @@ "source": [ "import os\n", "\n", - "import predictionguard as pg\n", "from langchain.llms import PredictionGuard\n", - "from langchain.prompts import PromptTemplate\nfrom langchain.chains import LLMChain" - ], - "id": "7191a5ce" + "from langchain.prompts import PromptTemplate\n", + "from langchain.chains import LLMChain" + ] }, { "cell_type": "markdown", + "id": "a8d356d3", "metadata": { "id": "mesCTyhnJkNS" }, "source": [ "## Basic LLM usage\n", "\n" - ], - "id": "a8d356d3" + ] }, { "cell_type": "code", "execution_count": null, + "id": "158b109a", "metadata": { "id": "kp_Ymnx1SnDG" }, @@ -61,46 +62,46 @@ "\n", "# Your Prediction Guard API key. Get one at predictionguard.com\n", "os.environ[\"PREDICTIONGUARD_TOKEN\"] = \"\"" - ], - "id": "158b109a" + ] }, { "cell_type": "code", "execution_count": null, + "id": "140717c9", "metadata": { "id": "Ua7Mw1N4HcER" }, "outputs": [], "source": [ "pgllm = PredictionGuard(model=\"OpenAI-text-davinci-003\")" - ], - "id": "140717c9" + ] }, { "cell_type": "code", "execution_count": null, + "id": "605f7ab6", "metadata": { "id": "Qo2p5flLHxrB" }, "outputs": [], "source": [ "pgllm(\"Tell me a joke\")" - ], - "id": "605f7ab6" + ] }, { "cell_type": "markdown", + "id": "99de09f9", "metadata": { "id": "EyBYaP_xTMXH" }, "source": [ "## Control the output structure/ type of LLMs" - ], - "id": "99de09f9" + ] }, { "cell_type": "code", "execution_count": null, + "id": "ae6bd8a1", "metadata": { "id": "55uxzhQSTPqF" }, @@ -118,12 +119,12 @@ "\n", "Result: \"\"\"\n", "prompt = PromptTemplate(template=template, input_variables=[\"query\"])" - ], - "id": "ae6bd8a1" + ] }, { "cell_type": "code", "execution_count": null, + "id": "f81be0fb", "metadata": { "id": "yersskWbTaxU" }, @@ -131,12 +132,12 @@ "source": [ "# Without \"guarding\" or controlling the output of the LLM.\n", "pgllm(prompt.format(query=\"What kind of post is this?\"))" - ], - "id": "f81be0fb" + ] }, { "cell_type": "code", "execution_count": null, + "id": "0cb3b91f", "metadata": { "id": "PzxSbYwqTm2w" }, @@ -154,34 +155,34 @@ " },\n", ")\n", "pgllm(prompt.format(query=\"What kind of post is this?\"))" - ], - "id": "0cb3b91f" + ] }, { "cell_type": "markdown", + "id": "c3b6211f", "metadata": { "id": "v3MzIUItJ8kV" }, "source": [ "## Chaining" - ], - "id": "c3b6211f" + ] }, { "cell_type": "code", "execution_count": null, + "id": "8d57d1b5", "metadata": { "id": "pPegEZExILrT" }, "outputs": [], "source": [ "pgllm = PredictionGuard(model=\"OpenAI-text-davinci-003\")" - ], - "id": "8d57d1b5" + ] }, { "cell_type": "code", "execution_count": null, + "id": "7915b7fa", "metadata": { "id": "suxw62y-J-bg" }, @@ -196,12 +197,12 @@ "question = \"What NFL team won the Super Bowl in the year Justin Beiber was born?\"\n", "\n", "llm_chain.predict(question=question)" - ], - "id": "7915b7fa" + ] }, { "cell_type": "code", "execution_count": null, + "id": "32ffd783", "metadata": { "id": "l2bc26KHKr7n" }, @@ -212,18 +213,17 @@ "llm_chain = LLMChain(prompt=prompt, llm=pgllm, verbose=True)\n", "\n", "llm_chain.predict(adjective=\"sad\", subject=\"ducks\")" - ], - "id": "32ffd783" + ] }, { "cell_type": "code", "execution_count": null, + "id": "408ad1e1", "metadata": { "id": "I--eSa2PLGqq" }, "outputs": [], - "source": [], - "id": "408ad1e1" + "source": [] } ], "metadata": { diff --git a/docs/docs/integrations/llms/titan_takeoff.ipynb b/docs/docs/integrations/llms/titan_takeoff.ipynb index 64b78f55290..435157981ce 100644 --- a/docs/docs/integrations/llms/titan_takeoff.ipynb +++ b/docs/docs/integrations/llms/titan_takeoff.ipynb @@ -34,7 +34,7 @@ }, "outputs": [], "source": [ - "pip install titan-iris" + "!pip install titan-iris" ] }, { @@ -49,22 +49,13 @@ "## Taking off\n", "Models are referred to by their model id on HuggingFace. Takeoff uses port 8000 by default, but can be configured to use another port. There is also support to use a Nvidia GPU by specifying cuda for the device flag.\n", "\n", - "To start the takeoff server, run:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "shellscript" - } - }, - "outputs": [], - "source": [ + "To start the takeoff server, run:\n", + "\n", + "```shell\n", "iris takeoff --model tiiuae/falcon-7b-instruct --device cpu\n", "iris takeoff --model tiiuae/falcon-7b-instruct --device cuda # Nvidia GPU required\n", - "iris takeoff --model tiiuae/falcon-7b-instruct --device cpu --port 5000 # run on port 5000 (default: 8000)" + "iris takeoff --model tiiuae/falcon-7b-instruct --device cpu --port 5000 # run on port 5000 (default: 8000)\n", + "```" ] }, { @@ -74,20 +65,11 @@ "You will then be directed to a login page, where you will need to create an account to proceed.\n", "After logging in, run the command onscreen to check whether the server is ready. When it is ready, you can start using the Takeoff integration.\n", "\n", - "To shutdown the server, run the following command. You will be presented with options on which Takeoff server to shut down, in case you have multiple running servers.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "shellscript" - } - }, - "outputs": [], - "source": [ - "iris takeoff --shutdown # shutdown the server" + "To shutdown the server, run the following command. You will be presented with options on which Takeoff server to shut down, in case you have multiple running servers.\n", + "\n", + "```shell\n", + "iris takeoff --shutdown # shutdown the server\n", + "```" ] }, { diff --git a/docs/docs/integrations/memory/aws_dynamodb.ipynb b/docs/docs/integrations/memory/aws_dynamodb.ipynb index 2890dbb8593..f1c502c8e01 100644 --- a/docs/docs/integrations/memory/aws_dynamodb.ipynb +++ b/docs/docs/integrations/memory/aws_dynamodb.ipynb @@ -252,8 +252,7 @@ "from langchain.chat_models import ChatOpenAI\n", "from langchain.agents import initialize_agent\n", "from langchain.agents import AgentType\n", - "from langchain_experimental.utilities import PythonREPL\n", - "from getpass import getpass" + "from langchain_experimental.utilities import PythonREPL" ] }, { diff --git a/docs/docs/integrations/memory/cassandra_chat_message_history.ipynb b/docs/docs/integrations/memory/cassandra_chat_message_history.ipynb index 53e9e7bb4d6..698e0f618a4 100644 --- a/docs/docs/integrations/memory/cassandra_chat_message_history.ipynb +++ b/docs/docs/integrations/memory/cassandra_chat_message_history.ipynb @@ -50,7 +50,6 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", "import getpass\n", "\n", "database_mode = (input(\"\\n(C)assandra or (A)stra DB? \")).upper()\n", diff --git a/docs/docs/integrations/memory/neo4j_chat_message_history.ipynb b/docs/docs/integrations/memory/neo4j_chat_message_history.ipynb index 238beb09982..f31915bb748 100644 --- a/docs/docs/integrations/memory/neo4j_chat_message_history.ipynb +++ b/docs/docs/integrations/memory/neo4j_chat_message_history.ipynb @@ -25,7 +25,7 @@ " url=\"bolt://localhost:7687\",\n", " username=\"neo4j\",\n", " password=\"password\",\n", - " session_id=\"session_id_1\"\n", + " session_id=\"session_id_1\",\n", ")\n", "\n", "history.add_user_message(\"hi!\")\n", diff --git a/docs/docs/integrations/memory/rockset_chat_message_history.ipynb b/docs/docs/integrations/memory/rockset_chat_message_history.ipynb index c6a2adeef17..d1b5673b0b8 100644 --- a/docs/docs/integrations/memory/rockset_chat_message_history.ipynb +++ b/docs/docs/integrations/memory/rockset_chat_message_history.ipynb @@ -58,7 +58,8 @@ "history = RocksetChatMessageHistory(\n", " session_id=\"MySession\",\n", " client=RocksetClient(\n", - " api_key=\"YOUR API KEY\", host=Regions.usw2a1 # us-west-2 Oregon\n", + " api_key=\"YOUR API KEY\",\n", + " host=Regions.usw2a1, # us-west-2 Oregon\n", " ),\n", " collection=\"langchain_demo\",\n", " sync=True,\n", diff --git a/docs/docs/integrations/providers/arthur_tracking.ipynb b/docs/docs/integrations/providers/arthur_tracking.ipynb index 203d717923e..10deb5dd8b5 100644 --- a/docs/docs/integrations/providers/arthur_tracking.ipynb +++ b/docs/docs/integrations/providers/arthur_tracking.ipynb @@ -61,23 +61,23 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "id": "9Hq9snQasynA" }, "outputs": [], "source": [ - "def make_langchain_chat_llm(chat_model=):\n", + "def make_langchain_chat_llm():\n", " return ChatOpenAI(\n", " streaming=True,\n", " temperature=0.1,\n", " callbacks=[\n", " StreamingStdOutCallbackHandler(),\n", " ArthurCallbackHandler.from_credentials(\n", - " arthur_model_id, \n", - " arthur_url=arthur_url, \n", - " arthur_login=arthur_login)\n", - " ])" + " arthur_model_id, arthur_url=arthur_url, arthur_login=arthur_login\n", + " ),\n", + " ],\n", + " )" ] }, { diff --git a/docs/docs/integrations/providers/clearml_tracking.ipynb b/docs/docs/integrations/providers/clearml_tracking.ipynb index e6bd0e953db..12c50ff43aa 100644 --- a/docs/docs/integrations/providers/clearml_tracking.ipynb +++ b/docs/docs/integrations/providers/clearml_tracking.ipynb @@ -105,7 +105,6 @@ } ], "source": [ - "from datetime import datetime\n", "from langchain.callbacks import StdOutCallbackHandler\n", "from langchain.llms import OpenAI\n", "\n", diff --git a/docs/docs/integrations/providers/comet_tracking.ipynb b/docs/docs/integrations/providers/comet_tracking.ipynb index f1f88aa7aec..6f6bcf02b7d 100644 --- a/docs/docs/integrations/providers/comet_tracking.ipynb +++ b/docs/docs/integrations/providers/comet_tracking.ipynb @@ -49,7 +49,6 @@ "source": [ "%pip install comet_ml langchain openai google-search-results spacy textstat pandas\n", "\n", - "import sys\n", "\n", "!{sys.executable} -m spacy download en_core_web_sm" ] @@ -119,8 +118,6 @@ "metadata": {}, "outputs": [], "source": [ - "from datetime import datetime\n", - "\n", "from langchain.callbacks import CometCallbackHandler, StdOutCallbackHandler\n", "from langchain.llms import OpenAI\n", "\n", diff --git a/docs/docs/integrations/providers/rebuff.ipynb b/docs/docs/integrations/providers/rebuff.ipynb index 991985886fe..16f52a8a73e 100644 --- a/docs/docs/integrations/providers/rebuff.ipynb +++ b/docs/docs/integrations/providers/rebuff.ipynb @@ -106,7 +106,6 @@ "from langchain.chains import LLMChain\n", "from langchain.llms import OpenAI\n", "from langchain.prompts import PromptTemplate\n", - "import os\n", "\n", "# Set up the LangChain SDK with the environment variable\n", "llm = OpenAI(temperature=0)" diff --git a/docs/docs/integrations/providers/vectara/vectara_chat.ipynb b/docs/docs/integrations/providers/vectara/vectara_chat.ipynb index 1f38eb9782b..cc33ad1fd50 100644 --- a/docs/docs/integrations/providers/vectara/vectara_chat.ipynb +++ b/docs/docs/integrations/providers/vectara/vectara_chat.ipynb @@ -21,7 +21,6 @@ "source": [ "import os\n", "from langchain.vectorstores import Vectara\n", - "from langchain.vectorstores.vectara import VectaraRetriever\n", "from langchain.llms import OpenAI\n", "from langchain.chains import ConversationalRetrievalChain" ] diff --git a/docs/docs/integrations/providers/vectara/vectara_text_generation.ipynb b/docs/docs/integrations/providers/vectara/vectara_text_generation.ipynb index 55b6ddfcb70..542d8bd9ae7 100644 --- a/docs/docs/integrations/providers/vectara/vectara_text_generation.ipynb +++ b/docs/docs/integrations/providers/vectara/vectara_text_generation.ipynb @@ -27,7 +27,6 @@ "import os\n", "from langchain.llms import OpenAI\n", "from langchain.docstore.document import Document\n", - "import requests\n", "from langchain.vectorstores import Vectara\n", "from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.prompts import PromptTemplate\n", diff --git a/docs/docs/integrations/retrievers/Activeloop DeepMemory+LangChain.ipynb b/docs/docs/integrations/retrievers/Activeloop DeepMemory+LangChain.ipynb index 79e34456d33..1a91aeeb0f0 100644 --- a/docs/docs/integrations/retrievers/Activeloop DeepMemory+LangChain.ipynb +++ b/docs/docs/integrations/retrievers/Activeloop DeepMemory+LangChain.ipynb @@ -272,10 +272,9 @@ "metadata": {}, "outputs": [], "source": [ - "from typing import Optional, List\n", + "from typing import List\n", "\n", "from langchain.chains.openai_functions import (\n", - " create_openai_fn_chain,\n", " create_structured_output_chain,\n", ")\n", "from langchain.chat_models import ChatOpenAI\n", diff --git a/docs/docs/integrations/retrievers/amazon_kendra_retriever.ipynb b/docs/docs/integrations/retrievers/amazon_kendra_retriever.ipynb index 75cd9372a9d..41cc1d3d55b 100644 --- a/docs/docs/integrations/retrievers/amazon_kendra_retriever.ipynb +++ b/docs/docs/integrations/retrievers/amazon_kendra_retriever.ipynb @@ -35,7 +35,6 @@ "metadata": {}, "outputs": [], "source": [ - "import boto3\n", "from langchain.retrievers import AmazonKendraRetriever" ] }, diff --git a/docs/docs/integrations/retrievers/docarray_retriever.ipynb b/docs/docs/integrations/retrievers/docarray_retriever.ipynb index 4a6373af698..421ff66a988 100644 --- a/docs/docs/integrations/retrievers/docarray_retriever.ipynb +++ b/docs/docs/integrations/retrievers/docarray_retriever.ipynb @@ -33,7 +33,6 @@ "from langchain.retrievers import DocArrayRetriever\n", "from docarray import BaseDoc\n", "from docarray.typing import NdArray\n", - "import numpy as np\n", "from langchain.embeddings import FakeEmbeddings\n", "import random\n", "\n", diff --git a/docs/docs/integrations/retrievers/kay.ipynb b/docs/docs/integrations/retrievers/kay.ipynb index 6e7ac153bff..3b048cf8290 100644 --- a/docs/docs/integrations/retrievers/kay.ipynb +++ b/docs/docs/integrations/retrievers/kay.ipynb @@ -72,7 +72,6 @@ "source": [ "import os\n", "from langchain.retrievers import KayAiRetriever\n", - "from kay.rag.retrievers import KayRetriever\n", "\n", "os.environ[\"KAY_API_KEY\"] = KAY_API_KEY\n", "retriever = KayAiRetriever.create(\n", diff --git a/docs/docs/integrations/retrievers/metal.ipynb b/docs/docs/integrations/retrievers/metal.ipynb index 4526998e809..d467ea94fb1 100644 --- a/docs/docs/integrations/retrievers/metal.ipynb +++ b/docs/docs/integrations/retrievers/metal.ipynb @@ -37,7 +37,7 @@ "CLIENT_ID = \"\"\n", "INDEX_ID = \"\"\n", "\n", - "metal = Metal(API_KEY, CLIENT_ID, INDEX_ID);" + "metal = Metal(API_KEY, CLIENT_ID, INDEX_ID)" ] }, { diff --git a/docs/docs/integrations/retrievers/self_query/activeloop_deeplake_self_query.ipynb b/docs/docs/integrations/retrievers/self_query/activeloop_deeplake_self_query.ipynb index 0c70d7d0207..0739b5b41cd 100644 --- a/docs/docs/integrations/retrievers/self_query/activeloop_deeplake_self_query.ipynb +++ b/docs/docs/integrations/retrievers/self_query/activeloop_deeplake_self_query.ipynb @@ -35,7 +35,7 @@ "metadata": {}, "outputs": [], "source": [ - "# !pip install lark" + "# !pip install lark\n" ] }, { @@ -46,7 +46,7 @@ "outputs": [], "source": [ "# in case if some queries fail consider installing libdeeplake manually\n", - "# !pip install libdeeplake" + "# !pip install libdeeplake\n" ] }, { @@ -160,7 +160,6 @@ " page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n", " metadata={\n", " \"year\": 1979,\n", - " \"rating\": 9.9,\n", " \"director\": \"Andrei Tarkovsky\",\n", " \"genre\": \"science fiction\",\n", " \"rating\": 9.9,\n", diff --git a/docs/docs/integrations/retrievers/self_query/chroma_self_query.ipynb b/docs/docs/integrations/retrievers/self_query/chroma_self_query.ipynb index a1eeddd16d8..3429cb95eef 100644 --- a/docs/docs/integrations/retrievers/self_query/chroma_self_query.ipynb +++ b/docs/docs/integrations/retrievers/self_query/chroma_self_query.ipynb @@ -32,7 +32,7 @@ }, "outputs": [], "source": [ - "#!pip install lark" + "#!pip install lark\n" ] }, { @@ -44,7 +44,7 @@ }, "outputs": [], "source": [ - "#!pip install chromadb" + "#!pip install chromadb\n" ] }, { @@ -136,7 +136,6 @@ " page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n", " metadata={\n", " \"year\": 1979,\n", - " \"rating\": 9.9,\n", " \"director\": \"Andrei Tarkovsky\",\n", " \"genre\": \"science fiction\",\n", " \"rating\": 9.9,\n", diff --git a/docs/docs/integrations/retrievers/self_query/elasticsearch_self_query.ipynb b/docs/docs/integrations/retrievers/self_query/elasticsearch_self_query.ipynb index 183da98575e..4b806f3ce1b 100644 --- a/docs/docs/integrations/retrievers/self_query/elasticsearch_self_query.ipynb +++ b/docs/docs/integrations/retrievers/self_query/elasticsearch_self_query.ipynb @@ -45,7 +45,7 @@ } ], "source": [ - "#!pip install -qU lark elasticsearch" + "#!pip install -qU lark elasticsearch\n" ] }, { @@ -102,7 +102,6 @@ " page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n", " metadata={\n", " \"year\": 1979,\n", - " \"rating\": 9.9,\n", " \"director\": \"Andrei Tarkovsky\",\n", " \"genre\": \"science fiction\",\n", " \"rating\": 9.9,\n", diff --git a/docs/docs/integrations/retrievers/self_query/myscale_self_query.ipynb b/docs/docs/integrations/retrievers/self_query/myscale_self_query.ipynb index 0c02dbfcc68..70a327aecdf 100644 --- a/docs/docs/integrations/retrievers/self_query/myscale_self_query.ipynb +++ b/docs/docs/integrations/retrievers/self_query/myscale_self_query.ipynb @@ -130,7 +130,6 @@ " page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n", " metadata={\n", " \"date\": \"1979-09-10\",\n", - " \"rating\": 9.9,\n", " \"director\": \"Andrei Tarkovsky\",\n", " \"genre\": [\"science fiction\", \"adventure\"],\n", " \"rating\": 9.9,\n", diff --git a/docs/docs/integrations/retrievers/self_query/pinecone.ipynb b/docs/docs/integrations/retrievers/self_query/pinecone.ipynb index e52085e42e3..c3181fdd809 100644 --- a/docs/docs/integrations/retrievers/self_query/pinecone.ipynb +++ b/docs/docs/integrations/retrievers/self_query/pinecone.ipynb @@ -32,7 +32,7 @@ "metadata": {}, "outputs": [], "source": [ - "# !pip install lark" + "# !pip install lark\n" ] }, { @@ -42,7 +42,7 @@ "metadata": {}, "outputs": [], "source": [ - "#!pip install pinecone-client" + "#!pip install pinecone-client\n" ] }, { @@ -119,7 +119,6 @@ " page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n", " metadata={\n", " \"year\": 1979,\n", - " \"rating\": 9.9,\n", " \"director\": \"Andrei Tarkovsky\",\n", " \"genre\": [\"science fiction\", \"thriller\"],\n", " \"rating\": 9.9,\n", diff --git a/docs/docs/integrations/retrievers/self_query/supabase_self_query.ipynb b/docs/docs/integrations/retrievers/self_query/supabase_self_query.ipynb index 947d0bfeaca..b4208764557 100644 --- a/docs/docs/integrations/retrievers/self_query/supabase_self_query.ipynb +++ b/docs/docs/integrations/retrievers/self_query/supabase_self_query.ipynb @@ -270,7 +270,6 @@ " page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n", " metadata={\n", " \"year\": 1979,\n", - " \"rating\": 9.9,\n", " \"director\": \"Andrei Tarkovsky\",\n", " \"genre\": \"science fiction\",\n", " \"rating\": 9.9,\n", diff --git a/docs/docs/integrations/retrievers/self_query/timescalevector_self_query.ipynb b/docs/docs/integrations/retrievers/self_query/timescalevector_self_query.ipynb index 75e36eb6569..56682190ce3 100644 --- a/docs/docs/integrations/retrievers/self_query/timescalevector_self_query.ipynb +++ b/docs/docs/integrations/retrievers/self_query/timescalevector_self_query.ipynb @@ -55,7 +55,7 @@ }, "outputs": [], "source": [ - "#!pip install lark" + "#!pip install lark\n" ] }, { @@ -67,7 +67,7 @@ }, "outputs": [], "source": [ - "#!pip install timescale-vector" + "#!pip install timescale-vector\n" ] }, { @@ -192,7 +192,6 @@ " page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n", " metadata={\n", " \"year\": 1979,\n", - " \"rating\": 9.9,\n", " \"director\": \"Andrei Tarkovsky\",\n", " \"genre\": \"science fiction\",\n", " \"rating\": 9.9,\n", diff --git a/docs/docs/integrations/retrievers/self_query/vectara_self_query.ipynb b/docs/docs/integrations/retrievers/self_query/vectara_self_query.ipynb index 23eec589161..ec2f703ab3e 100644 --- a/docs/docs/integrations/retrievers/self_query/vectara_self_query.ipynb +++ b/docs/docs/integrations/retrievers/self_query/vectara_self_query.ipynb @@ -77,12 +77,9 @@ "source": [ "from langchain.embeddings import FakeEmbeddings\n", "from langchain.schema import Document\n", - "from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.vectorstores import Vectara\n", - "from langchain.document_loaders import TextLoader\n", "\n", "from langchain.llms import OpenAI\n", - "from langchain.chains import ConversationalRetrievalChain\n", "from langchain.retrievers.self_query.base import SelfQueryRetriever\n", "from langchain.chains.query_constructor.base import AttributeInfo" ] diff --git a/docs/docs/integrations/retrievers/self_query/weaviate_self_query.ipynb b/docs/docs/integrations/retrievers/self_query/weaviate_self_query.ipynb index df11279c404..e5f6be8a7a0 100644 --- a/docs/docs/integrations/retrievers/self_query/weaviate_self_query.ipynb +++ b/docs/docs/integrations/retrievers/self_query/weaviate_self_query.ipynb @@ -33,7 +33,7 @@ }, "outputs": [], "source": [ - "#!pip install lark weaviate-client" + "#!pip install lark weaviate-client\n" ] }, { @@ -48,7 +48,6 @@ "from langchain.schema import Document\n", "from langchain.embeddings.openai import OpenAIEmbeddings\n", "from langchain.vectorstores import Weaviate\n", - "import os\n", "\n", "embeddings = OpenAIEmbeddings()" ] @@ -87,7 +86,6 @@ " page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n", " metadata={\n", " \"year\": 1979,\n", - " \"rating\": 9.9,\n", " \"director\": \"Andrei Tarkovsky\",\n", " \"genre\": \"science fiction\",\n", " \"rating\": 9.9,\n", diff --git a/docs/docs/integrations/text_embedding/baidu_qianfan_endpoint.ipynb b/docs/docs/integrations/text_embedding/baidu_qianfan_endpoint.ipynb index 270aed78dab..783ab807859 100644 --- a/docs/docs/integrations/text_embedding/baidu_qianfan_endpoint.ipynb +++ b/docs/docs/integrations/text_embedding/baidu_qianfan_endpoint.ipynb @@ -81,8 +81,6 @@ "\n", "await aioEmbed()\n", "\n", - "import asyncio\n", - "\n", "\n", "async def aioEmbedDocs():\n", " res = await embed.aembed_documents([\"hi\", \"world\"])\n", diff --git a/docs/docs/integrations/text_embedding/clarifai.ipynb b/docs/docs/integrations/text_embedding/clarifai.ipynb index 1fb53e49fc0..33f9ac0abca 100644 --- a/docs/docs/integrations/text_embedding/clarifai.ipynb +++ b/docs/docs/integrations/text_embedding/clarifai.ipynb @@ -82,7 +82,7 @@ "source": [ "# Import the required modules\n", "from langchain.embeddings import ClarifaiEmbeddings\n", - "from langchain.prompts import PromptTemplate\nfrom langchain.chains import LLMChain" + "from langchain.prompts import PromptTemplate" ] }, { diff --git a/docs/docs/integrations/text_embedding/edenai.ipynb b/docs/docs/integrations/text_embedding/edenai.ipynb index d7f813a6399..1b8983d21b6 100644 --- a/docs/docs/integrations/text_embedding/edenai.ipynb +++ b/docs/docs/integrations/text_embedding/edenai.ipynb @@ -32,16 +32,10 @@ "which you can get by creating an account https://app.edenai.run/user/register and heading here https://app.edenai.run/admin/account/settings\n", "\n", "Once we have a key we'll want to set it as an environment variable by running:\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "export EDENAI_API_KEY=\"...\"" + "\n", + "```shell\n", + "export EDENAI_API_KEY=\"...\"\n", + "```\n" ] }, { diff --git a/docs/docs/integrations/text_embedding/elasticsearch.ipynb b/docs/docs/integrations/text_embedding/elasticsearch.ipynb index 185811f4f5d..97922eff712 100644 --- a/docs/docs/integrations/text_embedding/elasticsearch.ipynb +++ b/docs/docs/integrations/text_embedding/elasticsearch.ipynb @@ -2,6 +2,7 @@ "cells": [ { "cell_type": "markdown", + "id": "72644940", "metadata": { "id": "1eZl1oaVUNeC" }, @@ -12,37 +13,36 @@ "The easiest way to instantiate the `ElasticsearchEmbeddings` class it either\n", "- using the `from_credentials` constructor if you are using Elastic Cloud\n", "- or using the `from_es_connection` constructor with any Elasticsearch cluster" - ], - "id": "72644940" + ] }, { "cell_type": "code", "execution_count": null, + "id": "298759cb", "metadata": { "id": "6dJxqebov4eU" }, "outputs": [], "source": [ "!pip -q install elasticsearch langchain" - ], - "id": "298759cb" + ] }, { "cell_type": "code", "execution_count": null, + "id": "76489aff", "metadata": { "id": "RV7C3DUmv4aq" }, "outputs": [], "source": [ - "import elasticsearch\n", "from langchain.embeddings.elasticsearch import ElasticsearchEmbeddings" - ], - "id": "76489aff" + ] }, { "cell_type": "code", "execution_count": null, + "id": "57bfdc82", "metadata": { "id": "MrT3jplJvp09" }, @@ -50,23 +50,23 @@ "source": [ "# Define the model ID\n", "model_id = \"your_model_id\"" - ], - "id": "57bfdc82" + ] }, { "cell_type": "markdown", + "id": "0ffad1ec", "metadata": { "id": "j5F-nwLVS_Zu" }, "source": [ "## Testing with `from_credentials`\n", "This required an Elastic Cloud `cloud_id`" - ], - "id": "0ffad1ec" + ] }, { "cell_type": "code", "execution_count": null, + "id": "fc2e9dcb", "metadata": { "id": "svtdnC-dvpxR" }, @@ -79,12 +79,12 @@ " es_user=\"your_user\",\n", " es_password=\"your_password\",\n", ")" - ], - "id": "fc2e9dcb" + ] }, { "cell_type": "code", "execution_count": null, + "id": "8ee7f1fc", "metadata": { "id": "7DXZAK7Kvpth" }, @@ -96,12 +96,12 @@ " \"Another example document to generate embeddings for.\",\n", "]\n", "document_embeddings = embeddings.embed_documents(documents)" - ], - "id": "8ee7f1fc" + ] }, { "cell_type": "code", "execution_count": null, + "id": "0b9d8471", "metadata": { "id": "K8ra75W_vpqy" }, @@ -110,12 +110,12 @@ "# Print document embeddings\n", "for i, embedding in enumerate(document_embeddings):\n", " print(f\"Embedding for document {i+1}: {embedding}\")" - ], - "id": "0b9d8471" + ] }, { "cell_type": "code", "execution_count": null, + "id": "3989ab23", "metadata": { "id": "V4Q5kQo9vpna" }, @@ -124,12 +124,12 @@ "# Create an embedding for a single query\n", "query = \"This is a single query.\"\n", "query_embedding = embeddings.embed_query(query)" - ], - "id": "3989ab23" + ] }, { "cell_type": "code", "execution_count": null, + "id": "0da6d2bf", "metadata": { "id": "O0oQDzGKvpkz" }, @@ -137,23 +137,23 @@ "source": [ "# Print query embedding\n", "print(f\"Embedding for query: {query_embedding}\")" - ], - "id": "0da6d2bf" + ] }, { "cell_type": "markdown", + "id": "32700096", "metadata": { "id": "rHN03yV6TJ5q" }, "source": [ "## Testing with Existing Elasticsearch client connection\n", "This can be used with any Elasticsearch deployment" - ], - "id": "32700096" + ] }, { "cell_type": "code", "execution_count": null, + "id": "0bc60465", "metadata": { "id": "GMQcJDwBTJFm" }, @@ -163,12 +163,12 @@ "es_connection = Elasticsearch(\n", " hosts=[\"https://es_cluster_url:port\"], basic_auth=(\"user\", \"password\")\n", ")" - ], - "id": "0bc60465" + ] }, { "cell_type": "code", "execution_count": null, + "id": "8085843b", "metadata": { "id": "WTYIU4u3TJO1" }, @@ -179,12 +179,12 @@ " model_id,\n", " es_connection,\n", ")" - ], - "id": "8085843b" + ] }, { "cell_type": "code", "execution_count": null, + "id": "59a90bf3", "metadata": { "id": "4gdAUHwoTJO3" }, @@ -196,12 +196,12 @@ " \"Another example document to generate embeddings for.\",\n", "]\n", "document_embeddings = embeddings.embed_documents(documents)" - ], - "id": "59a90bf3" + ] }, { "cell_type": "code", "execution_count": null, + "id": "54b18673", "metadata": { "id": "RC_-tov6TJO3" }, @@ -210,12 +210,12 @@ "# Print document embeddings\n", "for i, embedding in enumerate(document_embeddings):\n", " print(f\"Embedding for document {i+1}: {embedding}\")" - ], - "id": "54b18673" + ] }, { "cell_type": "code", "execution_count": null, + "id": "a4812d5e", "metadata": { "id": "6GEnHBqETJO3" }, @@ -224,12 +224,12 @@ "# Create an embedding for a single query\n", "query = \"This is a single query.\"\n", "query_embedding = embeddings.embed_query(query)" - ], - "id": "a4812d5e" + ] }, { "cell_type": "code", "execution_count": null, + "id": "c6c69916", "metadata": { "id": "-kyUQAXDTJO4" }, @@ -237,8 +237,7 @@ "source": [ "# Print query embedding\n", "print(f\"Embedding for query: {query_embedding}\")" - ], - "id": "c6c69916" + ] } ], "metadata": { diff --git a/docs/docs/integrations/text_embedding/fastembed.ipynb b/docs/docs/integrations/text_embedding/fastembed.ipynb index 9d6826f92d2..efea277b93d 100644 --- a/docs/docs/integrations/text_embedding/fastembed.ipynb +++ b/docs/docs/integrations/text_embedding/fastembed.ipynb @@ -2,6 +2,7 @@ "cells": [ { "cell_type": "markdown", + "id": "900fbd04-f6aa-4813-868f-1c54e3265385", "metadata": {}, "source": [ "# Qdrant FastEmbed\n", @@ -60,6 +61,7 @@ }, { "cell_type": "markdown", + "id": "8c77b0bb-2613-4167-a204-14d424b59105", "metadata": {}, "source": [ "## Instantiating FastEmbed\n", @@ -97,6 +99,7 @@ }, { "cell_type": "markdown", + "id": "119fbaad-9442-4fff-8214-c5f597bc8e77", "metadata": {}, "source": [ "## Usage\n", @@ -107,14 +110,18 @@ { "cell_type": "code", "execution_count": null, + "id": "62920051-cbd2-460d-ba24-0424c1ed395d", "metadata": {}, "outputs": [], "source": [ - "document_embeddings = embeddings.embed_documents([\"This is a document\", \"This is some other document\"])" + "document_embeddings = embeddings.embed_documents(\n", + " [\"This is a document\", \"This is some other document\"]\n", + ")" ] }, { "cell_type": "markdown", + "id": "7fd10d96-baee-468f-a532-b70b16b78d1f", "metadata": {}, "source": [ "### Generating query embeddings" @@ -123,6 +130,7 @@ { "cell_type": "code", "execution_count": null, + "id": "9f793bb6-609a-4a4a-a5c7-8e8597228915", "metadata": {}, "outputs": [], "source": [ diff --git a/docs/docs/integrations/text_embedding/open_clip.ipynb b/docs/docs/integrations/text_embedding/open_clip.ipynb index d3c3fd3766a..c1e1ba9ed81 100644 --- a/docs/docs/integrations/text_embedding/open_clip.ipynb +++ b/docs/docs/integrations/text_embedding/open_clip.ipynb @@ -48,6 +48,7 @@ "outputs": [], "source": [ "import open_clip\n", + "\n", "open_clip.list_pretrained()" ] }, @@ -147,8 +148,8 @@ " \"rocket\": \"a rocket standing on a launchpad\",\n", " \"motorcycle_right\": \"a red motorcycle standing in a garage\",\n", " \"camera\": \"a person looking at a camera on a tripod\",\n", - " \"horse\": \"a black-and-white silhouette of a horse\", \n", - " \"coffee\": \"a cup of coffee on a saucer\"\n", + " \"horse\": \"a black-and-white silhouette of a horse\",\n", + " \"coffee\": \"a cup of coffee on a saucer\",\n", "}\n", "\n", "original_images = []\n", @@ -158,14 +159,18 @@ "plt.figure(figsize=(16, 5))\n", "\n", "# Loop to display and prepare images and assemble URIs\n", - "for filename in [filename for filename in os.listdir(skimage.data_dir) if filename.endswith(\".png\") or filename.endswith(\".jpg\")]:\n", + "for filename in [\n", + " filename\n", + " for filename in os.listdir(skimage.data_dir)\n", + " if filename.endswith(\".png\") or filename.endswith(\".jpg\")\n", + "]:\n", " name = os.path.splitext(filename)[0]\n", " if name not in descriptions:\n", " continue\n", "\n", " image_path = os.path.join(skimage.data_dir, filename)\n", " image = Image.open(image_path).convert(\"RGB\")\n", - " \n", + "\n", " plt.subplot(2, 4, len(images) + 1)\n", " plt.imshow(image)\n", " plt.title(f\"{filename}\\n{descriptions[name]}\")\n", @@ -173,7 +178,7 @@ " plt.yticks([])\n", "\n", " original_images.append(image)\n", - " images.append(image) # Origional code does preprocessing here\n", + " images.append(image) # Origional code does preprocessing here\n", " texts.append(descriptions[name])\n", " image_uris.append(image_path) # Add the image URI to the list\n", "\n", @@ -216,7 +221,7 @@ "# Instantiate your model\n", "clip_embd = OpenCLIPEmbeddings()\n", "\n", - "# Embed images and text \n", + "# Embed images and text\n", "img_features = clip_embd.embed_image(image_uris)\n", "text_features = clip_embd.embed_documents([\"This is \" + desc for desc in texts])\n", "\n", @@ -241,7 +246,7 @@ " plt.text(x, y, f\"{similarity[y, x]:.2f}\", ha=\"center\", va=\"center\", size=12)\n", "\n", "for side in [\"left\", \"top\", \"right\", \"bottom\"]:\n", - " plt.gca().spines[side].set_visible(False)\n", + " plt.gca().spines[side].set_visible(False)\n", "\n", "plt.xlim([-0.5, count - 0.5])\n", "plt.ylim([count + 0.5, -2])\n", diff --git a/docs/docs/integrations/text_embedding/sagemaker-endpoint.ipynb b/docs/docs/integrations/text_embedding/sagemaker-endpoint.ipynb index 8bb72f41590..08d22af40cb 100644 --- a/docs/docs/integrations/text_embedding/sagemaker-endpoint.ipynb +++ b/docs/docs/integrations/text_embedding/sagemaker-endpoint.ipynb @@ -43,7 +43,6 @@ "from langchain.embeddings import SagemakerEndpointEmbeddings\n", "from langchain.embeddings.sagemaker_endpoint import EmbeddingsContentHandler\n", "import json\n", - "import boto3\n", "\n", "\n", "class ContentHandler(EmbeddingsContentHandler):\n", diff --git a/docs/docs/integrations/text_embedding/sentence_transformers.ipynb b/docs/docs/integrations/text_embedding/sentence_transformers.ipynb index e4649e6b719..e0b881ac8cc 100644 --- a/docs/docs/integrations/text_embedding/sentence_transformers.ipynb +++ b/docs/docs/integrations/text_embedding/sentence_transformers.ipynb @@ -39,7 +39,7 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings" + "from langchain.embeddings import HuggingFaceEmbeddings" ] }, { diff --git a/docs/docs/integrations/toolkits/github.ipynb b/docs/docs/integrations/toolkits/github.ipynb index 939618e2566..aec05c95051 100644 --- a/docs/docs/integrations/toolkits/github.ipynb +++ b/docs/docs/integrations/toolkits/github.ipynb @@ -241,7 +241,6 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.tools.github.tool import GitHubAction\n", "from langchain.tools import DuckDuckGoSearchRun\n", "from langchain.agents import Tool\n", "from langchain.chat_models import ChatOpenAI\n", diff --git a/docs/docs/integrations/toolkits/json.ipynb b/docs/docs/integrations/toolkits/json.ipynb index 89614101359..dc841ed1b25 100644 --- a/docs/docs/integrations/toolkits/json.ipynb +++ b/docs/docs/integrations/toolkits/json.ipynb @@ -32,14 +32,11 @@ }, "outputs": [], "source": [ - "import os\n", "import yaml\n", "\n", - "from langchain.agents import create_json_agent, AgentExecutor\n", + "from langchain.agents import create_json_agent\n", "from langchain.agents.agent_toolkits import JsonToolkit\n", - "from langchain.chains import LLMChain\n", "from langchain.llms.openai import OpenAI\n", - "from langchain.requests import TextRequestsWrapper\n", "from langchain.tools.json.tool import JsonSpec" ] }, diff --git a/docs/docs/integrations/toolkits/multion.ipynb b/docs/docs/integrations/toolkits/multion.ipynb index de4098fa006..7bde3fdd190 100644 --- a/docs/docs/integrations/toolkits/multion.ipynb +++ b/docs/docs/integrations/toolkits/multion.ipynb @@ -27,7 +27,6 @@ "outputs": [], "source": [ "from langchain.agents.agent_toolkits import MultionToolkit\n", - "import os\n", "\n", "\n", "toolkit = MultionToolkit()\n", diff --git a/docs/docs/integrations/toolkits/openapi.ipynb b/docs/docs/integrations/toolkits/openapi.ipynb index f97532e36d8..bebe8088def 100644 --- a/docs/docs/integrations/toolkits/openapi.ipynb +++ b/docs/docs/integrations/toolkits/openapi.ipynb @@ -41,7 +41,8 @@ "metadata": {}, "outputs": [], "source": [ - "import os, yaml" + "import os\n", + "import yaml" ] }, { @@ -586,7 +587,6 @@ "from langchain.agents import create_openapi_agent\n", "from langchain.agents.agent_toolkits import OpenAPIToolkit\n", "from langchain.llms.openai import OpenAI\n", - "from langchain.requests import TextRequestsWrapper\n", "from langchain.tools.json.tool import JsonSpec" ] }, diff --git a/docs/docs/integrations/toolkits/openapi_nla.ipynb b/docs/docs/integrations/toolkits/openapi_nla.ipynb index 09f54b5d5ac..eeae9df5303 100644 --- a/docs/docs/integrations/toolkits/openapi_nla.ipynb +++ b/docs/docs/integrations/toolkits/openapi_nla.ipynb @@ -25,13 +25,9 @@ }, "outputs": [], "source": [ - "from typing import List, Optional\n", - "from langchain.chains import LLMChain\n", "from langchain.llms import OpenAI\n", - "from langchain.prompts import PromptTemplate\n", "from langchain.requests import Requests\n", - "from langchain.tools import APIOperation, OpenAPISpec\n", - "from langchain.agents import AgentType, Tool, initialize_agent\n", + "from langchain.agents import AgentType, initialize_agent\n", "from langchain.agents.agent_toolkits import NLAToolkit" ] }, diff --git a/docs/docs/integrations/toolkits/playwright.ipynb b/docs/docs/integrations/toolkits/playwright.ipynb index ccf569506b7..3091039e2aa 100644 --- a/docs/docs/integrations/toolkits/playwright.ipynb +++ b/docs/docs/integrations/toolkits/playwright.ipynb @@ -45,8 +45,7 @@ "source": [ "from langchain.agents.agent_toolkits import PlayWrightBrowserToolkit\n", "from langchain.tools.playwright.utils import (\n", - " create_async_playwright_browser,\n", - " create_sync_playwright_browser, # A synchronous browser is available, though it isn't compatible with jupyter.\n", + " create_async_playwright_browser, # A synchronous browser is available, though it isn't compatible with jupyter.\n", ")" ] }, diff --git a/docs/docs/integrations/toolkits/powerbi.ipynb b/docs/docs/integrations/toolkits/powerbi.ipynb index 475e66e612f..41380c80f51 100644 --- a/docs/docs/integrations/toolkits/powerbi.ipynb +++ b/docs/docs/integrations/toolkits/powerbi.ipynb @@ -41,7 +41,6 @@ "from langchain.agents.agent_toolkits import PowerBIToolkit\n", "from langchain.utilities.powerbi import PowerBIDataset\n", "from langchain.chat_models import ChatOpenAI\n", - "from langchain.agents import AgentExecutor\n", "from azure.identity import DefaultAzureCredential" ] }, diff --git a/docs/docs/integrations/toolkits/python.ipynb b/docs/docs/integrations/toolkits/python.ipynb index 67edc85447f..e6bea2a73d5 100644 --- a/docs/docs/integrations/toolkits/python.ipynb +++ b/docs/docs/integrations/toolkits/python.ipynb @@ -21,7 +21,6 @@ "source": [ "from langchain_experimental.agents.agent_toolkits import create_python_agent\n", "from langchain_experimental.tools import PythonREPLTool\n", - "from langchain_experimental.utilities import PythonREPL\n", "from langchain.llms.openai import OpenAI\n", "from langchain.agents.agent_types import AgentType\n", "from langchain.chat_models import ChatOpenAI" diff --git a/docs/docs/integrations/toolkits/sql_database.ipynb b/docs/docs/integrations/toolkits/sql_database.ipynb index 76d50084ba1..d67c66eda76 100644 --- a/docs/docs/integrations/toolkits/sql_database.ipynb +++ b/docs/docs/integrations/toolkits/sql_database.ipynb @@ -38,9 +38,7 @@ "from langchain.agents.agent_toolkits import SQLDatabaseToolkit\n", "from langchain.sql_database import SQLDatabase\n", "from langchain.llms.openai import OpenAI\n", - "from langchain.agents import AgentExecutor\n", - "from langchain.agents.agent_types import AgentType\n", - "from langchain.chat_models import ChatOpenAI" + "from langchain.agents.agent_types import AgentType" ] }, { @@ -103,7 +101,7 @@ "# toolkit=toolkit,\n", "# verbose=True,\n", "# agent_type=AgentType.OPENAI_FUNCTIONS\n", - "# )" + "# )\n" ] }, { @@ -115,20 +113,14 @@ "\n", "The query chain may generate insert/update/delete queries. When this is not expected, use a custom prompt or create a SQL users without write permissions.\n", "\n", - "The final user might overload your SQL database by asking a simple question such as \"run the biggest query possible\". The generated query might look like:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "949772b9", - "metadata": {}, - "outputs": [], - "source": [ + "The final user might overload your SQL database by asking a simple question such as \"run the biggest query possible\". The generated query might look like:\n", + "\n", + "```sql\n", "SELECT * FROM \"public\".\"users\"\n", " JOIN \"public\".\"user_permissions\" ON \"public\".\"users\".id = \"public\".\"user_permissions\".user_id\n", " JOIN \"public\".\"projects\" ON \"public\".\"users\".id = \"public\".\"projects\".user_id\n", - " JOIN \"public\".\"events\" ON \"public\".\"projects\".id = \"public\".\"events\".project_id;" + " JOIN \"public\".\"events\" ON \"public\".\"projects\".id = \"public\".\"events\".project_id;\n", + "```" ] }, { diff --git a/docs/docs/integrations/toolkits/vectorstore.ipynb b/docs/docs/integrations/toolkits/vectorstore.ipynb index 22322494d82..ea137a56c1b 100644 --- a/docs/docs/integrations/toolkits/vectorstore.ipynb +++ b/docs/docs/integrations/toolkits/vectorstore.ipynb @@ -31,7 +31,6 @@ "from langchain.vectorstores import Chroma\n", "from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.llms import OpenAI\n", - "from langchain.chains import VectorDBQA\n", "\n", "llm = OpenAI(temperature=0)" ] diff --git a/docs/docs/integrations/tools/dalle_image_generator.ipynb b/docs/docs/integrations/tools/dalle_image_generator.ipynb index e0430436e77..6a66aae71fb 100644 --- a/docs/docs/integrations/tools/dalle_image_generator.ipynb +++ b/docs/docs/integrations/tools/dalle_image_generator.ipynb @@ -100,8 +100,6 @@ "# Or you can try the options below to display the image inline in this notebook\n", "\n", "try:\n", - " import google.colab\n", - "\n", " IN_COLAB = True\n", "except:\n", " IN_COLAB = False\n", diff --git a/docs/docs/integrations/tools/filesystem.ipynb b/docs/docs/integrations/tools/filesystem.ipynb index 85462a88858..f09de3114bf 100644 --- a/docs/docs/integrations/tools/filesystem.ipynb +++ b/docs/docs/integrations/tools/filesystem.ipynb @@ -26,14 +26,6 @@ }, "outputs": [], "source": [ - "from langchain.tools.file_management import (\n", - " ReadFileTool,\n", - " CopyFileTool,\n", - " DeleteFileTool,\n", - " MoveFileTool,\n", - " WriteFileTool,\n", - " ListDirectoryTool,\n", - ")\n", "from langchain.agents.agent_toolkits import FileManagementToolkit\n", "from tempfile import TemporaryDirectory\n", "\n", diff --git a/docs/docs/integrations/tools/graphql.ipynb b/docs/docs/integrations/tools/graphql.ipynb index 76a3cf910fa..d45da8870b5 100644 --- a/docs/docs/integrations/tools/graphql.ipynb +++ b/docs/docs/integrations/tools/graphql.ipynb @@ -45,7 +45,6 @@ "source": [ "from langchain.llms import OpenAI\n", "from langchain.agents import load_tools, initialize_agent, AgentType\n", - "from langchain.utilities import GraphQLAPIWrapper\n", "\n", "llm = OpenAI(temperature=0)\n", "\n", diff --git a/docs/docs/integrations/vectorstores/activeloop_deeplake.ipynb b/docs/docs/integrations/vectorstores/activeloop_deeplake.ipynb index 95062b4fa86..5a05d6dcd04 100644 --- a/docs/docs/integrations/vectorstores/activeloop_deeplake.ipynb +++ b/docs/docs/integrations/vectorstores/activeloop_deeplake.ipynb @@ -753,7 +753,7 @@ } ], "source": [ - "dataset_path = f\"s3://BUCKET/langchain_test\" # could be also ./local/path (much faster locally), hub://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n", + "dataset_path = \"s3://BUCKET/langchain_test\" # could be also ./local/path (much faster locally), hub://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n", "\n", "embedding = OpenAIEmbeddings()\n", "db = DeepLake.from_documents(\n", diff --git a/docs/docs/integrations/vectorstores/atlas.ipynb b/docs/docs/integrations/vectorstores/atlas.ipynb index 162ac705a92..a493402d392 100644 --- a/docs/docs/integrations/vectorstores/atlas.ipynb +++ b/docs/docs/integrations/vectorstores/atlas.ipynb @@ -70,7 +70,6 @@ "outputs": [], "source": [ "import time\n", - "from langchain.embeddings.openai import OpenAIEmbeddings\n", "from langchain.text_splitter import SpacyTextSplitter\n", "from langchain.vectorstores import AtlasDB\n", "from langchain.document_loaders import TextLoader" diff --git a/docs/docs/integrations/vectorstores/awadb.ipynb b/docs/docs/integrations/vectorstores/awadb.ipynb index a0cad5d4809..5a466d510e6 100644 --- a/docs/docs/integrations/vectorstores/awadb.ipynb +++ b/docs/docs/integrations/vectorstores/awadb.ipynb @@ -128,21 +128,13 @@ "## Restore the table created and added data before" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "1bfa6e25", - "metadata": {}, - "outputs": [], - "source": [ - "AwaDB automatically persists added document data" - ] - }, { "cell_type": "markdown", "id": "2a0f3b35", "metadata": {}, "source": [ + "AwaDB automatically persists added document data.\n", + "\n", "If you can restore the table you created and added before, you can just do this as below:" ] }, @@ -166,7 +158,7 @@ "id": "aba255c2", "metadata": {}, "source": [ - "awadb load table success" + "awadb load table success\n" ] } ], diff --git a/docs/docs/integrations/vectorstores/azure_cosmos_db.ipynb b/docs/docs/integrations/vectorstores/azure_cosmos_db.ipynb index 7cb420c524e..90afa9a8cef 100644 --- a/docs/docs/integrations/vectorstores/azure_cosmos_db.ipynb +++ b/docs/docs/integrations/vectorstores/azure_cosmos_db.ipynb @@ -2,6 +2,10 @@ "cells": [ { "cell_type": "markdown", + "id": "245c0aa70db77606", + "metadata": { + "collapsed": false + }, "source": [ "# Azure Cosmos DB\n", "\n", @@ -17,15 +21,19 @@ "\n", "[Sign Up](https://azure.microsoft.com/en-us/free/) for free to get started today.\n", " " - ], - "metadata": { - "collapsed": false - }, - "id": "245c0aa70db77606" + ] }, { "cell_type": "code", "execution_count": 2, + "id": "ab8e45f5bd435ade", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-10T17:20:00.721985Z", + "start_time": "2023-10-10T17:19:57.996265Z" + }, + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -38,51 +46,50 @@ ], "source": [ "!pip install pymongo" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-10T17:20:00.721985Z", - "start_time": "2023-10-10T17:19:57.996265Z" - } - }, - "id": "ab8e45f5bd435ade" + ] }, { "cell_type": "code", "execution_count": 24, + "id": "9c7ce9e7b26efbb0", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-10T17:50:03.615234Z", + "start_time": "2023-10-10T17:50:03.604289Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "import os\n", - "import getpass\n", "\n", "CONNECTION_STRING = \"AZURE COSMOS DB MONGO vCORE connection string\"\n", "INDEX_NAME = \"izzy-test-index\"\n", "NAMESPACE = \"izzy_test_db.izzy_test_collection\"\n", "DB_NAME, COLLECTION_NAME = NAMESPACE.split(\".\")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-10T17:50:03.615234Z", - "start_time": "2023-10-10T17:50:03.604289Z" - } - }, - "id": "9c7ce9e7b26efbb0" + ] }, { "cell_type": "markdown", - "source": [ - "We want to use `OpenAIEmbeddings` so we need to set up our Azure OpenAI API Key alongside other environment variables. " - ], + "id": "f2e66b097c6ce2e3", "metadata": { "collapsed": false }, - "id": "f2e66b097c6ce2e3" + "source": [ + "We want to use `OpenAIEmbeddings` so we need to set up our Azure OpenAI API Key alongside other environment variables. " + ] }, { "cell_type": "code", "execution_count": 25, + "id": "4a052d99c6b8a2a7", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-10T17:50:11.712929Z", + "start_time": "2023-10-10T17:50:11.703871Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "# Set up the OpenAI Environment Variables\n", @@ -96,36 +103,34 @@ " \"OPENAI_EMBEDDINGS_DEPLOYMENT\"\n", "] = \"smart-agent-embedding-ada\" # the deployment name for the embedding model\n", "os.environ[\"OPENAI_EMBEDDINGS_MODEL_NAME\"] = \"text-embedding-ada-002\" # the model name" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-10T17:50:11.712929Z", - "start_time": "2023-10-10T17:50:11.703871Z" - } - }, - "id": "4a052d99c6b8a2a7" + ] }, { "cell_type": "markdown", + "id": "ebaa28c6e2b35063", + "metadata": { + "collapsed": false + }, "source": [ "Now, we need to load the documents into the collection, create the index and then run our queries against the index to retrieve matches.\n", "\n", "Please refer to the [documentation](https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/vector-search) if you have questions about certain parameters" - ], - "metadata": { - "collapsed": false - }, - "id": "ebaa28c6e2b35063" + ] }, { "cell_type": "code", "execution_count": 26, + "id": "183741cf8f4c7c53", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-10T17:50:16.732718Z", + "start_time": "2023-10-10T17:50:16.716642Z" + }, + "collapsed": false + }, "outputs": [], "source": [ - "from langchain.docstore.document import Document\n", "from langchain.embeddings import OpenAIEmbeddings\n", - "from langchain.schema.embeddings import Embeddings\n", "from langchain.vectorstores.azure_cosmos_db_vector_search import (\n", " AzureCosmosDBVectorSearch,\n", " CosmosDBSimilarityType,\n", @@ -150,19 +155,19 @@ "openai_embeddings: OpenAIEmbeddings = OpenAIEmbeddings(\n", " deployment=model_deployment, model=model_name, chunk_size=1\n", ")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-10T17:50:16.732718Z", - "start_time": "2023-10-10T17:50:16.716642Z" - } - }, - "id": "183741cf8f4c7c53" + ] }, { "cell_type": "code", "execution_count": 28, + "id": "39ae6058c2f7fdf1", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-10T17:51:17.980698Z", + "start_time": "2023-10-10T17:51:11.786336Z" + }, + "collapsed": false + }, "outputs": [ { "data": { @@ -200,37 +205,37 @@ "similarity_algorithm = CosmosDBSimilarityType.COS\n", "\n", "vectorstore.create_index(num_lists, dimensions, similarity_algorithm)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-10T17:51:17.980698Z", - "start_time": "2023-10-10T17:51:11.786336Z" - } - }, - "id": "39ae6058c2f7fdf1" + ] }, { "cell_type": "code", "execution_count": 29, + "id": "32c68d3246adc21f", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-10T17:51:44.840121Z", + "start_time": "2023-10-10T17:51:44.498639Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "# perform a similarity search between the embedding of the query and the embeddings of the documents\n", "query = \"What did the president say about Ketanji Brown Jackson\"\n", "docs = vectorstore.similarity_search(query)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-10T17:51:44.840121Z", - "start_time": "2023-10-10T17:51:44.498639Z" - } - }, - "id": "32c68d3246adc21f" + ] }, { "cell_type": "code", "execution_count": 31, + "id": "8feeeb4364efb204", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-10T17:52:08.049294Z", + "start_time": "2023-10-10T17:52:08.038511Z" + }, + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -248,29 +253,29 @@ ], "source": [ "print(docs[0].page_content)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-10T17:52:08.049294Z", - "start_time": "2023-10-10T17:52:08.038511Z" - } - }, - "id": "8feeeb4364efb204" + ] }, { "cell_type": "markdown", - "source": [ - "Once the documents have been loaded and the index has been created, you can now instantiate the vector store directly and run queries against the index" - ], + "id": "37e4df8c7d7db851", "metadata": { "collapsed": false }, - "id": "37e4df8c7d7db851" + "source": [ + "Once the documents have been loaded and the index has been created, you can now instantiate the vector store directly and run queries against the index" + ] }, { "cell_type": "code", "execution_count": 32, + "id": "3c218ab6f59301f7", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-10T17:52:14.994861Z", + "start_time": "2023-10-10T17:52:13.986379Z" + }, + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -296,19 +301,19 @@ "docs = vectorstore.similarity_search(query)\n", "\n", "print(docs[0].page_content)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-10T17:52:14.994861Z", - "start_time": "2023-10-10T17:52:13.986379Z" - } - }, - "id": "3c218ab6f59301f7" + ] }, { "cell_type": "code", "execution_count": 33, + "id": "fd67e4d92c9ab32f", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-10T17:53:21.145431Z", + "start_time": "2023-10-10T17:53:20.884531Z" + }, + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -334,25 +339,17 @@ "docs = vectorstore.similarity_search(query)\n", "\n", "print(docs[0].page_content)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-10T17:53:21.145431Z", - "start_time": "2023-10-10T17:53:20.884531Z" - } - }, - "id": "fd67e4d92c9ab32f" + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [], + "id": "b63c73c7e905001c", "metadata": { "collapsed": false }, - "id": "b63c73c7e905001c" + "outputs": [], + "source": [] } ], "metadata": { diff --git a/docs/docs/integrations/vectorstores/azuresearch.ipynb b/docs/docs/integrations/vectorstores/azuresearch.ipynb index 78a53dba3cb..8710beb7ad0 100644 --- a/docs/docs/integrations/vectorstores/azuresearch.ipynb +++ b/docs/docs/integrations/vectorstores/azuresearch.ipynb @@ -44,7 +44,6 @@ "metadata": {}, "outputs": [], "source": [ - "import openai\n", "import os\n", "from langchain.embeddings import OpenAIEmbeddings\n", "from langchain.vectorstores.azuresearch import AzureSearch" @@ -456,7 +455,6 @@ " SimpleField,\n", " ScoringProfile,\n", " TextWeights,\n", - " ScoringFunction,\n", " FreshnessScoringFunction,\n", " FreshnessScoringParameters,\n", ")\n", diff --git a/docs/docs/integrations/vectorstores/dingo.ipynb b/docs/docs/integrations/vectorstores/dingo.ipynb index 13d123b79e3..d4a82d32400 100644 --- a/docs/docs/integrations/vectorstores/dingo.ipynb +++ b/docs/docs/integrations/vectorstores/dingo.ipynb @@ -24,7 +24,7 @@ "outputs": [], "source": [ "!pip install dingodb\n", - "or install latest:\n", + "# or install latest:\n", "!pip install git+https://git@github.com/dingodb/pydingo.git" ] }, diff --git a/docs/docs/integrations/vectorstores/elasticsearch.ipynb b/docs/docs/integrations/vectorstores/elasticsearch.ipynb index 93343a499cc..8af2decbb51 100644 --- a/docs/docs/integrations/vectorstores/elasticsearch.ipynb +++ b/docs/docs/integrations/vectorstores/elasticsearch.ipynb @@ -778,6 +778,7 @@ }, { "cell_type": "markdown", + "id": "a125af82-1f45-4337-a085-6f393bca2de8", "metadata": {}, "source": [ "# Customize the Document Builder\n", @@ -788,19 +789,25 @@ { "cell_type": "code", "execution_count": null, + "id": "5bafd4a0-75d0-471e-885a-243312af182a", "metadata": {}, "outputs": [], "source": [ "from typing import Dict\n", "from langchain.docstore.document import Document\n", "\n", + "\n", "def custom_document_builder(hit: Dict) -> Document:\n", " src = hit.get(\"_source\", {})\n", " return Document(\n", " page_content=src.get(\"content\", \"Missing content!\"),\n", - " metadata={\"page_number\": src.get(\"page_number\", -1), \"original_filename\": src.get(\"original_filename\", \"Missing filename!\")},\n", + " metadata={\n", + " \"page_number\": src.get(\"page_number\", -1),\n", + " \"original_filename\": src.get(\"original_filename\", \"Missing filename!\"),\n", + " },\n", " )\n", "\n", + "\n", "results = db.similarity_search(\n", " \"What did the president say about Ketanji Brown Jackson\",\n", " k=4,\n", diff --git a/docs/docs/integrations/vectorstores/hippo.ipynb b/docs/docs/integrations/vectorstores/hippo.ipynb index e3a04b43ace..950b5cc3ba7 100644 --- a/docs/docs/integrations/vectorstores/hippo.ipynb +++ b/docs/docs/integrations/vectorstores/hippo.ipynb @@ -2,6 +2,10 @@ "cells": [ { "cell_type": "markdown", + "id": "357f24224a8e818f", + "metadata": { + "collapsed": false + }, "source": [ "## Hippo\n", "\n", @@ -11,27 +15,31 @@ "## Getting Started\n", "\n", "The only prerequisite here is an API key from the OpenAI website. Make sure you have already started a Hippo instance." - ], - "metadata": { - "collapsed": false - }, - "id": "357f24224a8e818f" + ] }, { "cell_type": "markdown", + "id": "a92d2ce26df7ac4c", + "metadata": { + "collapsed": false + }, "source": [ "## Installing Dependencies\n", "\n", "Initially, we require the installation of certain dependencies, such as OpenAI, Langchain, and Hippo-API. Please note, you should install the appropriate versions tailored to your environment." - ], - "metadata": { - "collapsed": false - }, - "id": "a92d2ce26df7ac4c" + ] }, { "cell_type": "code", "execution_count": 15, + "id": "13b1d1ae153ff434", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-30T06:47:54.718488Z", + "start_time": "2023-10-30T06:47:53.563129Z" + }, + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -45,121 +53,121 @@ "source": [ "!pip install langchain tiktoken openai\n", "!pip install hippo-api==1.1.0.rc3" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-30T06:47:54.718488Z", - "start_time": "2023-10-30T06:47:53.563129Z" - } - }, - "id": "13b1d1ae153ff434" + ] }, { "cell_type": "markdown", + "id": "554081137df2c252", + "metadata": { + "collapsed": false + }, "source": [ "Note: Python version needs to be >=3.8.\n", "\n", "## Best Practice\n", "### Importing Dependency Packages" - ], - "metadata": { - "collapsed": false - }, - "id": "554081137df2c252" + ] }, { "cell_type": "code", "execution_count": 16, + "id": "5ff3296ce812aeb8", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-30T06:47:56.003409Z", + "start_time": "2023-10-30T06:47:55.998839Z" + }, + "collapsed": false + }, "outputs": [], "source": [ - "from langchain.chat_models import AzureChatOpenAI, ChatOpenAI\n", + "from langchain.chat_models import ChatOpenAI\n", "from langchain.document_loaders import TextLoader\n", "from langchain.embeddings import OpenAIEmbeddings\n", "from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.vectorstores.hippo import Hippo\n", "import os" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-30T06:47:56.003409Z", - "start_time": "2023-10-30T06:47:55.998839Z" - } - }, - "id": "5ff3296ce812aeb8" + ] }, { "cell_type": "markdown", - "source": [ - "### Loading Knowledge Documents" - ], + "id": "dad255dae8aea755", "metadata": { "collapsed": false }, - "id": "dad255dae8aea755" + "source": [ + "### Loading Knowledge Documents" + ] }, { "cell_type": "code", "execution_count": 17, + "id": "f02d66a7fd653dc1", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-30T06:47:59.027869Z", + "start_time": "2023-10-30T06:47:59.023934Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "os.environ[\"OPENAI_API_KEY\"] = \"YOUR OPENAI KEY\"\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-30T06:47:59.027869Z", - "start_time": "2023-10-30T06:47:59.023934Z" - } - }, - "id": "f02d66a7fd653dc1" + ] }, { "cell_type": "markdown", + "id": "e9b93c330f1c6160", + "metadata": { + "collapsed": false + }, "source": [ "### Segmenting the Knowledge Document\n", "\n", "Here, we use Langchain's CharacterTextSplitter for segmentation. The delimiter is a period. After segmentation, the text segment does not exceed 1000 characters, and the number of repeated characters is 0." - ], - "metadata": { - "collapsed": false - }, - "id": "e9b93c330f1c6160" + ] }, { "cell_type": "code", "execution_count": 18, + "id": "fe6b43175318331f", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-30T06:48:00.279351Z", + "start_time": "2023-10-30T06:48:00.275763Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-30T06:48:00.279351Z", - "start_time": "2023-10-30T06:48:00.275763Z" - } - }, - "id": "fe6b43175318331f" + ] }, { "cell_type": "markdown", - "source": [ - "### Declaring the Embedding Model\n", - "Below, we create the OpenAI or Azure embedding model using the OpenAIEmbeddings method from Langchain." - ], + "id": "eefe28c7c993ffdf", "metadata": { "collapsed": false }, - "id": "eefe28c7c993ffdf" + "source": [ + "### Declaring the Embedding Model\n", + "Below, we create the OpenAI or Azure embedding model using the OpenAIEmbeddings method from Langchain." + ] }, { "cell_type": "code", "execution_count": 19, + "id": "8619f16b9f7355ea", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-30T06:48:11.686166Z", + "start_time": "2023-10-30T06:48:11.664355Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "# openai\n", @@ -173,55 +181,55 @@ "# deployment=\"x x x\",\n", "# openai_api_key=\"x x x\"\n", "# )" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-30T06:48:11.686166Z", - "start_time": "2023-10-30T06:48:11.664355Z" - } - }, - "id": "8619f16b9f7355ea" + ] }, { "cell_type": "markdown", - "source": [ - "### Declaring Hippo Client" - ], + "id": "e60235602ed91d3c", "metadata": { "collapsed": false }, - "id": "e60235602ed91d3c" + "source": [ + "### Declaring Hippo Client" + ] }, { "cell_type": "code", "execution_count": 20, - "outputs": [], - "source": [ - "HIPPO_CONNECTION = {\"host\": \"IP\", \"port\": \"PORT\"}" - ], + "id": "c666b70dcab78129", "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2023-10-30T06:48:48.594298Z", "start_time": "2023-10-30T06:48:48.585267Z" - } + }, + "collapsed": false }, - "id": "c666b70dcab78129" + "outputs": [], + "source": [ + "HIPPO_CONNECTION = {\"host\": \"IP\", \"port\": \"PORT\"}" + ] }, { "cell_type": "markdown", - "source": [ - "### Storing the Document" - ], + "id": "43ee6dbd765c3172", "metadata": { "collapsed": false }, - "id": "43ee6dbd765c3172" + "source": [ + "### Storing the Document" + ] }, { "cell_type": "code", "execution_count": 23, + "id": "79372c869844bdc9", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-30T06:51:12.661741Z", + "start_time": "2023-10-30T06:51:06.257156Z" + }, + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -242,31 +250,31 @@ " connection_args=HIPPO_CONNECTION,\n", ")\n", "print(\"success\")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-30T06:51:12.661741Z", - "start_time": "2023-10-30T06:51:06.257156Z" - } - }, - "id": "79372c869844bdc9" + ] }, { "cell_type": "markdown", + "id": "89077cc9763d5dd0", + "metadata": { + "collapsed": false + }, "source": [ "### Conducting Knowledge-based Question and Answer\n", "#### Creating a Large Language Question-Answering Model\n", "Below, we create the OpenAI or Azure large language question-answering model respectively using the AzureChatOpenAI and ChatOpenAI methods from Langchain." - ], - "metadata": { - "collapsed": false - }, - "id": "89077cc9763d5dd0" + ] }, { "cell_type": "code", "execution_count": 24, + "id": "c9f2c42e9884f628", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-30T06:51:28.329351Z", + "start_time": "2023-10-30T06:51:28.318713Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "# llm = AzureChatOpenAI(\n", @@ -278,29 +286,29 @@ "# )\n", "\n", "llm = ChatOpenAI(openai_api_key=\"YOUR OPENAI KEY\", model_name=\"gpt-3.5-turbo-16k\")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-30T06:51:28.329351Z", - "start_time": "2023-10-30T06:51:28.318713Z" - } - }, - "id": "c9f2c42e9884f628" + ] }, { "cell_type": "markdown", - "source": [ - "### Acquiring Related Knowledge Based on the Question:" - ], + "id": "a4c5d73016a9db0c", "metadata": { "collapsed": false }, - "id": "a4c5d73016a9db0c" + "source": [ + "### Acquiring Related Knowledge Based on the Question:" + ] }, { "cell_type": "code", "execution_count": 25, + "id": "8656e80519da1f97", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-30T06:51:33.195634Z", + "start_time": "2023-10-30T06:51:32.196493Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "query = \"Please introduce COVID-19\"\n", @@ -313,29 +321,29 @@ "res = vector_store.similarity_search(query, 2)\n", "content_list = [item.page_content for item in res]\n", "text = \"\".join(content_list)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-30T06:51:33.195634Z", - "start_time": "2023-10-30T06:51:32.196493Z" - } - }, - "id": "8656e80519da1f97" + ] }, { "cell_type": "markdown", - "source": [ - "### Constructing a Prompt Template" - ], + "id": "e5adbaaa7086d1ae", "metadata": { "collapsed": false }, - "id": "e5adbaaa7086d1ae" + "source": [ + "### Constructing a Prompt Template" + ] }, { "cell_type": "code", "execution_count": 26, + "id": "b915d3001a2741c1", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-30T06:51:35.649376Z", + "start_time": "2023-10-30T06:51:35.645763Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "prompt = f\"\"\"\n", @@ -343,29 +351,29 @@ "[Article]:{text}\n", "Please answer this question in conjunction with the above article:{query}\n", "\"\"\"" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-30T06:51:35.649376Z", - "start_time": "2023-10-30T06:51:35.645763Z" - } - }, - "id": "b915d3001a2741c1" + ] }, { "cell_type": "markdown", - "source": [ - "### Waiting for the Large Language Model to Generate an Answer" - ], + "id": "b36b6a9adbec8a82", "metadata": { "collapsed": false }, - "id": "b36b6a9adbec8a82" + "source": [ + "### Waiting for the Large Language Model to Generate an Answer" + ] }, { "cell_type": "code", "execution_count": 27, + "id": "58eb5d2396321001", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-30T06:52:17.967885Z", + "start_time": "2023-10-30T06:51:37.692819Z" + }, + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -383,28 +391,20 @@ "response = llm.predict(query)\n", "print(\"==========================================\")\n", "print(f\"response_without_hippo:{response}\")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-10-30T06:52:17.967885Z", - "start_time": "2023-10-30T06:51:37.692819Z" - } - }, - "id": "58eb5d2396321001" + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [], + "id": "b2b7ce4e1850ecf1", "metadata": { - "collapsed": false, "ExecuteTime": { "start_time": "2023-10-30T06:42:42.172639Z" - } + }, + "collapsed": false }, - "id": "b2b7ce4e1850ecf1" + "outputs": [], + "source": [] } ], "metadata": { diff --git a/docs/docs/integrations/vectorstores/matchingengine.ipynb b/docs/docs/integrations/vectorstores/matchingengine.ipynb index 4e2629ad726..0978ccdbbda 100644 --- a/docs/docs/integrations/vectorstores/matchingengine.ipynb +++ b/docs/docs/integrations/vectorstores/matchingengine.ipynb @@ -93,7 +93,7 @@ "!pip install tensorflow \\\n", " google-cloud-aiplatform \\\n", " tensorflow-hub \\\n", - " tensorflow-text \n" + " tensorflow-text " ] }, { @@ -103,12 +103,10 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", "import json\n", "\n", "from google.cloud import aiplatform\n", - "import tensorflow_hub as hub\n", - "import tensorflow_text" + "import tensorflow_hub as hub" ] }, { diff --git a/docs/docs/integrations/vectorstores/mongodb_atlas.ipynb b/docs/docs/integrations/vectorstores/mongodb_atlas.ipynb index 7e9c7d4e7e9..e9a4ae4c9fe 100644 --- a/docs/docs/integrations/vectorstores/mongodb_atlas.ipynb +++ b/docs/docs/integrations/vectorstores/mongodb_atlas.ipynb @@ -80,7 +80,6 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", "import getpass\n", "\n", "MONGODB_ATLAS_CLUSTER_URI = getpass.getpass(\"MongoDB Atlas Cluster URI:\")" @@ -329,7 +328,6 @@ "outputs": [], "source": [ "from langchain.chains import RetrievalQA\n", - "from langchain.chat_models import ChatOpenAI\n", "from langchain.llms import OpenAI\n", "\n", "qa = RetrievalQA.from_chain_type(\n", diff --git a/docs/docs/integrations/vectorstores/myscale.ipynb b/docs/docs/integrations/vectorstores/myscale.ipynb index c3090836f4e..7ab41ef85ee 100644 --- a/docs/docs/integrations/vectorstores/myscale.ipynb +++ b/docs/docs/integrations/vectorstores/myscale.ipynb @@ -219,7 +219,7 @@ } ], "source": [ - "from langchain.vectorstores import MyScale, MyScaleSettings\n", + "from langchain.vectorstores import MyScale\n", "from langchain.document_loaders import TextLoader\n", "\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", diff --git a/docs/docs/integrations/vectorstores/pgvecto_rs.ipynb b/docs/docs/integrations/vectorstores/pgvecto_rs.ipynb index 8e8150f2ed1..9b60f7de172 100644 --- a/docs/docs/integrations/vectorstores/pgvecto_rs.ipynb +++ b/docs/docs/integrations/vectorstores/pgvecto_rs.ipynb @@ -66,7 +66,7 @@ }, "outputs": [], "source": [ - "docker run --name pgvecto-rs-demo -e POSTGRES_PASSWORD=mysecretpassword -p 5432:5432 -d tensorchord/pgvecto-rs:latest" + "! docker run --name pgvecto-rs-demo -e POSTGRES_PASSWORD=mysecretpassword -p 5432:5432 -d tensorchord/pgvecto-rs:latest" ] }, { diff --git a/docs/docs/integrations/vectorstores/pgvector.ipynb b/docs/docs/integrations/vectorstores/pgvector.ipynb index a3c01d21e54..6c149b5eb1e 100644 --- a/docs/docs/integrations/vectorstores/pgvector.ipynb +++ b/docs/docs/integrations/vectorstores/pgvector.ipynb @@ -65,11 +65,11 @@ "cell_type": "code", "execution_count": 3, "metadata": { - "tags": [], "ExecuteTime": { "end_time": "2023-09-09T08:02:19.742896Z", "start_time": "2023-09-09T08:02:19.732527Z" - } + }, + "tags": [] }, "outputs": [ { @@ -83,7 +83,6 @@ ], "source": [ "## Loading Environment Variables\n", - "from typing import List, Tuple\n", "from dotenv import load_dotenv\n", "\n", "load_dotenv()" @@ -93,11 +92,11 @@ "cell_type": "code", "execution_count": 4, "metadata": { - "tags": [], "ExecuteTime": { "end_time": "2023-09-09T08:02:23.144824Z", "start_time": "2023-09-09T08:02:22.047801Z" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -156,12 +155,12 @@ }, { "cell_type": "markdown", - "source": [ - "## Similarity Search with Euclidean Distance (Default)" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "## Similarity Search with Euclidean Distance (Default)" + ] }, { "cell_type": "code", @@ -285,32 +284,39 @@ }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "## Maximal Marginal Relevance Search (MMR)\n", "Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": 10, - "outputs": [], - "source": [ - "docs_with_score = db.max_marginal_relevance_search_with_score(query)" - ], "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2023-09-09T08:05:23.276819Z", "start_time": "2023-09-09T08:05:21.972256Z" - } - } + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "docs_with_score = db.max_marginal_relevance_search_with_score(query)" + ] }, { "cell_type": "code", "execution_count": 11, + "metadata": { + "ExecuteTime": { + "end_time": "2023-09-09T08:05:27.478580Z", + "start_time": "2023-09-09T08:05:27.470138Z" + }, + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -399,14 +405,7 @@ " print(\"Score: \", score)\n", " print(doc.page_content)\n", " print(\"-\" * 80)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-09-09T08:05:27.478580Z", - "start_time": "2023-09-09T08:05:27.470138Z" - } - } + ] }, { "cell_type": "markdown", diff --git a/docs/docs/integrations/vectorstores/scann.ipynb b/docs/docs/integrations/vectorstores/scann.ipynb index 6fd218745ba..37ce6bb89e6 100644 --- a/docs/docs/integrations/vectorstores/scann.ipynb +++ b/docs/docs/integrations/vectorstores/scann.ipynb @@ -69,7 +69,6 @@ "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", "\n", - "from langchain.embeddings import TensorflowHubEmbeddings\n", "\n", "embeddings = HuggingFaceEmbeddings()\n", "\n", diff --git a/docs/docs/integrations/vectorstores/starrocks.ipynb b/docs/docs/integrations/vectorstores/starrocks.ipynb index 39d4de18ae1..f5554dbffe0 100644 --- a/docs/docs/integrations/vectorstores/starrocks.ipynb +++ b/docs/docs/integrations/vectorstores/starrocks.ipynb @@ -60,12 +60,11 @@ "from langchain.embeddings.openai import OpenAIEmbeddings\n", "from langchain.vectorstores import StarRocks\n", "from langchain.vectorstores.starrocks import StarRocksSettings\n", - "from langchain.vectorstores import Chroma\n", - "from langchain.text_splitter import CharacterTextSplitter, TokenTextSplitter\n", - "from langchain.llms import OpenAI\nfrom langchain.chains import VectorDBQA\n", + "from langchain.text_splitter import TokenTextSplitter\n", + "from langchain.llms import OpenAI\n", "from langchain.document_loaders import DirectoryLoader\n", "from langchain.chains import RetrievalQA\n", - "from langchain.document_loaders import TextLoader, UnstructuredMarkdownLoader\n", + "from langchain.document_loaders import UnstructuredMarkdownLoader\n", "\n", "update_vectordb = False" ] diff --git a/docs/docs/integrations/vectorstores/timescalevector.ipynb b/docs/docs/integrations/vectorstores/timescalevector.ipynb index eaad9526ec4..3377216b2a4 100644 --- a/docs/docs/integrations/vectorstores/timescalevector.ipynb +++ b/docs/docs/integrations/vectorstores/timescalevector.ipynb @@ -88,7 +88,7 @@ "# Get the API key and save it as an environment variable\n", "# import os\n", "# import getpass\n", - "# os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")" + "# os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n" ] }, { @@ -99,7 +99,7 @@ }, "outputs": [], "source": [ - "from typing import List, Tuple" + "from typing import Tuple" ] }, { @@ -118,7 +118,6 @@ }, "outputs": [], "source": [ - "import timescale_vector\n", "from datetime import datetime, timedelta\n", "from langchain.embeddings.openai import OpenAIEmbeddings\n", "from langchain.text_splitter import CharacterTextSplitter\n", @@ -439,9 +438,7 @@ "execution_count": 14, "metadata": {}, "outputs": [], - "source": [ - "import json" - ] + "source": [] }, { "attachments": {}, @@ -567,7 +564,7 @@ "source": [ "# Download the file using curl and save it as commit_history.csv\n", "# Note: Execute this command in your terminal, in the same directory as the notebook\n", - "curl -O https://s3.amazonaws.com/assets.timescale.com/ai/ts_git_log.json" + "!curl -O https://s3.amazonaws.com/assets.timescale.com/ai/ts_git_log.json" ] }, { diff --git a/docs/docs/integrations/vectorstores/vectara.ipynb b/docs/docs/integrations/vectorstores/vectara.ipynb index a7e920f9356..1de1bd00400 100644 --- a/docs/docs/integrations/vectorstores/vectara.ipynb +++ b/docs/docs/integrations/vectorstores/vectara.ipynb @@ -91,7 +91,6 @@ "from langchain.document_loaders import TextLoader\n", "\n", "from langchain.llms import OpenAI\n", - "from langchain.chains import ConversationalRetrievalChain\n", "from langchain.retrievers.self_query.base import SelfQueryRetriever\n", "from langchain.chains.query_constructor.base import AttributeInfo" ] diff --git a/docs/docs/integrations/vectorstores/vespa.ipynb b/docs/docs/integrations/vectorstores/vespa.ipynb index 72318091c64..505003fb735 100644 --- a/docs/docs/integrations/vectorstores/vespa.ipynb +++ b/docs/docs/integrations/vectorstores/vespa.ipynb @@ -30,6 +30,10 @@ }, { "cell_type": "markdown", + "id": "283b49c9", + "metadata": { + "collapsed": false + }, "source": [ "Using the `pyvespa` package, you can either connect to a\n", "[Vespa Cloud instance](https://pyvespa.readthedocs.io/en/latest/deploy-vespa-cloud.html)\n", @@ -40,15 +44,18 @@ "#### Creating a Vespa application\n", "\n", "First, we need to create an application package:" - ], - "metadata": { - "collapsed": false - }, - "id": "283b49c9" + ] }, { "cell_type": "code", "execution_count": null, + "id": "91150665", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from vespa.package import ApplicationPackage, Field, RankProfile\n", @@ -62,7 +69,7 @@ " name=\"embedding\",\n", " type=\"tensor(x[384])\",\n", " indexing=[\"attribute\", \"summary\"],\n", - " attribute=[f\"distance-metric: angular\"],\n", + " attribute=[\"distance-metric: angular\"],\n", " ),\n", ")\n", "app_package.schema.add_rank_profile(\n", @@ -72,17 +79,17 @@ " inputs=[(\"query(query_embedding)\", \"tensor(x[384])\")],\n", " )\n", ")" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "id": "91150665" + ] }, { "cell_type": "markdown", + "id": "15477106", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "This sets up a Vespa application with a schema for each document that contains\n", "two fields: `text` for holding the document text and `embedding` for holding\n", @@ -100,14 +107,7 @@ "[nearest neighbor search](https://docs.vespa.ai/en/nearest-neighbor-search.html).\n", "\n", "Now we can deploy this application locally:" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - }, - "id": "15477106" + ] }, { "cell_type": "code", @@ -141,6 +141,13 @@ { "cell_type": "code", "execution_count": null, + "id": "7abde491", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from langchain.document_loaders import TextLoader\n", @@ -154,17 +161,17 @@ "from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings\n", "\n", "embedding_function = SentenceTransformerEmbeddings(model_name=\"all-MiniLM-L6-v2\")" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "id": "7abde491" + ] }, { "cell_type": "markdown", + "id": "d42365c7", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Here, we also set up local sentence embedder to transform the text to embedding\n", "vectors. One could also use OpenAI embeddings, but the vector length needs to\n", @@ -173,18 +180,18 @@ "To feed these to Vespa, we need to configure how the vector store should map to\n", "fields in the Vespa application. Then we create the vector store directly from\n", "this set of documents:" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - }, - "id": "d42365c7" + ] }, { "cell_type": "code", "execution_count": null, + "id": "0b647878", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "vespa_config = dict(\n", @@ -196,31 +203,24 @@ "from langchain.vectorstores import VespaStore\n", "\n", "db = VespaStore.from_documents(docs, embedding_function, app=vespa_app, **vespa_config)" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "id": "0b647878" + ] }, { "cell_type": "markdown", - "source": [ - "This creates a Vespa vector store and feeds that set of documents to Vespa.\n", - "The vector store takes care of calling the embedding function for each document\n", - "and inserts them into the database.\n", - "\n", - "We can now query the vector store:" - ], + "id": "d6bd0aab", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, - "id": "d6bd0aab" + "source": [ + "This creates a Vespa vector store and feeds that set of documents to Vespa.\n", + "The vector store takes care of calling the embedding function for each document\n", + "and inserts them into the database.\n", + "\n", + "We can now query the vector store:" + ] }, { "cell_type": "code", @@ -270,6 +270,13 @@ { "cell_type": "code", "execution_count": null, + "id": "a5256284", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "query = \"What did the president say about Ketanji Brown Jackson\"\n", @@ -281,17 +288,17 @@ "\n", "results = db.similarity_search(query)\n", "print(results[0].page_content)" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "id": "a5256284" + ] }, { "cell_type": "markdown", + "id": "2526b50e", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "However, the `pyvespa` library contains methods to manipulate\n", "content on Vespa which you can use directly.\n", @@ -299,18 +306,18 @@ "#### Deleting documents\n", "\n", "You can delete documents using the `delete` function:" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - }, - "id": "2526b50e" + ] }, { "cell_type": "code", "execution_count": null, + "id": "52cab87e", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "result = db.similarity_search(query)\n", @@ -319,17 +326,17 @@ "db.delete([\"32\"])\n", "result = db.similarity_search(query)\n", "# docs[0].metadata[\"id\"] != \"id:testapp:testapp::32\"" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "id": "52cab87e" + ] }, { "cell_type": "markdown", + "id": "deffaba5", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Again, the `pyvespa` connection contains methods to delete documents as well.\n", "\n", @@ -337,34 +344,34 @@ "\n", "The `similarity_search` method only returns the documents in order of\n", "relevancy. To retrieve the actual scores:" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - }, - "id": "deffaba5" + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "results = db.similarity_search_with_score(query)\n", - "result = results[0]\n", - "# result[1] ~= 0.463" - ], + "id": "cd9ae173", "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, - "id": "cd9ae173" + "outputs": [], + "source": [ + "results = db.similarity_search_with_score(query)\n", + "result = results[0]\n", + "# result[1] ~= 0.463" + ] }, { "cell_type": "markdown", + "id": "7257d67a", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "This is a result of using the `\"all-MiniLM-L6-v2\"` embedding model using the\n", "cosine distance function (as given by the argument `angular` in the\n", @@ -382,18 +389,18 @@ "[LangChain retriever](https://python.langchain.com/docs/modules/data_connection/retrievers/)\n", "simply call the `as_retriever` function, which is a standard vector store\n", "method:" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - }, - "id": "7257d67a" + ] }, { "cell_type": "code", "execution_count": null, + "id": "7fb717a9", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "db = VespaStore.from_documents(docs, embedding_function, app=vespa_app, **vespa_config)\n", @@ -402,17 +409,17 @@ "results = retriever.get_relevant_documents(query)\n", "\n", "# results[0].metadata[\"id\"] == \"id:testapp:testapp::32\"" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "id": "7fb717a9" + ] }, { "cell_type": "markdown", + "id": "fba7f07e", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "This allows for more general, unstructured, retrieval from the vector store.\n", "\n", @@ -424,18 +431,18 @@ "\n", "Vespa can contain many fields with different types by adding them to the application\n", "package:" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - }, - "id": "fba7f07e" + ] }, { "cell_type": "code", "execution_count": null, + "id": "59cffcf2", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "app_package.schema.add_fields(\n", @@ -446,31 +453,31 @@ " # ...\n", ")\n", "vespa_app = vespa_docker.deploy(application_package=app_package)" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "id": "59cffcf2" + ] }, { "cell_type": "markdown", - "source": [ - "We can add some metadata fields in the documents:" - ], + "id": "eebef70c", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, - "id": "eebef70c" + "source": [ + "We can add some metadata fields in the documents:" + ] }, { "cell_type": "code", "execution_count": null, + "id": "b21efbfa", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "# Add metadata\n", @@ -478,60 +485,60 @@ " doc.metadata[\"date\"] = f\"2023-{(i % 12)+1}-{(i % 28)+1}\"\n", " doc.metadata[\"rating\"] = range(1, 6)[i % 5]\n", " doc.metadata[\"author\"] = [\"Joe Biden\", \"Unknown\"][min(i, 1)]" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "id": "b21efbfa" + ] }, { "cell_type": "markdown", - "source": [ - "And let the Vespa vector store know about these fields:" - ], + "id": "9b42bd4d", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, - "id": "9b42bd4d" + "source": [ + "And let the Vespa vector store know about these fields:" + ] }, { "cell_type": "code", "execution_count": null, + "id": "6bb272f6", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "vespa_config.update(dict(metadata_fields=[\"date\", \"rating\", \"author\"]))" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "id": "6bb272f6" + ] }, { "cell_type": "markdown", - "source": [ - "Now, when searching for these documents, these fields will be returned.\n", - "Also, these fields can be filtered on:" - ], + "id": "43818655", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, - "id": "43818655" + "source": [ + "Now, when searching for these documents, these fields will be returned.\n", + "Also, these fields can be filtered on:" + ] }, { "cell_type": "code", "execution_count": null, + "id": "831759f3", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "db = VespaStore.from_documents(docs, embedding_function, app=vespa_app, **vespa_config)\n", @@ -539,17 +546,17 @@ "results = db.similarity_search(query, filter=\"rating > 3\")\n", "# results[0].metadata[\"id\"] == \"id:testapp:testapp::34\"\n", "# results[0].metadata[\"author\"] == \"Unknown\"" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "id": "831759f3" + ] }, { "cell_type": "markdown", + "id": "a49aad6e", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Custom query\n", "\n", @@ -559,18 +566,18 @@ "rather just write this yourself.\n", "\n", "First, let's add a BM25 ranking function to our application:" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - }, - "id": "a49aad6e" + ] }, { "cell_type": "code", "execution_count": null, + "id": "d0fb0562", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from vespa.package import FieldSet\n", @@ -579,36 +586,36 @@ "app_package.schema.add_rank_profile(RankProfile(name=\"bm25\", first_phase=\"bm25(text)\"))\n", "vespa_app = vespa_docker.deploy(application_package=app_package)\n", "db = VespaStore.from_documents(docs, embedding_function, app=vespa_app, **vespa_config)" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "id": "d0fb0562" + ] }, { "cell_type": "markdown", - "source": [ - "Then, to perform a regular text search based on BM25:" - ], + "id": "fe607747", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, - "id": "fe607747" + "source": [ + "Then, to perform a regular text search based on BM25:" + ] }, { "cell_type": "code", "execution_count": null, + "id": "cee245c3", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "query = \"What did the president say about Ketanji Brown Jackson\"\n", "custom_query = {\n", - " \"yql\": f\"select * from sources * where userQuery()\",\n", + " \"yql\": \"select * from sources * where userQuery()\",\n", " \"query\": query,\n", " \"type\": \"weakAnd\",\n", " \"ranking\": \"bm25\",\n", @@ -617,17 +624,17 @@ "results = db.similarity_search_with_score(query, custom_query=custom_query)\n", "# results[0][0].metadata[\"id\"] == \"id:testapp:testapp::32\"\n", "# results[0][1] ~= 14.384" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "id": "cee245c3" + ] }, { "cell_type": "markdown", + "id": "41a4c081", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "All of the powerful search and query capabilities of Vespa can be used\n", "by using a custom query. Please refer to the Vespa documentation on it's\n", @@ -638,18 +645,18 @@ "Hybrid search means using both a classic term-based search such as\n", "BM25 and a vector search and combining the results. We need to create\n", "a new rank profile for hybrid search on Vespa:" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - }, - "id": "41a4c081" + ] }, { "cell_type": "code", "execution_count": null, + "id": "bf73efc1", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "app_package.schema.add_rank_profile(\n", @@ -661,32 +668,32 @@ ")\n", "vespa_app = vespa_docker.deploy(application_package=app_package)\n", "db = VespaStore.from_documents(docs, embedding_function, app=vespa_app, **vespa_config)" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "id": "bf73efc1" + ] }, { "cell_type": "markdown", - "source": [ - "Here, we score each document as a combination of it's BM25 score and its\n", - "distance score. We can query using a custom query:" - ], + "id": "40f48711", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, - "id": "40f48711" + "source": [ + "Here, we score each document as a combination of it's BM25 score and its\n", + "distance score. We can query using a custom query:" + ] }, { "cell_type": "code", "execution_count": null, + "id": "d2e289f0", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "query = \"What did the president say about Ketanji Brown Jackson\"\n", @@ -705,17 +712,17 @@ "results = db.similarity_search_with_score(query, custom_query=custom_query)\n", "# results[0][0].metadata[\"id\"], \"id:testapp:testapp::32\")\n", "# results[0][1] ~= 2.897" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "id": "d2e289f0" + ] }, { "cell_type": "markdown", + "id": "958e269f", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Native embedders in Vespa\n", "\n", @@ -728,18 +735,18 @@ "for more information.\n", "\n", "First, we need to modify our application package:" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - }, - "id": "958e269f" + ] }, { "cell_type": "code", "execution_count": null, + "id": "56b9686c", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from vespa.package import Component, Parameter\n", @@ -759,7 +766,7 @@ " type=\"tensor(x[384])\",\n", " is_document_field=False,\n", " indexing=[\"input text\", \"embed hf-embedder\", \"attribute\", \"summary\"],\n", - " attribute=[f\"distance-metric: angular\"],\n", + " attribute=[\"distance-metric: angular\"],\n", ")\n", "app_package.schema.add_rank_profile(\n", " RankProfile(\n", @@ -768,35 +775,35 @@ " inputs=[(\"query(query_embedding)\", \"tensor(x[384])\")],\n", " )\n", ")" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "id": "56b9686c" + ] }, { "cell_type": "markdown", - "source": [ - "Please refer to the embeddings documentation on adding embedder models\n", - "and tokenizers to the application. Note that the `hfembedding` field\n", - "includes instructions for embedding using the `hf-embedder`.\n", - "\n", - "Now we can query with a custom query:" - ], + "id": "5cd721a8", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, - "id": "5cd721a8" + "source": [ + "Please refer to the embeddings documentation on adding embedder models\n", + "and tokenizers to the application. Note that the `hfembedding` field\n", + "includes instructions for embedding using the `hf-embedder`.\n", + "\n", + "Now we can query with a custom query:" + ] }, { "cell_type": "code", "execution_count": null, + "id": "da631d13", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "query = \"What did the president say about Ketanji Brown Jackson\"\n", @@ -812,17 +819,17 @@ "results = db.similarity_search_with_score(query, custom_query=custom_query)\n", "# results[0][0].metadata[\"id\"], \"id:testapp:testapp::32\")\n", "# results[0][1] ~= 0.630" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "id": "da631d13" + ] }, { "cell_type": "markdown", + "id": "a333b553", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Note that the query here includes an `embed` instruction to embed the query\n", "using the same model as for the documents.\n", @@ -836,18 +843,18 @@ "[approximate nearest neighbors](https://docs.vespa.ai/en/approximate-nn-hnsw.html).\n", "\n", "First, we can change the embedding field to create a HNSW index:" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - }, - "id": "a333b553" + ] }, { "cell_type": "code", "execution_count": null, + "id": "9ee955c8", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from vespa.package import HNSW\n", @@ -864,60 +871,53 @@ " ),\n", " )\n", ")" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "id": "9ee955c8" + ] }, { "cell_type": "markdown", - "source": [ - "This creates a HNSW index on the embedding data which allows for efficient\n", - "searching. With this set, we can easily search using ANN by setting\n", - "the `approximate` argument to `True`:" - ], + "id": "2ed1c224", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, - "id": "2ed1c224" + "source": [ + "This creates a HNSW index on the embedding data which allows for efficient\n", + "searching. With this set, we can easily search using ANN by setting\n", + "the `approximate` argument to `True`:" + ] }, { "cell_type": "code", "execution_count": null, + "id": "7981739a", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "query = \"What did the president say about Ketanji Brown Jackson\"\n", "results = db.similarity_search(query, approximate=True)\n", "# results[0][0].metadata[\"id\"], \"id:testapp:testapp::32\")" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - }, - "id": "7981739a" + ] }, { "cell_type": "markdown", - "source": [ - "This covers most of the functionality in the Vespa vector store in LangChain.\n", - "\n" - ], + "id": "24791204", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, - "id": "24791204" + "source": [ + "This covers most of the functionality in the Vespa vector store in LangChain.\n", + "\n" + ] } ], "metadata": { diff --git a/docs/docs/integrations/vectorstores/weaviate.ipynb b/docs/docs/integrations/vectorstores/weaviate.ipynb index dd3d6a2646c..03b9c933c6c 100644 --- a/docs/docs/integrations/vectorstores/weaviate.ipynb +++ b/docs/docs/integrations/vectorstores/weaviate.ipynb @@ -149,12 +149,7 @@ "metadata": {}, "outputs": [], "source": [ - "db = Weaviate.from_documents(\n", - " docs, \n", - " embeddings, \n", - " weaviate_url=WEAVIATE_URL, \n", - " by_text=False\n", - ")" + "db = Weaviate.from_documents(docs, embeddings, weaviate_url=WEAVIATE_URL, by_text=False)" ] }, { @@ -227,8 +222,7 @@ "import weaviate\n", "\n", "client = weaviate.Client(\n", - " url=WEAVIATE_URL, \n", - " auth_client_secret=weaviate.AuthApiKey(WEAVIATE_API_KEY)\n", + " url=WEAVIATE_URL, auth_client_secret=weaviate.AuthApiKey(WEAVIATE_API_KEY)\n", ")\n", "\n", "# client = weaviate.Client(\n", @@ -240,10 +234,7 @@ "# )\n", "\n", "vectorstore = Weaviate.from_documents(\n", - " documents, \n", - " embeddings, \n", - " client=client, \n", - " by_text=False\n", + " documents, embeddings, client=client, by_text=False\n", ")" ] }, @@ -378,6 +369,7 @@ ], "source": [ "from langchain.chat_models import ChatOpenAI\n", + "\n", "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", "llm.predict(\"What did the president say about Justice Breyer\")" ] @@ -575,10 +567,10 @@ "from langchain.schema.output_parser import StrOutputParser\n", "\n", "rag_chain = (\n", - " {\"context\": retriever, \"question\": RunnablePassthrough()} \n", - " | prompt \n", + " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", + " | prompt\n", " | llm\n", - " | StrOutputParser() \n", + " | StrOutputParser()\n", ")\n", "\n", "rag_chain.invoke(\"What did the president say about Justice Breyer\")" diff --git a/docs/docs/modules/agents/agent_types/openai_functions_agent.ipynb b/docs/docs/modules/agents/agent_types/openai_functions_agent.ipynb index 0b90ae061fc..bd7decaae56 100644 --- a/docs/docs/modules/agents/agent_types/openai_functions_agent.ipynb +++ b/docs/docs/modules/agents/agent_types/openai_functions_agent.ipynb @@ -44,7 +44,6 @@ "from langchain.agents import initialize_agent, AgentType, Tool\n", "from langchain.chains import LLMMathChain\n", "from langchain.chat_models import ChatOpenAI\n", - "from langchain.llms import OpenAI\n", "from langchain.utilities import SerpAPIWrapper, SQLDatabase\n", "from langchain_experimental.sql import SQLDatabaseChain" ] diff --git a/docs/docs/modules/agents/agent_types/structured_chat.ipynb b/docs/docs/modules/agents/agent_types/structured_chat.ipynb index 2ba9ca9b56f..a65b47f3ac3 100644 --- a/docs/docs/modules/agents/agent_types/structured_chat.ipynb +++ b/docs/docs/modules/agents/agent_types/structured_chat.ipynb @@ -43,8 +43,7 @@ "source": [ "from langchain.agents.agent_toolkits import PlayWrightBrowserToolkit\n", "from langchain.tools.playwright.utils import (\n", - " create_async_playwright_browser,\n", - " create_sync_playwright_browser, # A synchronous browser is available, though it isn't compatible with jupyter.\n", + " create_async_playwright_browser, # A synchronous browser is available, though it isn't compatible with jupyter.\n", ")\n", "\n", "# This import is required only for jupyter notebooks, since they have their own eventloop\n", diff --git a/docs/docs/modules/agents/how_to/add_memory_openai_functions.ipynb b/docs/docs/modules/agents/how_to/add_memory_openai_functions.ipynb index 6530f477e5c..9c20536c2d3 100644 --- a/docs/docs/modules/agents/how_to/add_memory_openai_functions.ipynb +++ b/docs/docs/modules/agents/how_to/add_memory_openai_functions.ipynb @@ -18,7 +18,6 @@ "outputs": [], "source": [ "from langchain.chains import LLMMathChain\n", - "from langchain.llms import OpenAI\n", "from langchain.utilities import SerpAPIWrapper\n", "from langchain.utilities import SQLDatabase\n", "from langchain_experimental.sql import SQLDatabaseChain\n", diff --git a/docs/docs/modules/agents/how_to/agent_iter.ipynb b/docs/docs/modules/agents/how_to/agent_iter.ipynb index 280e73eabcf..89b3c952530 100644 --- a/docs/docs/modules/agents/how_to/agent_iter.ipynb +++ b/docs/docs/modules/agents/how_to/agent_iter.ipynb @@ -22,12 +22,8 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", - "\n", - "import dotenv\n", "import pydantic\n", - "from langchain.agents import AgentExecutor, initialize_agent, AgentType\n", - "from langchain.schema import AgentFinish\n", + "from langchain.agents import initialize_agent, AgentType\n", "from langchain.agents.tools import Tool\n", "from langchain.chains import LLMMathChain\n", "from langchain.chat_models import ChatOpenAI" diff --git a/docs/docs/modules/agents/how_to/agent_vectorstore.ipynb b/docs/docs/modules/agents/how_to/agent_vectorstore.ipynb index 611d61e70da..072e7bcf172 100644 --- a/docs/docs/modules/agents/how_to/agent_vectorstore.ipynb +++ b/docs/docs/modules/agents/how_to/agent_vectorstore.ipynb @@ -164,9 +164,7 @@ "# Import things that are needed generically\n", "from langchain.agents import initialize_agent, Tool\n", "from langchain.agents import AgentType\n", - "from langchain.tools import BaseTool\n", - "from langchain.llms import OpenAI\n", - "from langchain.chains import LLMMathChain\nfrom langchain.utilities import SerpAPIWrapper" + "from langchain.llms import OpenAI" ] }, { diff --git a/docs/docs/modules/agents/how_to/async_agent.ipynb b/docs/docs/modules/agents/how_to/async_agent.ipynb index ac9d0695fcc..dbbf456108b 100644 --- a/docs/docs/modules/agents/how_to/async_agent.ipynb +++ b/docs/docs/modules/agents/how_to/async_agent.ipynb @@ -45,9 +45,6 @@ "from langchain.agents import initialize_agent, load_tools\n", "from langchain.agents import AgentType\n", "from langchain.llms import OpenAI\n", - "from langchain.callbacks.stdout import StdOutCallbackHandler\n", - "from langchain.callbacks.tracers import LangChainTracer\n", - "from aiohttp import ClientSession\n", "\n", "questions = [\n", " \"Who won the US Open men's final in 2019? What is his age raised to the 0.334 power?\",\n", diff --git a/docs/docs/modules/agents/how_to/chatgpt_clone.ipynb b/docs/docs/modules/agents/how_to/chatgpt_clone.ipynb index 009ff2eb961..5dbf7db338c 100644 --- a/docs/docs/modules/agents/how_to/chatgpt_clone.ipynb +++ b/docs/docs/modules/agents/how_to/chatgpt_clone.ipynb @@ -47,7 +47,9 @@ } ], "source": [ - "from langchain.llms import OpenAI\nfrom langchain.chains import ConversationChain, LLMChain\nfrom langchain.prompts import PromptTemplate\n", + "from langchain.llms import OpenAI\n", + "from langchain.chains import LLMChain\n", + "from langchain.prompts import PromptTemplate\n", "from langchain.memory import ConversationBufferWindowMemory\n", "\n", "\n", diff --git a/docs/docs/modules/agents/how_to/custom_agent.ipynb b/docs/docs/modules/agents/how_to/custom_agent.ipynb index 071deb1486d..77f64f3047f 100644 --- a/docs/docs/modules/agents/how_to/custom_agent.ipynb +++ b/docs/docs/modules/agents/how_to/custom_agent.ipynb @@ -26,7 +26,7 @@ "outputs": [], "source": [ "from langchain.agents import Tool, AgentExecutor, BaseSingleActionAgent\n", - "from langchain.llms import OpenAI\nfrom langchain.utilities import SerpAPIWrapper" + "from langchain.utilities import SerpAPIWrapper" ] }, { diff --git a/docs/docs/modules/agents/how_to/custom_agent_with_tool_retrieval.ipynb b/docs/docs/modules/agents/how_to/custom_agent_with_tool_retrieval.ipynb index 8f3a0b4c6f2..f533d121f2d 100644 --- a/docs/docs/modules/agents/how_to/custom_agent_with_tool_retrieval.ipynb +++ b/docs/docs/modules/agents/how_to/custom_agent_with_tool_retrieval.ipynb @@ -38,8 +38,10 @@ " AgentOutputParser,\n", ")\n", "from langchain.prompts import StringPromptTemplate\n", - "from langchain.llms import OpenAI\nfrom langchain.utilities import SerpAPIWrapper\nfrom langchain.chains import LLMChain\n", - "from typing import List, Union\n", + "from langchain.llms import OpenAI\n", + "from langchain.utilities import SerpAPIWrapper\n", + "from langchain.chains import LLMChain\n", + "from typing import Union\n", "from langchain.schema import AgentAction, AgentFinish\n", "import re" ] diff --git a/docs/docs/modules/agents/how_to/custom_multi_action_agent.ipynb b/docs/docs/modules/agents/how_to/custom_multi_action_agent.ipynb index 11c204cce7e..7b4486815e8 100644 --- a/docs/docs/modules/agents/how_to/custom_multi_action_agent.ipynb +++ b/docs/docs/modules/agents/how_to/custom_multi_action_agent.ipynb @@ -26,7 +26,7 @@ "outputs": [], "source": [ "from langchain.agents import Tool, AgentExecutor, BaseMultiActionAgent\n", - "from langchain.llms import OpenAI\nfrom langchain.utilities import SerpAPIWrapper" + "from langchain.utilities import SerpAPIWrapper" ] }, { diff --git a/docs/docs/modules/agents/how_to/handle_parsing_errors.ipynb b/docs/docs/modules/agents/how_to/handle_parsing_errors.ipynb index 3d5fb1b2dcd..c54a65ddeab 100644 --- a/docs/docs/modules/agents/how_to/handle_parsing_errors.ipynb +++ b/docs/docs/modules/agents/how_to/handle_parsing_errors.ipynb @@ -25,15 +25,10 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.llms import OpenAI\n", - "from langchain.chains import LLMMathChain\n", "from langchain.utilities import SerpAPIWrapper\n", - "from langchain.utilities import SQLDatabase\n", - "from langchain_experimental.sql import SQLDatabaseChain\n", "from langchain.agents import initialize_agent, Tool\n", "from langchain.agents import AgentType\n", - "from langchain.chat_models import ChatOpenAI\n", - "from langchain.agents.types import AGENT_TO_CLASS" + "from langchain.chat_models import ChatOpenAI" ] }, { diff --git a/docs/docs/modules/agents/how_to/max_iterations.ipynb b/docs/docs/modules/agents/how_to/max_iterations.ipynb index 1fbae75b4b3..b1c02b1aebd 100644 --- a/docs/docs/modules/agents/how_to/max_iterations.ipynb +++ b/docs/docs/modules/agents/how_to/max_iterations.ipynb @@ -17,7 +17,6 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.agents import load_tools\n", "from langchain.agents import initialize_agent, Tool\n", "from langchain.agents import AgentType\n", "from langchain.llms import OpenAI" diff --git a/docs/docs/modules/agents/how_to/max_time_limit.ipynb b/docs/docs/modules/agents/how_to/max_time_limit.ipynb index 84534659460..234bd8c48e6 100644 --- a/docs/docs/modules/agents/how_to/max_time_limit.ipynb +++ b/docs/docs/modules/agents/how_to/max_time_limit.ipynb @@ -17,7 +17,6 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.agents import load_tools\n", "from langchain.agents import initialize_agent, Tool\n", "from langchain.agents import AgentType\n", "from langchain.llms import OpenAI" diff --git a/docs/docs/modules/agents/how_to/use_toolkits_with_openai_functions.ipynb b/docs/docs/modules/agents/how_to/use_toolkits_with_openai_functions.ipynb index 60a0442fdef..def4333b308 100644 --- a/docs/docs/modules/agents/how_to/use_toolkits_with_openai_functions.ipynb +++ b/docs/docs/modules/agents/how_to/use_toolkits_with_openai_functions.ipynb @@ -17,12 +17,8 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.chains import LLMMathChain\n", - "from langchain.llms import OpenAI\n", - "from langchain.utilities import SerpAPIWrapper\n", "from langchain.utilities import SQLDatabase\n", - "from langchain_experimental.sql import SQLDatabaseChain\n", - "from langchain.agents import initialize_agent, Tool\n", + "from langchain.agents import initialize_agent\n", "from langchain.agents import AgentType\n", "from langchain.chat_models import ChatOpenAI\n", "from langchain.agents.agent_toolkits import SQLDatabaseToolkit\n", diff --git a/docs/docs/modules/agents/index.ipynb b/docs/docs/modules/agents/index.ipynb index 0fe978bd0a0..ec321e6a883 100644 --- a/docs/docs/modules/agents/index.ipynb +++ b/docs/docs/modules/agents/index.ipynb @@ -198,6 +198,7 @@ "source": [ "from langchain.agents import tool\n", "\n", + "\n", "@tool\n", "def get_word_length(word: str) -> int:\n", " \"\"\"Returns the length of a word.\"\"\"\n", @@ -606,10 +607,12 @@ "source": [ "input1 = \"how many letters in the word educa?\"\n", "result = agent_executor.invoke({\"input\": input1, \"chat_history\": chat_history})\n", - "chat_history.extend([\n", - " HumanMessage(content=input1),\n", - " AIMessage(content=result[\"output\"]),\n", - "])\n", + "chat_history.extend(\n", + " [\n", + " HumanMessage(content=input1),\n", + " AIMessage(content=result[\"output\"]),\n", + " ]\n", + ")\n", "agent_executor.invoke({\"input\": \"is that a real word?\", \"chat_history\": chat_history})" ] }, diff --git a/docs/docs/modules/agents/tools/custom_tools.ipynb b/docs/docs/modules/agents/tools/custom_tools.ipynb index a7b4fb3e5c4..aa491c269f7 100644 --- a/docs/docs/modules/agents/tools/custom_tools.ipynb +++ b/docs/docs/modules/agents/tools/custom_tools.ipynb @@ -105,7 +105,7 @@ " Tool.from_function(\n", " func=search.run,\n", " name=\"Search\",\n", - " description=\"useful for when you need to answer questions about current events\"\n", + " description=\"useful for when you need to answer questions about current events\",\n", " # coroutine= ... <- you can specify an async method if desired as well\n", " ),\n", "]" @@ -139,7 +139,7 @@ " func=llm_math_chain.run,\n", " name=\"Calculator\",\n", " description=\"useful for when you need to answer questions about math\",\n", - " args_schema=CalculatorInput\n", + " args_schema=CalculatorInput,\n", " # coroutine= ... <- you can specify an async method if desired as well\n", " )\n", ")" diff --git a/docs/docs/modules/callbacks/multiple_callbacks.ipynb b/docs/docs/modules/callbacks/multiple_callbacks.ipynb index dda74647bb2..12207a3abf8 100644 --- a/docs/docs/modules/callbacks/multiple_callbacks.ipynb +++ b/docs/docs/modules/callbacks/multiple_callbacks.ipynb @@ -129,7 +129,6 @@ "from langchain.callbacks.base import BaseCallbackHandler\n", "from langchain.schema import AgentAction\n", "from langchain.agents import AgentType, initialize_agent, load_tools\n", - "from langchain.callbacks import tracing_enabled\n", "from langchain.llms import OpenAI\n", "\n", "\n", diff --git a/docs/docs/modules/chains/document/map_reduce.ipynb b/docs/docs/modules/chains/document/map_reduce.ipynb index d186993c188..2214fa75c44 100644 --- a/docs/docs/modules/chains/document/map_reduce.ipynb +++ b/docs/docs/modules/chains/document/map_reduce.ipynb @@ -31,7 +31,6 @@ "source": [ "from functools import partial\n", "\n", - "from langchain.callbacks.manager import CallbackManagerForChainRun\n", "from langchain.chains.combine_documents import collapse_docs, split_list_of_docs\n", "from langchain.chat_models import ChatAnthropic\n", "from langchain.prompts import PromptTemplate\n", diff --git a/docs/docs/modules/data_connection/document_transformers/post_retrieval/long_context_reorder.ipynb b/docs/docs/modules/data_connection/document_transformers/post_retrieval/long_context_reorder.ipynb index 7a67dd3c5ff..8d0e74db68d 100644 --- a/docs/docs/modules/data_connection/document_transformers/post_retrieval/long_context_reorder.ipynb +++ b/docs/docs/modules/data_connection/document_transformers/post_retrieval/long_context_reorder.ipynb @@ -42,8 +42,6 @@ } ], "source": [ - "import os\n", - "import chromadb\n", "from langchain.vectorstores import Chroma\n", "from langchain.embeddings import HuggingFaceEmbeddings\n", "from langchain.document_transformers import (\n", diff --git a/docs/docs/modules/data_connection/retrievers/self_query.ipynb b/docs/docs/modules/data_connection/retrievers/self_query.ipynb index c043a4eced5..f2c8fa17b56 100644 --- a/docs/docs/modules/data_connection/retrievers/self_query.ipynb +++ b/docs/docs/modules/data_connection/retrievers/self_query.ipynb @@ -28,7 +28,7 @@ "metadata": {}, "outputs": [], "source": [ - "# !pip install lark chromadb" + "# !pip install lark chromadb\n" ] }, { @@ -68,7 +68,6 @@ " page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n", " metadata={\n", " \"year\": 1979,\n", - " \"rating\": 9.9,\n", " \"director\": \"Andrei Tarkovsky\",\n", " \"genre\": \"thriller\",\n", " \"rating\": 9.9,\n", diff --git a/docs/docs/modules/data_connection/text_embedding/caching_embeddings.ipynb b/docs/docs/modules/data_connection/text_embedding/caching_embeddings.ipynb index 34498fd422f..94de55231ee 100644 --- a/docs/docs/modules/data_connection/text_embedding/caching_embeddings.ipynb +++ b/docs/docs/modules/data_connection/text_embedding/caching_embeddings.ipynb @@ -58,8 +58,7 @@ "source": [ "from langchain.document_loaders import TextLoader\n", "from langchain.embeddings.openai import OpenAIEmbeddings\n", - "from langchain.text_splitter import CharacterTextSplitter\n", - "from langchain.vectorstores import FAISS" + "from langchain.text_splitter import CharacterTextSplitter" ] }, { diff --git a/docs/docs/modules/memory/adding_memory_chain_multiple_inputs.ipynb b/docs/docs/modules/memory/adding_memory_chain_multiple_inputs.ipynb index d7fff83f8e5..62eb1eb678b 100644 --- a/docs/docs/modules/memory/adding_memory_chain_multiple_inputs.ipynb +++ b/docs/docs/modules/memory/adding_memory_chain_multiple_inputs.ipynb @@ -18,11 +18,8 @@ "outputs": [], "source": [ "from langchain.embeddings.openai import OpenAIEmbeddings\n", - "from langchain.embeddings.cohere import CohereEmbeddings\n", "from langchain.text_splitter import CharacterTextSplitter\n", - "from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch\n", - "from langchain.vectorstores import Chroma\n", - "from langchain.docstore.document import Document" + "from langchain.vectorstores import Chroma" ] }, { diff --git a/docs/docs/modules/memory/agent_with_memory_in_db.ipynb b/docs/docs/modules/memory/agent_with_memory_in_db.ipynb index b1aae1b92e7..eec24d7af9f 100644 --- a/docs/docs/modules/memory/agent_with_memory_in_db.ipynb +++ b/docs/docs/modules/memory/agent_with_memory_in_db.ipynb @@ -35,9 +35,9 @@ "source": [ "from langchain.agents import ZeroShotAgent, Tool, AgentExecutor\n", "from langchain.memory import ConversationBufferMemory\n", - "from langchain.memory.chat_memory import ChatMessageHistory\n", "from langchain.memory.chat_message_histories import RedisChatMessageHistory\n", - "from langchain.llms import OpenAI\nfrom langchain.chains import LLMChain\n", + "from langchain.llms import OpenAI\n", + "from langchain.chains import LLMChain\n", "from langchain.utilities import GoogleSearchAPIWrapper" ] }, diff --git a/docs/docs/modules/model_io/chat/index.ipynb b/docs/docs/modules/model_io/chat/index.ipynb index b7b3fc658b4..74e826ae919 100644 --- a/docs/docs/modules/model_io/chat/index.ipynb +++ b/docs/docs/modules/model_io/chat/index.ipynb @@ -635,7 +635,7 @@ } ], "source": [ - "from langchain.schema import AIMessage, HumanMessage, SystemMessage\n", + "from langchain.schema import HumanMessage, SystemMessage\n", "\n", "chat(\n", " [\n", diff --git a/docs/docs/modules/model_io/output_parsers/index.ipynb b/docs/docs/modules/model_io/output_parsers/index.ipynb index 9118e2acb56..1ff426c6e2f 100644 --- a/docs/docs/modules/model_io/output_parsers/index.ipynb +++ b/docs/docs/modules/model_io/output_parsers/index.ipynb @@ -50,8 +50,6 @@ } ], "source": [ - "from typing import List\n", - "\n", "from langchain.llms import OpenAI\n", "from langchain.output_parsers import PydanticOutputParser\n", "from langchain.prompts import PromptTemplate\n", diff --git a/docs/docs/modules/model_io/output_parsers/retry.ipynb b/docs/docs/modules/model_io/output_parsers/retry.ipynb index 383b3eb0691..d9910142a14 100644 --- a/docs/docs/modules/model_io/output_parsers/retry.ipynb +++ b/docs/docs/modules/model_io/output_parsers/retry.ipynb @@ -19,18 +19,14 @@ "source": [ "from langchain.prompts import (\n", " PromptTemplate,\n", - " ChatPromptTemplate,\n", - " HumanMessagePromptTemplate,\n", ")\n", "from langchain.llms import OpenAI\n", "from langchain.chat_models import ChatOpenAI\n", "from langchain.output_parsers import (\n", " PydanticOutputParser,\n", " OutputFixingParser,\n", - " RetryOutputParser,\n", ")\n", - "from pydantic import BaseModel, Field, validator\n", - "from typing import List" + "from pydantic import BaseModel, Field" ] }, { diff --git a/docs/docs/modules/model_io/prompts/prompt_templates/connecting_to_a_feature_store.ipynb b/docs/docs/modules/model_io/prompts/prompt_templates/connecting_to_a_feature_store.ipynb index e4626a9d1d7..23bea0f672c 100644 --- a/docs/docs/modules/model_io/prompts/prompt_templates/connecting_to_a_feature_store.ipynb +++ b/docs/docs/modules/model_io/prompts/prompt_templates/connecting_to_a_feature_store.ipynb @@ -669,7 +669,7 @@ " subscription_id: str,\n", " resource_group: str,\n", " feature_store_name: str,\n", - " **kwargs\n", + " **kwargs,\n", " ):\n", " # this is an example template for proof of concept and can be changed to suit the developer needs\n", " template = \"\"\"\n", diff --git a/docs/docs/modules/model_io/prompts/prompt_templates/prompts_pipelining.ipynb b/docs/docs/modules/model_io/prompts/prompt_templates/prompts_pipelining.ipynb index 92765069992..2ad420b8b10 100644 --- a/docs/docs/modules/model_io/prompts/prompt_templates/prompts_pipelining.ipynb +++ b/docs/docs/modules/model_io/prompts/prompt_templates/prompts_pipelining.ipynb @@ -169,7 +169,6 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate\n", "from langchain.schema import HumanMessage, AIMessage, SystemMessage" ] }, diff --git a/docs/docs/use_cases/chatbots.ipynb b/docs/docs/use_cases/chatbots.ipynb index 2ac41956c23..5a5e1cc704d 100644 --- a/docs/docs/use_cases/chatbots.ipynb +++ b/docs/docs/use_cases/chatbots.ipynb @@ -94,7 +94,7 @@ } ], "source": [ - "from langchain.schema import AIMessage, HumanMessage, SystemMessage\n", + "from langchain.schema import HumanMessage, SystemMessage\n", "from langchain.chat_models import ChatOpenAI\n", "\n", "chat = ChatOpenAI()\n", diff --git a/docs/docs/use_cases/data_generation.ipynb b/docs/docs/use_cases/data_generation.ipynb index 7ffc49a5501..ded0e8196e9 100644 --- a/docs/docs/use_cases/data_generation.ipynb +++ b/docs/docs/use_cases/data_generation.ipynb @@ -67,7 +67,6 @@ "from langchain.prompts import FewShotPromptTemplate, PromptTemplate\n", "from langchain.chat_models import ChatOpenAI\n", "from langchain.pydantic_v1 import BaseModel\n", - "from langchain_experimental.tabular_synthetic_data.base import SyntheticDataGenerator\n", "from langchain_experimental.tabular_synthetic_data.openai import (\n", " create_openai_data_generator,\n", " OPENAI_TEMPLATE,\n", @@ -492,7 +491,7 @@ "from langchain.llms import OpenAI\n", "from langchain.prompts import PromptTemplate\n", "from langchain.output_parsers import PydanticOutputParser\n", - "from langchain.chains import create_extraction_chain_pydantic, SimpleSequentialChain\n", + "from langchain.chains import create_extraction_chain_pydantic\n", "from pydantic import BaseModel, Field\n", "from typing import List" ] diff --git a/docs/docs/use_cases/extraction.ipynb b/docs/docs/use_cases/extraction.ipynb index 143ce5c265e..0d720e4c6a1 100644 --- a/docs/docs/use_cases/extraction.ipynb +++ b/docs/docs/use_cases/extraction.ipynb @@ -448,8 +448,6 @@ "from typing import Sequence, Optional\n", "from langchain.prompts import (\n", " PromptTemplate,\n", - " ChatPromptTemplate,\n", - " HumanMessagePromptTemplate,\n", ")\n", "from langchain.llms import OpenAI\n", "from pydantic import BaseModel, Field, validator\n", @@ -529,8 +527,6 @@ "source": [ "from langchain.prompts import (\n", " PromptTemplate,\n", - " ChatPromptTemplate,\n", - " HumanMessagePromptTemplate,\n", ")\n", "from langchain.llms import OpenAI\n", "from pydantic import BaseModel, Field, validator\n", diff --git a/docs/docs/use_cases/graph/graph_networkx_qa.ipynb b/docs/docs/use_cases/graph/graph_networkx_qa.ipynb index 9aa650898ff..ad64042d3fc 100644 --- a/docs/docs/use_cases/graph/graph_networkx_qa.ipynb +++ b/docs/docs/use_cases/graph/graph_networkx_qa.ipynb @@ -28,8 +28,7 @@ "outputs": [], "source": [ "from langchain.indexes import GraphIndexCreator\n", - "from langchain.llms import OpenAI\n", - "from langchain.document_loaders import TextLoader" + "from langchain.llms import OpenAI" ] }, { diff --git a/docs/docs/use_cases/qa_structured/sql.ipynb b/docs/docs/use_cases/qa_structured/sql.ipynb index 4b6833b74ef..d9947df0bc7 100644 --- a/docs/docs/use_cases/qa_structured/sql.ipynb +++ b/docs/docs/use_cases/qa_structured/sql.ipynb @@ -7,7 +7,7 @@ "---\n", "title: SQL\n", "sidebar_position: 2\n", - "---" + "---\n" ] }, { @@ -754,18 +754,19 @@ "metadata": {}, "outputs": [], "source": [ - "# few_shots = {'List all artists.': 'SELECT * FROM artists;',\n", - "# \"Find all albums for the artist 'AC/DC'.\": \"SELECT * FROM albums WHERE ArtistId = (SELECT ArtistId FROM artists WHERE Name = 'AC/DC');\",\n", - "# \"List all tracks in the 'Rock' genre.\": \"SELECT * FROM tracks WHERE GenreId = (SELECT GenreId FROM genres WHERE Name = 'Rock');\",\n", - "# 'Find the total duration of all tracks.': 'SELECT SUM(Milliseconds) FROM tracks;',\n", - "# 'List all customers from Canada.': \"SELECT * FROM customers WHERE Country = 'Canada';\",\n", - "# 'How many tracks are there in the album with ID 5?': 'SELECT COUNT(*) FROM tracks WHERE AlbumId = 5;',\n", - "# 'Find the total number of invoices.': 'SELECT COUNT(*) FROM invoices;',\n", - "# 'List all tracks that are longer than 5 minutes.': 'SELECT * FROM tracks WHERE Milliseconds > 300000;',\n", - "# 'Who are the top 5 customers by total purchase?': 'SELECT CustomerId, SUM(Total) AS TotalPurchase FROM invoices GROUP BY CustomerId ORDER BY TotalPurchase DESC LIMIT 5;',\n", - "# 'Which albums are from the year 2000?': \"SELECT * FROM albums WHERE strftime('%Y', ReleaseDate) = '2000';\",\n", - "# 'How many employees are there': 'SELECT COUNT(*) FROM \"employee\"'\n", - "# }" + "few_shots = {\n", + " \"List all artists.\": \"SELECT * FROM artists;\",\n", + " \"Find all albums for the artist 'AC/DC'.\": \"SELECT * FROM albums WHERE ArtistId = (SELECT ArtistId FROM artists WHERE Name = 'AC/DC');\",\n", + " \"List all tracks in the 'Rock' genre.\": \"SELECT * FROM tracks WHERE GenreId = (SELECT GenreId FROM genres WHERE Name = 'Rock');\",\n", + " \"Find the total duration of all tracks.\": \"SELECT SUM(Milliseconds) FROM tracks;\",\n", + " \"List all customers from Canada.\": \"SELECT * FROM customers WHERE Country = 'Canada';\",\n", + " \"How many tracks are there in the album with ID 5?\": \"SELECT COUNT(*) FROM tracks WHERE AlbumId = 5;\",\n", + " \"Find the total number of invoices.\": \"SELECT COUNT(*) FROM invoices;\",\n", + " \"List all tracks that are longer than 5 minutes.\": \"SELECT * FROM tracks WHERE Milliseconds > 300000;\",\n", + " \"Who are the top 5 customers by total purchase?\": \"SELECT CustomerId, SUM(Total) AS TotalPurchase FROM invoices GROUP BY CustomerId ORDER BY TotalPurchase DESC LIMIT 5;\",\n", + " \"Which albums are from the year 2000?\": \"SELECT * FROM albums WHERE strftime('%Y', ReleaseDate) = '2000';\",\n", + " \"How many employees are there\": 'SELECT COUNT(*) FROM \"employee\"',\n", + "}" ] }, { @@ -1196,7 +1197,7 @@ "# {\"firstname\": \"Jennifer\",\"lastname\":\"Takeda\"},\n", "# ]\n", "# for i, customer in enumerate(customers):\n", - "# db.create(index=\"customers\", document=customer, id=i)" + "# db.create(index=\"customers\", document=customer, id=i)\n" ] }, { @@ -1232,7 +1233,6 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.chains.elasticsearch_database.prompts import DEFAULT_DSL_TEMPLATE\n", "from langchain.prompts.prompt import PromptTemplate\n", "\n", "PROMPT_TEMPLATE = \"\"\"Given an input question, create a syntactically correct Elasticsearch query to run. Unless the user specifies in their question a specific number of examples they wish to obtain, always limit your query to at most {top_k} results. You can order the results by a relevant column to return the most interesting examples in the database.\n", diff --git a/docs/docs/use_cases/question_answering/code_understanding.ipynb b/docs/docs/use_cases/question_answering/code_understanding.ipynb index 569a704f971..d90d73586fe 100644 --- a/docs/docs/use_cases/question_answering/code_understanding.ipynb +++ b/docs/docs/use_cases/question_answering/code_understanding.ipynb @@ -367,7 +367,6 @@ "source": [ "from langchain.llms import LlamaCpp\n", "from langchain.prompts import PromptTemplate\n", - "from langchain.chains import LLMChain\n", "from langchain.callbacks.manager import CallbackManager\n", "from langchain.memory import ConversationSummaryMemory\n", "from langchain.chains import ConversationalRetrievalChain\n", diff --git a/docs/docs/use_cases/question_answering/vector_db_text_generation.ipynb b/docs/docs/use_cases/question_answering/vector_db_text_generation.ipynb index 20a26290e13..89f2246eedf 100644 --- a/docs/docs/use_cases/question_answering/vector_db_text_generation.ipynb +++ b/docs/docs/use_cases/question_answering/vector_db_text_generation.ipynb @@ -26,7 +26,6 @@ "source": [ "from langchain.llms import OpenAI\n", "from langchain.docstore.document import Document\n", - "import requests\n", "from langchain.embeddings.openai import OpenAIEmbeddings\n", "from langchain.vectorstores import Chroma\n", "from langchain.text_splitter import CharacterTextSplitter\n", diff --git a/docs/docs/use_cases/summarization.ipynb b/docs/docs/use_cases/summarization.ipynb index 3d108d47469..724c66360e1 100644 --- a/docs/docs/use_cases/summarization.ipynb +++ b/docs/docs/use_cases/summarization.ipynb @@ -298,7 +298,6 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.chains.mapreduce import MapReduceChain\n", "from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain\n", "\n", diff --git a/docs/docs/use_cases/tagging.ipynb b/docs/docs/use_cases/tagging.ipynb index db12093db54..2c927cf37bc 100644 --- a/docs/docs/use_cases/tagging.ipynb +++ b/docs/docs/use_cases/tagging.ipynb @@ -64,7 +64,6 @@ "outputs": [], "source": [ "from langchain.chat_models import ChatOpenAI\n", - "from langchain.prompts import ChatPromptTemplate\n", "from langchain.chains import create_tagging_chain, create_tagging_chain_pydantic" ] }, @@ -324,7 +323,6 @@ "metadata": {}, "outputs": [], "source": [ - "from enum import Enum\n", "from pydantic import BaseModel, Field" ] }, diff --git a/docs/scripts/copy_templates.py b/docs/scripts/copy_templates.py index 11120d4d0fa..e1f1d6140e1 100644 --- a/docs/scripts/copy_templates.py +++ b/docs/scripts/copy_templates.py @@ -10,7 +10,7 @@ DOCS_TEMPLATES_DIR = Path(os.path.abspath(__file__)).parents[1] / "docs" / "temp readmes = list(glob.glob(str(TEMPLATES_DIR) + "/*/README.md")) -destinations = [readme[len(str(TEMPLATES_DIR)) + 1:-10] + ".md" for readme in readmes] +destinations = [readme[len(str(TEMPLATES_DIR)) + 1 : -10] + ".md" for readme in readmes] for source, destination in zip(readmes, destinations): full_destination = DOCS_TEMPLATES_DIR / destination shutil.copyfile(source, full_destination) @@ -33,4 +33,3 @@ with open(TEMPLATES_INDEX_DESTINATION, "r") as f: content = re.sub("\]\(\.\.\/", "](/docs/templates/", content) with open(TEMPLATES_INDEX_DESTINATION, "w") as f: f.write(sidebar_hidden + content) - diff --git a/poetry.lock b/poetry.lock index e186fc60df7..27fdc72ad8e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1627,7 +1627,7 @@ files = [ [[package]] name = "langchain" -version = "0.0.327" +version = "0.0.335" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -1640,7 +1640,7 @@ anyio = "<4.0" async-timeout = {version = "^4.0.0", markers = "python_version < \"3.11\""} dataclasses-json = ">= 0.5.7, < 0.7" jsonpatch = "^1.33" -langsmith = "~0.0.52" +langsmith = "~0.0.63" numpy = "^1" pydantic = ">=1,<3" PyYAML = ">=5.3" @@ -1649,7 +1649,7 @@ SQLAlchemy = ">=1.4,<3" tenacity = "^8.1.0" [package.extras] -all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.8.3,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.10.1,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<4)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.6.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] +all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "amadeus (>=8.1.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.9,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (>=9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=4,<5)", "deeplake (>=3.8.3,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.6,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "librosa (>=0.10.0.post2,<0.11.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "marqo (>=1.2.4,<2.0.0)", "momento (>=1.13.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<4)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "python-arango (>=7.5.9,<8.0.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.3.1,<2.0.0)", "rdflib (>=6.3.2,<7.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.6.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (>=0,<1)"] clarifai = ["clarifai (>=9.1.0)"] cli = ["typer (>=0.9.0,<0.10.0)"] @@ -1669,13 +1669,13 @@ url = "libs/langchain" [[package]] name = "langsmith" -version = "0.0.53" +version = "0.0.63" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langsmith-0.0.53-py3-none-any.whl", hash = "sha256:a090b1c7d7968fb8d2476ddd608a5171f0e812a82b1bca29ca136cdea375a74e"}, - {file = "langsmith-0.0.53.tar.gz", hash = "sha256:a426a1d39843207a5dd3d72787b5304376541eb818509ee7909bbb696b072488"}, + {file = "langsmith-0.0.63-py3-none-any.whl", hash = "sha256:43a521dd10d8405ac21a0b959e3de33e2270e4abe6c73cc4036232a6990a0793"}, + {file = "langsmith-0.0.63.tar.gz", hash = "sha256:ddb2dfadfad3e05151ed8ba1643d1c516024b80fbd0c6263024400ced06a3768"}, ] [package.dependencies] @@ -3093,28 +3093,28 @@ files = [ [[package]] name = "ruff" -version = "0.1.3" -description = "An extremely fast Python linter, written in Rust." +version = "0.1.5" +description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.1.3-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:b46d43d51f7061652eeadb426a9e3caa1e0002470229ab2fc19de8a7b0766901"}, - {file = "ruff-0.1.3-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:b8afeb9abd26b4029c72adc9921b8363374f4e7edb78385ffaa80278313a15f9"}, - {file = "ruff-0.1.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca3cf365bf32e9ba7e6db3f48a4d3e2c446cd19ebee04f05338bc3910114528b"}, - {file = "ruff-0.1.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4874c165f96c14a00590dcc727a04dca0cfd110334c24b039458c06cf78a672e"}, - {file = "ruff-0.1.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eec2dd31eed114e48ea42dbffc443e9b7221976554a504767ceaee3dd38edeb8"}, - {file = "ruff-0.1.3-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:dc3ec4edb3b73f21b4aa51337e16674c752f1d76a4a543af56d7d04e97769613"}, - {file = "ruff-0.1.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e3de9ed2e39160800281848ff4670e1698037ca039bda7b9274f849258d26ce"}, - {file = "ruff-0.1.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c595193881922cc0556a90f3af99b1c5681f0c552e7a2a189956141d8666fe8"}, - {file = "ruff-0.1.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f75e670d529aa2288cd00fc0e9b9287603d95e1536d7a7e0cafe00f75e0dd9d"}, - {file = "ruff-0.1.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:76dd49f6cd945d82d9d4a9a6622c54a994689d8d7b22fa1322983389b4892e20"}, - {file = "ruff-0.1.3-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:918b454bc4f8874a616f0d725590277c42949431ceb303950e87fef7a7d94cb3"}, - {file = "ruff-0.1.3-py3-none-musllinux_1_2_i686.whl", hash = "sha256:d8859605e729cd5e53aa38275568dbbdb4fe882d2ea2714c5453b678dca83784"}, - {file = "ruff-0.1.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:0b6c55f5ef8d9dd05b230bb6ab80bc4381ecb60ae56db0330f660ea240cb0d4a"}, - {file = "ruff-0.1.3-py3-none-win32.whl", hash = "sha256:3e7afcbdcfbe3399c34e0f6370c30f6e529193c731b885316c5a09c9e4317eef"}, - {file = "ruff-0.1.3-py3-none-win_amd64.whl", hash = "sha256:7a18df6638cec4a5bd75350639b2bb2a2366e01222825562c7346674bdceb7ea"}, - {file = "ruff-0.1.3-py3-none-win_arm64.whl", hash = "sha256:12fd53696c83a194a2db7f9a46337ce06445fb9aa7d25ea6f293cf75b21aca9f"}, - {file = "ruff-0.1.3.tar.gz", hash = "sha256:3ba6145369a151401d5db79f0a47d50e470384d0d89d0d6f7fab0b589ad07c34"}, + {file = "ruff-0.1.5-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:32d47fc69261c21a4c48916f16ca272bf2f273eb635d91c65d5cd548bf1f3d96"}, + {file = "ruff-0.1.5-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:171276c1df6c07fa0597fb946139ced1c2978f4f0b8254f201281729981f3c17"}, + {file = "ruff-0.1.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17ef33cd0bb7316ca65649fc748acc1406dfa4da96a3d0cde6d52f2e866c7b39"}, + {file = "ruff-0.1.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b2c205827b3f8c13b4a432e9585750b93fd907986fe1aec62b2a02cf4401eee6"}, + {file = "ruff-0.1.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bb408e3a2ad8f6881d0f2e7ad70cddb3ed9f200eb3517a91a245bbe27101d379"}, + {file = "ruff-0.1.5-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:f20dc5e5905ddb407060ca27267c7174f532375c08076d1a953cf7bb016f5a24"}, + {file = "ruff-0.1.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aafb9d2b671ed934998e881e2c0f5845a4295e84e719359c71c39a5363cccc91"}, + {file = "ruff-0.1.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a4894dddb476597a0ba4473d72a23151b8b3b0b5f958f2cf4d3f1c572cdb7af7"}, + {file = "ruff-0.1.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a00a7ec893f665ed60008c70fe9eeb58d210e6b4d83ec6654a9904871f982a2a"}, + {file = "ruff-0.1.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a8c11206b47f283cbda399a654fd0178d7a389e631f19f51da15cbe631480c5b"}, + {file = "ruff-0.1.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:fa29e67b3284b9a79b1a85ee66e293a94ac6b7bb068b307a8a373c3d343aa8ec"}, + {file = "ruff-0.1.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9b97fd6da44d6cceb188147b68db69a5741fbc736465b5cea3928fdac0bc1aeb"}, + {file = "ruff-0.1.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:721f4b9d3b4161df8dc9f09aa8562e39d14e55a4dbaa451a8e55bdc9590e20f4"}, + {file = "ruff-0.1.5-py3-none-win32.whl", hash = "sha256:f80c73bba6bc69e4fdc73b3991db0b546ce641bdcd5b07210b8ad6f64c79f1ab"}, + {file = "ruff-0.1.5-py3-none-win_amd64.whl", hash = "sha256:c21fe20ee7d76206d290a76271c1af7a5096bc4c73ab9383ed2ad35f852a0087"}, + {file = "ruff-0.1.5-py3-none-win_arm64.whl", hash = "sha256:82bfcb9927e88c1ed50f49ac6c9728dab3ea451212693fe40d08d314663e412f"}, + {file = "ruff-0.1.5.tar.gz", hash = "sha256:5cbec0ef2ae1748fb194f420fb03fb2c25c3258c86129af7172ff8f198f125ab"}, ] [[package]] @@ -3895,4 +3895,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "2e33ec084f387f65ff58572cd90a3f5ec838d1e39eb05b622911c8e78b830945" +content-hash = "92d5bf0c2eb9ba31a2f618f1e8ef52587711274c9a64505a14d35f04b3830d3a" diff --git a/pyproject.toml b/pyproject.toml index 5d0662f7da3..fe0925580d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,8 +29,7 @@ sphinx-copybutton = "^0.5.1" nbdoc = "^0.0.82" [tool.poetry.group.lint.dependencies] -ruff = "^0.1.3" -black = { version = "23.10.1", extras = ["jupyter"] } +ruff = "^0.1.5" [tool.poetry.group.codespell.dependencies] codespell = "^2.2.0" @@ -52,3 +51,63 @@ ignore-regex = '.*(Stati Uniti|Tense=Pres).*' # aapply - async apply # unsecure - typo but part of API, decided to not bother for now ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure,damon,crate,aadd,symbl,precesses,accademia,nin' + +[tool.ruff] +extend-include = ["*.ipynb"] + +[tool.ruff.lint.per-file-ignores] +"**/{cookbook,docs}/*" = [ + "E402", # allow imports to appear anywhere in docs + "F401", # allow "imported but unused" example code + "F811", # allow re-importing the same module, so that cells can stay independent + "F841", # allow assignments to variables that are never read -- it's example code +] + +# These files were failing the listed rules at the time ruff was adopted for notebooks. +# Don't require them to change at once, though we should look into them eventually. +"cookbook/gymnasium_agent_simulation.ipynb" = ["F821"] +"cookbook/multi_modal_output_agent.ipynb" = ["F821"] +"cookbook/multi_modal_RAG_chroma.ipynb" = ["F821"] +"cookbook/qianfan_baidu_elasticesearch_RAG.ipynb" = ["F821"] +"cookbook/retrieval_in_sql.ipynb" = ["F821"] +"cookbook/wikibase_agent.ipynb" = ["E722"] +"docs/docs/expression_language/how_to/configure.ipynb" = ["F821"] +"docs/docs/expression_language/how_to/fallbacks.ipynb" = ["E722"] +"docs/docs/guides/fallbacks.ipynb" = ["E722"] +"docs/docs/integrations/chat_loaders/imessage.ipynb" = ["F821"] +"docs/docs/integrations/chat_loaders/langsmith_dataset.ipynb" = ["F821"] +"docs/docs/integrations/chat/google_vertex_ai_palm.ipynb" = ["F821"] +"docs/docs/integrations/chat/promptlayer_chatopenai.ipynb" = ["F821"] +"docs/docs/integrations/document_loaders/arcgis.ipynb" = ["F821"] +"docs/docs/integrations/document_loaders/datadog_logs.ipynb" = ["F821"] +"docs/docs/integrations/document_loaders/embaas.ipynb" = ["F821"] +"docs/docs/integrations/document_loaders/etherscan.ipynb" = ["F821"] +"docs/docs/integrations/document_loaders/larksuite.ipynb" = ["F821"] +"docs/docs/integrations/document_loaders/tensorflow_datasets.ipynb" = ["F821"] +"docs/docs/integrations/llms/anyscale.ipynb" = ["F821"] +"docs/docs/integrations/llms/bittensor.ipynb" = ["F821"] +"docs/docs/integrations/llms/databricks.ipynb" = ["F821"] +"docs/docs/integrations/llms/llm_caching.ipynb" = ["F821"] +"docs/docs/integrations/llms/runhouse.ipynb" = ["F821"] +"docs/docs/integrations/retrievers/Activeloop DeepMemory+LangChain.ipynb" = ["F821"] +"docs/docs/integrations/text_embedding/cohere.ipynb" = ["F821"] +"docs/docs/integrations/text_embedding/elasticsearch.ipynb" = ["F821"] +"docs/docs/integrations/text_embedding/embaas.ipynb" = ["F821"] +"docs/docs/integrations/text_embedding/jina.ipynb" = ["F821"] +"docs/docs/integrations/text_embedding/localai.ipynb" = ["F821"] +"docs/docs/integrations/text_embedding/openai.ipynb" = ["F821"] +"docs/docs/integrations/tools/dalle_image_generator.ipynb" = ["E722"] +"docs/docs/integrations/tools/gradio_tools.ipynb" = ["F821"] +"docs/docs/integrations/vectorstores/async_faiss.ipynb" = ["F821"] +"docs/docs/integrations/vectorstores/awadb.ipynb" = ["F821"] +"docs/docs/integrations/vectorstores/baiducloud_vector_search.ipynb" = ["F821"] +"docs/docs/integrations/vectorstores/faiss.ipynb" = ["F821"] +"docs/docs/integrations/vectorstores/mongodb_atlas.ipynb" = ["F821"] +"docs/docs/integrations/vectorstores/nucliadb.ipynb" = ["F821"] +"docs/docs/integrations/vectorstores/opensearch.ipynb" = ["F821"] +"docs/docs/modules/agents/agent_types/chat_conversation_agent.ipynb" = ["F821"] +"docs/docs/modules/chains/how_to/call_methods.ipynb" = ["F821"] +"docs/docs/modules/data_connection/retrievers/ensemble.ipynb" = ["F821"] +"docs/docs/modules/data_connection/retrievers/multi_vector.ipynb" = ["E741"] +"docs/docs/modules/data_connection/retrievers/parent_document_retriever.ipynb" = ["E741"] +"docs/docs/modules/data_connection/text_embedding/caching_embeddings.ipynb" = ["F821"]