From eae358810b4f115150dc6004d57a36fd4ed4a902 Mon Sep 17 00:00:00 2001 From: Mark Kretschmann Date: Sat, 26 Nov 2022 17:34:16 +0100 Subject: [PATCH 01/24] Fix Unicode error on Windows (Issue #200) (#203) Fix Unicode error on Windows during setup, while trying to read contents of README.md. (Issue #200) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index fbc129daebe..99bebc37c8b 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ from setuptools import find_packages, setup with open(Path(__file__).absolute().parents[0] / "langchain" / "VERSION") as _f: __version__ = _f.read().strip() -with open("README.md", "r") as f: +with open("README.md", "r", encoding="utf-8") as f: long_description = f.read() LLM_DEPENDENCIES = ["cohere", "openai", "nlpcloud", "huggingface_hub"] From 287f1857ee153f700c32fe528c98ac403b223098 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Sat, 26 Nov 2022 15:15:43 -0800 Subject: [PATCH 02/24] fix self ask w search (#206) --- langchain/agents/self_ask_with_search/base.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/langchain/agents/self_ask_with_search/base.py b/langchain/agents/self_ask_with_search/base.py index 4a50ad416d6..ce4590a2dfb 100644 --- a/langchain/agents/self_ask_with_search/base.py +++ b/langchain/agents/self_ask_with_search/base.py @@ -1,5 +1,5 @@ """Chain that does self ask with search.""" -from typing import Any, ClassVar, List, Tuple +from typing import Any, ClassVar, List, Optional, Tuple from langchain.agents.agent import Agent from langchain.agents.self_ask_with_search.prompt import PROMPT @@ -25,7 +25,7 @@ class SelfAskWithSearchAgent(Agent): f"Tool name should be Intermediate Answer, got {tool_names}" ) - def _extract_tool_and_input(self, text: str) -> Tuple[str, str]: + def _extract_tool_and_input(self, text: str) -> Optional[Tuple[str, str]]: followup = "Follow up:" if "\n" not in text: last_line = text @@ -35,8 +35,8 @@ class SelfAskWithSearchAgent(Agent): if followup not in last_line: finish_string = "So the final answer is: " if finish_string not in last_line: - raise ValueError("We should probably never get here") - return "Final Answer", text[len(finish_string) :] + return None + return "Final Answer", last_line[len(finish_string) :] if ":" not in last_line: after_colon = last_line @@ -50,6 +50,9 @@ class SelfAskWithSearchAgent(Agent): return "Intermediate Answer", after_colon + def _fix_text(self, text: str) -> str: + return text + "\nSo the final answer is:" + @property def observation_prefix(self) -> str: """Prefix to append the observation with.""" From d0415952f71f74c14e10979aa363d9025be0a3a3 Mon Sep 17 00:00:00 2001 From: Dillon Chen <5334557+dillchen@users.noreply.github.com> Date: Sat, 26 Nov 2022 23:21:42 -0500 Subject: [PATCH 03/24] Update README.md memory now added as a feature (#208) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d7eb1f5614a..b8e5503b00a 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ These are, in increasing order of complexity: 1. LLM and Prompts 2. Chains 3. Agents -4. (Coming Soon) Memory +4. Memory Let's go through these categories and for each one identify key concepts (to clarify terminology) as well as the problems in this area LangChain helps solve. From b90e25f786af40e38fa795f4202c925af9190444 Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Sun, 27 Nov 2022 00:24:59 -0800 Subject: [PATCH 04/24] Add HuggingFace Hub Embeddings (#125) Add support for calling HuggingFace embedding models using the HuggingFaceHub Inference API. New class mirrors the existing HuggingFaceHub LLM implementation. Currently only supports 'sentence-transformers' models. Closes #86 --- langchain/embeddings/__init__.py | 8 +- langchain/embeddings/huggingface.py | 6 +- langchain/embeddings/huggingface_hub.py | 105 ++++++++++++++++++ langchain/llms/huggingface_hub.py | 2 +- .../embeddings/test_huggingface_hub.py | 19 ++++ tests/unit_tests/embeddings/__init__.py | 1 + .../embeddings/test_huggingface_hub.py | 11 ++ 7 files changed, 148 insertions(+), 4 deletions(-) create mode 100644 langchain/embeddings/huggingface_hub.py create mode 100644 tests/integration_tests/embeddings/test_huggingface_hub.py create mode 100644 tests/unit_tests/embeddings/__init__.py create mode 100644 tests/unit_tests/embeddings/test_huggingface_hub.py diff --git a/langchain/embeddings/__init__.py b/langchain/embeddings/__init__.py index 1967f6f3347..6a57deb1364 100644 --- a/langchain/embeddings/__init__.py +++ b/langchain/embeddings/__init__.py @@ -1,6 +1,12 @@ """Wrappers around embedding modules.""" from langchain.embeddings.cohere import CohereEmbeddings from langchain.embeddings.huggingface import HuggingFaceEmbeddings +from langchain.embeddings.huggingface_hub import HuggingFaceHubEmbeddings from langchain.embeddings.openai import OpenAIEmbeddings -__all__ = ["OpenAIEmbeddings", "HuggingFaceEmbeddings", "CohereEmbeddings"] +__all__ = [ + "OpenAIEmbeddings", + "HuggingFaceEmbeddings", + "CohereEmbeddings", + "HuggingFaceHubEmbeddings", +] diff --git a/langchain/embeddings/huggingface.py b/langchain/embeddings/huggingface.py index a32aef6561a..8d5d817b8a3 100644 --- a/langchain/embeddings/huggingface.py +++ b/langchain/embeddings/huggingface.py @@ -5,6 +5,8 @@ from pydantic import BaseModel, Extra from langchain.embeddings.base import Embeddings +DEFAULT_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2" + class HuggingFaceEmbeddings(BaseModel, Embeddings): """Wrapper around sentence_transformers embedding models. @@ -16,11 +18,11 @@ class HuggingFaceEmbeddings(BaseModel, Embeddings): from langchain.embeddings import HuggingFaceEmbeddings model_name = "sentence-transformers/all-mpnet-base-v2" - huggingface = HuggingFaceEmbeddings(model_name=model_name) + hf = HuggingFaceEmbeddings(model_name=model_name) """ client: Any #: :meta private: - model_name: str = "sentence-transformers/all-mpnet-base-v2" + model_name: str = DEFAULT_MODEL_NAME """Model name to use.""" def __init__(self, **kwargs: Any): diff --git a/langchain/embeddings/huggingface_hub.py b/langchain/embeddings/huggingface_hub.py new file mode 100644 index 00000000000..66c662f0554 --- /dev/null +++ b/langchain/embeddings/huggingface_hub.py @@ -0,0 +1,105 @@ +"""Wrapper around HuggingFace Hub embedding models.""" +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Extra, root_validator + +from langchain.embeddings.base import Embeddings +from langchain.utils import get_from_dict_or_env + +DEFAULT_REPO_ID = "sentence-transformers/all-mpnet-base-v2" +VALID_TASKS = ("feature-extraction",) + + +class HuggingFaceHubEmbeddings(BaseModel, Embeddings): + """Wrapper around HuggingFaceHub embedding models. + + To use, you should have the ``huggingface_hub`` python package installed, and the + environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, or pass + it as a named parameter to the constructor. + + Example: + .. code-block:: python + + from langchain.embeddings import HuggingFaceHubEmbeddings + repo_id = "sentence-transformers/all-mpnet-base-v2" + hf = HuggingFaceHubEmbeddings( + repo_id=repo_id, + task="feature-extraction", + huggingfacehub_api_token="my-api-key", + ) + """ + + client: Any #: :meta private: + repo_id: str = DEFAULT_REPO_ID + """Model name to use.""" + task: Optional[str] = "feature-extraction" + """Task to call the model with.""" + model_kwargs: Optional[dict] = None + """Key word arguments to pass to the model.""" + + huggingfacehub_api_token: Optional[str] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + huggingfacehub_api_token = get_from_dict_or_env( + values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN" + ) + try: + from huggingface_hub.inference_api import InferenceApi + + repo_id = values["repo_id"] + if not repo_id.startswith("sentence-transformers"): + raise ValueError( + "Currently only 'sentence-transformers' embedding models " + f"are supported. Got invalid 'repo_id' {repo_id}." + ) + client = InferenceApi( + repo_id=repo_id, + token=huggingfacehub_api_token, + task=values.get("task"), + ) + if client.task not in VALID_TASKS: + raise ValueError( + f"Got invalid task {client.task}, " + f"currently only {VALID_TASKS} are supported" + ) + values["client"] = client + except ImportError: + raise ValueError( + "Could not import huggingface_hub python package. " + "Please it install it with `pip install huggingface_hub`." + ) + return values + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Call out to HuggingFaceHub's embedding endpoint for embedding search docs. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + # replace newlines, which can negatively affect performance. + texts = [text.replace("\n", " ") for text in texts] + _model_kwargs = self.model_kwargs or {} + responses = self.client(inputs=texts, params=_model_kwargs) + return responses + + def embed_query(self, text: str) -> List[float]: + """Call out to HuggingFaceHub's embedding endpoint for embedding query text. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + response = self.embed_documents([text])[0] + return response diff --git a/langchain/llms/huggingface_hub.py b/langchain/llms/huggingface_hub.py index c67c9720a4e..5cded677376 100644 --- a/langchain/llms/huggingface_hub.py +++ b/langchain/llms/huggingface_hub.py @@ -51,7 +51,7 @@ class HuggingFaceHub(LLM, BaseModel): try: from huggingface_hub.inference_api import InferenceApi - repo_id = values.get("repo_id", DEFAULT_REPO_ID) + repo_id = values["repo_id"] client = InferenceApi( repo_id=repo_id, token=huggingfacehub_api_token, diff --git a/tests/integration_tests/embeddings/test_huggingface_hub.py b/tests/integration_tests/embeddings/test_huggingface_hub.py new file mode 100644 index 00000000000..ed57bcccd8a --- /dev/null +++ b/tests/integration_tests/embeddings/test_huggingface_hub.py @@ -0,0 +1,19 @@ +"""Test HuggingFaceHub embeddings.""" +from langchain.embeddings import HuggingFaceHubEmbeddings + + +def test_huggingfacehub_embedding_documents() -> None: + """Test huggingfacehub embeddings.""" + documents = ["foo bar"] + embedding = HuggingFaceHubEmbeddings() + output = embedding.embed_documents(documents) + assert len(output) == 1 + assert len(output[0]) == 768 + + +def test_huggingfacehub_embedding_query() -> None: + """Test huggingfacehub embeddings.""" + document = "foo bar" + embedding = HuggingFaceHubEmbeddings() + output = embedding.embed_query(document) + assert len(output) == 768 diff --git a/tests/unit_tests/embeddings/__init__.py b/tests/unit_tests/embeddings/__init__.py new file mode 100644 index 00000000000..9aaef73a027 --- /dev/null +++ b/tests/unit_tests/embeddings/__init__.py @@ -0,0 +1 @@ +"""All unit tests for Embeddings objects.""" diff --git a/tests/unit_tests/embeddings/test_huggingface_hub.py b/tests/unit_tests/embeddings/test_huggingface_hub.py new file mode 100644 index 00000000000..ecc1fbfbe21 --- /dev/null +++ b/tests/unit_tests/embeddings/test_huggingface_hub.py @@ -0,0 +1,11 @@ +"""Test HuggingFaceHub embeddings.""" +import pytest + +from langchain.embeddings import HuggingFaceHubEmbeddings + + +def test_huggingfacehub_embedding_invalid_repo() -> None: + """Test huggingfacehub embedding repo id validation.""" + # Only sentence-transformers models are currently supported. + with pytest.raises(ValueError): + HuggingFaceHubEmbeddings(repo_id="allenai/specter") From ae72cf84b872394c2c81b971f7d35c6b56d02104 Mon Sep 17 00:00:00 2001 From: Akash Samant <70665700+asamant21@users.noreply.github.com> Date: Sun, 27 Nov 2022 09:10:35 -0800 Subject: [PATCH 05/24] Save Prompts (#194) --- langchain/prompts/base.py | 41 +++++++++++++++++++++++- langchain/prompts/few_shot.py | 9 ++++++ tests/unit_tests/prompts/test_loading.py | 28 ++++++++++++++++ 3 files changed, 77 insertions(+), 1 deletion(-) diff --git a/langchain/prompts/base.py b/langchain/prompts/base.py index b44fec9a641..5090acc313f 100644 --- a/langchain/prompts/base.py +++ b/langchain/prompts/base.py @@ -1,7 +1,10 @@ """BasePrompt schema definition.""" +import json from abc import ABC, abstractmethod -from typing import Any, Dict, List +from pathlib import Path +from typing import Any, Dict, List, Union +import yaml from pydantic import BaseModel, root_validator from langchain.formatting import formatter @@ -61,3 +64,39 @@ class BasePromptTemplate(BaseModel, ABC): prompt.format(variable1="foo") """ + + def _prompt_dict(self) -> Dict: + """Return a dictionary of the prompt.""" + return self.dict() + + def save(self, file_path: Union[Path, str]) -> None: + """Save the prompt. + + Args: + file_path: Path to directory to save prompt to. + + Example: + .. code-block:: python + + prompt.save(file_path="path/prompt.yaml") + """ + # Convert file to Path object. + if isinstance(file_path, str): + save_path = Path(file_path) + else: + save_path = file_path + + directory_path = save_path.parent + directory_path.mkdir(parents=True, exist_ok=True) + + # Fetch dictionary to save + prompt_dict = self._prompt_dict() + + if save_path.suffix == ".json": + with open(file_path, "w") as f: + f.write(json.dumps(prompt_dict, indent=4)) + elif save_path.suffix == ".yaml": + with open(file_path, "w") as f: + yaml.dump(prompt_dict, f, default_flow_style=False) + else: + raise ValueError(f"{save_path} must be json or yaml") diff --git a/langchain/prompts/few_shot.py b/langchain/prompts/few_shot.py index a73c1256cab..98dffd6578f 100644 --- a/langchain/prompts/few_shot.py +++ b/langchain/prompts/few_shot.py @@ -108,3 +108,12 @@ class FewShotPromptTemplate(BasePromptTemplate, BaseModel): template = self.example_separator.join([piece for piece in pieces if piece]) # Format the template with the input variables. return DEFAULT_FORMATTER_MAPPING[self.template_format](template, **kwargs) + + def _prompt_dict(self) -> Dict: + """Return a dictionary of the prompt.""" + if self.example_selector: + raise ValueError("Saving an example selector is not currently supported") + + prompt_dict = self.dict() + prompt_dict["_type"] = "few_shot" + return prompt_dict diff --git a/tests/unit_tests/prompts/test_loading.py b/tests/unit_tests/prompts/test_loading.py index 6af7f24c9c6..11dbec971cf 100644 --- a/tests/unit_tests/prompts/test_loading.py +++ b/tests/unit_tests/prompts/test_loading.py @@ -43,6 +43,34 @@ def test_loading_from_JSON() -> None: assert prompt == expected_prompt +def test_saving_loading_round_trip(tmp_path: Path) -> None: + """Test equality when saving and loading a prompt.""" + simple_prompt = PromptTemplate( + input_variables=["adjective", "content"], + template="Tell me a {adjective} joke about {content}.", + ) + simple_prompt.save(file_path=tmp_path / "prompt.yaml") + loaded_prompt = load_prompt(tmp_path / "prompt.yaml") + assert loaded_prompt == simple_prompt + + few_shot_prompt = FewShotPromptTemplate( + input_variables=["adjective"], + prefix="Write antonyms for the following words.", + example_prompt=PromptTemplate( + input_variables=["input", "output"], + template="Input: {input}\nOutput: {output}", + ), + examples=[ + {"input": "happy", "output": "sad"}, + {"input": "tall", "output": "short"}, + ], + suffix="Input: {adjective}\nOutput:", + ) + few_shot_prompt.save(file_path=tmp_path / "few_shot.yaml") + loaded_prompt = load_prompt(tmp_path / "few_shot.yaml") + assert loaded_prompt == few_shot_prompt + + def test_loading_with_template_as_file() -> None: """Test loading when the template is a file.""" with change_directory(): From b94244eb12d8a46c0c01d24ae59f4834b33ebbc6 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Sun, 27 Nov 2022 13:03:09 -0800 Subject: [PATCH 06/24] nits (#210) use json.dump move test to integration tests (since it requires huggingface_hub) --- langchain/prompts/base.py | 2 +- .../embeddings/test_huggingface_hub.py | 9 +++++++++ tests/unit_tests/embeddings/__init__.py | 1 - tests/unit_tests/embeddings/test_huggingface_hub.py | 11 ----------- 4 files changed, 10 insertions(+), 13 deletions(-) delete mode 100644 tests/unit_tests/embeddings/__init__.py delete mode 100644 tests/unit_tests/embeddings/test_huggingface_hub.py diff --git a/langchain/prompts/base.py b/langchain/prompts/base.py index 5090acc313f..26b6cc2311e 100644 --- a/langchain/prompts/base.py +++ b/langchain/prompts/base.py @@ -94,7 +94,7 @@ class BasePromptTemplate(BaseModel, ABC): if save_path.suffix == ".json": with open(file_path, "w") as f: - f.write(json.dumps(prompt_dict, indent=4)) + json.dump(prompt_dict, f, indent=4) elif save_path.suffix == ".yaml": with open(file_path, "w") as f: yaml.dump(prompt_dict, f, default_flow_style=False) diff --git a/tests/integration_tests/embeddings/test_huggingface_hub.py b/tests/integration_tests/embeddings/test_huggingface_hub.py index ed57bcccd8a..42dd55dbe63 100644 --- a/tests/integration_tests/embeddings/test_huggingface_hub.py +++ b/tests/integration_tests/embeddings/test_huggingface_hub.py @@ -1,4 +1,6 @@ """Test HuggingFaceHub embeddings.""" +import pytest + from langchain.embeddings import HuggingFaceHubEmbeddings @@ -17,3 +19,10 @@ def test_huggingfacehub_embedding_query() -> None: embedding = HuggingFaceHubEmbeddings() output = embedding.embed_query(document) assert len(output) == 768 + + +def test_huggingfacehub_embedding_invalid_repo() -> None: + """Test huggingfacehub embedding repo id validation.""" + # Only sentence-transformers models are currently supported. + with pytest.raises(ValueError): + HuggingFaceHubEmbeddings(repo_id="allenai/specter") diff --git a/tests/unit_tests/embeddings/__init__.py b/tests/unit_tests/embeddings/__init__.py deleted file mode 100644 index 9aaef73a027..00000000000 --- a/tests/unit_tests/embeddings/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""All unit tests for Embeddings objects.""" diff --git a/tests/unit_tests/embeddings/test_huggingface_hub.py b/tests/unit_tests/embeddings/test_huggingface_hub.py deleted file mode 100644 index ecc1fbfbe21..00000000000 --- a/tests/unit_tests/embeddings/test_huggingface_hub.py +++ /dev/null @@ -1,11 +0,0 @@ -"""Test HuggingFaceHub embeddings.""" -import pytest - -from langchain.embeddings import HuggingFaceHubEmbeddings - - -def test_huggingfacehub_embedding_invalid_repo() -> None: - """Test huggingfacehub embedding repo id validation.""" - # Only sentence-transformers models are currently supported. - with pytest.raises(ValueError): - HuggingFaceHubEmbeddings(repo_id="allenai/specter") From 261029cef3e7c30277027f5d5283b87197eab520 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Sun, 27 Nov 2022 19:51:11 -0800 Subject: [PATCH 07/24] bump version to 0.0.23 (#211) --- langchain/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langchain/VERSION b/langchain/VERSION index 818944f5b82..df5db66fed3 100644 --- a/langchain/VERSION +++ b/langchain/VERSION @@ -1 +1 @@ -0.0.22 +0.0.23 From a39c9983421b099c7479395604fed701cf31e96e Mon Sep 17 00:00:00 2001 From: Hansen Qian Date: Mon, 28 Nov 2022 14:11:30 -0500 Subject: [PATCH 08/24] Add chain name to verbose logging (#214) Adds some context over what chain is running, thereby making it more obvious how different chains are entered and existed Screen Shot 2022-11-28 at 11 55 34 AM (note that the `...` is because the output is too long and VSCode truncated it) --- langchain/chains/base.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/langchain/chains/base.py b/langchain/chains/base.py index 8e8005e7740..529e86dda41 100644 --- a/langchain/chains/base.py +++ b/langchain/chains/base.py @@ -81,10 +81,12 @@ class Chain(BaseModel, ABC): inputs = dict(inputs, **external_context) self._validate_inputs(inputs) if self.verbose: - print("\n\n\033[1m> Entering new chain...\033[0m") + print( + f"\n\n\033[1m> Entering new {self.__class__.__name__} chain...\033[0m" + ) outputs = self._call(inputs) if self.verbose: - print("\n\033[1m> Finished chain.\033[0m") + print(f"\n\033[1m> Finished {self.__class__.__name__} chain.\033[0m") self._validate_outputs(outputs) if self.memory is not None: self.memory.save_context(inputs, outputs) From cf3569fb1be7935dcdc4d3b0db53bf26f7dcbfaa Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 28 Nov 2022 17:27:11 -0800 Subject: [PATCH 09/24] remove check (#217) doesnt do much --- langchain/agents/self_ask_with_search/base.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/langchain/agents/self_ask_with_search/base.py b/langchain/agents/self_ask_with_search/base.py index ce4590a2dfb..ab0b6c62cfa 100644 --- a/langchain/agents/self_ask_with_search/base.py +++ b/langchain/agents/self_ask_with_search/base.py @@ -45,8 +45,6 @@ class SelfAskWithSearchAgent(Agent): if " " == after_colon[0]: after_colon = after_colon[1:] - if "?" != after_colon[-1]: - print("we probably should never get here..." + text) return "Intermediate Answer", after_colon From 03c71402285103c7b0cee06568f2577edc01734c Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 28 Nov 2022 17:27:26 -0800 Subject: [PATCH 10/24] fix self ask template (#216) --- docs/examples/agents/self_ask_with_search.ipynb | 6 +++++- langchain/agents/self_ask_with_search/prompt.py | 8 ++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/docs/examples/agents/self_ask_with_search.ipynb b/docs/examples/agents/self_ask_with_search.ipynb index aa3ab58eabf..7be3b59fdca 100644 --- a/docs/examples/agents/self_ask_with_search.ipynb +++ b/docs/examples/agents/self_ask_with_search.ipynb @@ -20,13 +20,17 @@ "name": "stdout", "output_type": "stream", "text": [ + "\n", + "\n", + "\u001b[1m> Entering new SelfAskWithSearchAgent chain...\u001b[0m\n", "What is the hometown of the reigning men's U.S. Open champion?\n", "Are follow up questions needed here:\u001b[32;1m\u001b[1;3m Yes.\n", "Follow up: Who is the reigning men's U.S. Open champion?\u001b[0m\n", "Intermediate answer: \u001b[36;1m\u001b[1;3mCarlos Alcaraz\u001b[0m\n", "\u001b[32;1m\u001b[1;3mFollow up: Where is Carlos Alcaraz from?\u001b[0m\n", "Intermediate answer: \u001b[36;1m\u001b[1;3mEl Palmar, Spain\u001b[0m\n", - "\u001b[32;1m\u001b[1;3mSo the final answer is: El Palmar, Spain\u001b[0m" + "\u001b[32;1m\u001b[1;3mSo the final answer is: El Palmar, Spain\u001b[0m\n", + "\u001b[1m> Finished SelfAskWithSearchAgent chain.\u001b[0m\n" ] }, { diff --git a/langchain/agents/self_ask_with_search/prompt.py b/langchain/agents/self_ask_with_search/prompt.py index f9d0c5b6d68..0a387457d2c 100644 --- a/langchain/agents/self_ask_with_search/prompt.py +++ b/langchain/agents/self_ask_with_search/prompt.py @@ -28,13 +28,13 @@ So the final answer is: Joseph Ball Question: Are both the directors of Jaws and Casino Royale from the same country? Are follow up questions needed here: Yes. Follow up: Who is the director of Jaws? -Intermediate Answer: The director of Jaws is Steven Spielberg. +Intermediate answer: The director of Jaws is Steven Spielberg. Follow up: Where is Steven Spielberg from? -Intermediate Answer: The United States. +Intermediate answer: The United States. Follow up: Who is the director of Casino Royale? -Intermediate Answer: The director of Casino Royale is Martin Campbell. +Intermediate answer: The director of Casino Royale is Martin Campbell. Follow up: Where is Martin Campbell from? -Intermediate Answer: New Zealand. +Intermediate answer: New Zealand. So the final answer is: No Question: {input}""" From 05689981666971e67e8b809cc34b84ec5c60792d Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 28 Nov 2022 18:03:04 -0800 Subject: [PATCH 11/24] Harrison/fix react stateful (#219) fix issue with react being stateful --- langchain/agents/agent.py | 5 +++++ langchain/agents/react/base.py | 3 +++ 2 files changed, 8 insertions(+) diff --git a/langchain/agents/agent.py b/langchain/agents/agent.py index a3aebe2fdb5..eda22924c28 100644 --- a/langchain/agents/agent.py +++ b/langchain/agents/agent.py @@ -87,6 +87,9 @@ class Agent(Chain, BaseModel, ABC): """Create a prompt for this class.""" return cls.prompt + def _prepare_for_new_call(self) -> None: + pass + @classmethod def from_llm_and_tools(cls, llm: LLM, tools: List[Tool], **kwargs: Any) -> "Agent": """Construct an agent from an LLM and tools.""" @@ -119,6 +122,8 @@ class Agent(Chain, BaseModel, ABC): def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: """Run text through and get agent response.""" text = inputs[self.input_key] + # Do any preparation necessary when receiving a new input. + self._prepare_for_new_call() # Construct a mapping of tool name to tool for easy lookup name_to_tool_map = {tool.name: tool.func for tool in self.tools} # Construct the initial string to pass into the LLM. This is made up diff --git a/langchain/agents/react/base.py b/langchain/agents/react/base.py index bb5dac044b1..a9f1d4cae63 100644 --- a/langchain/agents/react/base.py +++ b/langchain/agents/react/base.py @@ -31,6 +31,9 @@ class ReActDocstoreAgent(Agent, BaseModel): f"Tool names should be Lookup and Search, got {tool_names}" ) + def _prepare_for_new_call(self) -> None: + self.i = 1 + def _fix_text(self, text: str) -> str: return text + f"\nAction {self.i}:" From de4b255c1f35e55663377afdfeebce2654e8c835 Mon Sep 17 00:00:00 2001 From: Shyamal H Anadkat Date: Mon, 28 Nov 2022 18:03:34 -0800 Subject: [PATCH 12/24] Switch default openai model to text-davinci-003 (#215) --- langchain/llms/openai.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/langchain/llms/openai.py b/langchain/llms/openai.py index c80bcbc0190..879dafe8673 100644 --- a/langchain/llms/openai.py +++ b/langchain/llms/openai.py @@ -20,11 +20,11 @@ class OpenAI(LLM, BaseModel): .. code-block:: python from langchain import OpenAI - openai = OpenAI(model="text-davinci-002") + openai = OpenAI(model="text-davinci-003") """ client: Any #: :meta private: - model_name: str = "text-davinci-002" + model_name: str = "text-davinci-003" """Model name to use.""" temperature: float = 0.7 """What sampling temperature to use.""" From 1b9b8efbc9ad6736c58f1decf3c8cbc1a16d4a1f Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 28 Nov 2022 21:38:34 -0800 Subject: [PATCH 13/24] pal chain (#207) from https://arxiv.org/pdf/2211.10435.pdf --- docs/examples/chains/pal.ipynb | 180 ++++++++++++++++++ langchain/chains/pal/__init__.py | 4 + langchain/chains/pal/base.py | 79 ++++++++ langchain/chains/pal/colored_object_prompt.py | 77 ++++++++ langchain/chains/pal/math_prompt.py | 157 +++++++++++++++ tests/integration_tests/chains/test_pal.py | 31 +++ 6 files changed, 528 insertions(+) create mode 100644 docs/examples/chains/pal.ipynb create mode 100644 langchain/chains/pal/__init__.py create mode 100644 langchain/chains/pal/base.py create mode 100644 langchain/chains/pal/colored_object_prompt.py create mode 100644 langchain/chains/pal/math_prompt.py create mode 100644 tests/integration_tests/chains/test_pal.py diff --git a/docs/examples/chains/pal.ipynb b/docs/examples/chains/pal.ipynb new file mode 100644 index 00000000000..272f7541a75 --- /dev/null +++ b/docs/examples/chains/pal.ipynb @@ -0,0 +1,180 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "32e022a2", + "metadata": {}, + "source": [ + "# PAL\n", + "\n", + "Implements Program-Aided Language Models, as in https://arxiv.org/pdf/2211.10435.pdf.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1370e40f", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains.pal.base import PALChain\n", + "from langchain import OpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "beddcac7", + "metadata": {}, + "outputs": [], + "source": [ + "llm = OpenAI(model_name='code-davinci-002', temperature=0, max_tokens=512)\n", + "pal_chain = PALChain.from_math_prompt(llm, verbose=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e2eab9d4", + "metadata": {}, + "outputs": [], + "source": [ + "question = \"Jan has three times the number of pets as Marcia. Marcia has two more pets than Cindy. If Cindy has four pets, how many total pets do the three have?\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3ef64b27", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mdef solution():\n", + " \"\"\"Jan has three times the number of pets as Marcia. Marcia has two more pets than Cindy. If Cindy has four pets, how many total pets do the three have?\"\"\"\n", + " cindy_pets = 4\n", + " marcia_pets = cindy_pets + 2\n", + " jan_pets = marcia_pets * 3\n", + " total_pets = cindy_pets + marcia_pets + jan_pets\n", + " result = total_pets\n", + " return result\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'28'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pal_chain.run(question)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e524f81f", + "metadata": {}, + "outputs": [], + "source": [ + "llm = OpenAI(model_name='code-davinci-002', temperature=0, max_tokens=512)\n", + "pal_chain = PALChain.from_colored_object_prompt(llm, verbose=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "03a237b8", + "metadata": {}, + "outputs": [], + "source": [ + "question = \"On the desk, you see two blue booklets, two purple booklets, and two yellow pairs of sunglasses. If I remove all the pairs of sunglasses from the desk, how many purple items remain on it?\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a84a4352", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m# Put objects into a list to record ordering\n", + "objects = []\n", + "objects += [('booklet', 'blue')] * 2\n", + "objects += [('booklet', 'purple')] * 2\n", + "objects += [('sunglasses', 'yellow')] * 2\n", + "\n", + "# Remove all pairs of sunglasses\n", + "objects = [object for object in objects if object[0] != 'sunglasses']\n", + "\n", + "# Count number of purple objects\n", + "num_purple = len([object for object in objects if object[1] == 'purple'])\n", + "answer = num_purple\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'2'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pal_chain.run(question)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ab20fec", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/langchain/chains/pal/__init__.py b/langchain/chains/pal/__init__.py new file mode 100644 index 00000000000..ac79f404ae3 --- /dev/null +++ b/langchain/chains/pal/__init__.py @@ -0,0 +1,4 @@ +"""Implements Program-Aided Language Models. + +As in https://arxiv.org/pdf/2211.10435.pdf. +""" diff --git a/langchain/chains/pal/base.py b/langchain/chains/pal/base.py new file mode 100644 index 00000000000..4ebeb0ea454 --- /dev/null +++ b/langchain/chains/pal/base.py @@ -0,0 +1,79 @@ +"""Implements Program-Aided Language Models. + +As in https://arxiv.org/pdf/2211.10435.pdf. +""" +from typing import Any, Dict, List + +from pydantic import BaseModel, Extra + +from langchain.chains.base import Chain +from langchain.chains.llm import LLMChain +from langchain.chains.pal.colored_object_prompt import COLORED_OBJECT_PROMPT +from langchain.chains.pal.math_prompt import MATH_PROMPT +from langchain.chains.python import PythonChain +from langchain.input import print_text +from langchain.llms.base import LLM +from langchain.prompts.base import BasePromptTemplate + + +class PALChain(Chain, BaseModel): + """Implements Program-Aided Language Models.""" + + llm: LLM + prompt: BasePromptTemplate + stop: str = "\n\n" + get_answer_expr: str = "print(solution())" + output_key: str = "result" #: :meta private: + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + def input_keys(self) -> List[str]: + """Return the singular input key. + + :meta private: + """ + return self.prompt.input_variables + + @property + def output_keys(self) -> List[str]: + """Return the singular output key. + + :meta private: + """ + return [self.output_key] + + def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: + llm_chain = LLMChain(llm=self.llm, prompt=self.prompt) + code = llm_chain.predict(stop=[self.stop], **inputs) + if self.verbose: + print_text(code, color="green", end="\n") + repl = PythonChain() + res = repl.run(code + f"\n{self.get_answer_expr}") + return {self.output_key: res.strip()} + + @classmethod + def from_math_prompt(cls, llm: LLM, **kwargs: Any) -> "PALChain": + """Load PAL from math prompt.""" + return cls( + llm=llm, + prompt=MATH_PROMPT, + stop="\n\n", + get_answer_expr="print(solution())", + **kwargs, + ) + + @classmethod + def from_colored_object_prompt(cls, llm: LLM, **kwargs: Any) -> "PALChain": + """Load PAL from colored object prompt.""" + return cls( + llm=llm, + prompt=COLORED_OBJECT_PROMPT, + stop="\n\n\n", + get_answer_expr="print(answer)", + **kwargs, + ) diff --git a/langchain/chains/pal/colored_object_prompt.py b/langchain/chains/pal/colored_object_prompt.py new file mode 100644 index 00000000000..49a3e43f180 --- /dev/null +++ b/langchain/chains/pal/colored_object_prompt.py @@ -0,0 +1,77 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +template = ( + """ +# Generate Python3 Code to solve problems +# Q: On the nightstand, there is a red pencil, a purple mug, a burgundy keychain, a fuchsia teddy bear, a black plate, and a blue stress ball. What color is the stress ball? +# Put objects into a dictionary for quick look up +objects = dict() +objects['pencil'] = 'red' +objects['mug'] = 'purple' +objects['keychain'] = 'burgundy' +objects['teddy bear'] = 'fuchsia' +objects['plate'] = 'black' +objects['stress ball'] = 'blue' + +# Look up the color of stress ball +stress_ball_color = objects['stress ball'] +answer = stress_ball_color + + +# Q: On the table, you see a bunch of objects arranged in a row: a purple paperclip, a pink stress ball, a brown keychain, a green scrunchiephone charger, a mauve fidget spinner, and a burgundy pen. What is the color of the object directly to the right of the stress ball? +# Put objects into a list to record ordering +objects = [] +objects += [('paperclip', 'purple')] * 1 +objects += [('stress ball', 'pink')] * 1 +objects += [('keychain', 'brown')] * 1 +objects += [('scrunchiephone charger', 'green')] * 1 +objects += [('fidget spinner', 'mauve')] * 1 +objects += [('pen', 'burgundy')] * 1 + +# Find the index of the stress ball +stress_ball_idx = None +for i, object in enumerate(objects): + if object[0] == 'stress ball': + stress_ball_idx = i + break + +# Find the directly right object +direct_right = objects[i+1] + +# Check the directly right object's color +direct_right_color = direct_right[1] +answer = direct_right_color + + +# Q: On the nightstand, you see the following items arranged in a row: a teal plate, a burgundy keychain, a yellow scrunchiephone charger, an orange mug, a pink notebook, and a grey cup. How many non-orange items do you see to the left of the teal item? +# Put objects into a list to record ordering +objects = [] +objects += [('plate', 'teal')] * 1 +objects += [('keychain', 'burgundy')] * 1 +objects += [('scrunchiephone charger', 'yellow')] * 1 +objects += [('mug', 'orange')] * 1 +objects += [('notebook', 'pink')] * 1 +objects += [('cup', 'grey')] * 1 + +# Find the index of the teal item +teal_idx = None +for i, object in enumerate(objects): + if object[1] == 'teal': + teal_idx = i + break + +# Find non-orange items to the left of the teal item +non_orange = [object for object in objects[:i] if object[1] != 'orange'] + +# Count number of non-orange objects +num_non_orange = len(non_orange) +answer = num_non_orange + + +# Q: {question} +""".strip() + + "\n" +) + +COLORED_OBJECT_PROMPT = PromptTemplate(input_variables=["question"], template=template) diff --git a/langchain/chains/pal/math_prompt.py b/langchain/chains/pal/math_prompt.py new file mode 100644 index 00000000000..95e3537189b --- /dev/null +++ b/langchain/chains/pal/math_prompt.py @@ -0,0 +1,157 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +template = ( + ''' +Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left? + +# solution in Python: + + +def solution(): + """Olivia has $23. She bought five bagels for $3 each. How much money does she have left?""" + money_initial = 23 + bagels = 5 + bagel_cost = 3 + money_spent = bagels * bagel_cost + money_left = money_initial - money_spent + result = money_left + return result + + + + + +Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday? + +# solution in Python: + + +def solution(): + """Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?""" + golf_balls_initial = 58 + golf_balls_lost_tuesday = 23 + golf_balls_lost_wednesday = 2 + golf_balls_left = golf_balls_initial - golf_balls_lost_tuesday - golf_balls_lost_wednesday + result = golf_balls_left + return result + + + + + +Q: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room? + +# solution in Python: + + +def solution(): + """There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?""" + computers_initial = 9 + computers_per_day = 5 + num_days = 4 # 4 days between monday and thursday + computers_added = computers_per_day * num_days + computers_total = computers_initial + computers_added + result = computers_total + return result + + + + + +Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now? + +# solution in Python: + + +def solution(): + """Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?""" + toys_initial = 5 + mom_toys = 2 + dad_toys = 2 + total_received = mom_toys + dad_toys + total_toys = toys_initial + total_received + result = total_toys + return result + + + + + +Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny? + +# solution in Python: + + +def solution(): + """Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?""" + jason_lollipops_initial = 20 + jason_lollipops_after = 12 + denny_lollipops = jason_lollipops_initial - jason_lollipops_after + result = denny_lollipops + return result + + + + + +Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total? + +# solution in Python: + + +def solution(): + """Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?""" + leah_chocolates = 32 + sister_chocolates = 42 + total_chocolates = leah_chocolates + sister_chocolates + chocolates_eaten = 35 + chocolates_left = total_chocolates - chocolates_eaten + result = chocolates_left + return result + + + + + +Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot? + +# solution in Python: + + +def solution(): + """If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?""" + cars_initial = 3 + cars_arrived = 2 + total_cars = cars_initial + cars_arrived + result = total_cars + return result + + + + + +Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today? + +# solution in Python: + + +def solution(): + """There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?""" + trees_initial = 15 + trees_after = 21 + trees_added = trees_after - trees_initial + result = trees_added + return result + + + + + +Q: {question} + +# solution in Python: +'''.strip() + + "\n\n\n" +) +MATH_PROMPT = PromptTemplate(input_variables=["question"], template=template) diff --git a/tests/integration_tests/chains/test_pal.py b/tests/integration_tests/chains/test_pal.py new file mode 100644 index 00000000000..9bbf6f8d886 --- /dev/null +++ b/tests/integration_tests/chains/test_pal.py @@ -0,0 +1,31 @@ +"""Test PAL chain.""" + +from langchain import OpenAI +from langchain.chains.pal.base import PALChain + + +def test_math_prompt() -> None: + """Test math prompt.""" + llm = OpenAI(model_name="code-davinci-002", temperature=0, max_tokens=512) + pal_chain = PALChain.from_math_prompt(llm) + question = ( + "Jan has three times the number of pets as Marcia. " + "Marcia has two more pets than Cindy. " + "If Cindy has four pets, how many total pets do the three have?" + ) + output = pal_chain.run(question) + assert output == "28" + + +def test_colored_object_prompt() -> None: + """Test colored object prompt.""" + llm = OpenAI(model_name="code-davinci-002", temperature=0, max_tokens=512) + pal_chain = PALChain.from_colored_object_prompt(llm) + question = ( + "On the desk, you see two blue booklets, " + "two purple booklets, and two yellow pairs of sunglasses. " + "If I remove all the pairs of sunglasses from the desk, " + "how many purple items remain on it?" + ) + output = pal_chain.run(question) + assert output == "2" From 1db7b18341ed291dde398552248df5ef96e95d7a Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 28 Nov 2022 22:20:30 -0800 Subject: [PATCH 14/24] bump version to 0.0.24 (#220) --- langchain/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langchain/VERSION b/langchain/VERSION index df5db66fed3..b056f4120ed 100644 --- a/langchain/VERSION +++ b/langchain/VERSION @@ -1 +1 @@ -0.0.23 +0.0.24 From d368c436489ab644aa2352527ab7bde6479408e3 Mon Sep 17 00:00:00 2001 From: Akash Samant <70665700+asamant21@users.noreply.github.com> Date: Tue, 29 Nov 2022 07:03:40 -0800 Subject: [PATCH 15/24] Bug Fix (#221) Quick bug fix for semantic similarity vector injection --- .../prompts/example_selector/semantic_similarity.py | 11 ++++++++--- tests/unit_tests/prompts/test_utils.py | 9 +++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 tests/unit_tests/prompts/test_utils.py diff --git a/langchain/prompts/example_selector/semantic_similarity.py b/langchain/prompts/example_selector/semantic_similarity.py index 499bd9fc7a8..a78ca12daae 100644 --- a/langchain/prompts/example_selector/semantic_similarity.py +++ b/langchain/prompts/example_selector/semantic_similarity.py @@ -8,6 +8,11 @@ from langchain.prompts.example_selector.base import BaseExampleSelector from langchain.vectorstores.base import VectorStore +def sorted_values(values: Dict[str, str]) -> List[Any]: + """Return a list of values in dict sorted by key.""" + return [values[val] for val in sorted(values)] + + class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel): """Example selector that selects examples based on SemanticSimilarity.""" @@ -26,13 +31,13 @@ class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel): def add_example(self, example: Dict[str, str]) -> None: """Add new example to vectorstore.""" - string_example = " ".join(example.values()) + string_example = " ".join(sorted_values(example)) self.vectorstore.add_texts([string_example], metadatas=[example]) def select_examples(self, input_variables: Dict[str, str]) -> List[dict]: """Select which examples to use based on semantic similarity.""" # Get the docs with the highest similarity. - query = " ".join(input_variables.values()) + query = " ".join(sorted_values(input_variables)) example_docs = self.vectorstore.similarity_search(query, k=self.k) # Get the examples from the metadata. # This assumes that examples are stored in metadata. @@ -73,7 +78,7 @@ class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel): Returns: The ExampleSelector instantiated, backed by a vector store. """ - string_examples = [" ".join(eg.values()) for eg in examples] + string_examples = [" ".join(sorted_values(eg)) for eg in examples] vectorstore = vectorstore_cls.from_texts( string_examples, embeddings, metadatas=examples, **vectorstore_cls_kwargs ) diff --git a/tests/unit_tests/prompts/test_utils.py b/tests/unit_tests/prompts/test_utils.py new file mode 100644 index 00000000000..479d02e8bd9 --- /dev/null +++ b/tests/unit_tests/prompts/test_utils.py @@ -0,0 +1,9 @@ +"""Test functionality related to prompt utils.""" +from langchain.prompts.example_selector.semantic_similarity import sorted_values + + +def test_sorted_vals() -> None: + """Test sorted values from dictionary.""" + test_dict = {"key2": "val2", "key1": "val1"} + expected_response = ["val1", "val2"] + assert sorted_values(test_dict) == expected_response From ea67c049f03c041d10a4f3c5e1478da8a80a1686 Mon Sep 17 00:00:00 2001 From: Andrew Gleave Date: Tue, 29 Nov 2022 16:28:45 +0000 Subject: [PATCH 16/24] Support SQL statements that return no results (#222) Adds support for statements such as insert, update etc which do not return any rows. `engine.execute` is deprecated and so execution has been updated to use `connection.exec_driver_sql` as-per: https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Engine.execute --- langchain/sql_database.py | 14 +++++++++++--- .../chains/test_sql_database.py | 17 +++++++++++++++++ tests/unit_tests/test_sql_database.py | 14 ++++++++++++++ 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/langchain/sql_database.py b/langchain/sql_database.py index a04ab15aafa..2afcebc146f 100644 --- a/langchain/sql_database.py +++ b/langchain/sql_database.py @@ -66,6 +66,14 @@ class SQLDatabase: return "\n".join(tables) def run(self, command: str) -> str: - """Execute a SQL command and return a string of the results.""" - result = self._engine.execute(command).fetchall() - return str(result) + """Execute a SQL command and return a string representing the results. + + If the statement returns rows, a string of the results is returned. + If the statement returns no rows, an empty string is returned. + """ + with self._engine.connect() as connection: + cursor = connection.exec_driver_sql(command) + if cursor.returns_rows: + result = cursor.fetchall() + return str(result) + return "" diff --git a/tests/integration_tests/chains/test_sql_database.py b/tests/integration_tests/chains/test_sql_database.py index 8f6ab55bb4c..67d82f023c2 100644 --- a/tests/integration_tests/chains/test_sql_database.py +++ b/tests/integration_tests/chains/test_sql_database.py @@ -28,3 +28,20 @@ def test_sql_database_run() -> None: output = db_chain.run("What company does Harrison work at?") expected_output = " Harrison works at Foo." assert output == expected_output + + +def test_sql_database_run_update() -> None: + """Test that update commands run successfully and returned in correct format.""" + engine = create_engine("sqlite:///:memory:") + metadata_obj.create_all(engine) + stmt = insert(user).values(user_id=13, user_name="Harrison", user_company="Foo") + with engine.connect() as conn: + conn.execute(stmt) + db = SQLDatabase(engine) + db_chain = SQLDatabaseChain(llm=OpenAI(temperature=0), database=db) + output = db_chain.run("Update Harrison's workplace to Bar") + expected_output = " Harrison's workplace has been updated to Bar." + assert output == expected_output + output = db_chain.run("What company does Harrison work at?") + expected_output = " Harrison works at Bar." + assert output == expected_output diff --git a/tests/unit_tests/test_sql_database.py b/tests/unit_tests/test_sql_database.py index 1a536fe5d1d..d9ce84f521d 100644 --- a/tests/unit_tests/test_sql_database.py +++ b/tests/unit_tests/test_sql_database.py @@ -47,3 +47,17 @@ def test_sql_database_run() -> None: output = db.run(command) expected_output = "[('Harrison',)]" assert output == expected_output + + +def test_sql_database_run_update() -> None: + """Test commands which return no rows return an empty string.""" + engine = create_engine("sqlite:///:memory:") + metadata_obj.create_all(engine) + stmt = insert(user).values(user_id=13, user_name="Harrison") + with engine.connect() as conn: + conn.execute(stmt) + db = SQLDatabase(engine) + command = "update user set user_name='Updated' where user_id = 13" + output = db.run(command) + expected_output = "" + assert output == expected_output From b19a73be268938051b27a0fac9872c82658437f5 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Tue, 29 Nov 2022 18:13:21 -0800 Subject: [PATCH 17/24] pal chain touch ups (#225) expose PAL in main entrypoint --- docs/examples/chains/pal.ipynb | 4 ++-- langchain/__init__.py | 2 ++ langchain/chains/__init__.py | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/examples/chains/pal.ipynb b/docs/examples/chains/pal.ipynb index 272f7541a75..f15b8afbe06 100644 --- a/docs/examples/chains/pal.ipynb +++ b/docs/examples/chains/pal.ipynb @@ -17,7 +17,7 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain.chains.pal.base import PALChain\n", + "from langchain.chains import PALChain\n", "from langchain import OpenAI" ] }, @@ -172,7 +172,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.8.7" } }, "nbformat": 4, diff --git a/langchain/__init__.py b/langchain/__init__.py index 75f767e83b9..dde7aa2259a 100644 --- a/langchain/__init__.py +++ b/langchain/__init__.py @@ -10,6 +10,7 @@ from langchain.chains import ( ConversationChain, LLMChain, LLMMathChain, + PALChain, PythonChain, SerpAPIChain, SQLDatabaseChain, @@ -49,4 +50,5 @@ __all__ = [ "ElasticVectorSearch", "InMemoryDocstore", "ConversationChain", + "PALChain", ] diff --git a/langchain/chains/__init__.py b/langchain/chains/__init__.py index 8b1d6ef89b3..eceb11d40aa 100644 --- a/langchain/chains/__init__.py +++ b/langchain/chains/__init__.py @@ -2,6 +2,7 @@ from langchain.chains.conversation.base import ConversationChain from langchain.chains.llm import LLMChain from langchain.chains.llm_math.base import LLMMathChain +from langchain.chains.pal.base import PALChain from langchain.chains.python import PythonChain from langchain.chains.sequential import SequentialChain, SimpleSequentialChain from langchain.chains.serpapi import SerpAPIChain @@ -18,4 +19,5 @@ __all__ = [ "SequentialChain", "SimpleSequentialChain", "ConversationChain", + "PALChain", ] From ca2394028fc6a1698deff9672cc103cdcd30471f Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Tue, 29 Nov 2022 20:07:44 -0800 Subject: [PATCH 18/24] move search to not be a chain (#226) --- docs/examples/agents/custom_agent.ipynb | 4 +- docs/examples/agents/mrkl.ipynb | 4 +- .../agents/self_ask_with_search.ipynb | 4 +- docs/examples/memory/agent_with_memory.ipynb | 4 +- docs/examples/model_laboratory.ipynb | 6 +-- docs/getting_started/agents.ipynb | 4 +- langchain/__init__.py | 5 ++- langchain/agents/mrkl/base.py | 4 +- langchain/agents/self_ask_with_search/base.py | 8 ++-- langchain/chains/__init__.py | 2 - langchain/{chains => }/serpapi.py | 41 +++++++------------ .../chains/test_self_ask_with_search.py | 4 +- .../{chains => }/test_serpapi.py | 4 +- 13 files changed, 40 insertions(+), 54 deletions(-) rename langchain/{chains => }/serpapi.py (77%) rename tests/integration_tests/{chains => }/test_serpapi.py (73%) diff --git a/docs/examples/agents/custom_agent.ipynb b/docs/examples/agents/custom_agent.ipynb index b97921e4397..cc6f135bec5 100644 --- a/docs/examples/agents/custom_agent.ipynb +++ b/docs/examples/agents/custom_agent.ipynb @@ -48,7 +48,7 @@ "outputs": [], "source": [ "from langchain.agents import ZeroShotAgent, Tool\n", - "from langchain import OpenAI, SerpAPIChain, LLMChain" + "from langchain import OpenAI, SerpAPIWrapper, LLMChain" ] }, { @@ -58,7 +58,7 @@ "metadata": {}, "outputs": [], "source": [ - "search = SerpAPIChain()\n", + "search = SerpAPIWrapper()\n", "tools = [\n", " Tool(\n", " name = \"Search\",\n", diff --git a/docs/examples/agents/mrkl.ipynb b/docs/examples/agents/mrkl.ipynb index c0dcb817b3e..71bc463ddcb 100644 --- a/docs/examples/agents/mrkl.ipynb +++ b/docs/examples/agents/mrkl.ipynb @@ -26,7 +26,7 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain import LLMMathChain, OpenAI, SerpAPIChain, SQLDatabase, SQLDatabaseChain\n", + "from langchain import LLMMathChain, OpenAI, SerpAPIWrapper, SQLDatabase, SQLDatabaseChain\n", "from langchain.agents import initialize_agent, Tool" ] }, @@ -38,7 +38,7 @@ "outputs": [], "source": [ "llm = OpenAI(temperature=0)\n", - "search = SerpAPIChain()\n", + "search = SerpAPIWrapper()\n", "llm_math_chain = LLMMathChain(llm=llm, verbose=True)\n", "db = SQLDatabase.from_uri(\"sqlite:///../../../notebooks/Chinook.db\")\n", "db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)\n", diff --git a/docs/examples/agents/self_ask_with_search.ipynb b/docs/examples/agents/self_ask_with_search.ipynb index 7be3b59fdca..d4a56ab2a8d 100644 --- a/docs/examples/agents/self_ask_with_search.ipynb +++ b/docs/examples/agents/self_ask_with_search.ipynb @@ -45,11 +45,11 @@ } ], "source": [ - "from langchain import OpenAI, SerpAPIChain\n", + "from langchain import OpenAI, SerpAPIWrapper\n", "from langchain.agents import initialize_agent, Tool\n", "\n", "llm = OpenAI(temperature=0)\n", - "search = SerpAPIChain()\n", + "search = SerpAPIWrapper()\n", "tools = [\n", " Tool(\n", " name=\"Intermediate Answer\",\n", diff --git a/docs/examples/memory/agent_with_memory.ipynb b/docs/examples/memory/agent_with_memory.ipynb index fa93a7e0dc1..7527907c154 100644 --- a/docs/examples/memory/agent_with_memory.ipynb +++ b/docs/examples/memory/agent_with_memory.ipynb @@ -29,7 +29,7 @@ "source": [ "from langchain.agents import ZeroShotAgent, Tool\n", "from langchain.chains.conversation.memory import ConversationBufferMemory\n", - "from langchain import OpenAI, SerpAPIChain, LLMChain" + "from langchain import OpenAI, SerpAPIWrapper, LLMChain" ] }, { @@ -39,7 +39,7 @@ "metadata": {}, "outputs": [], "source": [ - "search = SerpAPIChain()\n", + "search = SerpAPIWrapper()\n", "tools = [\n", " Tool(\n", " name = \"Search\",\n", diff --git a/docs/examples/model_laboratory.ipynb b/docs/examples/model_laboratory.ipynb index 13892a73d6a..56490197889 100644 --- a/docs/examples/model_laboratory.ipynb +++ b/docs/examples/model_laboratory.ipynb @@ -135,14 +135,14 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain import SelfAskWithSearchChain, SerpAPIChain\n", + "from langchain import SelfAskWithSearchChain, SerpAPIWrapper\n", "\n", "open_ai_llm = OpenAI(temperature=0)\n", - "search = SerpAPIChain()\n", + "search = SerpAPIWrapper()\n", "self_ask_with_search_openai = SelfAskWithSearchChain(llm=open_ai_llm, search_chain=search, verbose=True)\n", "\n", "cohere_llm = Cohere(temperature=0, model=\"command-xlarge-20221108\")\n", - "search = SerpAPIChain()\n", + "search = SerpAPIWrapper()\n", "self_ask_with_search_cohere = SelfAskWithSearchChain(llm=cohere_llm, search_chain=search, verbose=True)" ] }, diff --git a/docs/getting_started/agents.ipynb b/docs/getting_started/agents.ipynb index 51d0fe78bb4..7f7b22aea44 100644 --- a/docs/getting_started/agents.ipynb +++ b/docs/getting_started/agents.ipynb @@ -77,9 +77,9 @@ "outputs": [], "source": [ "# Load the tool configs that are needed.\n", - "from langchain import LLMMathChain, SerpAPIChain\n", + "from langchain import LLMMathChain, SerpAPIWrapper\n", "llm = OpenAI(temperature=0)\n", - "search = SerpAPIChain()\n", + "search = SerpAPIWrapper()\n", "llm_math_chain = LLMMathChain(llm=llm, verbose=True)\n", "tools = [\n", " Tool(\n", diff --git a/langchain/__init__.py b/langchain/__init__.py index dde7aa2259a..35b6446ca6f 100644 --- a/langchain/__init__.py +++ b/langchain/__init__.py @@ -12,7 +12,6 @@ from langchain.chains import ( LLMMathChain, PALChain, PythonChain, - SerpAPIChain, SQLDatabaseChain, VectorDBQA, ) @@ -24,6 +23,7 @@ from langchain.prompts import ( Prompt, PromptTemplate, ) +from langchain.serpapi import SerpAPIWrapper from langchain.sql_database import SQLDatabase from langchain.vectorstores import FAISS, ElasticVectorSearch @@ -32,7 +32,8 @@ __all__ = [ "LLMMathChain", "PythonChain", "SelfAskWithSearchChain", - "SerpAPIChain", + "SerpAPIWrapper", + "SerpAPIWrapper", "Cohere", "OpenAI", "BasePromptTemplate", diff --git a/langchain/agents/mrkl/base.py b/langchain/agents/mrkl/base.py index 5474b2a9072..28eac3db1d7 100644 --- a/langchain/agents/mrkl/base.py +++ b/langchain/agents/mrkl/base.py @@ -131,10 +131,10 @@ class MRKLChain(ZeroShotAgent): Example: .. code-block:: python - from langchain import LLMMathChain, OpenAI, SerpAPIChain, MRKLChain + from langchain import LLMMathChain, OpenAI, SerpAPIWrapper, MRKLChain from langchain.chains.mrkl.base import ChainConfig llm = OpenAI(temperature=0) - search = SerpAPIChain() + search = SerpAPIWrapper() llm_math_chain = LLMMathChain(llm=llm) chains = [ ChainConfig( diff --git a/langchain/agents/self_ask_with_search/base.py b/langchain/agents/self_ask_with_search/base.py index ab0b6c62cfa..1273308db58 100644 --- a/langchain/agents/self_ask_with_search/base.py +++ b/langchain/agents/self_ask_with_search/base.py @@ -5,9 +5,9 @@ from langchain.agents.agent import Agent from langchain.agents.self_ask_with_search.prompt import PROMPT from langchain.agents.tools import Tool from langchain.chains.llm import LLMChain -from langchain.chains.serpapi import SerpAPIChain from langchain.llms.base import LLM from langchain.prompts.base import BasePromptTemplate +from langchain.serpapi import SerpAPIWrapper class SelfAskWithSearchAgent(Agent): @@ -73,12 +73,12 @@ class SelfAskWithSearchChain(SelfAskWithSearchAgent): Example: .. code-block:: python - from langchain import SelfAskWithSearchChain, OpenAI, SerpAPIChain - search_chain = SerpAPIChain() + from langchain import SelfAskWithSearchChain, OpenAI, SerpAPIWrapper + search_chain = SerpAPIWrapper() self_ask = SelfAskWithSearchChain(llm=OpenAI(), search_chain=search_chain) """ - def __init__(self, llm: LLM, search_chain: SerpAPIChain, **kwargs: Any): + def __init__(self, llm: LLM, search_chain: SerpAPIWrapper, **kwargs: Any): """Initialize with just an LLM and a search chain.""" search_tool = Tool(name="Intermediate Answer", func=search_chain.run) llm_chain = LLMChain(llm=llm, prompt=PROMPT) diff --git a/langchain/chains/__init__.py b/langchain/chains/__init__.py index eceb11d40aa..62018b9393e 100644 --- a/langchain/chains/__init__.py +++ b/langchain/chains/__init__.py @@ -5,7 +5,6 @@ from langchain.chains.llm_math.base import LLMMathChain from langchain.chains.pal.base import PALChain from langchain.chains.python import PythonChain from langchain.chains.sequential import SequentialChain, SimpleSequentialChain -from langchain.chains.serpapi import SerpAPIChain from langchain.chains.sql_database.base import SQLDatabaseChain from langchain.chains.vector_db_qa.base import VectorDBQA @@ -13,7 +12,6 @@ __all__ = [ "LLMChain", "LLMMathChain", "PythonChain", - "SerpAPIChain", "SQLDatabaseChain", "VectorDBQA", "SequentialChain", diff --git a/langchain/chains/serpapi.py b/langchain/serpapi.py similarity index 77% rename from langchain/chains/serpapi.py rename to langchain/serpapi.py index 30ac632a3c8..99affb42466 100644 --- a/langchain/chains/serpapi.py +++ b/langchain/serpapi.py @@ -4,11 +4,10 @@ Heavily borrowed from https://github.com/ofirpress/self-ask """ import os import sys -from typing import Any, Dict, List, Optional +from typing import Any, Dict, Optional from pydantic import BaseModel, Extra, root_validator -from langchain.chains.base import Chain from langchain.utils import get_from_dict_or_env @@ -26,8 +25,8 @@ class HiddenPrints: sys.stdout = self._original_stdout -class SerpAPIChain(Chain, BaseModel): - """Chain that calls SerpAPI. +class SerpAPIWrapper(BaseModel): + """Wrapper around SerpAPI. To use, you should have the ``google-search-results`` python package installed, and the environment variable ``SERPAPI_API_KEY`` set with your API key, or pass @@ -36,13 +35,11 @@ class SerpAPIChain(Chain, BaseModel): Example: .. code-block:: python - from langchain import SerpAPIChain - serpapi = SerpAPIChain() + from langchain import SerpAPIWrapper + serpapi = SerpAPIWrapper() """ search_engine: Any #: :meta private: - input_key: str = "search_query" #: :meta private: - output_key: str = "search_result" #: :meta private: serpapi_api_key: Optional[str] = None @@ -51,22 +48,6 @@ class SerpAPIChain(Chain, BaseModel): extra = Extra.forbid - @property - def input_keys(self) -> List[str]: - """Return the singular input key. - - :meta private: - """ - return [self.input_key] - - @property - def output_keys(self) -> List[str]: - """Return the singular output key. - - :meta private: - """ - return [self.output_key] - @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" @@ -85,11 +66,12 @@ class SerpAPIChain(Chain, BaseModel): ) return values - def _call(self, inputs: Dict[str, Any]) -> Dict[str, str]: + def run(self, query: str) -> str: + """Run query through SerpAPI and parse result.""" params = { "api_key": self.serpapi_api_key, "engine": "google", - "q": inputs[self.input_key], + "q": query, "google_domain": "google.com", "gl": "us", "hl": "en", @@ -112,4 +94,9 @@ class SerpAPIChain(Chain, BaseModel): toret = res["organic_results"][0]["snippet"] else: toret = "No good search result found" - return {self.output_key: toret} + return toret + + +# For backwards compatability + +SerpAPIWrapper = SerpAPIWrapper diff --git a/tests/integration_tests/chains/test_self_ask_with_search.py b/tests/integration_tests/chains/test_self_ask_with_search.py index 8873be8bf87..e4536f75863 100644 --- a/tests/integration_tests/chains/test_self_ask_with_search.py +++ b/tests/integration_tests/chains/test_self_ask_with_search.py @@ -1,7 +1,7 @@ """Integration test for self ask with search.""" from langchain.agents.self_ask_with_search.base import SelfAskWithSearchChain -from langchain.chains.serpapi import SerpAPIChain from langchain.llms.openai import OpenAI +from langchain.serpapi import SerpAPIWrapper def test_self_ask_with_search() -> None: @@ -9,7 +9,7 @@ def test_self_ask_with_search() -> None: question = "What is the hometown of the reigning men's U.S. Open champion?" chain = SelfAskWithSearchChain( llm=OpenAI(temperature=0), - search_chain=SerpAPIChain(), + search_chain=SerpAPIWrapper(), input_key="q", output_key="a", ) diff --git a/tests/integration_tests/chains/test_serpapi.py b/tests/integration_tests/test_serpapi.py similarity index 73% rename from tests/integration_tests/chains/test_serpapi.py rename to tests/integration_tests/test_serpapi.py index 60cda1aa62d..cd2b63437a1 100644 --- a/tests/integration_tests/chains/test_serpapi.py +++ b/tests/integration_tests/test_serpapi.py @@ -1,9 +1,9 @@ """Integration test for SerpAPI.""" -from langchain.chains.serpapi import SerpAPIChain +from langchain.serpapi import SerpAPIWrapper def test_call() -> None: """Test that call gives the correct answer.""" - chain = SerpAPIChain() + chain = SerpAPIWrapper() output = chain.run("What was Obama's first name?") assert output == "Barack Hussein Obama II" From 3bda0019ae069ab3671e7012a0213e49ea474fb2 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Tue, 29 Nov 2022 20:08:00 -0800 Subject: [PATCH 19/24] Harrison/list of examples (#218) --- docs/explanation/cool_demos.md | 39 ++++++++++++++++++++++++++++++++++ docs/index.rst | 1 + 2 files changed, 40 insertions(+) create mode 100644 docs/explanation/cool_demos.md diff --git a/docs/explanation/cool_demos.md b/docs/explanation/cool_demos.md new file mode 100644 index 00000000000..47758aeec50 --- /dev/null +++ b/docs/explanation/cool_demos.md @@ -0,0 +1,39 @@ +# Cool Demos + +Lots of people have built some pretty awesome stuff with LangChain. +This is a collection of our favorites. +If you see any other demos that you think we should highlight, be sure to let us know! + +## Open Source + +### [ThoughtSource](https://github.com/OpenBioLink/ThoughtSource) +A central, open resource and community around data and tools related to chain-of-thought reasoning in large language models. + +### [Notion Database Question-Answering Bot](https://github.com/hwchase17/notion-qa) +Open source GitHub project shows how to use LangChain to create a +chatbot that can answer questions about an arbitrary Notion database. + +### [GPT Index](https://github.com/jerryjliu/gpt_index) +GPT Index is a project consisting of a set of data structures that are created using GPT-3 and can be traversed using GPT-3 in order to answer queries. + +### [Grover's Algorithm](https://github.com/JavaFXpert/llm-grovers-search-party) +Leveraging Qiskit, OpenAI and LangChain to demonstrate Grover's algorithm + + +## Not Open Source + +### [Daimon](https://twitter.com/sjwhitmore/status/1580593217153531908?s=20&t=neQvtZZTlp623U3LZwz3bQ) +A chat-based AI personal assistant with long-term memory about you. + +### [Clerkie](https://twitter.com/krrish_dh/status/1581028925618106368?s=20&t=neQvtZZTlp623U3LZwz3bQ) +Stack Tracing QA Bot to help debug complex stack tracing (especially the ones that go multi-function/file deep). + +### [Sales Email Writer](https://twitter.com/Raza_Habib496/status/1596880140490838017?s=20&t=6MqEQYWfSqmJwsKahjCVOA) +By Raza Habib, this demo utilizes LangChain + SerpAPI + HumanLoop to write sales emails. +Give it a company name and a person, this application will use Google Search (via SerpAPI) to get +more information on the company and the person, and then write them a sales message. + +### [Question-Answering on a Web Browser](https://twitter.com/chillzaza_/status/1592961099384905730?s=20&t=EhU8jl0KyCPJ7vE9Rnz-cQ) +By Zahid Khawaja, this demo utilizes question answering to answer questions about a given website. +A followup added this for [Youtube videos](https://twitter.com/chillzaza_/status/1593739682013220865?s=20&t=EhU8jl0KyCPJ7vE9Rnz-cQ), +and then another followup added it for [Wikipedia](https://twitter.com/chillzaza_/status/1594847151238037505?s=20&t=EhU8jl0KyCPJ7vE9Rnz-cQ). \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index 5dc98488dea..a2e3ca6eb23 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -158,6 +158,7 @@ see detailed information about the various classes, methods, and APIs. explanation/core_concepts.md explanation/agents.md explanation/glossary.md + explanation/cool_demos.md Discord Higher level, conceptual explanations of the LangChain components. From ab9abf53b706980ee90eb12eddc95481794b7e46 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Wed, 30 Nov 2022 06:48:22 -0800 Subject: [PATCH 20/24] Harrison/version 0025 (#227) --- langchain/VERSION | 2 +- langchain/__init__.py | 4 ++-- langchain/serpapi.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/langchain/VERSION b/langchain/VERSION index b056f4120ed..2678ff8d63d 100644 --- a/langchain/VERSION +++ b/langchain/VERSION @@ -1 +1 @@ -0.0.24 +0.0.25 diff --git a/langchain/__init__.py b/langchain/__init__.py index 35b6446ca6f..10811af544a 100644 --- a/langchain/__init__.py +++ b/langchain/__init__.py @@ -23,7 +23,7 @@ from langchain.prompts import ( Prompt, PromptTemplate, ) -from langchain.serpapi import SerpAPIWrapper +from langchain.serpapi import SerpAPIChain, SerpAPIWrapper from langchain.sql_database import SQLDatabase from langchain.vectorstores import FAISS, ElasticVectorSearch @@ -33,7 +33,7 @@ __all__ = [ "PythonChain", "SelfAskWithSearchChain", "SerpAPIWrapper", - "SerpAPIWrapper", + "SerpAPIChain", "Cohere", "OpenAI", "BasePromptTemplate", diff --git a/langchain/serpapi.py b/langchain/serpapi.py index 99affb42466..6224939d581 100644 --- a/langchain/serpapi.py +++ b/langchain/serpapi.py @@ -99,4 +99,4 @@ class SerpAPIWrapper(BaseModel): # For backwards compatability -SerpAPIWrapper = SerpAPIWrapper +SerpAPIChain = SerpAPIWrapper From 347fc49d4d4257937fa5ff871d03fc4e45a18321 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Wed, 30 Nov 2022 22:00:02 -0800 Subject: [PATCH 21/24] Harrison/combine documents chain (#212) combine documents chain powering vector db qa with sources chain --- docs/examples/chains/combine documents.ipynb | 200 ++++++++++++++++++ docs/examples/chains/map_reduce.ipynb | 6 +- langchain/__init__.py | 4 + langchain/chains/__init__.py | 4 + langchain/chains/combine_documents.py | 94 ++++++++ langchain/chains/mapreduce.py | 11 +- langchain/chains/qa_with_sources/__init__.py | 1 + langchain/chains/qa_with_sources/base.py | 143 +++++++++++++ langchain/chains/qa_with_sources/prompt.py | 55 +++++ langchain/chains/qa_with_sources/vector_db.py | 20 ++ 10 files changed, 530 insertions(+), 8 deletions(-) create mode 100644 docs/examples/chains/combine documents.ipynb create mode 100644 langchain/chains/combine_documents.py create mode 100644 langchain/chains/qa_with_sources/__init__.py create mode 100644 langchain/chains/qa_with_sources/base.py create mode 100644 langchain/chains/qa_with_sources/prompt.py create mode 100644 langchain/chains/qa_with_sources/vector_db.py diff --git a/docs/examples/chains/combine documents.ipynb b/docs/examples/chains/combine documents.ipynb new file mode 100644 index 00000000000..63dfda97411 --- /dev/null +++ b/docs/examples/chains/combine documents.ipynb @@ -0,0 +1,200 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "efc5be67", + "metadata": {}, + "source": [ + "# Question-Answering with Sources\n", + "\n", + "This notebook goes over how to do question-answering with sources. It does this in a few different ways - first showing how you can use the `QAWithSourcesChain` to take in documents and use those, and next showing the `VectorDBQAWithSourcesChain`, which also does the lookup of the documents from a vector database. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1c613960", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.embeddings.openai import OpenAIEmbeddings\n", + "from langchain.embeddings.cohere import CohereEmbeddings\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch\n", + "from langchain.vectorstores.faiss import FAISS" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "17d1306e", + "metadata": {}, + "outputs": [], + "source": [ + "with open('../state_of_the_union.txt') as f:\n", + " state_of_the_union = f.read()\n", + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", + "texts = text_splitter.split_text(state_of_the_union)\n", + "\n", + "embeddings = OpenAIEmbeddings()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "0e745d99", + "metadata": {}, + "outputs": [], + "source": [ + "docsearch = FAISS.from_texts(texts, embeddings)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f42d79dc", + "metadata": {}, + "outputs": [], + "source": [ + "# Add in a fake source information\n", + "for i, d in enumerate(docsearch.docstore._dict.values()):\n", + " d.metadata = {'source': f\"{i}-pl\"}" + ] + }, + { + "cell_type": "markdown", + "id": "aa1c1b60", + "metadata": {}, + "source": [ + "### QAWithSourcesChain\n", + "This shows how to use the `QAWithSourcesChain`, which takes in document objects and uses them directly." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "61bce191", + "metadata": {}, + "outputs": [], + "source": [ + "query = \"What did the president say about Justice Breyer\"\n", + "docs = docsearch.similarity_search(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "57ddf8c7", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import QAWithSourcesChain\n", + "from langchain.llms import OpenAI, Cohere\n", + "from langchain.docstore.document import Document" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f908a92a", + "metadata": {}, + "outputs": [], + "source": [ + "chain = QAWithSourcesChain.from_llm(OpenAI(temperature=0))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a505ac89", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'answer': ' The president thanked Justice Breyer for his service.',\n", + " 'sources': '27-pl'}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain({\"docs\": docs, \"question\": query}, return_only_outputs=True)" + ] + }, + { + "cell_type": "markdown", + "id": "e6fc81de", + "metadata": {}, + "source": [ + "### VectorDBQAWithSourcesChain\n", + "\n", + "This shows how to use the `VectorDBQAWithSourcesChain`, which uses a vector database to look up relevant documents." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "8aa571ae", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import VectorDBQAWithSourcesChain" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "aa859d4c", + "metadata": {}, + "outputs": [], + "source": [ + "chain = VectorDBQAWithSourcesChain.from_llm(OpenAI(temperature=0), vectorstore=docsearch)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ba36fa7", + "metadata": {}, + "outputs": [], + "source": [ + "chain({\"question\": \"What did the president say about Justice Breyer\"}, return_only_outputs=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "980fae3b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/examples/chains/map_reduce.ipynb b/docs/examples/chains/map_reduce.ipynb index 2862b1a7e1f..212bb5eb85d 100644 --- a/docs/examples/chains/map_reduce.ipynb +++ b/docs/examples/chains/map_reduce.ipynb @@ -39,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "99bbe19b", "metadata": {}, "outputs": [ @@ -49,7 +49,7 @@ "\"\\n\\nThe President discusses the recent aggression by Russia, and the response by the United States and its allies. He announces new sanctions against Russia, and says that the free world is united in holding Putin accountable. The President also discusses the American Rescue Plan, the Bipartisan Infrastructure Law, and the Bipartisan Innovation Act. Finally, the President addresses the need for women's rights and equality for LGBTQ+ Americans.\"" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -63,7 +63,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b581501e", + "id": "baa6e808", "metadata": {}, "outputs": [], "source": [] diff --git a/langchain/__init__.py b/langchain/__init__.py index 10811af544a..ebf276a4e98 100644 --- a/langchain/__init__.py +++ b/langchain/__init__.py @@ -12,8 +12,10 @@ from langchain.chains import ( LLMMathChain, PALChain, PythonChain, + QAWithSourcesChain, SQLDatabaseChain, VectorDBQA, + VectorDBQAWithSourcesChain, ) from langchain.docstore import InMemoryDocstore, Wikipedia from langchain.llms import Cohere, HuggingFaceHub, OpenAI @@ -51,5 +53,7 @@ __all__ = [ "ElasticVectorSearch", "InMemoryDocstore", "ConversationChain", + "VectorDBQAWithSourcesChain", + "QAWithSourcesChain", "PALChain", ] diff --git a/langchain/chains/__init__.py b/langchain/chains/__init__.py index 62018b9393e..bc8e42d4f4b 100644 --- a/langchain/chains/__init__.py +++ b/langchain/chains/__init__.py @@ -4,6 +4,8 @@ from langchain.chains.llm import LLMChain from langchain.chains.llm_math.base import LLMMathChain from langchain.chains.pal.base import PALChain from langchain.chains.python import PythonChain +from langchain.chains.qa_with_sources.base import QAWithSourcesChain +from langchain.chains.qa_with_sources.vector_db import VectorDBQAWithSourcesChain from langchain.chains.sequential import SequentialChain, SimpleSequentialChain from langchain.chains.sql_database.base import SQLDatabaseChain from langchain.chains.vector_db_qa.base import VectorDBQA @@ -17,5 +19,7 @@ __all__ = [ "SequentialChain", "SimpleSequentialChain", "ConversationChain", + "QAWithSourcesChain", + "VectorDBQAWithSourcesChain", "PALChain", ] diff --git a/langchain/chains/combine_documents.py b/langchain/chains/combine_documents.py new file mode 100644 index 00000000000..211f0c45b1c --- /dev/null +++ b/langchain/chains/combine_documents.py @@ -0,0 +1,94 @@ +"""Document combining chain.""" + +from typing import Any, Dict, List + +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.chains.base import Chain +from langchain.chains.llm import LLMChain +from langchain.prompts.base import BasePromptTemplate +from langchain.prompts.prompt import Prompt + + +def _get_default_document_prompt() -> Prompt: + return Prompt(input_variables=["page_content"], template="{page_content}") + + +class CombineDocumentsChain(Chain, BaseModel): + """Combine documents.""" + + llm_chain: LLMChain + """LLM wrapper to use after formatting documents.""" + document_prompt: BasePromptTemplate = Field( + default_factory=_get_default_document_prompt + ) + """Prompt to use to format each document.""" + document_variable_name: str + """The variable name in the llm_chain to put the documents in. + If only one variable in the llm_chain, this need not be provided.""" + input_key: str = "input_documents" #: :meta private: + output_key: str = "output_text" #: :meta private: + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + def input_keys(self) -> List[str]: + """Expect input key. + + :meta private: + """ + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + """Return output key. + + :meta private: + """ + return [self.output_key] + + @root_validator(pre=True) + def get_default_document_variable_name(cls, values: Dict) -> Dict: + """Get default document variable name, if not provided.""" + if "document_variable_name" not in values: + llm_chain_variables = values["llm_chain"].prompt.input_variables + if len(llm_chain_variables) == 1: + values["document_variable_name"] = llm_chain_variables[0] + else: + raise ValueError( + "document_variable_name must be provided if there are " + "multiple llm_chain_variables" + ) + else: + llm_chain_variables = values["llm_chain"].prompt.input_variables + if values["document_variable_name"] not in llm_chain_variables: + raise ValueError( + f"document_variable_name {values['document_variable_name']} was " + f"not found in llm_chain input_variables: {llm_chain_variables}" + ) + return values + + def _call(self, inputs: Dict[str, Any]) -> Dict[str, str]: + docs = inputs[self.input_key] + # Other keys are assumed to be needed for LLM prediction + other_keys = {k: v for k, v in inputs.items() if k != self.input_key} + # Get relevant information from each document. + doc_dicts = [] + for doc in docs: + base_info = {"page_content": doc.page_content} + base_info.update(doc.metadata) + document_info = { + k: base_info[k] for k in self.document_prompt.input_variables + } + doc_dicts.append(document_info) + # Format each document according to the prompt + doc_strings = [self.document_prompt.format(**doc) for doc in doc_dicts] + # Join the documents together to put them in the prompt. + other_keys[self.document_variable_name] = "\n".join(doc_strings) + # Call predict on the LLM. + output = self.llm_chain.predict(**other_keys) + return {self.output_key: output} diff --git a/langchain/chains/mapreduce.py b/langchain/chains/mapreduce.py index 8ec5a0b469a..8a0792d7ea8 100644 --- a/langchain/chains/mapreduce.py +++ b/langchain/chains/mapreduce.py @@ -9,7 +9,9 @@ from typing import Dict, List from pydantic import BaseModel, Extra from langchain.chains.base import Chain +from langchain.chains.combine_documents import CombineDocumentsChain from langchain.chains.llm import LLMChain +from langchain.docstore.document import Document from langchain.llms.base import LLM from langchain.prompts.base import BasePromptTemplate from langchain.text_splitter import TextSplitter @@ -66,10 +68,9 @@ class MapReduceChain(Chain, BaseModel): input_list = [{self.map_llm.prompt.input_variables[0]: d} for d in docs] summary_results = self.map_llm.apply(input_list) summaries = [res[self.map_llm.output_key] for res in summary_results] - + summary_docs = [Document(page_content=text) for text in summaries] # We then need to combine these individual parts into one. # This is the reduce part. - summary_str = "\n".join(summaries) - inputs = {self.reduce_llm.prompt.input_variables[0]: summary_str} - output = self.reduce_llm.predict(**inputs) - return {self.output_key: output} + reduce_chain = CombineDocumentsChain(llm_chain=self.reduce_llm) + outputs = reduce_chain({reduce_chain.input_key: summary_docs}) + return {self.output_key: outputs[self.output_key]} diff --git a/langchain/chains/qa_with_sources/__init__.py b/langchain/chains/qa_with_sources/__init__.py new file mode 100644 index 00000000000..f1d6b4e7279 --- /dev/null +++ b/langchain/chains/qa_with_sources/__init__.py @@ -0,0 +1 @@ +"""Question answering with sources over documents.""" diff --git a/langchain/chains/qa_with_sources/base.py b/langchain/chains/qa_with_sources/base.py new file mode 100644 index 00000000000..9b778985fc5 --- /dev/null +++ b/langchain/chains/qa_with_sources/base.py @@ -0,0 +1,143 @@ +"""Question answering with sources over documents.""" + +from abc import ABC, abstractmethod +from typing import Any, Dict, List + +from pydantic import BaseModel, Extra, root_validator + +from langchain.chains.base import Chain +from langchain.chains.combine_documents import CombineDocumentsChain +from langchain.chains.llm import LLMChain +from langchain.chains.qa_with_sources.prompt import ( + COMBINE_PROMPT, + EXAMPLE_PROMPT, + QUESTION_PROMPT, +) +from langchain.docstore.document import Document +from langchain.llms.base import LLM +from langchain.prompts.base import BasePromptTemplate + + +class BaseQAWithSourcesChain(Chain, BaseModel, ABC): + """Question answering with sources over documents.""" + + llm_question_chain: LLMChain + """LLM wrapper to use for asking questions to each document.""" + combine_document_chain: CombineDocumentsChain + """Chain to use to combine documents.""" + doc_source_key: str = "source" + """Key in document.metadata to use as source information""" + question_key: str = "question" #: :meta private: + input_docs_key: str = "docs" #: :meta private: + answer_key: str = "answer" #: :meta private: + sources_answer_key: str = "sources" #: :meta private: + + @classmethod + def from_llm( + cls, + llm: LLM, + combine_document_prompt: BasePromptTemplate = EXAMPLE_PROMPT, + question_prompt: BasePromptTemplate = QUESTION_PROMPT, + combine_prompt: BasePromptTemplate = COMBINE_PROMPT, + **kwargs: Any, + ) -> "BaseQAWithSourcesChain": + """Construct the chain from an LLM.""" + llm_question_chain = LLMChain(llm=llm, prompt=question_prompt) + llm_combine_chain = LLMChain(llm=llm, prompt=combine_prompt) + combine_document_chain = CombineDocumentsChain( + llm_chain=llm_combine_chain, + document_prompt=combine_document_prompt, + document_variable_name="summaries", + ) + return cls( + llm_question_chain=llm_question_chain, + combine_document_chain=combine_document_chain, + **kwargs, + ) + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @property + def input_keys(self) -> List[str]: + """Expect input key. + + :meta private: + """ + return [self.question_key] + + @property + def output_keys(self) -> List[str]: + """Return output key. + + :meta private: + """ + return [self.answer_key, self.sources_answer_key] + + @root_validator(pre=True) + def validate_question_chain(cls, values: Dict) -> Dict: + """Validate question chain.""" + llm_question_chain = values["llm_question_chain"] + if len(llm_question_chain.input_keys) != 2: + raise ValueError( + f"The llm_question_chain should have two inputs: a content key " + f"(the first one) and a question key (the second one). Got " + f"{llm_question_chain.input_keys}." + ) + return values + + @root_validator() + def validate_combine_chain_can_be_constructed(cls, values: Dict) -> Dict: + """Validate that the combine chain can be constructed.""" + # Try to construct the combine documents chains. + + return values + + @abstractmethod + def _get_docs(self, inputs: Dict[str, Any]) -> List[Document]: + """Get docs to run questioning over.""" + + def _call(self, inputs: Dict[str, Any]) -> Dict[str, str]: + docs = self._get_docs(inputs) + query = inputs[self.question_key] + content_key, query_key = self.llm_question_chain.input_keys + results = self.llm_question_chain.apply( + [{content_key: d.page_content, query_key: query} for d in docs] + ) + question_result_key = self.llm_question_chain.output_key + result_docs = [ + Document(page_content=r[question_result_key], metadata=docs[i].metadata) + for i, r in enumerate(results) + ] + answer_dict = self.combine_document_chain( + { + self.combine_document_chain.input_key: result_docs, + self.question_key: query, + } + ) + answer = answer_dict[self.combine_document_chain.output_key] + if "\nSOURCES: " in answer: + answer, sources = answer.split("\nSOURCES: ") + else: + sources = "" + return {self.answer_key: answer, self.sources_answer_key: sources} + + +class QAWithSourcesChain(BaseQAWithSourcesChain, BaseModel): + """Question answering with sources over documents.""" + + input_docs_key: str = "docs" #: :meta private: + + @property + def input_keys(self) -> List[str]: + """Expect input key. + + :meta private: + """ + return [self.input_docs_key, self.question_key] + + def _get_docs(self, inputs: Dict[str, Any]) -> List[Document]: + return inputs[self.input_docs_key] diff --git a/langchain/chains/qa_with_sources/prompt.py b/langchain/chains/qa_with_sources/prompt.py new file mode 100644 index 00000000000..8cafe7ecfbf --- /dev/null +++ b/langchain/chains/qa_with_sources/prompt.py @@ -0,0 +1,55 @@ +# flake8: noqa +from langchain.prompts import PromptTemplate + +question_prompt_template = """Use the following portion of a long document to see if any of the text is relevant to answer the question. +Return any relevant text verbatim. +{context} +Question: {question} +Relevant text, if any:""" +QUESTION_PROMPT = PromptTemplate( + template=question_prompt_template, input_variables=["context", "question"] +) + +combine_prompt_template = """Given the following extracted parts of a long document and a question, create a final answer with references ("SOURCES"). +If you don't know the answer, just say that you don't know. Don't try to make up an answer. +ALWAYS return a "SOURCES" part in your answer. + +QUESTION: Which state/country's law governs the interpretation of the contract? +========= +Content: This Agreement is governed by English law and the parties submit to the exclusive jurisdiction of the English courts in relation to any dispute (contractual or non-contractual) concerning this Agreement save that either party may apply to any court for an injunction or other relief to protect its Intellectual Property Rights. +Source: 28-pl +Content: No Waiver. Failure or delay in exercising any right or remedy under this Agreement shall not constitute a waiver of such (or any other) right or remedy.\n\n11.7 Severability. The invalidity, illegality or unenforceability of any term (or part of a term) of this Agreement shall not affect the continuation in force of the remainder of the term (if any) and this Agreement.\n\n11.8 No Agency. Except as expressly stated otherwise, nothing in this Agreement shall create an agency, partnership or joint venture of any kind between the parties.\n\n11.9 No Third-Party Beneficiaries. +Source: 30-pl +Content: (b) if Google believes, in good faith, that the Distributor has violated or caused Google to violate any Anti-Bribery Laws (as defined in Clause 8.5) or that such a violation is reasonably likely to occur, +Source: 4-pl +========= +FINAL ANSWER: This Agreement is governed by English law. +SOURCES: 28-pl + +QUESTION: What did the president say about Michael Jackson? +========= +Content: Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \n\nLast year COVID-19 kept us apart. This year we are finally together again. \n\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n\nWith a duty to one another to the American people to the Constitution. \n\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \n\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \n\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \n\nHe met the Ukrainian people. \n\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. \n\nGroups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. +Source: 0-pl +Content: And we won’t stop. \n\nWe have lost so much to COVID-19. Time with one another. And worst of all, so much loss of life. \n\nLet’s use this moment to reset. Let’s stop looking at COVID-19 as a partisan dividing line and see it for what it is: A God-awful disease. \n\nLet’s stop seeing each other as enemies, and start seeing each other for who we really are: Fellow Americans. \n\nWe can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \n\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \n\nThey were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \n\nOfficer Mora was 27 years old. \n\nOfficer Rivera was 22. \n\nBoth Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \n\nI spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. +Source: 24-pl +Content: And a proud Ukrainian people, who have known 30 years of independence, have repeatedly shown that they will not tolerate anyone who tries to take their country backwards. \n\nTo all Americans, I will be honest with you, as I’ve always promised. A Russian dictator, invading a foreign country, has costs around the world. \n\nAnd I’m taking robust action to make sure the pain of our sanctions is targeted at Russia’s economy. And I will use every tool at our disposal to protect American businesses and consumers. \n\nTonight, I can announce that the United States has worked with 30 other countries to release 60 Million barrels of oil from reserves around the world. \n\nAmerica will lead that effort, releasing 30 Million barrels from our own Strategic Petroleum Reserve. And we stand ready to do more if necessary, unified with our allies. \n\nThese steps will help blunt gas prices here at home. And I know the news about what’s happening can seem alarming. \n\nBut I want you to know that we are going to be okay. +Source: 5-pl +Content: More support for patients and families. \n\nTo get there, I call on Congress to fund ARPA-H, the Advanced Research Projects Agency for Health. \n\nIt’s based on DARPA—the Defense Department project that led to the Internet, GPS, and so much more. \n\nARPA-H will have a singular purpose—to drive breakthroughs in cancer, Alzheimer’s, diabetes, and more. \n\nA unity agenda for the nation. \n\nWe can do this. \n\nMy fellow Americans—tonight , we have gathered in a sacred space—the citadel of our democracy. \n\nIn this Capitol, generation after generation, Americans have debated great questions amid great strife, and have done great things. \n\nWe have fought for freedom, expanded liberty, defeated totalitarianism and terror. \n\nAnd built the strongest, freest, and most prosperous nation the world has ever known. \n\nNow is the hour. \n\nOur moment of responsibility. \n\nOur test of resolve and conscience, of history itself. \n\nIt is in this moment that our character is formed. Our purpose is found. Our future is forged. \n\nWell I know this nation. +Source: 34-pl +========= +FINAL ANSWER: The president did not mention Michael Jackson. +SOURCES: + +QUESTION: {question} +========= +{summaries} +========= +FINAL ANSWER:""" +COMBINE_PROMPT = PromptTemplate( + template=combine_prompt_template, input_variables=["summaries", "question"] +) + +EXAMPLE_PROMPT = PromptTemplate( + template="Content: {page_content}\nSource: {source}", + input_variables=["page_content", "source"], +) diff --git a/langchain/chains/qa_with_sources/vector_db.py b/langchain/chains/qa_with_sources/vector_db.py new file mode 100644 index 00000000000..14fbf004a6f --- /dev/null +++ b/langchain/chains/qa_with_sources/vector_db.py @@ -0,0 +1,20 @@ +"""Question-answering with sources over a vector database.""" +from typing import Any, Dict, List + +from pydantic import BaseModel + +from langchain.chains.qa_with_sources.base import BaseQAWithSourcesChain +from langchain.docstore.document import Document +from langchain.vectorstores.base import VectorStore + + +class VectorDBQAWithSourcesChain(BaseQAWithSourcesChain, BaseModel): + """Question-answering with sources over a vector database.""" + + vectorstore: VectorStore + """Vector Database to connect to.""" + k: int = 4 + + def _get_docs(self, inputs: Dict[str, Any]) -> List[Document]: + question = inputs[self.question_key] + return self.vectorstore.similarity_search(question, k=self.k) From 3ca2c8d6c5c2ba5ae1c900e34ae4e3f7ef08b68d Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Wed, 30 Nov 2022 22:20:13 -0800 Subject: [PATCH 22/24] allow passing of stop params into openai (#232) --- langchain/llms/openai.py | 11 +++++++---- tests/integration_tests/llms/test_openai.py | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/langchain/llms/openai.py b/langchain/llms/openai.py index 879dafe8673..8ed2af8a1e2 100644 --- a/langchain/llms/openai.py +++ b/langchain/llms/openai.py @@ -82,7 +82,7 @@ class OpenAI(LLM, BaseModel): return values @property - def _default_params(self) -> Mapping[str, Any]: + def _default_params(self) -> Dict[str, Any]: """Get the default parameters for calling OpenAI API.""" normal_params = { "temperature": self.temperature, @@ -115,7 +115,10 @@ class OpenAI(LLM, BaseModel): response = openai("Tell me a joke.") """ - response = self.client.create( - model=self.model_name, prompt=prompt, stop=stop, **self._default_params - ) + params = self._default_params + if stop is not None: + if "stop" in params: + raise ValueError("`stop` found in both the input and default params.") + params["stop"] = stop + response = self.client.create(model=self.model_name, prompt=prompt, **params) return response["choices"][0]["text"] diff --git a/tests/integration_tests/llms/test_openai.py b/tests/integration_tests/llms/test_openai.py index 3519f928599..4050e42ed84 100644 --- a/tests/integration_tests/llms/test_openai.py +++ b/tests/integration_tests/llms/test_openai.py @@ -26,3 +26,21 @@ def test_openai_extra_kwargs() -> None: # Test that if provided twice it errors with pytest.raises(ValueError): OpenAI(foo=3, model_kwargs={"foo": 2}) + + +def test_openai_stop_valid() -> None: + """Test openai stop logic on valid configuration.""" + query = "write an ordered list of five items" + first_llm = OpenAI(stop="3", temperature=0) + first_output = first_llm(query) + second_llm = OpenAI(temperature=0) + second_output = second_llm(query, stop=["3"]) + # Because it stops on new lines, shouldn't return anything + assert first_output == second_output + + +def test_openai_stop_error() -> None: + """Test openai stop logic on bad configuration.""" + llm = OpenAI(stop="3", temperature=0) + with pytest.raises(ValueError): + llm("write an ordered list of five items", stop=["\n"]) From 473943643eaee80b18a6ad7e6ab54398437519b4 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Thu, 1 Dec 2022 09:01:15 -0800 Subject: [PATCH 23/24] bump version 0026 (#235) --- langchain/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langchain/VERSION b/langchain/VERSION index 2678ff8d63d..c4475d3bb78 100644 --- a/langchain/VERSION +++ b/langchain/VERSION @@ -1 +1 @@ -0.0.25 +0.0.26 From bb4bf9d6d00edf62b957b47f7014162db1b1575a Mon Sep 17 00:00:00 2001 From: "Xupeng (Tony) Tong" Date: Fri, 2 Dec 2022 02:50:36 +0800 Subject: [PATCH 24/24] chore: minor clean up / formatting (#233) to get familiarize with the project --- langchain/agents/agent.py | 4 +++- langchain/agents/mrkl/base.py | 4 +++- langchain/agents/self_ask_with_search/base.py | 12 +++--------- langchain/chains/conversation/memory.py | 4 ++-- langchain/chains/mapreduce.py | 3 ++- langchain/chains/natbot/base.py | 4 +++- langchain/chains/pal/base.py | 6 ++++-- langchain/chains/qa_with_sources/base.py | 4 +++- langchain/chains/sql_database/base.py | 2 +- langchain/model_laboratory.py | 4 +++- .../prompts/example_selector/semantic_similarity.py | 4 +++- langchain/prompts/prompt.py | 6 ++++-- langchain/sql_database.py | 4 +++- langchain/text_splitter.py | 6 +++--- langchain/vectorstores/base.py | 6 ++++-- langchain/vectorstores/elastic_vector_search.py | 4 +++- langchain/vectorstores/faiss.py | 4 +++- tests/unit_tests/chains/test_sequential.py | 2 +- 18 files changed, 51 insertions(+), 32 deletions(-) diff --git a/langchain/agents/agent.py b/langchain/agents/agent.py index eda22924c28..80ee3ab1dc4 100644 --- a/langchain/agents/agent.py +++ b/langchain/agents/agent.py @@ -1,4 +1,6 @@ """Chain that takes in an input and produces an action and action input.""" +from __future__ import annotations + from abc import ABC, abstractmethod from typing import Any, ClassVar, Dict, List, NamedTuple, Optional, Tuple @@ -91,7 +93,7 @@ class Agent(Chain, BaseModel, ABC): pass @classmethod - def from_llm_and_tools(cls, llm: LLM, tools: List[Tool], **kwargs: Any) -> "Agent": + def from_llm_and_tools(cls, llm: LLM, tools: List[Tool], **kwargs: Any) -> Agent: """Construct an agent from an LLM and tools.""" cls._validate_tools(tools) llm_chain = LLMChain(llm=llm, prompt=cls.create_prompt(tools)) diff --git a/langchain/agents/mrkl/base.py b/langchain/agents/mrkl/base.py index 28eac3db1d7..1519c38d137 100644 --- a/langchain/agents/mrkl/base.py +++ b/langchain/agents/mrkl/base.py @@ -1,4 +1,6 @@ """Attempt to implement MRKL systems as described in arxiv.org/pdf/2205.00445.pdf.""" +from __future__ import annotations + from typing import Any, Callable, List, NamedTuple, Optional, Tuple from langchain.agents.agent import Agent @@ -114,7 +116,7 @@ class MRKLChain(ZeroShotAgent): """ @classmethod - def from_chains(cls, llm: LLM, chains: List[ChainConfig], **kwargs: Any) -> "Agent": + def from_chains(cls, llm: LLM, chains: List[ChainConfig], **kwargs: Any) -> Agent: """User friendly way to initialize the MRKL chain. This is intended to be an easy way to get up and running with the diff --git a/langchain/agents/self_ask_with_search/base.py b/langchain/agents/self_ask_with_search/base.py index 1273308db58..d8184fb4fb7 100644 --- a/langchain/agents/self_ask_with_search/base.py +++ b/langchain/agents/self_ask_with_search/base.py @@ -27,10 +27,7 @@ class SelfAskWithSearchAgent(Agent): def _extract_tool_and_input(self, text: str) -> Optional[Tuple[str, str]]: followup = "Follow up:" - if "\n" not in text: - last_line = text - else: - last_line = text.split("\n")[-1] + last_line = text.split("\n")[-1] if followup not in last_line: finish_string = "So the final answer is: " @@ -38,10 +35,7 @@ class SelfAskWithSearchAgent(Agent): return None return "Final Answer", last_line[len(finish_string) :] - if ":" not in last_line: - after_colon = last_line - else: - after_colon = text.split(":")[-1] + after_colon = text.split(":")[-1] if " " == after_colon[0]: after_colon = after_colon[1:] @@ -49,7 +43,7 @@ class SelfAskWithSearchAgent(Agent): return "Intermediate Answer", after_colon def _fix_text(self, text: str) -> str: - return text + "\nSo the final answer is:" + return f"{text}\nSo the final answer is:" @property def observation_prefix(self) -> str: diff --git a/langchain/chains/conversation/memory.py b/langchain/chains/conversation/memory.py index b6bf9e2654a..758bbbfbbd2 100644 --- a/langchain/chains/conversation/memory.py +++ b/langchain/chains/conversation/memory.py @@ -84,8 +84,8 @@ class ConversationSummaryMemory(Memory, BaseModel): prompt_input_key = _get_prompt_input_key(inputs, self.memory_variables) if len(outputs) != 1: raise ValueError(f"One output key expected, got {outputs.keys()}") - human = "Human: " + inputs[prompt_input_key] - ai = "AI: " + list(outputs.values())[0] + human = f"Human: {inputs[prompt_input_key]}" + ai = f"AI: {list(outputs.values())[0]}" new_lines = "\n".join([human, ai]) chain = LLMChain(llm=self.llm, prompt=self.prompt) self.buffer = chain.predict(summary=self.buffer, new_lines=new_lines) diff --git a/langchain/chains/mapreduce.py b/langchain/chains/mapreduce.py index 8a0792d7ea8..aea2baa4d47 100644 --- a/langchain/chains/mapreduce.py +++ b/langchain/chains/mapreduce.py @@ -3,6 +3,7 @@ Splits up a document, sends the smaller parts to the LLM with one prompt, then combines the results with another one. """ +from __future__ import annotations from typing import Dict, List @@ -32,7 +33,7 @@ class MapReduceChain(Chain, BaseModel): @classmethod def from_params( cls, llm: LLM, prompt: BasePromptTemplate, text_splitter: TextSplitter - ) -> "MapReduceChain": + ) -> MapReduceChain: """Construct a map-reduce chain that uses the chain for map and reduce.""" llm_chain = LLMChain(llm=llm, prompt=prompt) return cls(map_llm=llm_chain, reduce_llm=llm_chain, text_splitter=text_splitter) diff --git a/langchain/chains/natbot/base.py b/langchain/chains/natbot/base.py index 67c69363fad..744c4bc80b0 100644 --- a/langchain/chains/natbot/base.py +++ b/langchain/chains/natbot/base.py @@ -1,4 +1,6 @@ """Implement an LLM driven browser.""" +from __future__ import annotations + from typing import Dict, List from pydantic import BaseModel, Extra @@ -36,7 +38,7 @@ class NatBotChain(Chain, BaseModel): arbitrary_types_allowed = True @classmethod - def from_default(cls, objective: str) -> "NatBotChain": + def from_default(cls, objective: str) -> NatBotChain: """Load with default LLM.""" llm = OpenAI(temperature=0.5, best_of=10, n=3, max_tokens=50) return cls(llm=llm, objective=objective) diff --git a/langchain/chains/pal/base.py b/langchain/chains/pal/base.py index 4ebeb0ea454..04573625a0a 100644 --- a/langchain/chains/pal/base.py +++ b/langchain/chains/pal/base.py @@ -2,6 +2,8 @@ As in https://arxiv.org/pdf/2211.10435.pdf. """ +from __future__ import annotations + from typing import Any, Dict, List from pydantic import BaseModel, Extra @@ -57,7 +59,7 @@ class PALChain(Chain, BaseModel): return {self.output_key: res.strip()} @classmethod - def from_math_prompt(cls, llm: LLM, **kwargs: Any) -> "PALChain": + def from_math_prompt(cls, llm: LLM, **kwargs: Any) -> PALChain: """Load PAL from math prompt.""" return cls( llm=llm, @@ -68,7 +70,7 @@ class PALChain(Chain, BaseModel): ) @classmethod - def from_colored_object_prompt(cls, llm: LLM, **kwargs: Any) -> "PALChain": + def from_colored_object_prompt(cls, llm: LLM, **kwargs: Any) -> PALChain: """Load PAL from colored object prompt.""" return cls( llm=llm, diff --git a/langchain/chains/qa_with_sources/base.py b/langchain/chains/qa_with_sources/base.py index 9b778985fc5..5e295e6a218 100644 --- a/langchain/chains/qa_with_sources/base.py +++ b/langchain/chains/qa_with_sources/base.py @@ -1,5 +1,7 @@ """Question answering with sources over documents.""" +from __future__ import annotations + from abc import ABC, abstractmethod from typing import Any, Dict, List @@ -40,7 +42,7 @@ class BaseQAWithSourcesChain(Chain, BaseModel, ABC): question_prompt: BasePromptTemplate = QUESTION_PROMPT, combine_prompt: BasePromptTemplate = COMBINE_PROMPT, **kwargs: Any, - ) -> "BaseQAWithSourcesChain": + ) -> BaseQAWithSourcesChain: """Construct the chain from an LLM.""" llm_question_chain = LLMChain(llm=llm, prompt=question_prompt) llm_combine_chain = LLMChain(llm=llm, prompt=combine_prompt) diff --git a/langchain/chains/sql_database/base.py b/langchain/chains/sql_database/base.py index 32bb1c9ce76..4a13f3938ff 100644 --- a/langchain/chains/sql_database/base.py +++ b/langchain/chains/sql_database/base.py @@ -54,7 +54,7 @@ class SQLDatabaseChain(Chain, BaseModel): def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: llm_chain = LLMChain(llm=self.llm, prompt=PROMPT) chained_input = ChainedInput( - inputs[self.input_key] + "\nSQLQuery:", verbose=self.verbose + f"{inputs[self.input_key]} \nSQLQuery:", verbose=self.verbose ) llm_inputs = { "input": chained_input.input, diff --git a/langchain/model_laboratory.py b/langchain/model_laboratory.py index 614bba344f9..e31d6654782 100644 --- a/langchain/model_laboratory.py +++ b/langchain/model_laboratory.py @@ -1,4 +1,6 @@ """Experiment with different models.""" +from __future__ import annotations + from typing import List, Optional, Sequence, Union from langchain.agents.agent import Agent @@ -49,7 +51,7 @@ class ModelLaboratory: @classmethod def from_llms( cls, llms: List[LLM], prompt: Optional[PromptTemplate] = None - ) -> "ModelLaboratory": + ) -> ModelLaboratory: """Initialize with LLMs to experiment with and optional prompt. Args: diff --git a/langchain/prompts/example_selector/semantic_similarity.py b/langchain/prompts/example_selector/semantic_similarity.py index a78ca12daae..620e0dfa42e 100644 --- a/langchain/prompts/example_selector/semantic_similarity.py +++ b/langchain/prompts/example_selector/semantic_similarity.py @@ -1,4 +1,6 @@ """Example selector that selects examples based on SemanticSimilarity.""" +from __future__ import annotations + from typing import Any, Dict, List, Optional from pydantic import BaseModel, Extra @@ -55,7 +57,7 @@ class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel): vectorstore_cls: VectorStore, k: int = 4, **vectorstore_cls_kwargs: Any, - ) -> "SemanticSimilarityExampleSelector": + ) -> SemanticSimilarityExampleSelector: """Create k-shot example selector using example list and embeddings. Reshuffles examples dynamically based on query similarity. diff --git a/langchain/prompts/prompt.py b/langchain/prompts/prompt.py index bb1d331fb3e..4a24c2de4e2 100644 --- a/langchain/prompts/prompt.py +++ b/langchain/prompts/prompt.py @@ -1,4 +1,6 @@ """Prompt schema definition.""" +from __future__ import annotations + from typing import Any, Dict, List from pydantic import BaseModel, Extra, root_validator @@ -67,7 +69,7 @@ class PromptTemplate(BasePromptTemplate, BaseModel): input_variables: List[str], example_separator: str = "\n\n", prefix: str = "", - ) -> "PromptTemplate": + ) -> PromptTemplate: """Take examples in list format with prefix and suffix to create a prompt. Intended be used as a way to dynamically create a prompt from examples. @@ -92,7 +94,7 @@ class PromptTemplate(BasePromptTemplate, BaseModel): @classmethod def from_file( cls, template_file: str, input_variables: List[str] - ) -> "PromptTemplate": + ) -> PromptTemplate: """Load a prompt from a file. Args: diff --git a/langchain/sql_database.py b/langchain/sql_database.py index 2afcebc146f..56e76d6cf4a 100644 --- a/langchain/sql_database.py +++ b/langchain/sql_database.py @@ -1,4 +1,6 @@ """SQLAlchemy wrapper around a database.""" +from __future__ import annotations + from typing import Any, Iterable, List, Optional from sqlalchemy import create_engine, inspect @@ -37,7 +39,7 @@ class SQLDatabase: ) @classmethod - def from_uri(cls, database_uri: str, **kwargs: Any) -> "SQLDatabase": + def from_uri(cls, database_uri: str, **kwargs: Any) -> SQLDatabase: """Construct a SQLAlchemy engine from URI.""" return cls(create_engine(database_uri), **kwargs) diff --git a/langchain/text_splitter.py b/langchain/text_splitter.py index dbae51cf4db..c2da1745499 100644 --- a/langchain/text_splitter.py +++ b/langchain/text_splitter.py @@ -1,4 +1,6 @@ """Functionality for splitting text.""" +from __future__ import annotations + from abc import ABC, abstractmethod from typing import Any, Callable, Iterable, List @@ -46,9 +48,7 @@ class TextSplitter(ABC): return docs @classmethod - def from_huggingface_tokenizer( - cls, tokenizer: Any, **kwargs: Any - ) -> "TextSplitter": + def from_huggingface_tokenizer(cls, tokenizer: Any, **kwargs: Any) -> TextSplitter: """Text splitter than uses HuggingFace tokenizer to count length.""" try: from transformers import PreTrainedTokenizerBase diff --git a/langchain/vectorstores/base.py b/langchain/vectorstores/base.py index 066c9a01d85..429f82469b0 100644 --- a/langchain/vectorstores/base.py +++ b/langchain/vectorstores/base.py @@ -1,4 +1,6 @@ """Interface for vector stores.""" +from __future__ import annotations + from abc import ABC, abstractmethod from typing import Any, Iterable, List, Optional @@ -26,6 +28,6 @@ class VectorStore(ABC): texts: List[str], embedding: Embeddings, metadatas: Optional[List[dict]] = None, - **kwargs: Any - ) -> "VectorStore": + **kwargs: Any, + ) -> VectorStore: """Return VectorStore initialized from texts and embeddings.""" diff --git a/langchain/vectorstores/elastic_vector_search.py b/langchain/vectorstores/elastic_vector_search.py index 8620c559fc4..20703f57913 100644 --- a/langchain/vectorstores/elastic_vector_search.py +++ b/langchain/vectorstores/elastic_vector_search.py @@ -1,4 +1,6 @@ """Wrapper around Elasticsearch vector database.""" +from __future__ import annotations + import uuid from typing import Any, Callable, Dict, Iterable, List, Optional @@ -117,7 +119,7 @@ class ElasticVectorSearch(VectorStore): embedding: Embeddings, metadatas: Optional[List[dict]] = None, **kwargs: Any, - ) -> "ElasticVectorSearch": + ) -> ElasticVectorSearch: """Construct ElasticVectorSearch wrapper from raw documents. This is a user-friendly interface that: diff --git a/langchain/vectorstores/faiss.py b/langchain/vectorstores/faiss.py index 7d26ad12aba..61ffdf530f5 100644 --- a/langchain/vectorstores/faiss.py +++ b/langchain/vectorstores/faiss.py @@ -1,4 +1,6 @@ """Wrapper around FAISS vector database.""" +from __future__ import annotations + import uuid from typing import Any, Callable, Dict, Iterable, List, Optional @@ -96,7 +98,7 @@ class FAISS(VectorStore): embedding: Embeddings, metadatas: Optional[List[dict]] = None, **kwargs: Any, - ) -> "FAISS": + ) -> FAISS: """Construct FAISS wrapper from raw documents. This is a user friendly interface that: diff --git a/tests/unit_tests/chains/test_sequential.py b/tests/unit_tests/chains/test_sequential.py index aa83f2aca29..f231a74090c 100644 --- a/tests/unit_tests/chains/test_sequential.py +++ b/tests/unit_tests/chains/test_sequential.py @@ -28,7 +28,7 @@ class FakeChain(Chain, BaseModel): outputs = {} for var in self.output_variables: variables = [inputs[k] for k in self.input_variables] - outputs[var] = " ".join(variables) + "foo" + outputs[var] = f"{' '.join(variables)}foo" return outputs