diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 8d56deda34a..68e023356b7 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,6 +1,6 @@ name: lint -on: [push, pull_request_target] +on: [push, pull_request] jobs: build: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ad0def9e912..54b61276fb8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,6 +1,6 @@ name: test -on: [push, pull_request_target] +on: [push, pull_request] jobs: build: diff --git a/README.md b/README.md index b195840a220..a7bded5508f 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ This project was largely inspired by a few projects seen on Twitter for which we **[Self-ask-with-search](https://ofir.io/self-ask.pdf)** -To recreate this paper, use the following code snippet or checkout the [example notebook](https://github.com/hwchase17/langchain/blob/master/examples/self_ask_with_search.ipynb). +To recreate this paper, use the following code snippet or checkout the [example notebook](https://github.com/hwchase17/langchain/blob/master/docs/examples/demos/self_ask_with_search.ipynb). ```python from langchain import SelfAskWithSearchChain, OpenAI, SerpAPIChain @@ -52,7 +52,7 @@ self_ask_with_search.run("What is the hometown of the reigning men's U.S. Open c **[LLM Math](https://twitter.com/amasad/status/1568824744367259648?s=20&t=-7wxpXBJinPgDuyHLouP1w)** -To recreate this example, use the following code snippet or check out the [example notebook](https://github.com/hwchase17/langchain/blob/master/examples/llm_math.ipynb). +To recreate this example, use the following code snippet or check out the [example notebook](https://github.com/hwchase17/langchain/blob/master/docs/examples/demos/llm_math.ipynb). ```python from langchain import OpenAI, LLMMathChain @@ -65,7 +65,7 @@ llm_math.run("How many of the integers between 0 and 99 inclusive are divisible **Generic Prompting** -You can also use this for simple prompting pipelines, as in the below example and this [example notebook](https://github.com/hwchase17/langchain/blob/master/examples/simple_prompts.ipynb). +You can also use this for simple prompting pipelines, as in the below example and this [example notebook](https://github.com/hwchase17/langchain/blob/master/docs/examples/demos/simple_prompts.ipynb). ```python from langchain import Prompt, OpenAI, LLMChain @@ -84,7 +84,7 @@ llm_chain.predict(question=question) **Embed & Search Documents** -We support two vector databases to store and search embeddings -- FAISS and Elasticsearch. Here's a code snippet showing how to use FAISS to store embeddings and search for text similar to a query. Both database backends are featured in this [example notebook](https://github.com/hwchase17/langchain/blob/master/examples/embeddings.ipynb). +We support two vector databases to store and search embeddings -- FAISS and Elasticsearch. Here's a code snippet showing how to use FAISS to store embeddings and search for text similar to a query. Both database backends are featured in this [example notebook](https://github.com/hwchase17/langchain/blob/master/docs/examples/integrations/embeddings.ipynb). ```python from langchain.embeddings.openai import OpenAIEmbeddings diff --git a/docs/examples/model_laboratory.ipynb b/docs/examples/model_laboratory.ipynb index 0646386e56b..8c5af92f172 100644 --- a/docs/examples/model_laboratory.ipynb +++ b/docs/examples/model_laboratory.ipynb @@ -42,7 +42,7 @@ "metadata": {}, "outputs": [], "source": [ - "model_lab = ModelLaboratory(llms)" + "model_lab = ModelLaboratory.from_llms(llms)" ] }, { @@ -60,19 +60,19 @@ "\n", "\u001b[1mOpenAI\u001b[0m\n", "Params: {'model': 'text-davinci-002', 'temperature': 0.0, 'max_tokens': 256, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'n': 1, 'best_of': 1}\n", - "\u001b[104m\n", + "\u001b[36;1m\u001b[1;3m\n", "\n", "Flamingos are pink.\u001b[0m\n", "\n", "\u001b[1mCohere\u001b[0m\n", "Params: {'model': 'command-xlarge-20221108', 'max_tokens': 20, 'temperature': 0.0, 'k': 0, 'p': 1, 'frequency_penalty': 0, 'presence_penalty': 0}\n", - "\u001b[103m\n", + "\u001b[33;1m\u001b[1;3m\n", "\n", "Pink\u001b[0m\n", "\n", "\u001b[1mHuggingFaceHub\u001b[0m\n", "Params: {'repo_id': 'google/flan-t5-xl', 'temperature': 1}\n", - "\u001b[101mpink\u001b[0m\n", + "\u001b[38;5;200m\u001b[1;3mpink\u001b[0m\n", "\n" ] } @@ -89,7 +89,7 @@ "outputs": [], "source": [ "prompt = Prompt(template=\"What is the capital of {state}?\", input_variables=[\"state\"])\n", - "model_lab_with_prompt = ModelLaboratory(llms, prompt=prompt)" + "model_lab_with_prompt = ModelLaboratory.from_llms(llms, prompt=prompt)" ] }, { @@ -107,19 +107,19 @@ "\n", "\u001b[1mOpenAI\u001b[0m\n", "Params: {'model': 'text-davinci-002', 'temperature': 0.0, 'max_tokens': 256, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'n': 1, 'best_of': 1}\n", - "\u001b[104m\n", + "\u001b[36;1m\u001b[1;3m\n", "\n", "The capital of New York is Albany.\u001b[0m\n", "\n", "\u001b[1mCohere\u001b[0m\n", "Params: {'model': 'command-xlarge-20221108', 'max_tokens': 20, 'temperature': 0.0, 'k': 0, 'p': 1, 'frequency_penalty': 0, 'presence_penalty': 0}\n", - "\u001b[103m\n", + "\u001b[33;1m\u001b[1;3m\n", "\n", "The capital of New York is Albany.\u001b[0m\n", "\n", "\u001b[1mHuggingFaceHub\u001b[0m\n", "Params: {'repo_id': 'google/flan-t5-xl', 'temperature': 1}\n", - "\u001b[101mst john s\u001b[0m\n", + "\u001b[38;5;200m\u001b[1;3mst john s\u001b[0m\n", "\n" ] } @@ -130,10 +130,103 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "54336dbf", "metadata": {}, "outputs": [], + "source": [ + "from langchain import SelfAskWithSearchChain, SerpAPIChain\n", + "\n", + "open_ai_llm = OpenAI(temperature=0)\n", + "search = SerpAPIChain()\n", + "self_ask_with_search_openai = SelfAskWithSearchChain(llm=open_ai_llm, search_chain=search, verbose=True)\n", + "\n", + "cohere_llm = Cohere(temperature=0, model=\"command-xlarge-20221108\")\n", + "search = SerpAPIChain()\n", + "self_ask_with_search_cohere = SelfAskWithSearchChain(llm=cohere_llm, search_chain=search, verbose=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6a50a9f1", + "metadata": {}, + "outputs": [], + "source": [ + "chains = [self_ask_with_search_openai, self_ask_with_search_cohere]\n", + "names = [str(open_ai_llm), str(cohere_llm)]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "d3549e99", + "metadata": {}, + "outputs": [], + "source": [ + "model_lab = ModelLaboratory(chains, names=names)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "362f7f57", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1mInput:\u001b[0m\n", + "What is the hometown of the reigning men's U.S. Open champion?\n", + "\n", + "\u001b[1mOpenAI\u001b[0m\n", + "Params: {'model': 'text-davinci-002', 'temperature': 0.0, 'max_tokens': 256, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'n': 1, 'best_of': 1}\n", + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "What is the hometown of the reigning men's U.S. Open champion?\n", + "Are follow up questions needed here:\u001b[32;1m\u001b[1;3m Yes.\n", + "Follow up: Who is the reigning men's U.S. Open champion?\u001b[0m\n", + "Intermediate answer: \u001b[33;1m\u001b[1;3mCarlos Alcaraz.\u001b[0m\u001b[32;1m\u001b[1;3m\n", + "Follow up: Where is Carlos Alcaraz from?\u001b[0m\n", + "Intermediate answer: \u001b[33;1m\u001b[1;3mEl Palmar, Spain.\u001b[0m\u001b[32;1m\u001b[1;3m\n", + "So the final answer is: El Palmar, Spain\u001b[0m\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "\u001b[36;1m\u001b[1;3m\n", + "So the final answer is: El Palmar, Spain\u001b[0m\n", + "\n", + "\u001b[1mCohere\u001b[0m\n", + "Params: {'model': 'command-xlarge-20221108', 'max_tokens': 256, 'temperature': 0.0, 'k': 0, 'p': 1, 'frequency_penalty': 0, 'presence_penalty': 0}\n", + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "What is the hometown of the reigning men's U.S. Open champion?\n", + "Are follow up questions needed here:\u001b[32;1m\u001b[1;3m Yes.\n", + "Follow up: Who is the reigning men's U.S. Open champion?\u001b[0m\n", + "Intermediate answer: \u001b[33;1m\u001b[1;3mCarlos Alcaraz.\u001b[0m\u001b[32;1m\u001b[1;3m\n", + "So the final answer is:\n", + "\n", + "Carlos Alcaraz\u001b[0m\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "\u001b[33;1m\u001b[1;3m\n", + "So the final answer is:\n", + "\n", + "Carlos Alcaraz\u001b[0m\n", + "\n" + ] + } + ], + "source": [ + "model_lab.compare(\"What is the hometown of the reigning men's U.S. Open champion?\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "94159131", + "metadata": {}, + "outputs": [], "source": [] } ], diff --git a/langchain/VERSION b/langchain/VERSION index 43b29618309..e3b86dd9cc1 100644 --- a/langchain/VERSION +++ b/langchain/VERSION @@ -1 +1 @@ -0.0.13 +0.0.16 diff --git a/langchain/chains/base.py b/langchain/chains/base.py index 15827f6577c..0f9edecb057 100644 --- a/langchain/chains/base.py +++ b/langchain/chains/base.py @@ -9,7 +9,7 @@ class Chain(BaseModel, ABC): """Base interface that all chains should implement.""" verbose: bool = False - """Whether to print out the code that was executed.""" + """Whether to print out response text.""" @property @abstractmethod @@ -49,6 +49,10 @@ class Chain(BaseModel, ABC): self._validate_outputs(outputs) return {**inputs, **outputs} + def apply(self, input_list: List[Dict[str, Any]]) -> List[Dict[str, str]]: + """Call the chain on all inputs in the list.""" + return [self(inputs) for inputs in input_list] + def run(self, text: str) -> str: """Run text in, text out (if applicable).""" if len(self.input_keys) != 1: diff --git a/langchain/chains/mapreduce.py b/langchain/chains/mapreduce.py index 0a88f945c61..623b95cdfcd 100644 --- a/langchain/chains/mapreduce.py +++ b/langchain/chains/mapreduce.py @@ -59,16 +59,13 @@ class MapReduceChain(Chain, BaseModel): def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: # Split the larger text into smaller chunks. - docs = self.text_splitter.split_text( - inputs[self.input_key], - ) + docs = self.text_splitter.split_text(inputs[self.input_key]) + # Now that we have the chunks, we send them to the LLM and track results. # This is the "map" part. - summaries = [] - for d in docs: - inputs = {self.map_llm.prompt.input_variables[0]: d} - res = self.map_llm.predict(**inputs) - summaries.append(res) + input_list = [{self.map_llm.prompt.input_variables[0]: d} for d in docs] + summary_results = self.map_llm.apply(input_list) + summaries = [res[self.map_llm.output_key] for res in summary_results] # We then need to combine these individual parts into one. # This is the reduce part. diff --git a/langchain/chains/natbot/crawler.py b/langchain/chains/natbot/crawler.py index 341b890b2fa..b15e0eace97 100644 --- a/langchain/chains/natbot/crawler.py +++ b/langchain/chains/natbot/crawler.py @@ -28,14 +28,7 @@ class Crawler: "Could not import playwright python package. " "Please it install it with `pip install playwright`." ) - self.browser = ( - sync_playwright() - .start() - .chromium.launch( - headless=False, - ) - ) - + self.browser = sync_playwright().start().chromium.launch(headless=False) self.page = self.browser.new_page() self.page.set_viewport_size({"width": 1280, "height": 1080}) diff --git a/langchain/chains/react/prompt.py b/langchain/chains/react/prompt.py index e0e16299f86..8a3b2cfe811 100644 --- a/langchain/chains/react/prompt.py +++ b/langchain/chains/react/prompt.py @@ -109,8 +109,4 @@ Action 3: Finish[yes]""", ] SUFFIX = """\n\nQuestion: {input}""" -PROMPT = Prompt.from_examples( - EXAMPLES, - SUFFIX, - ["input"], -) +PROMPT = Prompt.from_examples(EXAMPLES, SUFFIX, ["input"]) diff --git a/langchain/chains/self_ask_with_search/prompt.py b/langchain/chains/self_ask_with_search/prompt.py index 003e68dd7fd..02f7ab3f51f 100644 --- a/langchain/chains/self_ask_with_search/prompt.py +++ b/langchain/chains/self_ask_with_search/prompt.py @@ -38,7 +38,4 @@ Intermediate Answer: New Zealand. So the final answer is: No Question: {input}""" -PROMPT = Prompt( - input_variables=["input"], - template=_DEFAULT_TEMPLATE, -) +PROMPT = Prompt(input_variables=["input"], template=_DEFAULT_TEMPLATE) diff --git a/langchain/chains/serpapi.py b/langchain/chains/serpapi.py index dbac148b898..50e086e13be 100644 --- a/langchain/chains/serpapi.py +++ b/langchain/chains/serpapi.py @@ -9,6 +9,7 @@ from typing import Any, Dict, List, Optional from pydantic import BaseModel, Extra, root_validator from langchain.chains.base import Chain +from langchain.utils import get_from_dict_or_env class HiddenPrints: @@ -43,7 +44,7 @@ class SerpAPIChain(Chain, BaseModel): input_key: str = "search_query" #: :meta private: output_key: str = "search_result" #: :meta private: - serpapi_api_key: Optional[str] = os.environ.get("SERPAPI_API_KEY") + serpapi_api_key: Optional[str] = None class Config: """Configuration for this pydantic object.""" @@ -69,14 +70,10 @@ class SerpAPIChain(Chain, BaseModel): @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" - serpapi_api_key = values.get("serpapi_api_key") - - if serpapi_api_key is None or serpapi_api_key == "": - raise ValueError( - "Did not find SerpAPI API key, please add an environment variable" - " `SERPAPI_API_KEY` which contains it, or pass `serpapi_api_key` " - "as a named parameter to the constructor." - ) + serpapi_api_key = get_from_dict_or_env( + values, "serpapi_api_key", "SERPAPI_API_KEY" + ) + values["serpapi_api_key"] = serpapi_api_key try: from serpapi import GoogleSearch diff --git a/langchain/chains/sql_database/prompt.py b/langchain/chains/sql_database/prompt.py index c35c92e4b57..43bb3fcfb67 100644 --- a/langchain/chains/sql_database/prompt.py +++ b/langchain/chains/sql_database/prompt.py @@ -15,6 +15,5 @@ Only use the following tables: Question: {input}""" PROMPT = Prompt( - input_variables=["input", "table_info", "dialect"], - template=_DEFAULT_TEMPLATE, + input_variables=["input", "table_info", "dialect"], template=_DEFAULT_TEMPLATE ) diff --git a/langchain/chains/vector_db_qa/base.py b/langchain/chains/vector_db_qa/base.py index 3e010710576..d54de11ca22 100644 --- a/langchain/chains/vector_db_qa/base.py +++ b/langchain/chains/vector_db_qa/base.py @@ -27,6 +27,8 @@ class VectorDBQA(Chain, BaseModel): """LLM wrapper to use.""" vectorstore: VectorStore """Vector Database to connect to.""" + k: int = 4 + """Number of documents to query for.""" input_key: str = "query" #: :meta private: output_key: str = "result" #: :meta private: @@ -55,7 +57,7 @@ class VectorDBQA(Chain, BaseModel): def _call(self, inputs: Dict[str, str]) -> Dict[str, str]: question = inputs[self.input_key] llm_chain = LLMChain(llm=self.llm, prompt=prompt) - docs = self.vectorstore.similarity_search(question) + docs = self.vectorstore.similarity_search(question, k=self.k) contexts = [] for j, doc in enumerate(docs): contexts.append(f"Context {j}:\n{doc.page_content}") diff --git a/langchain/docstore/document.py b/langchain/docstore/document.py index 2c6e04bb0ba..cd6349d5312 100644 --- a/langchain/docstore/document.py +++ b/langchain/docstore/document.py @@ -1,7 +1,7 @@ """Interface for interacting with a document.""" from typing import List -from pydantic import BaseModel +from pydantic import BaseModel, Field class Document(BaseModel): @@ -10,6 +10,7 @@ class Document(BaseModel): page_content: str lookup_str: str = "" lookup_index = 0 + metadata: dict = Field(default_factory=dict) @property def paragraphs(self) -> List[str]: diff --git a/langchain/embeddings/cohere.py b/langchain/embeddings/cohere.py index 3ff641bfb02..9a4f2ffe6e1 100644 --- a/langchain/embeddings/cohere.py +++ b/langchain/embeddings/cohere.py @@ -1,10 +1,10 @@ """Wrapper around Cohere embedding models.""" -import os from typing import Any, Dict, List, Optional from pydantic import BaseModel, Extra, root_validator from langchain.embeddings.base import Embeddings +from langchain.utils import get_from_dict_or_env class CohereEmbeddings(BaseModel, Embeddings): @@ -25,7 +25,7 @@ class CohereEmbeddings(BaseModel, Embeddings): model: str = "medium" """Model name to use.""" - cohere_api_key: Optional[str] = os.environ.get("COHERE_API_KEY") + cohere_api_key: Optional[str] = None class Config: """Configuration for this pydantic object.""" @@ -35,14 +35,9 @@ class CohereEmbeddings(BaseModel, Embeddings): @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" - cohere_api_key = values.get("cohere_api_key") - - if cohere_api_key is None or cohere_api_key == "": - raise ValueError( - "Did not find Cohere API key, please add an environment variable" - " `COHERE_API_KEY` which contains it, or pass `cohere_api_key` as a" - " named parameter." - ) + cohere_api_key = get_from_dict_or_env( + values, "cohere_api_key", "COHERE_API_KEY" + ) try: import cohere diff --git a/langchain/embeddings/openai.py b/langchain/embeddings/openai.py index 0bb97e58ae0..864e7758f37 100644 --- a/langchain/embeddings/openai.py +++ b/langchain/embeddings/openai.py @@ -1,10 +1,10 @@ """Wrapper around OpenAI embedding models.""" -import os from typing import Any, Dict, List, Optional from pydantic import BaseModel, Extra, root_validator from langchain.embeddings.base import Embeddings +from langchain.utils import get_from_dict_or_env class OpenAIEmbeddings(BaseModel, Embeddings): @@ -25,7 +25,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings): model_name: str = "babbage" """Model name to use.""" - openai_api_key: Optional[str] = os.environ.get("OPENAI_API_KEY") + openai_api_key: Optional[str] = None class Config: """Configuration for this pydantic object.""" @@ -35,14 +35,9 @@ class OpenAIEmbeddings(BaseModel, Embeddings): @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" - openai_api_key = values.get("openai_api_key") - - if openai_api_key is None or openai_api_key == "": - raise ValueError( - "Did not find OpenAI API key, please add an environment variable" - " `OPENAI_API_KEY` which contains it, or pass `openai_api_key` as a" - " named parameter." - ) + openai_api_key = get_from_dict_or_env( + values, "openai_api_key", "OPENAI_API_KEY" + ) try: import openai diff --git a/langchain/input.py b/langchain/input.py index 94fad908280..ef7053ad315 100644 --- a/langchain/input.py +++ b/langchain/input.py @@ -1,14 +1,19 @@ """Handle chained inputs.""" from typing import Dict, List, Optional -_COLOR_MAPPING = {"blue": 104, "yellow": 103, "red": 101, "green": 102} +_TEXT_COLOR_MAPPING = { + "blue": "36;1", + "yellow": "33;1", + "pink": "38;5;200", + "green": "32;1", +} def get_color_mapping( items: List[str], excluded_colors: Optional[List] = None ) -> Dict[str, str]: """Get mapping for items to a support color.""" - colors = list(_COLOR_MAPPING.keys()) + colors = list(_TEXT_COLOR_MAPPING.keys()) if excluded_colors is not None: colors = [c for c in colors if c not in excluded_colors] color_mapping = {item: colors[i % len(colors)] for i, item in enumerate(items)} @@ -20,8 +25,8 @@ def print_text(text: str, color: Optional[str] = None, end: str = "") -> None: if color is None: print(text, end=end) else: - color_str = _COLOR_MAPPING[color] - print(f"\x1b[{color_str}m{text}\x1b[0m", end=end) + color_str = _TEXT_COLOR_MAPPING[color] + print(f"\u001b[{color_str}m\033[1;3m{text}\u001b[0m", end=end) class ChainedInput: @@ -29,14 +34,14 @@ class ChainedInput: def __init__(self, text: str, verbose: bool = False): """Initialize with verbose flag and initial text.""" - self.verbose = verbose - if self.verbose: + self._verbose = verbose + if self._verbose: print_text(text, None) self._input = text def add(self, text: str, color: Optional[str] = None) -> None: """Add text to input, print if in verbose mode.""" - if self.verbose: + if self._verbose: print_text(text, color) self._input += text diff --git a/langchain/llms/ai21.py b/langchain/llms/ai21.py index b1dac08b32f..a870d9e4bfd 100644 --- a/langchain/llms/ai21.py +++ b/langchain/llms/ai21.py @@ -1,11 +1,11 @@ """Wrapper around AI21 APIs.""" -import os from typing import Any, Dict, List, Mapping, Optional import requests from pydantic import BaseModel, Extra, root_validator from langchain.llms.base import LLM +from langchain.utils import get_from_dict_or_env class AI21PenaltyData(BaseModel): @@ -62,7 +62,7 @@ class AI21(BaseModel, LLM): logitBias: Optional[Dict[str, float]] = None """Adjust the probability of specific tokens being generated.""" - ai21_api_key: Optional[str] = os.environ.get("AI21_API_KEY") + ai21_api_key: Optional[str] = None class Config: """Configuration for this pydantic object.""" @@ -72,14 +72,8 @@ class AI21(BaseModel, LLM): @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key exists in environment.""" - ai21_api_key = values.get("ai21_api_key") - - if ai21_api_key is None or ai21_api_key == "": - raise ValueError( - "Did not find AI21 API key, please add an environment variable" - " `AI21_API_KEY` which contains it, or pass `ai21_api_key`" - " as a named parameter." - ) + ai21_api_key = get_from_dict_or_env(values, "ai21_api_key", "AI21_API_KEY") + values["ai21_api_key"] = ai21_api_key return values @property @@ -122,11 +116,7 @@ class AI21(BaseModel, LLM): response = requests.post( url=f"https://api.ai21.com/studio/v1/{self.model}/complete", headers={"Authorization": f"Bearer {self.ai21_api_key}"}, - json={ - "prompt": prompt, - "stopSequences": stop, - **self._default_params, - }, + json={"prompt": prompt, "stopSequences": stop, **self._default_params}, ) if response.status_code != 200: optional_detail = response.json().get("error") diff --git a/langchain/llms/cohere.py b/langchain/llms/cohere.py index 2a41b807ea3..e051ba47ff7 100644 --- a/langchain/llms/cohere.py +++ b/langchain/llms/cohere.py @@ -1,11 +1,11 @@ """Wrapper around Cohere APIs.""" -import os from typing import Any, Dict, List, Mapping, Optional from pydantic import BaseModel, Extra, root_validator from langchain.llms.base import LLM from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env class Cohere(LLM, BaseModel): @@ -44,7 +44,7 @@ class Cohere(LLM, BaseModel): presence_penalty: int = 0 """Penalizes repeated tokens.""" - cohere_api_key: Optional[str] = os.environ.get("COHERE_API_KEY") + cohere_api_key: Optional[str] = None class Config: """Configuration for this pydantic object.""" @@ -54,14 +54,9 @@ class Cohere(LLM, BaseModel): @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" - cohere_api_key = values.get("cohere_api_key") - - if cohere_api_key is None or cohere_api_key == "": - raise ValueError( - "Did not find Cohere API key, please add an environment variable" - " `COHERE_API_KEY` which contains it, or pass `cohere_api_key`" - " as a named parameter." - ) + cohere_api_key = get_from_dict_or_env( + values, "cohere_api_key", "COHERE_API_KEY" + ) try: import cohere diff --git a/langchain/llms/huggingface_hub.py b/langchain/llms/huggingface_hub.py index 8d584558ae9..c67c9720a4e 100644 --- a/langchain/llms/huggingface_hub.py +++ b/langchain/llms/huggingface_hub.py @@ -1,11 +1,11 @@ """Wrapper around HuggingFace APIs.""" -import os from typing import Any, Dict, List, Mapping, Optional from pydantic import BaseModel, Extra, root_validator from langchain.llms.base import LLM from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env DEFAULT_REPO_ID = "gpt2" VALID_TASKS = ("text2text-generation", "text-generation") @@ -18,7 +18,7 @@ class HuggingFaceHub(LLM, BaseModel): environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, or pass it as a named parameter to the constructor. - Only supports task `text-generation` for now. + Only supports `text-generation` and `text2text-generation` for now. Example: .. code-block:: python @@ -35,7 +35,7 @@ class HuggingFaceHub(LLM, BaseModel): model_kwargs: Optional[dict] = None """Key word arguments to pass to the model.""" - huggingfacehub_api_token: Optional[str] = os.environ.get("HUGGINGFACEHUB_API_TOKEN") + huggingfacehub_api_token: Optional[str] = None class Config: """Configuration for this pydantic object.""" @@ -45,13 +45,9 @@ class HuggingFaceHub(LLM, BaseModel): @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" - huggingfacehub_api_token = values.get("huggingfacehub_api_token") - if huggingfacehub_api_token is None or huggingfacehub_api_token == "": - raise ValueError( - "Did not find HuggingFace API token, please add an environment variable" - " `HUGGINGFACEHUB_API_TOKEN` which contains it, or pass" - " `huggingfacehub_api_token` as a named parameter." - ) + huggingfacehub_api_token = get_from_dict_or_env( + values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN" + ) try: from huggingface_hub.inference_api import InferenceApi diff --git a/langchain/llms/nlpcloud.py b/langchain/llms/nlpcloud.py index 771cc815a56..d9e4c54e206 100644 --- a/langchain/llms/nlpcloud.py +++ b/langchain/llms/nlpcloud.py @@ -1,10 +1,10 @@ """Wrapper around NLPCloud APIs.""" -import os from typing import Any, Dict, List, Mapping, Optional from pydantic import BaseModel, Extra, root_validator from langchain.llms.base import LLM +from langchain.utils import get_from_dict_or_env class NLPCloud(LLM, BaseModel): @@ -54,7 +54,7 @@ class NLPCloud(LLM, BaseModel): num_return_sequences: int = 1 """How many completions to generate for each prompt.""" - nlpcloud_api_key: Optional[str] = os.environ.get("NLPCLOUD_API_KEY") + nlpcloud_api_key: Optional[str] = None class Config: """Configuration for this pydantic object.""" @@ -64,14 +64,9 @@ class NLPCloud(LLM, BaseModel): @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" - nlpcloud_api_key = values.get("nlpcloud_api_key") - - if nlpcloud_api_key is None or nlpcloud_api_key == "": - raise ValueError( - "Did not find NLPCloud API key, please add an environment variable" - " `NLPCLOUD_API_KEY` which contains it, or pass `nlpcloud_api_key`" - " as a named parameter." - ) + nlpcloud_api_key = get_from_dict_or_env( + values, "nlpcloud_api_key", "NLPCLOUD_API_KEY" + ) try: import nlpcloud diff --git a/langchain/llms/openai.py b/langchain/llms/openai.py index 2355015b64d..2affb86de5e 100644 --- a/langchain/llms/openai.py +++ b/langchain/llms/openai.py @@ -1,10 +1,10 @@ """Wrapper around OpenAI APIs.""" -import os from typing import Any, Dict, List, Mapping, Optional from pydantic import BaseModel, Extra, root_validator from langchain.llms.base import LLM +from langchain.utils import get_from_dict_or_env class OpenAI(LLM, BaseModel): @@ -38,7 +38,7 @@ class OpenAI(LLM, BaseModel): best_of: int = 1 """Generates best_of completions server-side and returns the "best".""" - openai_api_key: Optional[str] = os.environ.get("OPENAI_API_KEY") + openai_api_key: Optional[str] = None class Config: """Configuration for this pydantic object.""" @@ -48,14 +48,9 @@ class OpenAI(LLM, BaseModel): @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" - openai_api_key = values.get("openai_api_key") - - if openai_api_key is None or openai_api_key == "": - raise ValueError( - "Did not find OpenAI API key, please add an environment variable" - " `OPENAI_API_KEY` which contains it, or pass `openai_api_key`" - " as a named parameter." - ) + openai_api_key = get_from_dict_or_env( + values, "openai_api_key", "OPENAI_API_KEY" + ) try: import openai diff --git a/langchain/model_laboratory.py b/langchain/model_laboratory.py index 0243f70e889..d61265c01df 100644 --- a/langchain/model_laboratory.py +++ b/langchain/model_laboratory.py @@ -1,6 +1,7 @@ """Experiment with different models.""" -from typing import List, Optional +from typing import List, Optional, Sequence +from langchain.chains.base import Chain from langchain.chains.llm import LLMChain from langchain.input import get_color_mapping, print_text from langchain.llms.base import LLM @@ -10,7 +11,41 @@ from langchain.prompts.prompt import Prompt class ModelLaboratory: """Experiment with different models.""" - def __init__(self, llms: List[LLM], prompt: Optional[Prompt] = None): + def __init__(self, chains: Sequence[Chain], names: Optional[List[str]] = None): + """Initialize with chains to experiment with. + + Args: + chains: list of chains to experiment with. + """ + if not isinstance(chains[0], Chain): + raise ValueError( + "ModelLaboratory should now be initialized with Chains. " + "If you want to initialize with LLMs, use the `from_llms` method " + "instead (`ModelLaboratory.from_llms(...)`)" + ) + for chain in chains: + if len(chain.input_keys) != 1: + raise ValueError( + "Currently only support chains with one input variable, " + f"got {chain.input_keys}" + ) + if len(chain.output_keys) != 1: + raise ValueError( + "Currently only support chains with one output variable, " + f"got {chain.output_keys}" + ) + if names is not None: + if len(names) != len(chains): + raise ValueError("Length of chains does not match length of names.") + self.chains = chains + chain_range = [str(i) for i in range(len(self.chains))] + self.chain_colors = get_color_mapping(chain_range) + self.names = names + + @classmethod + def from_llms( + cls, llms: List[LLM], prompt: Optional[Prompt] = None + ) -> "ModelLaboratory": """Initialize with LLMs to experiment with and optional prompt. Args: @@ -18,18 +53,11 @@ class ModelLaboratory: prompt: Optional prompt to use to prompt the LLMs. Defaults to None. If a prompt was provided, it should only have one input variable. """ - self.llms = llms - llm_range = [str(i) for i in range(len(self.llms))] - self.llm_colors = get_color_mapping(llm_range) if prompt is None: - self.prompt = Prompt(input_variables=["_input"], template="{_input}") - else: - if len(prompt.input_variables) != 1: - raise ValueError( - "Currently only support prompts with one input variable, " - f"got {prompt}" - ) - self.prompt = prompt + prompt = Prompt(input_variables=["_input"], template="{_input}") + chains = [LLMChain(llm=llm, prompt=prompt) for llm in llms] + names = [str(llm) for llm in llms] + return cls(chains, names=names) def compare(self, text: str) -> None: """Compare model outputs on an input text. @@ -42,9 +70,11 @@ class ModelLaboratory: text: input text to run all models on. """ print(f"\033[1mInput:\033[0m\n{text}\n") - for i, llm in enumerate(self.llms): - print_text(str(llm), end="\n") - chain = LLMChain(llm=llm, prompt=self.prompt) - llm_inputs = {self.prompt.input_variables[0]: text} - output = chain.predict(**llm_inputs) - print_text(output, color=self.llm_colors[str(i)], end="\n\n") + for i, chain in enumerate(self.chains): + if self.names is not None: + name = self.names[i] + else: + name = str(chain) + print_text(name, end="\n") + output = chain.run(text) + print_text(output, color=self.chain_colors[str(i)], end="\n\n") diff --git a/langchain/prompts/prompt.py b/langchain/prompts/prompt.py index b84b7718149..f27c678f04d 100644 --- a/langchain/prompts/prompt.py +++ b/langchain/prompts/prompt.py @@ -94,8 +94,7 @@ class Prompt(BaseModel, BasePrompt): Returns: The final prompt generated. """ - example_str = example_separator.join(examples) - template = prefix + example_str + suffix + template = example_separator.join([prefix, *examples, suffix]) return cls(input_variables=input_variables, template=template) @classmethod diff --git a/langchain/sql_database.py b/langchain/sql_database.py index 138839bb3dc..a04ab15aafa 100644 --- a/langchain/sql_database.py +++ b/langchain/sql_database.py @@ -1,4 +1,6 @@ """SQLAlchemy wrapper around a database.""" +from typing import Any, Iterable, List, Optional + from sqlalchemy import create_engine, inspect from sqlalchemy.engine import Engine @@ -6,29 +8,57 @@ from sqlalchemy.engine import Engine class SQLDatabase: """SQLAlchemy wrapper around a database.""" - def __init__(self, engine: Engine): + def __init__( + self, + engine: Engine, + ignore_tables: Optional[List[str]] = None, + include_tables: Optional[List[str]] = None, + ): """Create engine from database URI.""" self._engine = engine + if include_tables and ignore_tables: + raise ValueError("Cannot specify both include_tables and ignore_tables") + + self._inspector = inspect(self._engine) + self._all_tables = self._inspector.get_table_names() + self._include_tables = include_tables or [] + if self._include_tables: + missing_tables = set(self._include_tables).difference(self._all_tables) + if missing_tables: + raise ValueError( + f"include_tables {missing_tables} not found in database" + ) + self._ignore_tables = ignore_tables or [] + if self._ignore_tables: + missing_tables = set(self._ignore_tables).difference(self._all_tables) + if missing_tables: + raise ValueError( + f"ignore_tables {missing_tables} not found in database" + ) @classmethod - def from_uri(cls, database_uri: str) -> "SQLDatabase": + def from_uri(cls, database_uri: str, **kwargs: Any) -> "SQLDatabase": """Construct a SQLAlchemy engine from URI.""" - return cls(create_engine(database_uri)) + return cls(create_engine(database_uri), **kwargs) @property def dialect(self) -> str: """Return string representation of dialect to use.""" return self._engine.dialect.name + def _get_table_names(self) -> Iterable[str]: + if self._include_tables: + return self._include_tables + return set(self._all_tables) - set(self._ignore_tables) + @property def table_info(self) -> str: """Information about all tables in the database.""" - template = "The '{table_name}' table has columns: {columns}." + template = "Table '{table_name}' has columns: {columns}." tables = [] - inspector = inspect(self._engine) - for table_name in inspector.get_table_names(): + for table_name in self._get_table_names(): columns = [] - for column in inspector.get_columns(table_name): + for column in self._inspector.get_columns(table_name): columns.append(f"{column['name']} ({str(column['type'])})") column_str = ", ".join(columns) table_str = template.format(table_name=table_name, columns=column_str) diff --git a/langchain/utils.py b/langchain/utils.py new file mode 100644 index 00000000000..8588f4e940e --- /dev/null +++ b/langchain/utils.py @@ -0,0 +1,17 @@ +"""Generic utility functions.""" +import os +from typing import Any, Dict + + +def get_from_dict_or_env(data: Dict[str, Any], key: str, env_key: str) -> str: + """Get a value from a dictionary or an environment variable.""" + if key in data and data[key]: + return data[key] + elif env_key in os.environ and os.environ[env_key]: + return os.environ[env_key] + else: + raise ValueError( + f"Did not find {key}, please add an environment variable" + f" `{env_key}` which contains it, or pass" + f" `{key}` as a named parameter." + ) diff --git a/langchain/vectorstores/elastic_vector_search.py b/langchain/vectorstores/elastic_vector_search.py index d8aed5f67d2..549277b3f94 100644 --- a/langchain/vectorstores/elastic_vector_search.py +++ b/langchain/vectorstores/elastic_vector_search.py @@ -1,10 +1,10 @@ """Wrapper around Elasticsearch vector database.""" -import os import uuid from typing import Any, Callable, Dict, List from langchain.docstore.document import Document from langchain.embeddings.base import Embeddings +from langchain.utils import get_from_dict_or_env from langchain.vectorstores.base import VectorStore @@ -45,10 +45,7 @@ class ElasticVectorSearch(VectorStore): """ def __init__( - self, - elasticsearch_url: str, - index_name: str, - embedding_function: Callable, + self, elasticsearch_url: str, index_name: str, embedding_function: Callable ): """Initialize with necessary components.""" try: @@ -110,16 +107,9 @@ class ElasticVectorSearch(VectorStore): elasticsearch_url="http://localhost:9200" ) """ - elasticsearch_url = kwargs.get("elasticsearch_url") - if not elasticsearch_url: - elasticsearch_url = os.environ.get("ELASTICSEARCH_URL") - - if elasticsearch_url is None or elasticsearch_url == "": - raise ValueError( - "Did not find Elasticsearch URL, please add an environment variable" - " `ELASTICSEARCH_URL` which contains it, or pass" - " `elasticsearch_url` as a named parameter." - ) + elasticsearch_url = get_from_dict_or_env( + kwargs, "elasticsearch_url", "ELASTICSEARCH_URL" + ) try: import elasticsearch from elasticsearch.helpers import bulk diff --git a/tests/integration_tests/llms/test_manifest.py b/tests/integration_tests/llms/test_manifest.py index 3416a332862..eca4a94b0fa 100644 --- a/tests/integration_tests/llms/test_manifest.py +++ b/tests/integration_tests/llms/test_manifest.py @@ -6,9 +6,7 @@ def test_manifest_wrapper() -> None: """Test manifest wrapper.""" from manifest import Manifest - manifest = Manifest( - client_name="openai", - ) + manifest = Manifest(client_name="openai") llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0}) output = llm("The capital of New York is:") assert output == "Albany" diff --git a/tests/unit_tests/test_input.py b/tests/unit_tests/test_input.py index dd17bfc5deb..43dd3b080b8 100644 --- a/tests/unit_tests/test_input.py +++ b/tests/unit_tests/test_input.py @@ -48,7 +48,7 @@ def test_chained_input_verbose() -> None: chained_input.add("baz", color="blue") sys.stdout = old_stdout output = mystdout.getvalue() - assert output == "\x1b[104mbaz\x1b[0m" + assert output == "\x1b[36;1m\x1b[1;3mbaz\x1b[0m" assert chained_input.input == "foobarbaz" @@ -70,5 +70,5 @@ def test_get_color_mapping_excluded_colors() -> None: """Test getting of color mapping with excluded colors.""" items = ["foo", "bar"] output = get_color_mapping(items, excluded_colors=["blue"]) - expected_output = {"foo": "yellow", "bar": "red"} + expected_output = {"foo": "yellow", "bar": "pink"} assert output == expected_output diff --git a/tests/unit_tests/test_prompt.py b/tests/unit_tests/test_prompt.py index 80a7fca4a75..7265cae3470 100644 --- a/tests/unit_tests/test_prompt.py +++ b/tests/unit_tests/test_prompt.py @@ -51,8 +51,8 @@ Question: {question} Answer:""" input_variables = ["question"] example_separator = "\n\n" - prefix = """Test Prompt:\n\n""" - suffix = """\n\nQuestion: {question}\nAnswer:""" + prefix = """Test Prompt:""" + suffix = """Question: {question}\nAnswer:""" examples = [ """Question: who are you?\nAnswer: foo""", """Question: what are you?\nAnswer: bar""", diff --git a/tests/unit_tests/test_sql_database.py b/tests/unit_tests/test_sql_database.py index c18d5deb63a..1a536fe5d1d 100644 --- a/tests/unit_tests/test_sql_database.py +++ b/tests/unit_tests/test_sql_database.py @@ -28,11 +28,11 @@ def test_table_info() -> None: db = SQLDatabase(engine) output = db.table_info expected_output = ( - "The 'company' table has columns: company_id (INTEGER), " - "company_location (VARCHAR).\n" - "The 'user' table has columns: user_id (INTEGER), user_name (VARCHAR(16))." + "Table 'company' has columns: company_id (INTEGER), " + "company_location (VARCHAR).", + "Table 'user' has columns: user_id (INTEGER), user_name (VARCHAR(16)).", ) - assert output == expected_output + assert sorted(output.split("\n")) == sorted(expected_output) def test_sql_database_run() -> None: