From 634358db5e9d0f091c66c82b8ed1379ec6531f88 Mon Sep 17 00:00:00 2001 From: dev2049 <130488702+dev2049@users.noreply.github.com> Date: Fri, 14 Apr 2023 11:09:36 -0700 Subject: [PATCH 01/10] Fix OpenAI LLM docstring (#2910) --- langchain/llms/openai.py | 45 ++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/langchain/llms/openai.py b/langchain/llms/openai.py index ff99ee48bf0..8a54c5d3703 100644 --- a/langchain/llms/openai.py +++ b/langchain/llms/openai.py @@ -114,20 +114,7 @@ async def acompletion_with_retry( class BaseOpenAI(BaseLLM): - """Wrapper around OpenAI large language models. - - To use, you should have the ``openai`` python package installed, and the - environment variable ``OPENAI_API_KEY`` set with your API key. - - Any parameters that are valid to be passed to the openai.create call can be passed - in, even if not explicitly saved on this class. - - Example: - .. code-block:: python - - from langchain.llms import OpenAI - openai = OpenAI(model_name="text-davinci-003") - """ + """Wrapper around OpenAI large language models.""" client: Any #: :meta private: model_name: str = "text-davinci-003" @@ -541,7 +528,20 @@ class BaseOpenAI(BaseLLM): class OpenAI(BaseOpenAI): - """Generic OpenAI class that uses model name.""" + """Wrapper around OpenAI large language models. + + To use, you should have the ``openai`` python package installed, and the + environment variable ``OPENAI_API_KEY`` set with your API key. + + Any parameters that are valid to be passed to the openai.create call can be passed + in, even if not explicitly saved on this class. + + Example: + .. code-block:: python + + from langchain.llms import OpenAI + openai = OpenAI(model_name="text-davinci-003") + """ @property def _invocation_params(self) -> Dict[str, Any]: @@ -549,7 +549,20 @@ class OpenAI(BaseOpenAI): class AzureOpenAI(BaseOpenAI): - """Azure specific OpenAI class that uses deployment name.""" + """Wrapper around Azure-specific OpenAI large language models. + + To use, you should have the ``openai`` python package installed, and the + environment variable ``OPENAI_API_KEY`` set with your API key. + + Any parameters that are valid to be passed to the openai.create call can be passed + in, even if not explicitly saved on this class. + + Example: + .. code-block:: python + + from langchain.llms import AzureOpenAI + openai = AzureOpenAI(model_name="text-davinci-003") + """ deployment_name: str = "" """Deployment name to use.""" From 7ee87eb0c8df10315b45ebbddcad36a72b7fe7b9 Mon Sep 17 00:00:00 2001 From: Boris Feld Date: Fri, 14 Apr 2023 22:19:58 +0200 Subject: [PATCH 02/10] Comet callback updates (#2889) I'm working with @DN6 and I made some small fixes and improvements after playing with the integration. --- docs/ecosystem/comet_tracking.ipynb | 52 ++++++++++++------------ langchain/callbacks/comet_ml_callback.py | 12 +++--- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/docs/ecosystem/comet_tracking.ipynb b/docs/ecosystem/comet_tracking.ipynb index fa8b1217dbd..4d33bd00ab5 100644 --- a/docs/ecosystem/comet_tracking.ipynb +++ b/docs/ecosystem/comet_tracking.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -9,7 +8,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -17,7 +15,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -31,7 +28,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -39,7 +35,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -52,14 +47,13 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install comet_ml\n", - "!pip install langchain\n", - "!pip install openai\n", - "!pip install google-search-results" + "%pip install comet_ml langchain openai google-search-results spacy textstat pandas\n", + "\n", + "import sys\n", + "!{sys.executable} -m spacy download en_core_web_sm" ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -67,7 +61,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -86,7 +79,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -94,7 +86,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -109,12 +100,12 @@ "source": [ "import os\n", "\n", - "%env OPENAI_API_KEY=\"...\"\n", - "%env SERPAPI_API_KEY=\"...\"" + "os.environ[\"OPENAI_API_KEY\"] = \"...\"\n", + "#os.environ[\"OPENAI_ORGANIZATION\"] = \"...\"\n", + "os.environ[\"SERPAPI_API_KEY\"] = \"...\"" ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -149,7 +140,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -185,12 +175,11 @@ "synopsis_chain = LLMChain(llm=llm, prompt=prompt_template, callback_manager=manager)\n", "\n", "test_prompts = [{\"title\": \"Documentary about Bigfoot in Paris\"}]\n", - "synopsis_chain.apply(test_prompts)\n", + "print(synopsis_chain.apply(test_prompts))\n", "comet_callback.flush_tracker(synopsis_chain, finish=True)" ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -232,7 +221,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -240,7 +228,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -256,7 +243,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install rouge-score" + "%pip install rouge-score" ] }, { @@ -336,16 +323,29 @@ " \"\"\"\n", " }\n", "]\n", - "synopsis_chain.apply(test_prompts)\n", + "print(synopsis_chain.apply(test_prompts))\n", "comet_callback.flush_tracker(synopsis_chain, finish=True)" ] } ], "metadata": { - "language_info": { - "name": "python" + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" }, - "orig_nbformat": 4 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + } }, "nbformat": 4, "nbformat_minor": 2 diff --git a/langchain/callbacks/comet_ml_callback.py b/langchain/callbacks/comet_ml_callback.py index c716d43d3f4..6f061f14aa4 100644 --- a/langchain/callbacks/comet_ml_callback.py +++ b/langchain/callbacks/comet_ml_callback.py @@ -34,12 +34,10 @@ def _get_experiment( ) -> Any: comet_ml = import_comet_ml() - experiment = comet_ml.config.get_global_experiment() - if experiment is None: - experiment = comet_ml.Experiment( # type: ignore - workspace=workspace, - project_name=project_name, - ) + experiment = comet_ml.Experiment( # type: ignore + workspace=workspace, + project_name=project_name, + ) return experiment @@ -132,7 +130,7 @@ class CometCallbackHandler(BaseMetadataCallbackHandler, BaseCallbackHandler): warning = ( "The comet_ml callback is currently in beta and is subject to change " "based on updates to `langchain`. Please report any issues to " - "https://github.com/comet_ml/issue_tracking/issues with the tag " + "https://github.com/comet-ml/issue_tracking/issues with the tag " "`langchain`." ) comet_ml.LOGGER.warning(warning) From 66bef1d7ed17f00e7b554ca5413e336970489253 Mon Sep 17 00:00:00 2001 From: Kwuang Tang <10319942+cktang88@users.noreply.github.com> Date: Fri, 14 Apr 2023 18:02:21 -0400 Subject: [PATCH 03/10] Ignore files from .gitignore in Git loader (#2909) fixes #2905 extends #2851 --- langchain/document_loaders/git.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/langchain/document_loaders/git.py b/langchain/document_loaders/git.py index eb10dcde8fb..a862e8f0312 100644 --- a/langchain/document_loaders/git.py +++ b/langchain/document_loaders/git.py @@ -54,6 +54,10 @@ class GitLoader(BaseLoader): file_path = os.path.join(self.repo_path, item.path) + ignored_files = repo.ignored([file_path]) + if len(ignored_files): + continue + # uses filter to skip files if self.file_filter and not self.file_filter(file_path): continue From 392f1b32188d40e45adabb85a1641780eedd006b Mon Sep 17 00:00:00 2001 From: Mike Lambert Date: Fri, 14 Apr 2023 18:09:07 -0400 Subject: [PATCH 04/10] Add Anthropic ChatModel to langchain (#2293) * Adds an Anthropic ChatModel * Factors out common code in our LLMModel and ChatModel * Supports streaming llm-tokens to the callbacks on a delta basis (until a future V2 API does that for us) * Some fixes --- langchain/chat_models/anthropic.py | 145 +++++++++++++++ langchain/llms/anthropic.py | 172 +++++++++--------- .../chat_models/test_anthropic.py | 81 +++++++++ .../integration_tests/llms/test_anthropic.py | 2 - 4 files changed, 316 insertions(+), 84 deletions(-) create mode 100644 langchain/chat_models/anthropic.py create mode 100644 tests/integration_tests/chat_models/test_anthropic.py diff --git a/langchain/chat_models/anthropic.py b/langchain/chat_models/anthropic.py new file mode 100644 index 00000000000..b63fbf052d9 --- /dev/null +++ b/langchain/chat_models/anthropic.py @@ -0,0 +1,145 @@ +from typing import List, Optional + +from pydantic import Extra + +from langchain.chat_models.base import BaseChatModel +from langchain.llms.anthropic import _AnthropicCommon +from langchain.schema import ( + AIMessage, + BaseMessage, + ChatGeneration, + ChatMessage, + ChatResult, + HumanMessage, + SystemMessage, +) + + +class ChatAnthropic(BaseChatModel, _AnthropicCommon): + r"""Wrapper around Anthropic's large language model. + + To use, you should have the ``anthropic`` python package installed, and the + environment variable ``ANTHROPIC_API_KEY`` set with your API key, or pass + it as a named parameter to the constructor. + + Example: + .. code-block:: python + import anthropic + from langchain.llms import Anthropic + model = Anthropic(model="", anthropic_api_key="my-api-key") + + # Simplest invocation, automatically wrapped with HUMAN_PROMPT + # and AI_PROMPT. + response = model("What are the biggest risks facing humanity?") + + # Or if you want to use the chat mode, build a few-shot-prompt, or + # put words in the Assistant's mouth, use HUMAN_PROMPT and AI_PROMPT: + raw_prompt = "What are the biggest risks facing humanity?" + prompt = f"{anthropic.HUMAN_PROMPT} {prompt}{anthropic.AI_PROMPT}" + response = model(prompt) + """ + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @property + def _llm_type(self) -> str: + """Return type of chat model.""" + return "anthropic-chat" + + def _convert_one_message_to_text(self, message: BaseMessage) -> str: + if isinstance(message, ChatMessage): + message_text = f"\n\n{message.role.capitalize()}: {message.content}" + elif isinstance(message, HumanMessage): + message_text = f"{self.HUMAN_PROMPT} {message.content}" + elif isinstance(message, AIMessage): + message_text = f"{self.AI_PROMPT} {message.content}" + elif isinstance(message, SystemMessage): + message_text = f"{self.HUMAN_PROMPT} {message.content}" + else: + raise ValueError(f"Got unknown type {message}") + return message_text + + def _convert_messages_to_text(self, messages: List[BaseMessage]) -> str: + """Format a list of strings into a single string with necessary newlines. + + Args: + messages (List[BaseMessage]): List of BaseMessage to combine. + + Returns: + str: Combined string with necessary newlines. + """ + return "".join( + self._convert_one_message_to_text(message) for message in messages + ) + + def _convert_messages_to_prompt(self, messages: List[BaseMessage]) -> str: + """Format a list of messages into a full prompt for the Anthropic model + + Args: + messages (List[BaseMessage]): List of BaseMessage to combine. + + Returns: + str: Combined string with necessary HUMAN_PROMPT and AI_PROMPT tags. + """ + if not self.AI_PROMPT: + raise NameError("Please ensure the anthropic package is loaded") + + if not isinstance(messages[-1], AIMessage): + messages.append(AIMessage(content="")) + text = self._convert_messages_to_text(messages) + return ( + text.rstrip() + ) # trim off the trailing ' ' that might come from the "Assistant: " + + def _generate( + self, messages: List[BaseMessage], stop: Optional[List[str]] = None + ) -> ChatResult: + prompt = self._convert_messages_to_prompt(messages) + params = {"prompt": prompt, "stop_sequences": stop, **self._default_params} + + if self.streaming: + completion = "" + stream_resp = self.client.completion_stream(**params) + for data in stream_resp: + delta = data["completion"][len(completion) :] + completion = data["completion"] + self.callback_manager.on_llm_new_token( + delta, + verbose=self.verbose, + ) + else: + response = self.client.completion(**params) + completion = response["completion"] + message = AIMessage(content=completion) + return ChatResult(generations=[ChatGeneration(message=message)]) + + async def _agenerate( + self, messages: List[BaseMessage], stop: Optional[List[str]] = None + ) -> ChatResult: + prompt = self._convert_messages_to_prompt(messages) + params = {"prompt": prompt, "stop_sequences": stop, **self._default_params} + + if self.streaming: + completion = "" + stream_resp = await self.client.acompletion_stream(**params) + async for data in stream_resp: + delta = data["completion"][len(completion) :] + completion = data["completion"] + if self.callback_manager.is_async: + await self.callback_manager.on_llm_new_token( + delta, + verbose=self.verbose, + ) + else: + self.callback_manager.on_llm_new_token( + delta, + verbose=self.verbose, + ) + else: + response = await self.client.acompletion(**params) + completion = response["completion"] + message = AIMessage(content=completion) + return ChatResult(generations=[ChatGeneration(message=message)]) diff --git a/langchain/llms/anthropic.py b/langchain/llms/anthropic.py index bc4cfd42032..24d9def1eb7 100644 --- a/langchain/llms/anthropic.py +++ b/langchain/llms/anthropic.py @@ -1,15 +1,100 @@ """Wrapper around Anthropic APIs.""" import re -from typing import Any, Dict, Generator, List, Mapping, Optional +from typing import Any, Callable, Dict, Generator, List, Mapping, Optional -from pydantic import Extra, root_validator +from pydantic import BaseModel, Extra, root_validator from langchain.llms.base import LLM from langchain.utils import get_from_dict_or_env -class Anthropic(LLM): - r"""Wrapper around Anthropic large language models. +class _AnthropicCommon(BaseModel): + client: Any = None #: :meta private: + model: str = "claude-latest" + """Model name to use.""" + + max_tokens_to_sample: int = 256 + """Denotes the number of tokens to predict per generation.""" + + temperature: Optional[float] = None + """A non-negative float that tunes the degree of randomness in generation.""" + + top_k: Optional[int] = None + """Number of most likely tokens to consider at each step.""" + + top_p: Optional[float] = None + """Total probability mass of tokens to consider at each step.""" + + streaming: bool = False + """Whether to stream the results.""" + + anthropic_api_key: Optional[str] = None + + HUMAN_PROMPT: Optional[str] = None + AI_PROMPT: Optional[str] = None + count_tokens: Optional[Callable[[str], int]] = None + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + anthropic_api_key = get_from_dict_or_env( + values, "anthropic_api_key", "ANTHROPIC_API_KEY" + ) + try: + import anthropic + + values["client"] = anthropic.Client(anthropic_api_key) + values["HUMAN_PROMPT"] = anthropic.HUMAN_PROMPT + values["AI_PROMPT"] = anthropic.AI_PROMPT + values["count_tokens"] = anthropic.count_tokens + except ImportError: + raise ValueError( + "Could not import anthropic python package. " + "Please it install it with `pip install anthropic`." + ) + return values + + @property + def _default_params(self) -> Mapping[str, Any]: + """Get the default parameters for calling Anthropic API.""" + d = { + "max_tokens_to_sample": self.max_tokens_to_sample, + "model": self.model, + } + if self.temperature is not None: + d["temperature"] = self.temperature + if self.top_k is not None: + d["top_k"] = self.top_k + if self.top_p is not None: + d["top_p"] = self.top_p + return d + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {**{}, **self._default_params} + + def _get_anthropic_stop(self, stop: Optional[List[str]] = None) -> List[str]: + if not self.HUMAN_PROMPT or not self.AI_PROMPT: + raise NameError("Please ensure the anthropic package is loaded") + + if stop is None: + stop = [] + + # Never want model to invent new turns of Human / Assistant dialog. + stop.extend([self.HUMAN_PROMPT]) + + return stop + + def get_num_tokens(self, text: str) -> int: + """Calculate number of tokens.""" + if not self.count_tokens: + raise NameError("Please ensure the anthropic package is loaded") + return self.count_tokens(text) + + +class Anthropic(LLM, _AnthropicCommon): + r"""Wrapper around Anthropic's large language models. To use, you should have the ``anthropic`` python package installed, and the environment variable ``ANTHROPIC_API_KEY`` set with your API key, or pass @@ -32,73 +117,15 @@ class Anthropic(LLM): response = model(prompt) """ - client: Any #: :meta private: - model: str = "claude-v1" - """Model name to use.""" - - max_tokens_to_sample: int = 256 - """Denotes the number of tokens to predict per generation.""" - - temperature: float = 1.0 - """A non-negative float that tunes the degree of randomness in generation.""" - - top_k: int = 0 - """Number of most likely tokens to consider at each step.""" - - top_p: float = 1 - """Total probability mass of tokens to consider at each step.""" - - streaming: bool = False - """Whether to stream the results.""" - - anthropic_api_key: Optional[str] = None - - HUMAN_PROMPT: Optional[str] = None - AI_PROMPT: Optional[str] = None - class Config: """Configuration for this pydantic object.""" extra = Extra.forbid - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and python package exists in environment.""" - anthropic_api_key = get_from_dict_or_env( - values, "anthropic_api_key", "ANTHROPIC_API_KEY" - ) - try: - import anthropic - - values["client"] = anthropic.Client(anthropic_api_key) - values["HUMAN_PROMPT"] = anthropic.HUMAN_PROMPT - values["AI_PROMPT"] = anthropic.AI_PROMPT - except ImportError: - raise ValueError( - "Could not import anthropic python package. " - "Please install it with `pip install anthropic`." - ) - return values - - @property - def _default_params(self) -> Mapping[str, Any]: - """Get the default parameters for calling Anthropic API.""" - return { - "max_tokens_to_sample": self.max_tokens_to_sample, - "temperature": self.temperature, - "top_k": self.top_k, - "top_p": self.top_p, - } - - @property - def _identifying_params(self) -> Mapping[str, Any]: - """Get the identifying parameters.""" - return {**{"model": self.model}, **self._default_params} - @property def _llm_type(self) -> str: """Return type of llm.""" - return "anthropic" + return "anthropic-llm" def _wrap_prompt(self, prompt: str) -> str: if not self.HUMAN_PROMPT or not self.AI_PROMPT: @@ -115,18 +142,6 @@ class Anthropic(LLM): # As a last resort, wrap the prompt ourselves to emulate instruct-style. return f"{self.HUMAN_PROMPT} {prompt}{self.AI_PROMPT} Sure, here you go:\n" - def _get_anthropic_stop(self, stop: Optional[List[str]] = None) -> List[str]: - if not self.HUMAN_PROMPT or not self.AI_PROMPT: - raise NameError("Please ensure the anthropic package is loaded") - - if stop is None: - stop = [] - - # Never want model to invent new turns of Human / Assistant dialog. - stop.extend([self.HUMAN_PROMPT, self.AI_PROMPT]) - - return stop - def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: r"""Call out to Anthropic's completion endpoint. @@ -148,10 +163,8 @@ class Anthropic(LLM): stop = self._get_anthropic_stop(stop) if self.streaming: stream_resp = self.client.completion_stream( - model=self.model, prompt=self._wrap_prompt(prompt), stop_sequences=stop, - stream=True, **self._default_params, ) current_completion = "" @@ -163,7 +176,6 @@ class Anthropic(LLM): ) return current_completion response = self.client.completion( - model=self.model, prompt=self._wrap_prompt(prompt), stop_sequences=stop, **self._default_params, @@ -175,10 +187,8 @@ class Anthropic(LLM): stop = self._get_anthropic_stop(stop) if self.streaming: stream_resp = await self.client.acompletion_stream( - model=self.model, prompt=self._wrap_prompt(prompt), stop_sequences=stop, - stream=True, **self._default_params, ) current_completion = "" @@ -195,7 +205,6 @@ class Anthropic(LLM): ) return current_completion response = await self.client.acompletion( - model=self.model, prompt=self._wrap_prompt(prompt), stop_sequences=stop, **self._default_params, @@ -227,7 +236,6 @@ class Anthropic(LLM): """ stop = self._get_anthropic_stop(stop) return self.client.completion_stream( - model=self.model, prompt=self._wrap_prompt(prompt), stop_sequences=stop, **self._default_params, diff --git a/tests/integration_tests/chat_models/test_anthropic.py b/tests/integration_tests/chat_models/test_anthropic.py new file mode 100644 index 00000000000..f04b30e2514 --- /dev/null +++ b/tests/integration_tests/chat_models/test_anthropic.py @@ -0,0 +1,81 @@ +"""Test Anthropic API wrapper.""" +from typing import List + +import pytest + +from langchain.callbacks.base import CallbackManager +from langchain.chat_models.anthropic import ChatAnthropic +from langchain.schema import ( + AIMessage, + BaseMessage, + ChatGeneration, + HumanMessage, + LLMResult, +) +from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler + + +def test_anthropic_call() -> None: + """Test valid call to anthropic.""" + chat = ChatAnthropic(model="bare-nano-0") + message = HumanMessage(content="Hello") + response = chat([message]) + assert isinstance(response, AIMessage) + assert isinstance(response.content, str) + + +def test_anthropic_streaming() -> None: + """Test streaming tokens from anthropic.""" + chat = ChatAnthropic(model="bare-nano-0", streaming=True) + message = HumanMessage(content="Hello") + response = chat([message]) + assert isinstance(response, AIMessage) + assert isinstance(response.content, str) + + +def test_anthropic_streaming_callback() -> None: + """Test that streaming correctly invokes on_llm_new_token callback.""" + callback_handler = FakeCallbackHandler() + callback_manager = CallbackManager([callback_handler]) + chat = ChatAnthropic( + streaming=True, + callback_manager=callback_manager, + verbose=True, + ) + message = HumanMessage(content="Write me a sentence with 100 words.") + chat([message]) + assert callback_handler.llm_streams > 1 + + +@pytest.mark.asyncio +async def test_anthropic_async_streaming_callback() -> None: + """Test that streaming correctly invokes on_llm_new_token callback.""" + callback_handler = FakeCallbackHandler() + callback_manager = CallbackManager([callback_handler]) + chat = ChatAnthropic( + streaming=True, + callback_manager=callback_manager, + verbose=True, + ) + chat_messages: List[BaseMessage] = [ + HumanMessage(content="How many toes do dogs have?") + ] + result: LLMResult = await chat.agenerate([chat_messages]) + assert callback_handler.llm_streams > 1 + assert isinstance(result, LLMResult) + for response in result.generations[0]: + assert isinstance(response, ChatGeneration) + assert isinstance(response.text, str) + assert response.text == response.message.content + + +def test_formatting() -> None: + chat = ChatAnthropic() + + chat_messages: List[BaseMessage] = [HumanMessage(content="Hello")] + result = chat._convert_messages_to_prompt(chat_messages) + assert result == "\n\nHuman: Hello\n\nAssistant:" + + chat_messages = [HumanMessage(content="Hello"), AIMessage(content="Answer:")] + result = chat._convert_messages_to_prompt(chat_messages) + assert result == "\n\nHuman: Hello\n\nAssistant: Answer:" diff --git a/tests/integration_tests/llms/test_anthropic.py b/tests/integration_tests/llms/test_anthropic.py index eaa509bf644..8c7717cfc7d 100644 --- a/tests/integration_tests/llms/test_anthropic.py +++ b/tests/integration_tests/llms/test_anthropic.py @@ -32,7 +32,6 @@ def test_anthropic_streaming_callback() -> None: callback_handler = FakeCallbackHandler() callback_manager = CallbackManager([callback_handler]) llm = Anthropic( - model="claude-v1", streaming=True, callback_manager=callback_manager, verbose=True, @@ -55,7 +54,6 @@ async def test_anthropic_async_streaming_callback() -> None: callback_handler = FakeCallbackHandler() callback_manager = CallbackManager([callback_handler]) llm = Anthropic( - model="claude-v1", streaming=True, callback_manager=callback_manager, verbose=True, From 13a0ed064b4c4c4b5978d61cc6ec0950dcdcf870 Mon Sep 17 00:00:00 2001 From: Akash NP <91617769+9akashnp8@users.noreply.github.com> Date: Sat, 15 Apr 2023 05:06:03 +0530 Subject: [PATCH 05/10] add encoding to avoid UnicodeDecodeError (#2908) **About** Specify encoding to avoid UnicodeDecodeError when reading .txt for users who are following the tutorial. **Reference** ``` return codecs.charmap_decode(input,self.errors,decoding_table)[0] UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 1205: character maps to ``` **Environment** OS: Win 11 Python: 3.8 --- docs/modules/indexes/getting_started.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/indexes/getting_started.ipynb b/docs/modules/indexes/getting_started.ipynb index 58a133ada0f..0c6a9d593aa 100644 --- a/docs/modules/indexes/getting_started.ipynb +++ b/docs/modules/indexes/getting_started.ipynb @@ -99,7 +99,7 @@ "outputs": [], "source": [ "from langchain.document_loaders import TextLoader\n", - "loader = TextLoader('../state_of_the_union.txt')" + "loader = TextLoader('../state_of_the_union.txt', encoding='utf8')" ] }, { From ec59e9d886904e27e6436a50a2cb755a621382d6 Mon Sep 17 00:00:00 2001 From: Ankush Gola <9536492+agola11@users.noreply.github.com> Date: Fri, 14 Apr 2023 17:22:01 -0700 Subject: [PATCH 06/10] Fix ChatAnthropic stop_sequences error (#2919) (#2920) Note to self: Always run integration tests, even on "that last minute change you thought would be safe" :) --------- Co-authored-by: Mike Lambert --- .../models/chat/integrations/anthropic.ipynb | 171 ++++++++++++++++++ langchain/chat_models/__init__.py | 3 +- langchain/chat_models/anthropic.py | 22 +-- langchain/llms/anthropic.py | 2 +- poetry.lock | 2 +- pyproject.toml | 2 +- .../chat_models/test_anthropic.py | 8 +- 7 files changed, 189 insertions(+), 21 deletions(-) create mode 100644 docs/modules/models/chat/integrations/anthropic.ipynb diff --git a/docs/modules/models/chat/integrations/anthropic.ipynb b/docs/modules/models/chat/integrations/anthropic.ipynb new file mode 100644 index 00000000000..57cbf48821d --- /dev/null +++ b/docs/modules/models/chat/integrations/anthropic.ipynb @@ -0,0 +1,171 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "bf733a38-db84-4363-89e2-de6735c37230", + "metadata": {}, + "source": [ + "# Anthropic\n", + "\n", + "This notebook covers how to get started with Anthropic chat models." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d4a7c55d-b235-4ca4-a579-c90cc9570da9", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.chat_models import ChatAnthropic\n", + "from langchain.prompts.chat import (\n", + " ChatPromptTemplate,\n", + " SystemMessagePromptTemplate,\n", + " AIMessagePromptTemplate,\n", + " HumanMessagePromptTemplate,\n", + ")\n", + "from langchain.schema import (\n", + " AIMessage,\n", + " HumanMessage,\n", + " SystemMessage\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "70cf04e8-423a-4ff6-8b09-f11fb711c817", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "chat = ChatAnthropic()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8199ef8f-eb8b-4253-9ea0-6c24a013ca4c", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content=\" J'adore programmer.\", additional_kwargs={})" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "messages = [\n", + " HumanMessage(content=\"Translate this sentence from English to French. I love programming.\")\n", + "]\n", + "chat(messages)" + ] + }, + { + "cell_type": "markdown", + "id": "c361ab1e-8c0c-4206-9e3c-9d1424a12b9c", + "metadata": {}, + "source": [ + "## `ChatAnthropic` also supports async and streaming functionality:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "93a21c5c-6ef9-4688-be60-b2e1f94842fb", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.callbacks.base import CallbackManager\n", + "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "c5fac0e9-05a4-4fc1-a3b3-e5bbb24b971b", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "LLMResult(generations=[[ChatGeneration(text=\" J'aime programmer.\", generation_info=None, message=AIMessage(content=\" J'aime programmer.\", additional_kwargs={}))]], llm_output={})" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "await chat.agenerate([messages])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "025be980-e50d-4a68-93dc-c9c7b500ce34", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " J'aime la programmation." + ] + }, + { + "data": { + "text/plain": [ + "AIMessage(content=\" J'aime la programmation.\", additional_kwargs={})" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chat = ChatAnthropic(streaming=True, verbose=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))\n", + "chat(messages)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/langchain/chat_models/__init__.py b/langchain/chat_models/__init__.py index 88bafc7e7a2..fdfe5e7d355 100644 --- a/langchain/chat_models/__init__.py +++ b/langchain/chat_models/__init__.py @@ -1,5 +1,6 @@ +from langchain.chat_models.anthropic import ChatAnthropic from langchain.chat_models.azure_openai import AzureChatOpenAI from langchain.chat_models.openai import ChatOpenAI from langchain.chat_models.promptlayer_openai import PromptLayerChatOpenAI -__all__ = ["ChatOpenAI", "AzureChatOpenAI", "PromptLayerChatOpenAI"] +__all__ = ["ChatOpenAI", "AzureChatOpenAI", "PromptLayerChatOpenAI", "ChatAnthropic"] diff --git a/langchain/chat_models/anthropic.py b/langchain/chat_models/anthropic.py index b63fbf052d9..f56c606361e 100644 --- a/langchain/chat_models/anthropic.py +++ b/langchain/chat_models/anthropic.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Any, Dict, List, Optional from pydantic import Extra @@ -26,17 +26,7 @@ class ChatAnthropic(BaseChatModel, _AnthropicCommon): .. code-block:: python import anthropic from langchain.llms import Anthropic - model = Anthropic(model="", anthropic_api_key="my-api-key") - - # Simplest invocation, automatically wrapped with HUMAN_PROMPT - # and AI_PROMPT. - response = model("What are the biggest risks facing humanity?") - - # Or if you want to use the chat mode, build a few-shot-prompt, or - # put words in the Assistant's mouth, use HUMAN_PROMPT and AI_PROMPT: - raw_prompt = "What are the biggest risks facing humanity?" - prompt = f"{anthropic.HUMAN_PROMPT} {prompt}{anthropic.AI_PROMPT}" - response = model(prompt) + model = ChatAnthropic(model="", anthropic_api_key="my-api-key") """ class Config: @@ -98,7 +88,9 @@ class ChatAnthropic(BaseChatModel, _AnthropicCommon): self, messages: List[BaseMessage], stop: Optional[List[str]] = None ) -> ChatResult: prompt = self._convert_messages_to_prompt(messages) - params = {"prompt": prompt, "stop_sequences": stop, **self._default_params} + params: Dict[str, Any] = {"prompt": prompt, **self._default_params} + if stop: + params["stop_sequences"] = stop if self.streaming: completion = "" @@ -120,7 +112,9 @@ class ChatAnthropic(BaseChatModel, _AnthropicCommon): self, messages: List[BaseMessage], stop: Optional[List[str]] = None ) -> ChatResult: prompt = self._convert_messages_to_prompt(messages) - params = {"prompt": prompt, "stop_sequences": stop, **self._default_params} + params: Dict[str, Any] = {"prompt": prompt, **self._default_params} + if stop: + params["stop_sequences"] = stop if self.streaming: completion = "" diff --git a/langchain/llms/anthropic.py b/langchain/llms/anthropic.py index 24d9def1eb7..e609627967e 100644 --- a/langchain/llms/anthropic.py +++ b/langchain/llms/anthropic.py @@ -10,7 +10,7 @@ from langchain.utils import get_from_dict_or_env class _AnthropicCommon(BaseModel): client: Any = None #: :meta private: - model: str = "claude-latest" + model: str = "claude-v1" """Model name to use.""" max_tokens_to_sample: int = 256 diff --git a/poetry.lock b/poetry.lock index 7a88ee2a9f7..6b0e37aa1a5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -9035,4 +9035,4 @@ qdrant = ["qdrant-client"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "373f68ef16e7f3d5d9cde8b81c5f261096cc537ddca4f6a36711d7215b63f226" +content-hash = "7e343fa8e31d8fcf1023cbda592f64c05e80015c4e0e23c1d387d2e9671ce995" diff --git a/pyproject.toml b/pyproject.toml index 3cc2d497615..351d6e43b77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ pinecone-text = {version = "^0.4.2", optional = true} weaviate-client = {version = "^3", optional = true} google-api-python-client = {version = "2.70.0", optional = true} wolframalpha = {version = "5.0.0", optional = true} -anthropic = {version = "^0.2.4", optional = true} +anthropic = {version = "^0.2.6", optional = true} qdrant-client = {version = "^1.1.2", optional = true, python = ">=3.8.1,<3.12"} dataclasses-json = "^0.5.7" tensorflow-text = {version = "^2.11.0", optional = true, python = "^3.10, <3.12"} diff --git a/tests/integration_tests/chat_models/test_anthropic.py b/tests/integration_tests/chat_models/test_anthropic.py index f04b30e2514..60fe58f319f 100644 --- a/tests/integration_tests/chat_models/test_anthropic.py +++ b/tests/integration_tests/chat_models/test_anthropic.py @@ -17,7 +17,7 @@ from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler def test_anthropic_call() -> None: """Test valid call to anthropic.""" - chat = ChatAnthropic(model="bare-nano-0") + chat = ChatAnthropic(model="test") message = HumanMessage(content="Hello") response = chat([message]) assert isinstance(response, AIMessage) @@ -26,7 +26,7 @@ def test_anthropic_call() -> None: def test_anthropic_streaming() -> None: """Test streaming tokens from anthropic.""" - chat = ChatAnthropic(model="bare-nano-0", streaming=True) + chat = ChatAnthropic(model="test", streaming=True) message = HumanMessage(content="Hello") response = chat([message]) assert isinstance(response, AIMessage) @@ -38,11 +38,12 @@ def test_anthropic_streaming_callback() -> None: callback_handler = FakeCallbackHandler() callback_manager = CallbackManager([callback_handler]) chat = ChatAnthropic( + model="test", streaming=True, callback_manager=callback_manager, verbose=True, ) - message = HumanMessage(content="Write me a sentence with 100 words.") + message = HumanMessage(content="Write me a sentence with 10 words.") chat([message]) assert callback_handler.llm_streams > 1 @@ -53,6 +54,7 @@ async def test_anthropic_async_streaming_callback() -> None: callback_handler = FakeCallbackHandler() callback_manager = CallbackManager([callback_handler]) chat = ChatAnthropic( + model="test", streaming=True, callback_manager=callback_manager, verbose=True, From 0aa828b1dc72b8e577500d3f1c9aaef28dee604f Mon Sep 17 00:00:00 2001 From: Hai Nguyen Mau Date: Sat, 15 Apr 2023 17:31:43 +0200 Subject: [PATCH 07/10] typo fix (#2937) missing w in link --- .../models/chat/integrations/promptlayer_chatopenai.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/models/chat/integrations/promptlayer_chatopenai.ipynb b/docs/modules/models/chat/integrations/promptlayer_chatopenai.ipynb index fbd23379c02..d75c3a0a3e4 100644 --- a/docs/modules/models/chat/integrations/promptlayer_chatopenai.ipynb +++ b/docs/modules/models/chat/integrations/promptlayer_chatopenai.ipynb @@ -115,7 +115,7 @@ "id": "a2d76826", "metadata": {}, "source": [ - "**The above request should now appear on your [PromptLayer dashboard](https://ww.promptlayer.com).**" + "**The above request should now appear on your [PromptLayer dashboard](https://www.promptlayer.com).**" ] }, { From cf2789d86d16cf33ff0cef03de40cdfdf6ec83af Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Sat, 15 Apr 2023 08:48:51 -0700 Subject: [PATCH 08/10] delete antropic chat notebook (#2945) --- .../models/chat/integrations/anthropic.ipynb | 171 ------------------ 1 file changed, 171 deletions(-) delete mode 100644 docs/modules/models/chat/integrations/anthropic.ipynb diff --git a/docs/modules/models/chat/integrations/anthropic.ipynb b/docs/modules/models/chat/integrations/anthropic.ipynb deleted file mode 100644 index 57cbf48821d..00000000000 --- a/docs/modules/models/chat/integrations/anthropic.ipynb +++ /dev/null @@ -1,171 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "bf733a38-db84-4363-89e2-de6735c37230", - "metadata": {}, - "source": [ - "# Anthropic\n", - "\n", - "This notebook covers how to get started with Anthropic chat models." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "d4a7c55d-b235-4ca4-a579-c90cc9570da9", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from langchain.chat_models import ChatAnthropic\n", - "from langchain.prompts.chat import (\n", - " ChatPromptTemplate,\n", - " SystemMessagePromptTemplate,\n", - " AIMessagePromptTemplate,\n", - " HumanMessagePromptTemplate,\n", - ")\n", - "from langchain.schema import (\n", - " AIMessage,\n", - " HumanMessage,\n", - " SystemMessage\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "70cf04e8-423a-4ff6-8b09-f11fb711c817", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "chat = ChatAnthropic()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "8199ef8f-eb8b-4253-9ea0-6c24a013ca4c", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "AIMessage(content=\" J'adore programmer.\", additional_kwargs={})" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "messages = [\n", - " HumanMessage(content=\"Translate this sentence from English to French. I love programming.\")\n", - "]\n", - "chat(messages)" - ] - }, - { - "cell_type": "markdown", - "id": "c361ab1e-8c0c-4206-9e3c-9d1424a12b9c", - "metadata": {}, - "source": [ - "## `ChatAnthropic` also supports async and streaming functionality:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "93a21c5c-6ef9-4688-be60-b2e1f94842fb", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from langchain.callbacks.base import CallbackManager\n", - "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "c5fac0e9-05a4-4fc1-a3b3-e5bbb24b971b", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "LLMResult(generations=[[ChatGeneration(text=\" J'aime programmer.\", generation_info=None, message=AIMessage(content=\" J'aime programmer.\", additional_kwargs={}))]], llm_output={})" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "await chat.agenerate([messages])" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "025be980-e50d-4a68-93dc-c9c7b500ce34", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " J'aime la programmation." - ] - }, - { - "data": { - "text/plain": [ - "AIMessage(content=\" J'aime la programmation.\", additional_kwargs={})" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "chat = ChatAnthropic(streaming=True, verbose=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))\n", - "chat(messages)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From ad3973a3b8369315a5e780b57594cebe2b64aed5 Mon Sep 17 00:00:00 2001 From: Nahin Khan Date: Sat, 15 Apr 2023 18:53:25 +0300 Subject: [PATCH 09/10] Fix typo (#2942) --- docs/modules/memory/examples/agent_with_memory_in_db.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/memory/examples/agent_with_memory_in_db.ipynb b/docs/modules/memory/examples/agent_with_memory_in_db.ipynb index 201a6533d84..b96760bbbe2 100644 --- a/docs/modules/memory/examples/agent_with_memory_in_db.ipynb +++ b/docs/modules/memory/examples/agent_with_memory_in_db.ipynb @@ -16,7 +16,7 @@ "In order to add a memory with an external message store to an agent we are going to do the following steps:\n", "\n", "1. We are going to create a `RedisChatMessageHistory` to connect to an external database to store the messages in.\n", - "2. We are going to create an `LLMChain` useing that chat history as memory.\n", + "2. We are going to create an `LLMChain` using that chat history as memory.\n", "3. We are going to use that `LLMChain` to create a custom Agent.\n", "\n", "For the purposes of this exercise, we are going to create a simple custom Agent that has access to a search tool and utilizes the `ConversationBufferMemory` class." From c4ae8c1d243a30c0e7cb5aceb53a232b7559c869 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Sat, 15 Apr 2023 09:23:19 -0700 Subject: [PATCH 10/10] bump ver to 140 (#2895) --- docs/use_cases/code.md | 1 + pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/use_cases/code.md b/docs/use_cases/code.md index 01d79f3d80b..0977b7269a4 100644 --- a/docs/use_cases/code.md +++ b/docs/use_cases/code.md @@ -23,3 +23,4 @@ Query Understanding: GPT-4 processes user queries, grasping the context and extr The full tutorial is available below. - [Twitter the-algorithm codebase analysis with Deep Lake](code/twitter-the-algorithm-analysis-deeplake.ipynb): A notebook walking through how to parse github source code and run queries conversation. +- [LangChain codebase analysis with Deep Lake](code/code-analysis-deeplake.ipynb): A notebook walking through how to analyze and do question answering over THIS code base. diff --git a/pyproject.toml b/pyproject.toml index 351d6e43b77..c7e292649c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain" -version = "0.0.139" +version = "0.0.140" description = "Building applications with LLMs through composability" authors = [] license = "MIT"