From 9d200e6cbe8f85ac27d70933d0b6b54861efacf7 Mon Sep 17 00:00:00 2001 From: David <127131037+david-huge@users.noreply.github.com> Date: Fri, 13 Oct 2023 15:25:11 -0400 Subject: [PATCH] Create ChatEverlyAI (#11357) - Description: Adds the ChatEverlyAI class with llama-2 7b on [EverlyAI Hosted Endpoints](https://everlyai.xyz/) - It inherits from ChatOpenAI and requires openai (probably unnecessary but it made for a quick and easy implementation) --------- Co-authored-by: everly-studio <127131037+everly-studio@users.noreply.github.com> --- docs/extras/integrations/chat/everlyai.ipynb | 214 ++++++++++++++++++ .../langchain/chat_models/__init__.py | 2 + .../langchain/chat_models/everlyai.py | 154 +++++++++++++ 3 files changed, 370 insertions(+) create mode 100644 docs/extras/integrations/chat/everlyai.ipynb create mode 100644 libs/langchain/langchain/chat_models/everlyai.py diff --git a/docs/extras/integrations/chat/everlyai.ipynb b/docs/extras/integrations/chat/everlyai.ipynb new file mode 100644 index 00000000000..b75557d56b7 --- /dev/null +++ b/docs/extras/integrations/chat/everlyai.ipynb @@ -0,0 +1,214 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "642fd21c-600a-47a1-be96-6e1438b421a9", + "metadata": {}, + "source": [ + "# EverlyAI\n", + "\n", + ">[EverlyAI](https://everlyai.xyz) allows you to run your ML models at scale in the cloud. It also provides API access to [several LLM models](https://everlyai.xyz).\n", + "\n", + "This notebook demonstrates the use of `langchain.chat_models.ChatEverlyAI` for [EverlyAI Hosted Endpoints](https://everlyai.xyz/).\n", + "\n", + "* Set `EVERLYAI_API_KEY` environment variable\n", + "* or use the `everlyai_api_key` keyword argument" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d00d850917865298", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# !pip install openai" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "72340871-ae2f-415f-b399-0777d32dc379", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "os.environ[\"EVERLYAI_API_KEY\"] = getpass()" + ] + }, + { + "cell_type": "markdown", + "id": "5d7fc704-3ea0-4c35-96e7-89fcae6c73fa", + "metadata": {}, + "source": [ + "# Let's try out LLAMA model offered on EverlyAI Hosted Endpoints" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "0dc9428d-4217-47d2-97de-f784b1764186", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Hello! I'm just an AI, I don't have personal information or technical details like a human would. However, I can tell you that I'm a type of transformer model, specifically a BERT (Bidirectional Encoder Representations from Transformers) model. B\n" + ] + } + ], + "source": [ + "from langchain.chat_models import ChatEverlyAI\n", + "from langchain.schema import SystemMessage, HumanMessage\n", + "\n", + "messages = [\n", + " SystemMessage(\n", + " content=\"You are a helpful AI that shares everything you know.\"\n", + " ),\n", + " HumanMessage(\n", + " content=\"Tell me technical facts about yourself. Are you a transformer model? How many billions of parameters do you have?\"\n", + " ),\n", + "]\n", + "\n", + "chat = ChatEverlyAI(model_name=\"meta-llama/Llama-2-7b-chat-hf\", temperature=0.3, max_tokens=64)\n", + "print(chat(messages).content)" + ] + }, + { + "cell_type": "markdown", + "id": "7c4f124a-eaf7-4d78-a2c0-b0aa23fb25c4", + "metadata": {}, + "source": [ + "# EverlyAI also supports streaming responses" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "1f94f5d2-569e-4a2c-965e-de53c2845fbb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Ah, a joke, you say? *adjusts glasses* Well, I've got a doozy for you! *winks*\n", + " *pauses for dramatic effect*\n", + "Why did the AI go to therapy?\n", + "*drumroll*\n", + "Because" + ] + }, + { + "data": { + "text/plain": [ + "AIMessageChunk(content=\" Ah, a joke, you say? *adjusts glasses* Well, I've got a doozy for you! *winks*\\n *pauses for dramatic effect*\\nWhy did the AI go to therapy?\\n*drumroll*\\nBecause\")" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain.chat_models import ChatEverlyAI\n", + "from langchain.schema import SystemMessage, HumanMessage\n", + "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", + "\n", + "messages = [\n", + " SystemMessage(\n", + " content=\"You are a humorous AI that delights people.\"\n", + " ),\n", + " HumanMessage(\n", + " content=\"Tell me a joke?\"\n", + " ),\n", + "]\n", + "\n", + "chat = ChatEverlyAI(model_name=\"meta-llama/Llama-2-7b-chat-hf\", temperature=0.3, max_tokens=64, streaming=True, callbacks=[StreamingStdOutCallbackHandler()])\n", + "chat(messages)" + ] + }, + { + "cell_type": "markdown", + "id": "7de56d98", + "metadata": {}, + "source": [ + "# Let's try a different language model on EverlyAI" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d8a44114", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " OH HO HO! *adjusts monocle* Well, well, well! Look who's here! *winks*\n", + "\n", + "You want a joke, huh? *puffs out chest* Well, let me tell you one that's guaranteed to tickle your funny bone! *clears throat*\n", + "\n", + "Why couldn't the bicycle stand up by itself? *pauses for dramatic effect* Because it was two-tired! *winks*\n", + "\n", + "Hope that one put a spring in your step, my dear! *" + ] + }, + { + "data": { + "text/plain": [ + "AIMessageChunk(content=\" OH HO HO! *adjusts monocle* Well, well, well! Look who's here! *winks*\\n\\nYou want a joke, huh? *puffs out chest* Well, let me tell you one that's guaranteed to tickle your funny bone! *clears throat*\\n\\nWhy couldn't the bicycle stand up by itself? *pauses for dramatic effect* Because it was two-tired! *winks*\\n\\nHope that one put a spring in your step, my dear! *\")" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain.chat_models import ChatEverlyAI\n", + "from langchain.schema import SystemMessage, HumanMessage\n", + "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", + "\n", + "messages = [\n", + " SystemMessage(\n", + " content=\"You are a humorous AI that delights people.\"\n", + " ),\n", + " HumanMessage(\n", + " content=\"Tell me a joke?\"\n", + " ),\n", + "]\n", + "\n", + "chat = ChatEverlyAI(model_name=\"meta-llama/Llama-2-13b-chat-hf-quantized\", temperature=0.3, max_tokens=128, streaming=True, callbacks=[StreamingStdOutCallbackHandler()])\n", + "chat(messages)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/langchain/langchain/chat_models/__init__.py b/libs/langchain/langchain/chat_models/__init__.py index 92fb6728e02..06332d95d59 100644 --- a/libs/langchain/langchain/chat_models/__init__.py +++ b/libs/langchain/langchain/chat_models/__init__.py @@ -24,6 +24,7 @@ from langchain.chat_models.baidu_qianfan_endpoint import QianfanChatEndpoint from langchain.chat_models.bedrock import BedrockChat from langchain.chat_models.cohere import ChatCohere from langchain.chat_models.ernie import ErnieBotChat +from langchain.chat_models.everlyai import ChatEverlyAI from langchain.chat_models.fake import FakeListChatModel from langchain.chat_models.fireworks import ChatFireworks from langchain.chat_models.google_palm import ChatGooglePalm @@ -45,6 +46,7 @@ __all__ = [ "AzureChatOpenAI", "FakeListChatModel", "PromptLayerChatOpenAI", + "ChatEverlyAI", "ChatAnthropic", "ChatCohere", "ChatGooglePalm", diff --git a/libs/langchain/langchain/chat_models/everlyai.py b/libs/langchain/langchain/chat_models/everlyai.py new file mode 100644 index 00000000000..0b5dbd85d06 --- /dev/null +++ b/libs/langchain/langchain/chat_models/everlyai.py @@ -0,0 +1,154 @@ +"""EverlyAI Endpoints chat wrapper. Relies heavily on ChatOpenAI.""" +from __future__ import annotations + +import logging +import sys +from typing import TYPE_CHECKING, Dict, Optional, Set + +from langchain.adapters.openai import convert_message_to_dict +from langchain.chat_models.openai import ( + ChatOpenAI, + _import_tiktoken, +) +from langchain.pydantic_v1 import Field, root_validator +from langchain.schema.messages import BaseMessage +from langchain.utils import get_from_dict_or_env + +if TYPE_CHECKING: + import tiktoken + +logger = logging.getLogger(__name__) + + +DEFAULT_API_BASE = "https://everlyai.xyz/hosted" +DEFAULT_MODEL = "meta-llama/Llama-2-7b-chat-hf" + + +class ChatEverlyAI(ChatOpenAI): + """`EverlyAI` Chat large language models. + + To use, you should have the ``openai`` python package installed, and the + environment variable ``EVERLYAI_API_KEY`` set with your API key. + Alternatively, you can use the everlyai_api_key keyword argument. + + Any parameters that are valid to be passed to the `openai.create` call can be passed + in, even if not explicitly saved on this class. + + Example: + .. code-block:: python + + from langchain.chat_models import ChatEverlyAI + chat = ChatEverlyAI(model_name="meta-llama/Llama-2-7b-chat-hf") + """ + + @property + def _llm_type(self) -> str: + """Return type of chat model.""" + return "everlyai-chat" + + @property + def lc_secrets(self) -> Dict[str, str]: + return {"everlyai_api_key": "EVERLYAI_API_KEY"} + + everlyai_api_key: Optional[str] = None + """EverlyAI Endpoints API keys.""" + model_name: str = Field(default=DEFAULT_MODEL, alias="model") + """Model name to use.""" + everlyai_api_base: str = DEFAULT_API_BASE + """Base URL path for API requests.""" + available_models: Optional[Set[str]] = None + """Available models from EverlyAI API.""" + + @staticmethod + def get_available_models() -> Set[str]: + """Get available models from EverlyAI API.""" + # EverlyAI doesn't yet support dynamically query for available models. + return set( + [ + "meta-llama/Llama-2-7b-chat-hf", + "meta-llama/Llama-2-13b-chat-hf-quantized", + ] + ) + + @root_validator(pre=True) + def validate_environment_override(cls, values: dict) -> dict: + """Validate that api key and python package exists in environment.""" + values["openai_api_key"] = get_from_dict_or_env( + values, + "everlyai_api_key", + "EVERLYAI_API_KEY", + ) + values["openai_api_base"] = DEFAULT_API_BASE + + try: + import openai + + except ImportError as e: + raise ValueError( + "Could not import openai python package. " + "Please install it with `pip install openai`.", + ) from e + try: + values["client"] = openai.ChatCompletion + except AttributeError as exc: + raise ValueError( + "`openai` has no `ChatCompletion` attribute, this is likely " + "due to an old version of the openai package. Try upgrading it " + "with `pip install --upgrade openai`.", + ) from exc + + if "model_name" not in values.keys(): + values["model_name"] = DEFAULT_MODEL + + model_name = values["model_name"] + + available_models = cls.get_available_models() + + if model_name not in available_models: + raise ValueError( + f"Model name {model_name} not found in available models: " + f"{available_models}.", + ) + + values["available_models"] = available_models + + return values + + def _get_encoding_model(self) -> tuple[str, tiktoken.Encoding]: + tiktoken_ = _import_tiktoken() + if self.tiktoken_model_name is not None: + model = self.tiktoken_model_name + else: + model = self.model_name + # Returns the number of tokens used by a list of messages. + try: + encoding = tiktoken_.encoding_for_model("gpt-3.5-turbo-0301") + except KeyError: + logger.warning("Warning: model not found. Using cl100k_base encoding.") + model = "cl100k_base" + encoding = tiktoken_.get_encoding(model) + return model, encoding + + def get_num_tokens_from_messages(self, messages: list[BaseMessage]) -> int: + """Calculate num tokens with tiktoken package. + + Official documentation: https://github.com/openai/openai-cookbook/blob/ + main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb""" + if sys.version_info[1] <= 7: + return super().get_num_tokens_from_messages(messages) + model, encoding = self._get_encoding_model() + tokens_per_message = 3 + tokens_per_name = 1 + num_tokens = 0 + messages_dict = [convert_message_to_dict(m) for m in messages] + for message in messages_dict: + num_tokens += tokens_per_message + for key, value in message.items(): + # Cast str(value) in case the message value is not a string + # This occurs with function messages + num_tokens += len(encoding.encode(str(value))) + if key == "name": + num_tokens += tokens_per_name + # every reply is primed with assistant + num_tokens += 3 + return num_tokens