From 9d200e6cbe8f85ac27d70933d0b6b54861efacf7 Mon Sep 17 00:00:00 2001
From: David <127131037+david-huge@users.noreply.github.com>
Date: Fri, 13 Oct 2023 15:25:11 -0400
Subject: [PATCH] Create ChatEverlyAI (#11357)

- Description: Adds the ChatEverlyAI class with llama-2 7b on [EverlyAI
Hosted
Endpoints](https://everlyai.xyz/)
- It inherits from ChatOpenAI and requires openai (probably unnecessary
but it made for a quick and easy implementation)

---------

Co-authored-by: everly-studio <127131037+everly-studio@users.noreply.github.com>
---
 docs/extras/integrations/chat/everlyai.ipynb  | 214 ++++++++++++++++++
 .../langchain/chat_models/__init__.py         |   2 +
 .../langchain/chat_models/everlyai.py         | 154 +++++++++++++
 3 files changed, 370 insertions(+)
 create mode 100644 docs/extras/integrations/chat/everlyai.ipynb
 create mode 100644 libs/langchain/langchain/chat_models/everlyai.py

diff --git a/docs/extras/integrations/chat/everlyai.ipynb b/docs/extras/integrations/chat/everlyai.ipynb
new file mode 100644
index 00000000000..b75557d56b7
--- /dev/null
+++ b/docs/extras/integrations/chat/everlyai.ipynb
@@ -0,0 +1,214 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "642fd21c-600a-47a1-be96-6e1438b421a9",
+   "metadata": {},
+   "source": [
+    "# EverlyAI\n",
+    "\n",
+    ">[EverlyAI](https://everlyai.xyz) allows you to run your ML models at scale in the cloud. It also provides API access to [several LLM models](https://everlyai.xyz).\n",
+    "\n",
+    "This notebook demonstrates the use of `langchain.chat_models.ChatEverlyAI` for [EverlyAI Hosted Endpoints](https://everlyai.xyz/).\n",
+    "\n",
+    "* Set `EVERLYAI_API_KEY` environment variable\n",
+    "* or use the `everlyai_api_key` keyword argument"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d00d850917865298",
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# !pip install openai"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "72340871-ae2f-415f-b399-0777d32dc379",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from getpass import getpass\n",
+    "\n",
+    "os.environ[\"EVERLYAI_API_KEY\"] = getpass()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5d7fc704-3ea0-4c35-96e7-89fcae6c73fa",
+   "metadata": {},
+   "source": [
+    "# Let's try out LLAMA model offered on EverlyAI Hosted Endpoints"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "0dc9428d-4217-47d2-97de-f784b1764186",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  Hello! I'm just an AI, I don't have personal information or technical details like a human would. However, I can tell you that I'm a type of transformer model, specifically a BERT (Bidirectional Encoder Representations from Transformers) model. B\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain.chat_models import ChatEverlyAI\n",
+    "from langchain.schema import SystemMessage, HumanMessage\n",
+    "\n",
+    "messages = [\n",
+    "    SystemMessage(\n",
+    "        content=\"You are a helpful AI that shares everything you know.\"\n",
+    "    ),\n",
+    "    HumanMessage(\n",
+    "        content=\"Tell me technical facts about yourself. Are you a transformer model? How many billions of parameters do you have?\"\n",
+    "    ),\n",
+    "]\n",
+    "\n",
+    "chat = ChatEverlyAI(model_name=\"meta-llama/Llama-2-7b-chat-hf\", temperature=0.3, max_tokens=64)\n",
+    "print(chat(messages).content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7c4f124a-eaf7-4d78-a2c0-b0aa23fb25c4",
+   "metadata": {},
+   "source": [
+    "# EverlyAI also supports streaming responses"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "1f94f5d2-569e-4a2c-965e-de53c2845fbb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  Ah, a joke, you say? *adjusts glasses* Well, I've got a doozy for you! *winks*\n",
+      " *pauses for dramatic effect*\n",
+      "Why did the AI go to therapy?\n",
+      "*drumroll*\n",
+      "Because"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "AIMessageChunk(content=\"  Ah, a joke, you say? *adjusts glasses* Well, I've got a doozy for you! *winks*\\n *pauses for dramatic effect*\\nWhy did the AI go to therapy?\\n*drumroll*\\nBecause\")"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain.chat_models import ChatEverlyAI\n",
+    "from langchain.schema import SystemMessage, HumanMessage\n",
+    "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
+    "\n",
+    "messages = [\n",
+    "    SystemMessage(\n",
+    "        content=\"You are a humorous AI that delights people.\"\n",
+    "    ),\n",
+    "    HumanMessage(\n",
+    "        content=\"Tell me a joke?\"\n",
+    "    ),\n",
+    "]\n",
+    "\n",
+    "chat = ChatEverlyAI(model_name=\"meta-llama/Llama-2-7b-chat-hf\", temperature=0.3, max_tokens=64, streaming=True, callbacks=[StreamingStdOutCallbackHandler()])\n",
+    "chat(messages)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7de56d98",
+   "metadata": {},
+   "source": [
+    "# Let's try a different language model on EverlyAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "d8a44114",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  OH HO HO! *adjusts monocle* Well, well, well! Look who's here! *winks*\n",
+      "\n",
+      "You want a joke, huh? *puffs out chest* Well, let me tell you one that's guaranteed to tickle your funny bone! *clears throat*\n",
+      "\n",
+      "Why couldn't the bicycle stand up by itself? *pauses for dramatic effect* Because it was two-tired! *winks*\n",
+      "\n",
+      "Hope that one put a spring in your step, my dear! *"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "AIMessageChunk(content=\"  OH HO HO! *adjusts monocle* Well, well, well! Look who's here! *winks*\\n\\nYou want a joke, huh? *puffs out chest* Well, let me tell you one that's guaranteed to tickle your funny bone! *clears throat*\\n\\nWhy couldn't the bicycle stand up by itself? *pauses for dramatic effect* Because it was two-tired! *winks*\\n\\nHope that one put a spring in your step, my dear! *\")"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain.chat_models import ChatEverlyAI\n",
+    "from langchain.schema import SystemMessage, HumanMessage\n",
+    "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
+    "\n",
+    "messages = [\n",
+    "    SystemMessage(\n",
+    "        content=\"You are a humorous AI that delights people.\"\n",
+    "    ),\n",
+    "    HumanMessage(\n",
+    "        content=\"Tell me a joke?\"\n",
+    "    ),\n",
+    "]\n",
+    "\n",
+    "chat = ChatEverlyAI(model_name=\"meta-llama/Llama-2-13b-chat-hf-quantized\", temperature=0.3, max_tokens=128, streaming=True, callbacks=[StreamingStdOutCallbackHandler()])\n",
+    "chat(messages)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/libs/langchain/langchain/chat_models/__init__.py b/libs/langchain/langchain/chat_models/__init__.py
index 92fb6728e02..06332d95d59 100644
--- a/libs/langchain/langchain/chat_models/__init__.py
+++ b/libs/langchain/langchain/chat_models/__init__.py
@@ -24,6 +24,7 @@ from langchain.chat_models.baidu_qianfan_endpoint import QianfanChatEndpoint
 from langchain.chat_models.bedrock import BedrockChat
 from langchain.chat_models.cohere import ChatCohere
 from langchain.chat_models.ernie import ErnieBotChat
+from langchain.chat_models.everlyai import ChatEverlyAI
 from langchain.chat_models.fake import FakeListChatModel
 from langchain.chat_models.fireworks import ChatFireworks
 from langchain.chat_models.google_palm import ChatGooglePalm
@@ -45,6 +46,7 @@ __all__ = [
     "AzureChatOpenAI",
     "FakeListChatModel",
     "PromptLayerChatOpenAI",
+    "ChatEverlyAI",
     "ChatAnthropic",
     "ChatCohere",
     "ChatGooglePalm",
diff --git a/libs/langchain/langchain/chat_models/everlyai.py b/libs/langchain/langchain/chat_models/everlyai.py
new file mode 100644
index 00000000000..0b5dbd85d06
--- /dev/null
+++ b/libs/langchain/langchain/chat_models/everlyai.py
@@ -0,0 +1,154 @@
+"""EverlyAI Endpoints chat wrapper. Relies heavily on ChatOpenAI."""
+from __future__ import annotations
+
+import logging
+import sys
+from typing import TYPE_CHECKING, Dict, Optional, Set
+
+from langchain.adapters.openai import convert_message_to_dict
+from langchain.chat_models.openai import (
+    ChatOpenAI,
+    _import_tiktoken,
+)
+from langchain.pydantic_v1 import Field, root_validator
+from langchain.schema.messages import BaseMessage
+from langchain.utils import get_from_dict_or_env
+
+if TYPE_CHECKING:
+    import tiktoken
+
+logger = logging.getLogger(__name__)
+
+
+DEFAULT_API_BASE = "https://everlyai.xyz/hosted"
+DEFAULT_MODEL = "meta-llama/Llama-2-7b-chat-hf"
+
+
+class ChatEverlyAI(ChatOpenAI):
+    """`EverlyAI` Chat large language models.
+
+    To use, you should have the ``openai`` python package installed, and the
+    environment variable ``EVERLYAI_API_KEY`` set with your API key.
+    Alternatively, you can use the everlyai_api_key keyword argument.
+
+    Any parameters that are valid to be passed to the `openai.create` call can be passed
+    in, even if not explicitly saved on this class.
+
+    Example:
+        .. code-block:: python
+
+            from langchain.chat_models import ChatEverlyAI
+            chat = ChatEverlyAI(model_name="meta-llama/Llama-2-7b-chat-hf")
+    """
+
+    @property
+    def _llm_type(self) -> str:
+        """Return type of chat model."""
+        return "everlyai-chat"
+
+    @property
+    def lc_secrets(self) -> Dict[str, str]:
+        return {"everlyai_api_key": "EVERLYAI_API_KEY"}
+
+    everlyai_api_key: Optional[str] = None
+    """EverlyAI Endpoints API keys."""
+    model_name: str = Field(default=DEFAULT_MODEL, alias="model")
+    """Model name to use."""
+    everlyai_api_base: str = DEFAULT_API_BASE
+    """Base URL path for API requests."""
+    available_models: Optional[Set[str]] = None
+    """Available models from EverlyAI API."""
+
+    @staticmethod
+    def get_available_models() -> Set[str]:
+        """Get available models from EverlyAI API."""
+        # EverlyAI doesn't yet support dynamically query for available models.
+        return set(
+            [
+                "meta-llama/Llama-2-7b-chat-hf",
+                "meta-llama/Llama-2-13b-chat-hf-quantized",
+            ]
+        )
+
+    @root_validator(pre=True)
+    def validate_environment_override(cls, values: dict) -> dict:
+        """Validate that api key and python package exists in environment."""
+        values["openai_api_key"] = get_from_dict_or_env(
+            values,
+            "everlyai_api_key",
+            "EVERLYAI_API_KEY",
+        )
+        values["openai_api_base"] = DEFAULT_API_BASE
+
+        try:
+            import openai
+
+        except ImportError as e:
+            raise ValueError(
+                "Could not import openai python package. "
+                "Please install it with `pip install openai`.",
+            ) from e
+        try:
+            values["client"] = openai.ChatCompletion
+        except AttributeError as exc:
+            raise ValueError(
+                "`openai` has no `ChatCompletion` attribute, this is likely "
+                "due to an old version of the openai package. Try upgrading it "
+                "with `pip install --upgrade openai`.",
+            ) from exc
+
+        if "model_name" not in values.keys():
+            values["model_name"] = DEFAULT_MODEL
+
+        model_name = values["model_name"]
+
+        available_models = cls.get_available_models()
+
+        if model_name not in available_models:
+            raise ValueError(
+                f"Model name {model_name} not found in available models: "
+                f"{available_models}.",
+            )
+
+        values["available_models"] = available_models
+
+        return values
+
+    def _get_encoding_model(self) -> tuple[str, tiktoken.Encoding]:
+        tiktoken_ = _import_tiktoken()
+        if self.tiktoken_model_name is not None:
+            model = self.tiktoken_model_name
+        else:
+            model = self.model_name
+        # Returns the number of tokens used by a list of messages.
+        try:
+            encoding = tiktoken_.encoding_for_model("gpt-3.5-turbo-0301")
+        except KeyError:
+            logger.warning("Warning: model not found. Using cl100k_base encoding.")
+            model = "cl100k_base"
+            encoding = tiktoken_.get_encoding(model)
+        return model, encoding
+
+    def get_num_tokens_from_messages(self, messages: list[BaseMessage]) -> int:
+        """Calculate num tokens with tiktoken package.
+
+        Official documentation: https://github.com/openai/openai-cookbook/blob/
+        main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb"""
+        if sys.version_info[1] <= 7:
+            return super().get_num_tokens_from_messages(messages)
+        model, encoding = self._get_encoding_model()
+        tokens_per_message = 3
+        tokens_per_name = 1
+        num_tokens = 0
+        messages_dict = [convert_message_to_dict(m) for m in messages]
+        for message in messages_dict:
+            num_tokens += tokens_per_message
+            for key, value in message.items():
+                # Cast str(value) in case the message value is not a string
+                # This occurs with function messages
+                num_tokens += len(encoding.encode(str(value)))
+                if key == "name":
+                    num_tokens += tokens_per_name
+        # every reply is primed with <im_start>assistant
+        num_tokens += 3
+        return num_tokens