openai[minor]: implement langchain-openai package (#15503)

Todo - [x] copy over integration tests - [x] update docs with new instructions in #15513 - [x] add linear ticket to bump core -> community, community->langchain, and core->openai deps - [ ] (optional): add `pip install langchain-openai` command to each notebook using it - [x] Update docstrings to not need `openai` install - [x] Add serialization - [x] deprecate old models Contributor steps: - [x] Add secret names to manual integrations workflow in .github/workflows/_integration_test.yml - [x] Add secrets to release workflow (for pre-release testing) in .github/workflows/_release.yml Maintainer steps (Contributors should not do these): - [x] set up pypi and test pypi projects - [x] add credential secrets to Github Actions - [ ] add package to conda-forge Functional changes to existing classes: - now relies on openai client v1 (1.6.1) via concrete dep in langchain-openai package Codebase organization - some function calling stuff moved to `langchain_core.utils.function_calling` in order to be used in both community and langchain-openai
2025-09-08 22:42:05 +00:00 · 2024-01-05 15:03:28 -08:00
parent a7d023aaf0
commit ebc75c5ca7
64 changed files with 5997 additions and 387 deletions
--- a/libs/partners/openai/langchain_openai/init.py
+++ b/libs/partners/openai/langchain_openai/init.py
@@ -0,0 +1,18 @@
+from langchain_openai.chat_models import (
+    AzureChatOpenAI,
+    ChatOpenAI,
+)
+from langchain_openai.embeddings import (
+    AzureOpenAIEmbeddings,
+    OpenAIEmbeddings,
+)
+from langchain_openai.llms import AzureOpenAI, OpenAI
+
+__all__ = [
+    "OpenAI",
+    "ChatOpenAI",
+    "OpenAIEmbeddings",
+    "AzureOpenAI",
+    "AzureChatOpenAI",
+    "AzureOpenAIEmbeddings",
+]
--- a/libs/partners/openai/langchain_openai/chat_models/init.py
+++ b/libs/partners/openai/langchain_openai/chat_models/init.py
@@ -0,0 +1,7 @@
+from langchain_openai.chat_models.azure import AzureChatOpenAI
+from langchain_openai.chat_models.base import ChatOpenAI
+
+__all__ = [
+    "ChatOpenAI",
+    "AzureChatOpenAI",
+]
--- a/libs/partners/openai/langchain_openai/chat_models/azure.py
+++ b/libs/partners/openai/langchain_openai/chat_models/azure.py
@@ -0,0 +1,218 @@
+"""Azure OpenAI chat wrapper."""
+from __future__ import annotations
+
+import logging
+import os
+from typing import Any, Callable, Dict, List, Union
+
+import openai
+from langchain_core.outputs import ChatResult
+from langchain_core.pydantic_v1 import BaseModel, Field, root_validator
+from langchain_core.utils import get_from_dict_or_env
+
+from langchain_openai.chat_models.base import ChatOpenAI
+
+logger = logging.getLogger(__name__)
+
+
+class AzureChatOpenAI(ChatOpenAI):
+    """`Azure OpenAI` Chat Completion API.
+
+    To use this class you
+    must have a deployed model on Azure OpenAI. Use `deployment_name` in the
+    constructor to refer to the "Model deployment name" in the Azure portal.
+
+    In addition, you should have the
+    following environment variables set or passed in constructor in lower case:
+    - ``AZURE_OPENAI_API_KEY``
+    - ``AZURE_OPENAI_ENDPOINT``
+    - ``AZURE_OPENAI_AD_TOKEN``
+    - ``OPENAI_API_VERSION``
+    - ``OPENAI_PROXY``
+
+    For example, if you have `gpt-3.5-turbo` deployed, with the deployment name
+    `35-turbo-dev`, the constructor should look like:
+
+    .. code-block:: python
+
+        AzureChatOpenAI(
+            azure_deployment="35-turbo-dev",
+            openai_api_version="2023-05-15",
+        )
+
+    Be aware the API version may change.
+
+    You can also specify the version of the model using ``model_version`` constructor
+    parameter, as Azure OpenAI doesn't return model version with the response.
+
+    Default is empty. When you specify the version, it will be appended to the
+    model name in the response. Setting correct version will help you to calculate the
+    cost properly. Model version is not validated, so make sure you set it correctly
+    to get the correct cost.
+
+    Any parameters that are valid to be passed to the openai.create call can be passed
+    in, even if not explicitly saved on this class.
+    """
+
+    azure_endpoint: Union[str, None] = None
+    """Your Azure endpoint, including the resource.
+    
+        Automatically inferred from env var `AZURE_OPENAI_ENDPOINT` if not provided.
+    
+        Example: `https://example-resource.azure.openai.com/`
+    """
+    deployment_name: Union[str, None] = Field(default=None, alias="azure_deployment")
+    """A model deployment. 
+    
+        If given sets the base client URL to include `/deployments/{azure_deployment}`.
+        Note: this means you won't be able to use non-deployment endpoints.
+    """
+    openai_api_version: str = Field(default="", alias="api_version")
+    """Automatically inferred from env var `OPENAI_API_VERSION` if not provided."""
+    openai_api_key: Union[str, None] = Field(default=None, alias="api_key")
+    """Automatically inferred from env var `AZURE_OPENAI_API_KEY` if not provided."""
+    azure_ad_token: Union[str, None] = None
+    """Your Azure Active Directory token.
+    
+        Automatically inferred from env var `AZURE_OPENAI_AD_TOKEN` if not provided.
+        
+        For more: 
+        https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id.
+    """  # noqa: E501
+    azure_ad_token_provider: Union[Callable[[], str], None] = None
+    """A function that returns an Azure Active Directory token.
+        
+        Will be invoked on every request.
+    """
+    model_version: str = ""
+    """Legacy, for openai<1.0.0 support."""
+    openai_api_type: str = ""
+    """Legacy, for openai<1.0.0 support."""
+    validate_base_url: bool = True
+    """For backwards compatibility. If legacy val openai_api_base is passed in, try to 
+        infer if it is a base_url or azure_endpoint and update accordingly.
+    """
+
+    @classmethod
+    def get_lc_namespace(cls) -> List[str]:
+        """Get the namespace of the langchain object."""
+        return ["langchain", "chat_models", "azure_openai"]
+
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that api key and python package exists in environment."""
+        if values["n"] < 1:
+            raise ValueError("n must be at least 1.")
+        if values["n"] > 1 and values["streaming"]:
+            raise ValueError("n must be 1 when streaming.")
+
+        # Check OPENAI_KEY for backwards compatibility.
+        # TODO: Remove OPENAI_API_KEY support to avoid possible conflict when using
+        # other forms of azure credentials.
+        values["openai_api_key"] = (
+            values["openai_api_key"]
+            or os.getenv("AZURE_OPENAI_API_KEY")
+            or os.getenv("OPENAI_API_KEY")
+        )
+        values["openai_api_base"] = values["openai_api_base"] or os.getenv(
+            "OPENAI_API_BASE"
+        )
+        values["openai_api_version"] = values["openai_api_version"] or os.getenv(
+            "OPENAI_API_VERSION"
+        )
+        # Check OPENAI_ORGANIZATION for backwards compatibility.
+        values["openai_organization"] = (
+            values["openai_organization"]
+            or os.getenv("OPENAI_ORG_ID")
+            or os.getenv("OPENAI_ORGANIZATION")
+        )
+        values["azure_endpoint"] = values["azure_endpoint"] or os.getenv(
+            "AZURE_OPENAI_ENDPOINT"
+        )
+        values["azure_ad_token"] = values["azure_ad_token"] or os.getenv(
+            "AZURE_OPENAI_AD_TOKEN"
+        )
+
+        values["openai_api_type"] = get_from_dict_or_env(
+            values, "openai_api_type", "OPENAI_API_TYPE", default="azure"
+        )
+        values["openai_proxy"] = get_from_dict_or_env(
+            values, "openai_proxy", "OPENAI_PROXY", default=""
+        )
+        # For backwards compatibility. Before openai v1, no distinction was made
+        # between azure_endpoint and base_url (openai_api_base).
+        openai_api_base = values["openai_api_base"]
+        if openai_api_base and values["validate_base_url"]:
+            if "/openai" not in openai_api_base:
+                raise ValueError(
+                    "As of openai>=1.0.0, Azure endpoints should be specified via "
+                    "the `azure_endpoint` param not `openai_api_base` "
+                    "(or alias `base_url`)."
+                )
+            if values["deployment_name"]:
+                raise ValueError(
+                    "As of openai>=1.0.0, if `deployment_name` (or alias "
+                    "`azure_deployment`) is specified then "
+                    "`openai_api_base` (or alias `base_url`) should not be. "
+                    "Instead use `deployment_name` (or alias `azure_deployment`) "
+                    "and `azure_endpoint`."
+                )
+        client_params = {
+            "api_version": values["openai_api_version"],
+            "azure_endpoint": values["azure_endpoint"],
+            "azure_deployment": values["deployment_name"],
+            "api_key": values["openai_api_key"],
+            "azure_ad_token": values["azure_ad_token"],
+            "azure_ad_token_provider": values["azure_ad_token_provider"],
+            "organization": values["openai_organization"],
+            "base_url": values["openai_api_base"],
+            "timeout": values["request_timeout"],
+            "max_retries": values["max_retries"],
+            "default_headers": values["default_headers"],
+            "default_query": values["default_query"],
+            "http_client": values["http_client"],
+        }
+        values["client"] = openai.AzureOpenAI(**client_params).chat.completions
+        values["async_client"] = openai.AsyncAzureOpenAI(
+            **client_params
+        ).chat.completions
+        return values
+
+    @property
+    def _identifying_params(self) -> Dict[str, Any]:
+        """Get the identifying parameters."""
+        return {**self._default_params}
+
+    @property
+    def _llm_type(self) -> str:
+        return "azure-openai-chat"
+
+    @property
+    def lc_attributes(self) -> Dict[str, Any]:
+        return {
+            "openai_api_type": self.openai_api_type,
+            "openai_api_version": self.openai_api_version,
+        }
+
+    def _create_chat_result(self, response: Union[dict, BaseModel]) -> ChatResult:
+        if not isinstance(response, dict):
+            response = response.dict()
+        for res in response["choices"]:
+            if res.get("finish_reason", None) == "content_filter":
+                raise ValueError(
+                    "Azure has not provided the response due to a content filter "
+                    "being triggered"
+                )
+        chat_result = super()._create_chat_result(response)
+
+        if "model" in response:
+            model = response["model"]
+            if self.model_version:
+                model = f"{model}-{self.model_version}"
+
+            if chat_result.llm_output is not None and isinstance(
+                chat_result.llm_output, dict
+            ):
+                chat_result.llm_output["model_name"] = model
+
+        return chat_result
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -0,0 +1,655 @@
+"""OpenAI chat wrapper."""
+from __future__ import annotations
+
+import logging
+import os
+import sys
+import warnings
+from typing import (
+    Any,
+    AsyncIterator,
+    Callable,
+    Dict,
+    Iterator,
+    List,
+    Mapping,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+    Union,
+    cast,
+)
+
+import openai
+import tiktoken
+from langchain_core.callbacks import (
+    AsyncCallbackManagerForLLMRun,
+    CallbackManagerForLLMRun,
+)
+from langchain_core.language_models import LanguageModelInput
+from langchain_core.language_models.chat_models import (
+    BaseChatModel,
+    agenerate_from_stream,
+    generate_from_stream,
+)
+from langchain_core.messages import (
+    AIMessage,
+    AIMessageChunk,
+    BaseMessage,
+    BaseMessageChunk,
+    ChatMessage,
+    ChatMessageChunk,
+    FunctionMessage,
+    FunctionMessageChunk,
+    HumanMessage,
+    HumanMessageChunk,
+    SystemMessage,
+    SystemMessageChunk,
+    ToolMessage,
+    ToolMessageChunk,
+)
+from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
+from langchain_core.pydantic_v1 import BaseModel, Field, root_validator
+from langchain_core.runnables import Runnable
+from langchain_core.utils import (
+    get_from_dict_or_env,
+    get_pydantic_field_names,
+)
+from langchain_core.utils.function_calling import convert_to_openai_function
+
+logger = logging.getLogger(__name__)
+
+
+def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
+    """Convert a dictionary to a LangChain message.
+
+    Args:
+        _dict: The dictionary.
+
+    Returns:
+        The LangChain message.
+    """
+    role = _dict.get("role")
+    if role == "user":
+        return HumanMessage(content=_dict.get("content", ""))
+    elif role == "assistant":
+        # Fix for azure
+        # Also OpenAI returns None for tool invocations
+        content = _dict.get("content", "") or ""
+        additional_kwargs: Dict = {}
+        if function_call := _dict.get("function_call"):
+            additional_kwargs["function_call"] = dict(function_call)
+        if tool_calls := _dict.get("tool_calls"):
+            additional_kwargs["tool_calls"] = tool_calls
+        return AIMessage(content=content, additional_kwargs=additional_kwargs)
+    elif role == "system":
+        return SystemMessage(content=_dict.get("content", ""))
+    elif role == "function":
+        return FunctionMessage(content=_dict.get("content", ""), name=_dict.get("name"))
+    elif role == "tool":
+        additional_kwargs = {}
+        if "name" in _dict:
+            additional_kwargs["name"] = _dict["name"]
+        return ToolMessage(
+            content=_dict.get("content", ""),
+            tool_call_id=_dict.get("tool_call_id"),
+            additional_kwargs=additional_kwargs,
+        )
+    else:
+        return ChatMessage(content=_dict.get("content", ""), role=role)
+
+
+def _convert_message_to_dict(message: BaseMessage) -> dict:
+    """Convert a LangChain message to a dictionary.
+
+    Args:
+        message: The LangChain message.
+
+    Returns:
+        The dictionary.
+    """
+    message_dict: Dict[str, Any]
+    if isinstance(message, ChatMessage):
+        message_dict = {"role": message.role, "content": message.content}
+    elif isinstance(message, HumanMessage):
+        message_dict = {"role": "user", "content": message.content}
+    elif isinstance(message, AIMessage):
+        message_dict = {"role": "assistant", "content": message.content}
+        if "function_call" in message.additional_kwargs:
+            message_dict["function_call"] = message.additional_kwargs["function_call"]
+            # If function call only, content is None not empty string
+            if message_dict["content"] == "":
+                message_dict["content"] = None
+        if "tool_calls" in message.additional_kwargs:
+            message_dict["tool_calls"] = message.additional_kwargs["tool_calls"]
+            # If tool calls only, content is None not empty string
+            if message_dict["content"] == "":
+                message_dict["content"] = None
+    elif isinstance(message, SystemMessage):
+        message_dict = {"role": "system", "content": message.content}
+    elif isinstance(message, FunctionMessage):
+        message_dict = {
+            "role": "function",
+            "content": message.content,
+            "name": message.name,
+        }
+    elif isinstance(message, ToolMessage):
+        message_dict = {
+            "role": "tool",
+            "content": message.content,
+            "tool_call_id": message.tool_call_id,
+        }
+    else:
+        raise TypeError(f"Got unknown type {message}")
+    if "name" in message.additional_kwargs:
+        message_dict["name"] = message.additional_kwargs["name"]
+    return message_dict
+
+
+def _convert_delta_to_message_chunk(
+    _dict: Mapping[str, Any], default_class: Type[BaseMessageChunk]
+) -> BaseMessageChunk:
+    role = cast(str, _dict.get("role"))
+    content = cast(str, _dict.get("content") or "")
+    additional_kwargs: Dict = {}
+    if _dict.get("function_call"):
+        function_call = dict(_dict["function_call"])
+        if "name" in function_call and function_call["name"] is None:
+            function_call["name"] = ""
+        additional_kwargs["function_call"] = function_call
+    if _dict.get("tool_calls"):
+        additional_kwargs["tool_calls"] = _dict["tool_calls"]
+
+    if role == "user" or default_class == HumanMessageChunk:
+        return HumanMessageChunk(content=content)
+    elif role == "assistant" or default_class == AIMessageChunk:
+        return AIMessageChunk(content=content, additional_kwargs=additional_kwargs)
+    elif role == "system" or default_class == SystemMessageChunk:
+        return SystemMessageChunk(content=content)
+    elif role == "function" or default_class == FunctionMessageChunk:
+        return FunctionMessageChunk(content=content, name=_dict["name"])
+    elif role == "tool" or default_class == ToolMessageChunk:
+        return ToolMessageChunk(content=content, tool_call_id=_dict["tool_call_id"])
+    elif role or default_class == ChatMessageChunk:
+        return ChatMessageChunk(content=content, role=role)
+    else:
+        return default_class(content=content)  # type: ignore
+
+
+class ChatOpenAI(BaseChatModel):
+    """`OpenAI` Chat large language models API.
+
+    To use, you should have the
+    environment variable ``OPENAI_API_KEY`` set with your API key.
+
+    Any parameters that are valid to be passed to the openai.create call can be passed
+    in, even if not explicitly saved on this class.
+
+    Example:
+        .. code-block:: python
+
+            from langchain_community.chat_models import ChatOpenAI
+            openai = ChatOpenAI(model_name="gpt-3.5-turbo")
+    """
+
+    @property
+    def lc_secrets(self) -> Dict[str, str]:
+        return {"openai_api_key": "OPENAI_API_KEY"}
+
+    @classmethod
+    def get_lc_namespace(cls) -> List[str]:
+        """Get the namespace of the langchain object."""
+        return ["langchain", "chat_models", "openai"]
+
+    @property
+    def lc_attributes(self) -> Dict[str, Any]:
+        attributes: Dict[str, Any] = {}
+
+        if self.openai_organization:
+            attributes["openai_organization"] = self.openai_organization
+
+        if self.openai_api_base:
+            attributes["openai_api_base"] = self.openai_api_base
+
+        if self.openai_proxy:
+            attributes["openai_proxy"] = self.openai_proxy
+
+        return attributes
+
+    @classmethod
+    def is_lc_serializable(cls) -> bool:
+        """Return whether this model can be serialized by Langchain."""
+        return True
+
+    client: Any = Field(default=None, exclude=True)  #: :meta private:
+    async_client: Any = Field(default=None, exclude=True)  #: :meta private:
+    model_name: str = Field(default="gpt-3.5-turbo", alias="model")
+    """Model name to use."""
+    temperature: float = 0.7
+    """What sampling temperature to use."""
+    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
+    """Holds any model parameters valid for `create` call not explicitly specified."""
+    # When updating this to use a SecretStr
+    # Check for classes that derive from this class (as some of them
+    # may assume openai_api_key is a str)
+    openai_api_key: Optional[str] = Field(default=None, alias="api_key")
+    """Automatically inferred from env var `OPENAI_API_KEY` if not provided."""
+    openai_api_base: Optional[str] = Field(default=None, alias="base_url")
+    """Base URL path for API requests, leave blank if not using a proxy or service 
+        emulator."""
+    openai_organization: Optional[str] = Field(default=None, alias="organization")
+    """Automatically inferred from env var `OPENAI_ORG_ID` if not provided."""
+    # to support explicit proxy for OpenAI
+    openai_proxy: Optional[str] = None
+    request_timeout: Union[float, Tuple[float, float], Any, None] = Field(
+        default=None, alias="timeout"
+    )
+    """Timeout for requests to OpenAI completion API. Can be float, httpx.Timeout or 
+        None."""
+    max_retries: int = 2
+    """Maximum number of retries to make when generating."""
+    streaming: bool = False
+    """Whether to stream the results or not."""
+    n: int = 1
+    """Number of chat completions to generate for each prompt."""
+    max_tokens: Optional[int] = None
+    """Maximum number of tokens to generate."""
+    tiktoken_model_name: Optional[str] = None
+    """The model name to pass to tiktoken when using this class. 
+    Tiktoken is used to count the number of tokens in documents to constrain 
+    them to be under a certain limit. By default, when set to None, this will 
+    be the same as the embedding model name. However, there are some cases 
+    where you may want to use this Embedding class with a model name not 
+    supported by tiktoken. This can include when using Azure embeddings or 
+    when using one of the many model providers that expose an OpenAI-like 
+    API but with different models. In those cases, in order to avoid erroring 
+    when tiktoken is called, you can specify a model name to use here."""
+    default_headers: Union[Mapping[str, str], None] = None
+    default_query: Union[Mapping[str, object], None] = None
+    # Configure a custom httpx client. See the
+    # [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
+    http_client: Union[Any, None] = None
+    """Optional httpx.Client."""
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        allow_population_by_field_name = True
+
+    @root_validator(pre=True)
+    def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+        """Build extra kwargs from additional params that were passed in."""
+        all_required_field_names = get_pydantic_field_names(cls)
+        extra = values.get("model_kwargs", {})
+        for field_name in list(values):
+            if field_name in extra:
+                raise ValueError(f"Found {field_name} supplied twice.")
+            if field_name not in all_required_field_names:
+                warnings.warn(
+                    f"""WARNING! {field_name} is not default parameter.
+                    {field_name} was transferred to model_kwargs.
+                    Please confirm that {field_name} is what you intended."""
+                )
+                extra[field_name] = values.pop(field_name)
+
+        invalid_model_kwargs = all_required_field_names.intersection(extra.keys())
+        if invalid_model_kwargs:
+            raise ValueError(
+                f"Parameters {invalid_model_kwargs} should be specified explicitly. "
+                f"Instead they were passed in as part of `model_kwargs` parameter."
+            )
+
+        values["model_kwargs"] = extra
+        return values
+
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that api key and python package exists in environment."""
+        if values["n"] < 1:
+            raise ValueError("n must be at least 1.")
+        if values["n"] > 1 and values["streaming"]:
+            raise ValueError("n must be 1 when streaming.")
+
+        values["openai_api_key"] = get_from_dict_or_env(
+            values, "openai_api_key", "OPENAI_API_KEY"
+        )
+        # Check OPENAI_ORGANIZATION for backwards compatibility.
+        values["openai_organization"] = (
+            values["openai_organization"]
+            or os.getenv("OPENAI_ORG_ID")
+            or os.getenv("OPENAI_ORGANIZATION")
+        )
+        values["openai_api_base"] = values["openai_api_base"] or os.getenv(
+            "OPENAI_API_BASE"
+        )
+        values["openai_proxy"] = get_from_dict_or_env(
+            values,
+            "openai_proxy",
+            "OPENAI_PROXY",
+            default="",
+        )
+
+        client_params = {
+            "api_key": values["openai_api_key"],
+            "organization": values["openai_organization"],
+            "base_url": values["openai_api_base"],
+            "timeout": values["request_timeout"],
+            "max_retries": values["max_retries"],
+            "default_headers": values["default_headers"],
+            "default_query": values["default_query"],
+            "http_client": values["http_client"],
+        }
+
+        if not values.get("client"):
+            values["client"] = openai.OpenAI(**client_params).chat.completions
+        if not values.get("async_client"):
+            values["async_client"] = openai.AsyncOpenAI(
+                **client_params
+            ).chat.completions
+        return values
+
+    @property
+    def _default_params(self) -> Dict[str, Any]:
+        """Get the default parameters for calling OpenAI API."""
+        params = {
+            "model": self.model_name,
+            "stream": self.streaming,
+            "n": self.n,
+            "temperature": self.temperature,
+            **self.model_kwargs,
+        }
+        if self.max_tokens is not None:
+            params["max_tokens"] = self.max_tokens
+        return params
+
+    def _combine_llm_outputs(self, llm_outputs: List[Optional[dict]]) -> dict:
+        overall_token_usage: dict = {}
+        system_fingerprint = None
+        for output in llm_outputs:
+            if output is None:
+                # Happens in streaming
+                continue
+            token_usage = output["token_usage"]
+            if token_usage is not None:
+                for k, v in token_usage.items():
+                    if k in overall_token_usage:
+                        overall_token_usage[k] += v
+                    else:
+                        overall_token_usage[k] = v
+            if system_fingerprint is None:
+                system_fingerprint = output.get("system_fingerprint")
+        combined = {"token_usage": overall_token_usage, "model_name": self.model_name}
+        if system_fingerprint:
+            combined["system_fingerprint"] = system_fingerprint
+        return combined
+
+    def _stream(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> Iterator[ChatGenerationChunk]:
+        message_dicts, params = self._create_message_dicts(messages, stop)
+        params = {**params, **kwargs, "stream": True}
+
+        default_chunk_class = AIMessageChunk
+        for chunk in self.client.create(messages=message_dicts, **params):
+            if not isinstance(chunk, dict):
+                chunk = chunk.dict()
+            if len(chunk["choices"]) == 0:
+                continue
+            choice = chunk["choices"][0]
+            chunk = _convert_delta_to_message_chunk(
+                choice["delta"], default_chunk_class
+            )
+            finish_reason = choice.get("finish_reason")
+            generation_info = (
+                dict(finish_reason=finish_reason) if finish_reason is not None else None
+            )
+            default_chunk_class = chunk.__class__
+            chunk = ChatGenerationChunk(message=chunk, generation_info=generation_info)
+            yield chunk
+            if run_manager:
+                run_manager.on_llm_new_token(chunk.text, chunk=chunk)
+
+    def _generate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        stream: Optional[bool] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        should_stream = stream if stream is not None else self.streaming
+        if should_stream:
+            stream_iter = self._stream(
+                messages, stop=stop, run_manager=run_manager, **kwargs
+            )
+            return generate_from_stream(stream_iter)
+        message_dicts, params = self._create_message_dicts(messages, stop)
+        params = {
+            **params,
+            **({"stream": stream} if stream is not None else {}),
+            **kwargs,
+        }
+        response = self.client.create(messages=message_dicts, **params)
+        return self._create_chat_result(response)
+
+    def _create_message_dicts(
+        self, messages: List[BaseMessage], stop: Optional[List[str]]
+    ) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
+        params = self._default_params
+        if stop is not None:
+            if "stop" in params:
+                raise ValueError("`stop` found in both the input and default params.")
+            params["stop"] = stop
+        message_dicts = [_convert_message_to_dict(m) for m in messages]
+        return message_dicts, params
+
+    def _create_chat_result(self, response: Union[dict, BaseModel]) -> ChatResult:
+        generations = []
+        if not isinstance(response, dict):
+            response = response.dict()
+        for res in response["choices"]:
+            message = _convert_dict_to_message(res["message"])
+            generation_info = dict(finish_reason=res.get("finish_reason"))
+            if "logprobs" in res:
+                generation_info["logprobs"] = res["logprobs"]
+            gen = ChatGeneration(
+                message=message,
+                generation_info=generation_info,
+            )
+            generations.append(gen)
+        token_usage = response.get("usage", {})
+        llm_output = {
+            "token_usage": token_usage,
+            "model_name": self.model_name,
+            "system_fingerprint": response.get("system_fingerprint", ""),
+        }
+        return ChatResult(generations=generations, llm_output=llm_output)
+
+    async def _astream(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> AsyncIterator[ChatGenerationChunk]:
+        message_dicts, params = self._create_message_dicts(messages, stop)
+        params = {**params, **kwargs, "stream": True}
+
+        default_chunk_class = AIMessageChunk
+        async for chunk in await self.async_client.create(
+            messages=message_dicts, **params
+        ):
+            if not isinstance(chunk, dict):
+                chunk = chunk.dict()
+            if len(chunk["choices"]) == 0:
+                continue
+            choice = chunk["choices"][0]
+            chunk = _convert_delta_to_message_chunk(
+                choice["delta"], default_chunk_class
+            )
+            finish_reason = choice.get("finish_reason")
+            generation_info = (
+                dict(finish_reason=finish_reason) if finish_reason is not None else None
+            )
+            default_chunk_class = chunk.__class__
+            chunk = ChatGenerationChunk(message=chunk, generation_info=generation_info)
+            yield chunk
+            if run_manager:
+                await run_manager.on_llm_new_token(token=chunk.text, chunk=chunk)
+
+    async def _agenerate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        stream: Optional[bool] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        should_stream = stream if stream is not None else self.streaming
+        if should_stream:
+            stream_iter = self._astream(
+                messages, stop=stop, run_manager=run_manager, **kwargs
+            )
+            return await agenerate_from_stream(stream_iter)
+
+        message_dicts, params = self._create_message_dicts(messages, stop)
+        params = {
+            **params,
+            **({"stream": stream} if stream is not None else {}),
+            **kwargs,
+        }
+        response = await self.async_client.create(messages=message_dicts, **params)
+        return self._create_chat_result(response)
+
+    @property
+    def _identifying_params(self) -> Dict[str, Any]:
+        """Get the identifying parameters."""
+        return {"model_name": self.model_name, **self._default_params}
+
+    def _get_invocation_params(
+        self, stop: Optional[List[str]] = None, **kwargs: Any
+    ) -> Dict[str, Any]:
+        """Get the parameters used to invoke the model."""
+        return {
+            "model": self.model_name,
+            **super()._get_invocation_params(stop=stop),
+            **self._default_params,
+            **kwargs,
+        }
+
+    @property
+    def _llm_type(self) -> str:
+        """Return type of chat model."""
+        return "openai-chat"
+
+    def _get_encoding_model(self) -> Tuple[str, tiktoken.Encoding]:
+        if self.tiktoken_model_name is not None:
+            model = self.tiktoken_model_name
+        else:
+            model = self.model_name
+            if model == "gpt-3.5-turbo":
+                # gpt-3.5-turbo may change over time.
+                # Returning num tokens assuming gpt-3.5-turbo-0301.
+                model = "gpt-3.5-turbo-0301"
+            elif model == "gpt-4":
+                # gpt-4 may change over time.
+                # Returning num tokens assuming gpt-4-0314.
+                model = "gpt-4-0314"
+        # Returns the number of tokens used by a list of messages.
+        try:
+            encoding = tiktoken.encoding_for_model(model)
+        except KeyError:
+            logger.warning("Warning: model not found. Using cl100k_base encoding.")
+            model = "cl100k_base"
+            encoding = tiktoken.get_encoding(model)
+        return model, encoding
+
+    def get_token_ids(self, text: str) -> List[int]:
+        """Get the tokens present in the text with tiktoken package."""
+        # tiktoken NOT supported for Python 3.7 or below
+        if sys.version_info[1] <= 7:
+            return super().get_token_ids(text)
+        _, encoding_model = self._get_encoding_model()
+        return encoding_model.encode(text)
+
+    def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int:
+        """Calculate num tokens for gpt-3.5-turbo and gpt-4 with tiktoken package.
+
+        Official documentation: https://github.com/openai/openai-cookbook/blob/
+        main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb"""
+        if sys.version_info[1] <= 7:
+            return super().get_num_tokens_from_messages(messages)
+        model, encoding = self._get_encoding_model()
+        if model.startswith("gpt-3.5-turbo-0301"):
+            # every message follows <im_start>{role/name}\n{content}<im_end>\n
+            tokens_per_message = 4
+            # if there's a name, the role is omitted
+            tokens_per_name = -1
+        elif model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4"):
+            tokens_per_message = 3
+            tokens_per_name = 1
+        else:
+            raise NotImplementedError(
+                f"get_num_tokens_from_messages() is not presently implemented "
+                f"for model {model}. See "
+                "https://platform.openai.com/docs/guides/text-generation/managing-tokens"
+                " for information on how messages are converted to tokens."
+            )
+        num_tokens = 0
+        messages_dict = [_convert_message_to_dict(m) for m in messages]
+        for message in messages_dict:
+            num_tokens += tokens_per_message
+            for key, value in message.items():
+                # Cast str(value) in case the message value is not a string
+                # This occurs with function messages
+                num_tokens += len(encoding.encode(str(value)))
+                if key == "name":
+                    num_tokens += tokens_per_name
+        # every reply is primed with <im_start>assistant
+        num_tokens += 3
+        return num_tokens
+
+    def bind_functions(
+        self,
+        functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
+        function_call: Optional[str] = None,
+        **kwargs: Any,
+    ) -> Runnable[LanguageModelInput, BaseMessage]:
+        """Bind functions (and other objects) to this chat model.
+
+        Args:
+            functions: A list of function definitions to bind to this chat model.
+                Can be  a dictionary, pydantic model, or callable. Pydantic
+                models and callables will be automatically converted to
+                their schema dictionary representation.
+            function_call: Which function to require the model to call.
+                Must be the name of the single provided function or
+                "auto" to automatically determine which function to call
+                (if any).
+            kwargs: Any additional parameters to pass to the
+                :class:`~langchain.runnable.Runnable` constructor.
+        """
+
+        formatted_functions = [convert_to_openai_function(fn) for fn in functions]
+        if function_call is not None:
+            if len(formatted_functions) != 1:
+                raise ValueError(
+                    "When specifying `function_call`, you must provide exactly one "
+                    "function."
+                )
+            if formatted_functions[0]["name"] != function_call:
+                raise ValueError(
+                    f"Function call {function_call} was specified, but the only "
+                    f"provided function was {formatted_functions[0]['name']}."
+                )
+            function_call_ = {"name": function_call}
+            kwargs = {**kwargs, "function_call": function_call_}
+        return super().bind(
+            functions=formatted_functions,
+            **kwargs,
+        )
--- a/libs/partners/openai/langchain_openai/embeddings/init.py
+++ b/libs/partners/openai/langchain_openai/embeddings/init.py
@@ -0,0 +1,7 @@
+from langchain_openai.embeddings.azure import AzureOpenAIEmbeddings
+from langchain_openai.embeddings.base import OpenAIEmbeddings
+
+__all__ = [
+    "OpenAIEmbeddings",
+    "AzureOpenAIEmbeddings",
+]
--- a/libs/partners/openai/langchain_openai/embeddings/azure.py
+++ b/libs/partners/openai/langchain_openai/embeddings/azure.py
@@ -0,0 +1,130 @@
+"""Azure OpenAI embeddings wrapper."""
+from __future__ import annotations
+
+import os
+from typing import Callable, Dict, Optional, Union
+
+import openai
+from langchain_core.pydantic_v1 import Field, root_validator
+from langchain_core.utils import get_from_dict_or_env
+
+from langchain_openai.embeddings.base import OpenAIEmbeddings
+
+
+class AzureOpenAIEmbeddings(OpenAIEmbeddings):
+    """`Azure OpenAI` Embeddings API."""
+
+    azure_endpoint: Union[str, None] = None
+    """Your Azure endpoint, including the resource.
+
+        Automatically inferred from env var `AZURE_OPENAI_ENDPOINT` if not provided.
+        
+        Example: `https://example-resource.azure.openai.com/`
+    """
+    deployment: Optional[str] = Field(default=None, alias="azure_deployment")
+    """A model deployment. 
+
+        If given sets the base client URL to include `/deployments/{azure_deployment}`.
+        Note: this means you won't be able to use non-deployment endpoints.
+    """
+    openai_api_key: Union[str, None] = Field(default=None, alias="api_key")
+    """Automatically inferred from env var `AZURE_OPENAI_API_KEY` if not provided."""
+    azure_ad_token: Union[str, None] = None
+    """Your Azure Active Directory token.
+
+        Automatically inferred from env var `AZURE_OPENAI_AD_TOKEN` if not provided.
+
+        For more: 
+        https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id.
+    """  # noqa: E501
+    azure_ad_token_provider: Union[Callable[[], str], None] = None
+    """A function that returns an Azure Active Directory token.
+
+        Will be invoked on every request.
+    """
+    openai_api_version: Optional[str] = Field(default=None, alias="api_version")
+    """Automatically inferred from env var `OPENAI_API_VERSION` if not provided."""
+    validate_base_url: bool = True
+
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that api key and python package exists in environment."""
+        # Check OPENAI_KEY for backwards compatibility.
+        # TODO: Remove OPENAI_API_KEY support to avoid possible conflict when using
+        # other forms of azure credentials.
+        values["openai_api_key"] = (
+            values["openai_api_key"]
+            or os.getenv("AZURE_OPENAI_API_KEY")
+            or os.getenv("OPENAI_API_KEY")
+        )
+        values["openai_api_base"] = values["openai_api_base"] or os.getenv(
+            "OPENAI_API_BASE"
+        )
+        values["openai_api_version"] = values["openai_api_version"] or os.getenv(
+            "OPENAI_API_VERSION", default="2023-05-15"
+        )
+        values["openai_api_type"] = get_from_dict_or_env(
+            values, "openai_api_type", "OPENAI_API_TYPE", default="azure"
+        )
+        values["openai_organization"] = (
+            values["openai_organization"]
+            or os.getenv("OPENAI_ORG_ID")
+            or os.getenv("OPENAI_ORGANIZATION")
+        )
+        values["openai_proxy"] = get_from_dict_or_env(
+            values,
+            "openai_proxy",
+            "OPENAI_PROXY",
+            default="",
+        )
+        values["azure_endpoint"] = values["azure_endpoint"] or os.getenv(
+            "AZURE_OPENAI_ENDPOINT"
+        )
+        values["azure_ad_token"] = values["azure_ad_token"] or os.getenv(
+            "AZURE_OPENAI_AD_TOKEN"
+        )
+        # Azure OpenAI embedding models allow a maximum of 16 texts
+        # at a time in each batch
+        # See: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings
+        values["chunk_size"] = min(values["chunk_size"], 16)
+        # For backwards compatibility. Before openai v1, no distinction was made
+        # between azure_endpoint and base_url (openai_api_base).
+        openai_api_base = values["openai_api_base"]
+        if openai_api_base and values["validate_base_url"]:
+            if "/openai" not in openai_api_base:
+                values["openai_api_base"] += "/openai"
+                raise ValueError(
+                    "As of openai>=1.0.0, Azure endpoints should be specified via "
+                    "the `azure_endpoint` param not `openai_api_base` "
+                    "(or alias `base_url`). "
+                )
+            if values["deployment"]:
+                raise ValueError(
+                    "As of openai>=1.0.0, if `deployment` (or alias "
+                    "`azure_deployment`) is specified then "
+                    "`openai_api_base` (or alias `base_url`) should not be. "
+                    "Instead use `deployment` (or alias `azure_deployment`) "
+                    "and `azure_endpoint`."
+                )
+        client_params = {
+            "api_version": values["openai_api_version"],
+            "azure_endpoint": values["azure_endpoint"],
+            "azure_deployment": values["deployment"],
+            "api_key": values["openai_api_key"],
+            "azure_ad_token": values["azure_ad_token"],
+            "azure_ad_token_provider": values["azure_ad_token_provider"],
+            "organization": values["openai_organization"],
+            "base_url": values["openai_api_base"],
+            "timeout": values["request_timeout"],
+            "max_retries": values["max_retries"],
+            "default_headers": values["default_headers"],
+            "default_query": values["default_query"],
+            "http_client": values["http_client"],
+        }
+        values["client"] = openai.AzureOpenAI(**client_params).embeddings
+        values["async_client"] = openai.AsyncAzureOpenAI(**client_params).embeddings
+        return values
+
+    @property
+    def _llm_type(self) -> str:
+        return "azure-openai-chat"
--- a/libs/partners/openai/langchain_openai/embeddings/base.py
+++ b/libs/partners/openai/langchain_openai/embeddings/base.py
@@ -0,0 +1,523 @@
+from __future__ import annotations
+
+import logging
+import os
+import warnings
+from typing import (
+    Any,
+    Dict,
+    Iterable,
+    List,
+    Literal,
+    Mapping,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    Union,
+    cast,
+)
+
+import numpy as np
+import openai
+import tiktoken
+from langchain_core.embeddings import Embeddings
+from langchain_core.pydantic_v1 import BaseModel, Extra, Field, root_validator
+from langchain_core.utils import get_from_dict_or_env, get_pydantic_field_names
+
+logger = logging.getLogger(__name__)
+
+
+class OpenAIEmbeddings(BaseModel, Embeddings):
+    """OpenAI embedding models.
+
+    To use, you should have the
+    environment variable ``OPENAI_API_KEY`` set with your API key or pass it
+    as a named parameter to the constructor.
+
+    Example:
+        .. code-block:: python
+
+            from langchain_community.embeddings import OpenAIEmbeddings
+            openai = OpenAIEmbeddings(openai_api_key="my-api-key")
+
+    In order to use the library with Microsoft Azure endpoints, you need to set
+    the OPENAI_API_TYPE, OPENAI_API_BASE, OPENAI_API_KEY and OPENAI_API_VERSION.
+    The OPENAI_API_TYPE must be set to 'azure' and the others correspond to
+    the properties of your endpoint.
+    In addition, the deployment name must be passed as the model parameter.
+
+    Example:
+        .. code-block:: python
+
+            import os
+
+            os.environ["OPENAI_API_TYPE"] = "azure"
+            os.environ["OPENAI_API_BASE"] = "https://<your-endpoint.openai.azure.com/"
+            os.environ["OPENAI_API_KEY"] = "your AzureOpenAI key"
+            os.environ["OPENAI_API_VERSION"] = "2023-05-15"
+            os.environ["OPENAI_PROXY"] = "http://your-corporate-proxy:8080"
+
+            from langchain_community.embeddings.openai import OpenAIEmbeddings
+            embeddings = OpenAIEmbeddings(
+                deployment="your-embeddings-deployment-name",
+                model="your-embeddings-model-name",
+                openai_api_base="https://your-endpoint.openai.azure.com/",
+                openai_api_type="azure",
+            )
+            text = "This is a test query."
+            query_result = embeddings.embed_query(text)
+
+    """
+
+    client: Any = Field(default=None, exclude=True)  #: :meta private:
+    async_client: Any = Field(default=None, exclude=True)  #: :meta private:
+    model: str = "text-embedding-ada-002"
+    # to support Azure OpenAI Service custom deployment names
+    deployment: Optional[str] = model
+    # TODO: Move to AzureOpenAIEmbeddings.
+    openai_api_version: Optional[str] = Field(default=None, alias="api_version")
+    """Automatically inferred from env var `OPENAI_API_VERSION` if not provided."""
+    # to support Azure OpenAI Service custom endpoints
+    openai_api_base: Optional[str] = Field(default=None, alias="base_url")
+    """Base URL path for API requests, leave blank if not using a proxy or service 
+        emulator."""
+    # to support Azure OpenAI Service custom endpoints
+    openai_api_type: Optional[str] = None
+    # to support explicit proxy for OpenAI
+    openai_proxy: Optional[str] = None
+    embedding_ctx_length: int = 8191
+    """The maximum number of tokens to embed at once."""
+    openai_api_key: Optional[str] = Field(default=None, alias="api_key")
+    """Automatically inferred from env var `OPENAI_API_KEY` if not provided."""
+    openai_organization: Optional[str] = Field(default=None, alias="organization")
+    """Automatically inferred from env var `OPENAI_ORG_ID` if not provided."""
+    allowed_special: Union[Literal["all"], Set[str]] = set()
+    disallowed_special: Union[Literal["all"], Set[str], Sequence[str]] = "all"
+    chunk_size: int = 1000
+    """Maximum number of texts to embed in each batch"""
+    max_retries: int = 2
+    """Maximum number of retries to make when generating."""
+    request_timeout: Optional[Union[float, Tuple[float, float], Any]] = Field(
+        default=None, alias="timeout"
+    )
+    """Timeout for requests to OpenAI completion API. Can be float, httpx.Timeout or 
+        None."""
+    headers: Any = None
+    tiktoken_enabled: bool = True
+    """Set this to False for non-OpenAI implementations of the embeddings API, e.g.
+    the `--extensions openai` extension for `text-generation-webui`"""
+    tiktoken_model_name: Optional[str] = None
+    """The model name to pass to tiktoken when using this class. 
+    Tiktoken is used to count the number of tokens in documents to constrain 
+    them to be under a certain limit. By default, when set to None, this will 
+    be the same as the embedding model name. However, there are some cases 
+    where you may want to use this Embedding class with a model name not 
+    supported by tiktoken. This can include when using Azure embeddings or 
+    when using one of the many model providers that expose an OpenAI-like 
+    API but with different models. In those cases, in order to avoid erroring 
+    when tiktoken is called, you can specify a model name to use here."""
+    show_progress_bar: bool = False
+    """Whether to show a progress bar when embedding."""
+    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
+    """Holds any model parameters valid for `create` call not explicitly specified."""
+    skip_empty: bool = False
+    """Whether to skip empty strings when embedding or raise an error.
+    Defaults to not skipping."""
+    default_headers: Union[Mapping[str, str], None] = None
+    default_query: Union[Mapping[str, object], None] = None
+    # Configure a custom httpx client. See the
+    # [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
+    retry_min_seconds: int = 4
+    """Min number of seconds to wait between retries"""
+    retry_max_seconds: int = 20
+    """Max number of seconds to wait between retries"""
+    http_client: Union[Any, None] = None
+    """Optional httpx.Client."""
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        extra = Extra.forbid
+        allow_population_by_field_name = True
+
+    @root_validator(pre=True)
+    def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+        """Build extra kwargs from additional params that were passed in."""
+        all_required_field_names = get_pydantic_field_names(cls)
+        extra = values.get("model_kwargs", {})
+        for field_name in list(values):
+            if field_name in extra:
+                raise ValueError(f"Found {field_name} supplied twice.")
+            if field_name not in all_required_field_names:
+                warnings.warn(
+                    f"""WARNING! {field_name} is not default parameter.
+                    {field_name} was transferred to model_kwargs.
+                    Please confirm that {field_name} is what you intended."""
+                )
+                extra[field_name] = values.pop(field_name)
+
+        invalid_model_kwargs = all_required_field_names.intersection(extra.keys())
+        if invalid_model_kwargs:
+            raise ValueError(
+                f"Parameters {invalid_model_kwargs} should be specified explicitly. "
+                f"Instead they were passed in as part of `model_kwargs` parameter."
+            )
+
+        values["model_kwargs"] = extra
+        return values
+
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that api key and python package exists in environment."""
+        values["openai_api_key"] = get_from_dict_or_env(
+            values, "openai_api_key", "OPENAI_API_KEY"
+        )
+        values["openai_api_base"] = values["openai_api_base"] or os.getenv(
+            "OPENAI_API_BASE"
+        )
+        values["openai_api_type"] = get_from_dict_or_env(
+            values,
+            "openai_api_type",
+            "OPENAI_API_TYPE",
+            default="",
+        )
+        values["openai_proxy"] = get_from_dict_or_env(
+            values,
+            "openai_proxy",
+            "OPENAI_PROXY",
+            default="",
+        )
+        if values["openai_api_type"] in ("azure", "azure_ad", "azuread"):
+            default_api_version = "2023-05-15"
+            # Azure OpenAI embedding models allow a maximum of 16 texts
+            # at a time in each batch
+            # See: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings
+            values["chunk_size"] = min(values["chunk_size"], 16)
+        else:
+            default_api_version = ""
+        values["openai_api_version"] = get_from_dict_or_env(
+            values,
+            "openai_api_version",
+            "OPENAI_API_VERSION",
+            default=default_api_version,
+        )
+        # Check OPENAI_ORGANIZATION for backwards compatibility.
+        values["openai_organization"] = (
+            values["openai_organization"]
+            or os.getenv("OPENAI_ORG_ID")
+            or os.getenv("OPENAI_ORGANIZATION")
+        )
+        if values["openai_api_type"] in ("azure", "azure_ad", "azuread"):
+            raise ValueError(
+                "If you are using Azure, "
+                "please use the `AzureOpenAIEmbeddings` class."
+            )
+        client_params = {
+            "api_key": values["openai_api_key"],
+            "organization": values["openai_organization"],
+            "base_url": values["openai_api_base"],
+            "timeout": values["request_timeout"],
+            "max_retries": values["max_retries"],
+            "default_headers": values["default_headers"],
+            "default_query": values["default_query"],
+            "http_client": values["http_client"],
+        }
+        if not values.get("client"):
+            values["client"] = openai.OpenAI(**client_params).embeddings
+        if not values.get("async_client"):
+            values["async_client"] = openai.AsyncOpenAI(**client_params).embeddings
+        return values
+
+    @property
+    def _invocation_params(self) -> Dict[str, Any]:
+        return {"model": self.model, **self.model_kwargs}
+
+    # please refer to
+    # https://github.com/openai/openai-cookbook/blob/main/examples/Embedding_long_inputs.ipynb
+    def _get_len_safe_embeddings(
+        self, texts: List[str], *, engine: str, chunk_size: Optional[int] = None
+    ) -> List[List[float]]:
+        """
+        Generate length-safe embeddings for a list of texts.
+
+        This method handles tokenization and embedding generation, respecting the
+        set embedding context length and chunk size. It supports both tiktoken
+        and HuggingFace tokenizer based on the tiktoken_enabled flag.
+
+        Args:
+            texts (List[str]): A list of texts to embed.
+            engine (str): The engine or model to use for embeddings.
+            chunk_size (Optional[int]): The size of chunks for processing embeddings.
+
+        Returns:
+            List[List[float]]: A list of embeddings for each input text.
+        """
+
+        tokens = []
+        indices = []
+        model_name = self.tiktoken_model_name or self.model
+        _chunk_size = chunk_size or self.chunk_size
+
+        # If tiktoken flag set to False
+        if not self.tiktoken_enabled:
+            try:
+                from transformers import AutoTokenizer  # noqa: F401
+            except ImportError:
+                raise ValueError(
+                    "Could not import transformers python package. "
+                    "This is needed in order to for OpenAIEmbeddings without "
+                    "`tiktoken`. Please install it with `pip install transformers`. "
+                )
+
+            tokenizer = AutoTokenizer.from_pretrained(
+                pretrained_model_name_or_path=model_name
+            )
+            for i, text in enumerate(texts):
+                # Tokenize the text using HuggingFace transformers
+                tokenized = tokenizer.encode(text, add_special_tokens=False)
+
+                # Split tokens into chunks respecting the embedding_ctx_length
+                for j in range(0, len(tokenized), self.embedding_ctx_length):
+                    token_chunk = tokenized[j : j + self.embedding_ctx_length]
+
+                    # Convert token IDs back to a string
+                    chunk_text = tokenizer.decode(token_chunk)
+                    tokens.append(chunk_text)
+                    indices.append(i)
+        else:
+            try:
+                encoding = tiktoken.encoding_for_model(model_name)
+            except KeyError:
+                logger.warning("Warning: model not found. Using cl100k_base encoding.")
+                model = "cl100k_base"
+                encoding = tiktoken.get_encoding(model)
+            for i, text in enumerate(texts):
+                if self.model.endswith("001"):
+                    # See: https://github.com/openai/openai-python/
+                    #      issues/418#issuecomment-1525939500
+                    # replace newlines, which can negatively affect performance.
+                    text = text.replace("\n", " ")
+
+                token = encoding.encode(
+                    text=text,
+                    allowed_special=self.allowed_special,
+                    disallowed_special=self.disallowed_special,
+                )
+
+                # Split tokens into chunks respecting the embedding_ctx_length
+                for j in range(0, len(token), self.embedding_ctx_length):
+                    tokens.append(token[j : j + self.embedding_ctx_length])
+                    indices.append(i)
+
+        if self.show_progress_bar:
+            try:
+                from tqdm.auto import tqdm
+
+                _iter: Iterable = tqdm(range(0, len(tokens), _chunk_size))
+            except ImportError:
+                _iter = range(0, len(tokens), _chunk_size)
+        else:
+            _iter = range(0, len(tokens), _chunk_size)
+
+        batched_embeddings: List[List[float]] = []
+        for i in _iter:
+            response = self.client.create(
+                input=tokens[i : i + _chunk_size], **self._invocation_params
+            )
+            if not isinstance(response, dict):
+                response = response.dict()
+            batched_embeddings.extend(r["embedding"] for r in response["data"])
+
+        results: List[List[List[float]]] = [[] for _ in range(len(texts))]
+        num_tokens_in_batch: List[List[int]] = [[] for _ in range(len(texts))]
+        for i in range(len(indices)):
+            if self.skip_empty and len(batched_embeddings[i]) == 1:
+                continue
+            results[indices[i]].append(batched_embeddings[i])
+            num_tokens_in_batch[indices[i]].append(len(tokens[i]))
+
+        embeddings: List[List[float]] = [[] for _ in range(len(texts))]
+        for i in range(len(texts)):
+            _result = results[i]
+            if len(_result) == 0:
+                average_embedded = self.client.create(
+                    input="", **self._invocation_params
+                )
+                if not isinstance(average_embedded, dict):
+                    average_embedded = average_embedded.dict()
+                average = average_embedded["data"][0]["embedding"]
+            else:
+                average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
+            embeddings[i] = (average / np.linalg.norm(average)).tolist()
+
+        return embeddings
+
+    # please refer to
+    # https://github.com/openai/openai-cookbook/blob/main/examples/Embedding_long_inputs.ipynb
+    async def _aget_len_safe_embeddings(
+        self, texts: List[str], *, engine: str, chunk_size: Optional[int] = None
+    ) -> List[List[float]]:
+        """
+        Asynchronously generate length-safe embeddings for a list of texts.
+
+        This method handles tokenization and asynchronous embedding generation,
+        respecting the set embedding context length and chunk size. It supports both
+        `tiktoken` and HuggingFace `tokenizer` based on the tiktoken_enabled flag.
+
+        Args:
+            texts (List[str]): A list of texts to embed.
+            engine (str): The engine or model to use for embeddings.
+            chunk_size (Optional[int]): The size of chunks for processing embeddings.
+
+        Returns:
+            List[List[float]]: A list of embeddings for each input text.
+        """
+
+        tokens = []
+        indices = []
+        model_name = self.tiktoken_model_name or self.model
+        _chunk_size = chunk_size or self.chunk_size
+
+        # If tiktoken flag set to False
+        if not self.tiktoken_enabled:
+            try:
+                from transformers import AutoTokenizer
+            except ImportError:
+                raise ValueError(
+                    "Could not import transformers python package. "
+                    "This is needed in order to for OpenAIEmbeddings without "
+                    " `tiktoken`. Please install it with `pip install transformers`."
+                )
+
+            tokenizer = AutoTokenizer.from_pretrained(
+                pretrained_model_name_or_path=model_name
+            )
+            for i, text in enumerate(texts):
+                # Tokenize the text using HuggingFace transformers
+                tokenized = tokenizer.encode(text, add_special_tokens=False)
+
+                # Split tokens into chunks respecting the embedding_ctx_length
+                for j in range(0, len(tokenized), self.embedding_ctx_length):
+                    token_chunk = tokenized[j : j + self.embedding_ctx_length]
+
+                    # Convert token IDs back to a string
+                    chunk_text = tokenizer.decode(token_chunk)
+                    tokens.append(chunk_text)
+                    indices.append(i)
+        else:
+            try:
+                encoding = tiktoken.encoding_for_model(model_name)
+            except KeyError:
+                logger.warning("Warning: model not found. Using cl100k_base encoding.")
+                model = "cl100k_base"
+                encoding = tiktoken.get_encoding(model)
+            for i, text in enumerate(texts):
+                if self.model.endswith("001"):
+                    # See: https://github.com/openai/openai-python/
+                    #      issues/418#issuecomment-1525939500
+                    # replace newlines, which can negatively affect performance.
+                    text = text.replace("\n", " ")
+
+                token = encoding.encode(
+                    text=text,
+                    allowed_special=self.allowed_special,
+                    disallowed_special=self.disallowed_special,
+                )
+
+                # Split tokens into chunks respecting the embedding_ctx_length
+                for j in range(0, len(token), self.embedding_ctx_length):
+                    tokens.append(token[j : j + self.embedding_ctx_length])
+                    indices.append(i)
+
+        batched_embeddings: List[List[float]] = []
+        _chunk_size = chunk_size or self.chunk_size
+        for i in range(0, len(tokens), _chunk_size):
+            response = await self.async_client.create(
+                input=tokens[i : i + _chunk_size], **self._invocation_params
+            )
+
+            if not isinstance(response, dict):
+                response = response.dict()
+            batched_embeddings.extend(r["embedding"] for r in response["data"])
+
+        results: List[List[List[float]]] = [[] for _ in range(len(texts))]
+        num_tokens_in_batch: List[List[int]] = [[] for _ in range(len(texts))]
+        for i in range(len(indices)):
+            results[indices[i]].append(batched_embeddings[i])
+            num_tokens_in_batch[indices[i]].append(len(tokens[i]))
+
+        embeddings: List[List[float]] = [[] for _ in range(len(texts))]
+        for i in range(len(texts)):
+            _result = results[i]
+            if len(_result) == 0:
+                average_embedded = await self.async_client.create(
+                    input="", **self._invocation_params
+                )
+                if not isinstance(average_embedded, dict):
+                    average_embedded = average_embedded.dict()
+                average = average_embedded["data"][0]["embedding"]
+            else:
+                average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
+            embeddings[i] = (average / np.linalg.norm(average)).tolist()
+
+        return embeddings
+
+    def embed_documents(
+        self, texts: List[str], chunk_size: Optional[int] = 0
+    ) -> List[List[float]]:
+        """Call out to OpenAI's embedding endpoint for embedding search docs.
+
+        Args:
+            texts: The list of texts to embed.
+            chunk_size: The chunk size of embeddings. If None, will use the chunk size
+                specified by the class.
+
+        Returns:
+            List of embeddings, one for each text.
+        """
+        # NOTE: to keep things simple, we assume the list may contain texts longer
+        #       than the maximum context and use length-safe embedding function.
+        engine = cast(str, self.deployment)
+        return self._get_len_safe_embeddings(texts, engine=engine)
+
+    async def aembed_documents(
+        self, texts: List[str], chunk_size: Optional[int] = 0
+    ) -> List[List[float]]:
+        """Call out to OpenAI's embedding endpoint async for embedding search docs.
+
+        Args:
+            texts: The list of texts to embed.
+            chunk_size: The chunk size of embeddings. If None, will use the chunk size
+                specified by the class.
+
+        Returns:
+            List of embeddings, one for each text.
+        """
+        # NOTE: to keep things simple, we assume the list may contain texts longer
+        #       than the maximum context and use length-safe embedding function.
+        engine = cast(str, self.deployment)
+        return await self._aget_len_safe_embeddings(texts, engine=engine)
+
+    def embed_query(self, text: str) -> List[float]:
+        """Call out to OpenAI's embedding endpoint for embedding query text.
+
+        Args:
+            text: The text to embed.
+
+        Returns:
+            Embedding for the text.
+        """
+        return self.embed_documents([text])[0]
+
+    async def aembed_query(self, text: str) -> List[float]:
+        """Call out to OpenAI's embedding endpoint async for embedding query text.
+
+        Args:
+            text: The text to embed.
+
+        Returns:
+            Embedding for the text.
+        """
+        embeddings = await self.aembed_documents([text])
+        return embeddings[0]
--- a/libs/partners/openai/langchain_openai/llms/init.py
+++ b/libs/partners/openai/langchain_openai/llms/init.py
@@ -0,0 +1,7 @@
+from langchain_openai.llms.azure import AzureOpenAI
+from langchain_openai.llms.base import OpenAI
+
+__all__ = [
+    "OpenAI",
+    "AzureOpenAI",
+]
--- a/libs/partners/openai/langchain_openai/llms/azure.py
+++ b/libs/partners/openai/langchain_openai/llms/azure.py
@@ -0,0 +1,190 @@
+from __future__ import annotations
+
+import logging
+import os
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Mapping,
+    Union,
+)
+
+import openai
+from langchain_core.pydantic_v1 import Field, root_validator
+from langchain_core.utils import get_from_dict_or_env
+
+from langchain_openai.llms.base import BaseOpenAI
+
+logger = logging.getLogger(__name__)
+
+
+class AzureOpenAI(BaseOpenAI):
+    """Azure-specific OpenAI large language models.
+
+    To use, you should have the ``openai`` python package installed, and the
+    environment variable ``OPENAI_API_KEY`` set with your API key.
+
+    Any parameters that are valid to be passed to the openai.create call can be passed
+    in, even if not explicitly saved on this class.
+
+    Example:
+        .. code-block:: python
+
+            from langchain_community.llms import AzureOpenAI
+            openai = AzureOpenAI(model_name="gpt-3.5-turbo-instruct")
+    """
+
+    azure_endpoint: Union[str, None] = None
+    """Your Azure endpoint, including the resource.
+
+        Automatically inferred from env var `AZURE_OPENAI_ENDPOINT` if not provided.
+
+        Example: `https://example-resource.azure.openai.com/`
+    """
+    deployment_name: Union[str, None] = Field(default=None, alias="azure_deployment")
+    """A model deployment. 
+
+        If given sets the base client URL to include `/deployments/{azure_deployment}`.
+        Note: this means you won't be able to use non-deployment endpoints.
+    """
+    openai_api_version: str = Field(default="", alias="api_version")
+    """Automatically inferred from env var `OPENAI_API_VERSION` if not provided."""
+    openai_api_key: Union[str, None] = Field(default=None, alias="api_key")
+    """Automatically inferred from env var `AZURE_OPENAI_API_KEY` if not provided."""
+    azure_ad_token: Union[str, None] = None
+    """Your Azure Active Directory token.
+
+        Automatically inferred from env var `AZURE_OPENAI_AD_TOKEN` if not provided.
+
+        For more: 
+        https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id.
+    """  # noqa: E501
+    azure_ad_token_provider: Union[Callable[[], str], None] = None
+    """A function that returns an Azure Active Directory token.
+
+        Will be invoked on every request.
+    """
+    openai_api_type: str = ""
+    """Legacy, for openai<1.0.0 support."""
+    validate_base_url: bool = True
+    """For backwards compatibility. If legacy val openai_api_base is passed in, try to 
+        infer if it is a base_url or azure_endpoint and update accordingly.
+    """
+
+    @classmethod
+    def get_lc_namespace(cls) -> List[str]:
+        """Get the namespace of the langchain object."""
+        return ["langchain", "llms", "openai"]
+
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that api key and python package exists in environment."""
+        if values["n"] < 1:
+            raise ValueError("n must be at least 1.")
+        if values["streaming"] and values["n"] > 1:
+            raise ValueError("Cannot stream results when n > 1.")
+        if values["streaming"] and values["best_of"] > 1:
+            raise ValueError("Cannot stream results when best_of > 1.")
+
+        # Check OPENAI_KEY for backwards compatibility.
+        # TODO: Remove OPENAI_API_KEY support to avoid possible conflict when using
+        # other forms of azure credentials.
+        values["openai_api_key"] = (
+            values["openai_api_key"]
+            or os.getenv("AZURE_OPENAI_API_KEY")
+            or os.getenv("OPENAI_API_KEY")
+        )
+
+        values["azure_endpoint"] = values["azure_endpoint"] or os.getenv(
+            "AZURE_OPENAI_ENDPOINT"
+        )
+        values["azure_ad_token"] = values["azure_ad_token"] or os.getenv(
+            "AZURE_OPENAI_AD_TOKEN"
+        )
+        values["openai_api_base"] = values["openai_api_base"] or os.getenv(
+            "OPENAI_API_BASE"
+        )
+        values["openai_proxy"] = get_from_dict_or_env(
+            values,
+            "openai_proxy",
+            "OPENAI_PROXY",
+            default="",
+        )
+        values["openai_organization"] = (
+            values["openai_organization"]
+            or os.getenv("OPENAI_ORG_ID")
+            or os.getenv("OPENAI_ORGANIZATION")
+        )
+        values["openai_api_version"] = values["openai_api_version"] or os.getenv(
+            "OPENAI_API_VERSION"
+        )
+        values["openai_api_type"] = get_from_dict_or_env(
+            values, "openai_api_type", "OPENAI_API_TYPE", default="azure"
+        )
+        # For backwards compatibility. Before openai v1, no distinction was made
+        # between azure_endpoint and base_url (openai_api_base).
+        openai_api_base = values["openai_api_base"]
+        if openai_api_base and values["validate_base_url"]:
+            if "/openai" not in openai_api_base:
+                values["openai_api_base"] = (
+                    values["openai_api_base"].rstrip("/") + "/openai"
+                )
+                raise ValueError(
+                    "As of openai>=1.0.0, Azure endpoints should be specified via "
+                    "the `azure_endpoint` param not `openai_api_base` "
+                    "(or alias `base_url`)."
+                )
+            if values["deployment_name"]:
+                raise ValueError(
+                    "As of openai>=1.0.0, if `deployment_name` (or alias "
+                    "`azure_deployment`) is specified then "
+                    "`openai_api_base` (or alias `base_url`) should not be. "
+                    "Instead use `deployment_name` (or alias `azure_deployment`) "
+                    "and `azure_endpoint`."
+                )
+                values["deployment_name"] = None
+        client_params = {
+            "api_version": values["openai_api_version"],
+            "azure_endpoint": values["azure_endpoint"],
+            "azure_deployment": values["deployment_name"],
+            "api_key": values["openai_api_key"],
+            "azure_ad_token": values["azure_ad_token"],
+            "azure_ad_token_provider": values["azure_ad_token_provider"],
+            "organization": values["openai_organization"],
+            "base_url": values["openai_api_base"],
+            "timeout": values["request_timeout"],
+            "max_retries": values["max_retries"],
+            "default_headers": values["default_headers"],
+            "default_query": values["default_query"],
+            "http_client": values["http_client"],
+        }
+        values["client"] = openai.AzureOpenAI(**client_params).completions
+        values["async_client"] = openai.AsyncAzureOpenAI(**client_params).completions
+
+        return values
+
+    @property
+    def _identifying_params(self) -> Mapping[str, Any]:
+        return {
+            **{"deployment_name": self.deployment_name},
+            **super()._identifying_params,
+        }
+
+    @property
+    def _invocation_params(self) -> Dict[str, Any]:
+        openai_params = {"model": self.deployment_name}
+        return {**openai_params, **super()._invocation_params}
+
+    @property
+    def _llm_type(self) -> str:
+        """Return type of llm."""
+        return "azure"
+
+    @property
+    def lc_attributes(self) -> Dict[str, Any]:
+        return {
+            "openai_api_type": self.openai_api_type,
+            "openai_api_version": self.openai_api_version,
+        }
--- a/libs/partners/openai/langchain_openai/llms/base.py
+++ b/libs/partners/openai/langchain_openai/llms/base.py
@@ -0,0 +1,611 @@
+from __future__ import annotations
+
+import logging
+import os
+import sys
+from typing import (
+    AbstractSet,
+    Any,
+    AsyncIterator,
+    Collection,
+    Dict,
+    Iterator,
+    List,
+    Literal,
+    Mapping,
+    Optional,
+    Set,
+    Tuple,
+    Union,
+)
+
+import openai
+import tiktoken
+from langchain_core.callbacks import (
+    AsyncCallbackManagerForLLMRun,
+    CallbackManagerForLLMRun,
+)
+from langchain_core.language_models.llms import BaseLLM
+from langchain_core.outputs import Generation, GenerationChunk, LLMResult
+from langchain_core.pydantic_v1 import Field, root_validator
+from langchain_core.utils import get_from_dict_or_env, get_pydantic_field_names
+from langchain_core.utils.utils import build_extra_kwargs
+
+logger = logging.getLogger(__name__)
+
+
+def _update_token_usage(
+    keys: Set[str], response: Dict[str, Any], token_usage: Dict[str, Any]
+) -> None:
+    """Update token usage."""
+    _keys_to_use = keys.intersection(response["usage"])
+    for _key in _keys_to_use:
+        if _key not in token_usage:
+            token_usage[_key] = response["usage"][_key]
+        else:
+            token_usage[_key] += response["usage"][_key]
+
+
+def _stream_response_to_generation_chunk(
+    stream_response: Dict[str, Any],
+) -> GenerationChunk:
+    """Convert a stream response to a generation chunk."""
+    if not stream_response["choices"]:
+        return GenerationChunk(text="")
+    return GenerationChunk(
+        text=stream_response["choices"][0]["text"],
+        generation_info=dict(
+            finish_reason=stream_response["choices"][0].get("finish_reason", None),
+            logprobs=stream_response["choices"][0].get("logprobs", None),
+        ),
+    )
+
+
+class BaseOpenAI(BaseLLM):
+    """Base OpenAI large language model class."""
+
+    @property
+    def lc_secrets(self) -> Dict[str, str]:
+        return {"openai_api_key": "OPENAI_API_KEY"}
+
+    @property
+    def lc_attributes(self) -> Dict[str, Any]:
+        attributes: Dict[str, Any] = {}
+        if self.openai_api_base:
+            attributes["openai_api_base"] = self.openai_api_base
+
+        if self.openai_organization:
+            attributes["openai_organization"] = self.openai_organization
+
+        if self.openai_proxy:
+            attributes["openai_proxy"] = self.openai_proxy
+
+        return attributes
+
+    client: Any = Field(default=None, exclude=True)  #: :meta private:
+    async_client: Any = Field(default=None, exclude=True)  #: :meta private:
+    model_name: str = Field(default="gpt-3.5-turbo-instruct", alias="model")
+    """Model name to use."""
+    temperature: float = 0.7
+    """What sampling temperature to use."""
+    max_tokens: int = 256
+    """The maximum number of tokens to generate in the completion.
+    -1 returns as many tokens as possible given the prompt and
+    the models maximal context size."""
+    top_p: float = 1
+    """Total probability mass of tokens to consider at each step."""
+    frequency_penalty: float = 0
+    """Penalizes repeated tokens according to frequency."""
+    presence_penalty: float = 0
+    """Penalizes repeated tokens."""
+    n: int = 1
+    """How many completions to generate for each prompt."""
+    best_of: int = 1
+    """Generates best_of completions server-side and returns the "best"."""
+    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
+    """Holds any model parameters valid for `create` call not explicitly specified."""
+    # When updating this to use a SecretStr
+    # Check for classes that derive from this class (as some of them
+    # may assume openai_api_key is a str)
+    openai_api_key: Optional[str] = Field(default=None, alias="api_key")
+    """Automatically inferred from env var `OPENAI_API_KEY` if not provided."""
+    openai_api_base: Optional[str] = Field(default=None, alias="base_url")
+    """Base URL path for API requests, leave blank if not using a proxy or service 
+        emulator."""
+    openai_organization: Optional[str] = Field(default=None, alias="organization")
+    """Automatically inferred from env var `OPENAI_ORG_ID` if not provided."""
+    # to support explicit proxy for OpenAI
+    openai_proxy: Optional[str] = None
+    batch_size: int = 20
+    """Batch size to use when passing multiple documents to generate."""
+    request_timeout: Union[float, Tuple[float, float], Any, None] = Field(
+        default=None, alias="timeout"
+    )
+    """Timeout for requests to OpenAI completion API. Can be float, httpx.Timeout or 
+        None."""
+    logit_bias: Optional[Dict[str, float]] = Field(default_factory=dict)
+    """Adjust the probability of specific tokens being generated."""
+    max_retries: int = 2
+    """Maximum number of retries to make when generating."""
+    streaming: bool = False
+    """Whether to stream the results or not."""
+    allowed_special: Union[Literal["all"], AbstractSet[str]] = set()
+    """Set of special tokens that are allowed。"""
+    disallowed_special: Union[Literal["all"], Collection[str]] = "all"
+    """Set of special tokens that are not allowed。"""
+    tiktoken_model_name: Optional[str] = None
+    """The model name to pass to tiktoken when using this class. 
+    Tiktoken is used to count the number of tokens in documents to constrain 
+    them to be under a certain limit. By default, when set to None, this will 
+    be the same as the embedding model name. However, there are some cases 
+    where you may want to use this Embedding class with a model name not 
+    supported by tiktoken. This can include when using Azure embeddings or 
+    when using one of the many model providers that expose an OpenAI-like 
+    API but with different models. In those cases, in order to avoid erroring 
+    when tiktoken is called, you can specify a model name to use here."""
+    default_headers: Union[Mapping[str, str], None] = None
+    default_query: Union[Mapping[str, object], None] = None
+    # Configure a custom httpx client. See the
+    # [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
+    http_client: Union[Any, None] = None
+    """Optional httpx.Client."""
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        allow_population_by_field_name = True
+
+    @root_validator(pre=True)
+    def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+        """Build extra kwargs from additional params that were passed in."""
+        all_required_field_names = get_pydantic_field_names(cls)
+        extra = values.get("model_kwargs", {})
+        values["model_kwargs"] = build_extra_kwargs(
+            extra, values, all_required_field_names
+        )
+        return values
+
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that api key and python package exists in environment."""
+        if values["n"] < 1:
+            raise ValueError("n must be at least 1.")
+        if values["streaming"] and values["n"] > 1:
+            raise ValueError("Cannot stream results when n > 1.")
+        if values["streaming"] and values["best_of"] > 1:
+            raise ValueError("Cannot stream results when best_of > 1.")
+
+        values["openai_api_key"] = get_from_dict_or_env(
+            values, "openai_api_key", "OPENAI_API_KEY"
+        )
+        values["openai_api_base"] = values["openai_api_base"] or os.getenv(
+            "OPENAI_API_BASE"
+        )
+        values["openai_proxy"] = get_from_dict_or_env(
+            values,
+            "openai_proxy",
+            "OPENAI_PROXY",
+            default="",
+        )
+        values["openai_organization"] = (
+            values["openai_organization"]
+            or os.getenv("OPENAI_ORG_ID")
+            or os.getenv("OPENAI_ORGANIZATION")
+        )
+
+        client_params = {
+            "api_key": values["openai_api_key"],
+            "organization": values["openai_organization"],
+            "base_url": values["openai_api_base"],
+            "timeout": values["request_timeout"],
+            "max_retries": values["max_retries"],
+            "default_headers": values["default_headers"],
+            "default_query": values["default_query"],
+            "http_client": values["http_client"],
+        }
+        if not values.get("client"):
+            values["client"] = openai.OpenAI(**client_params).completions
+        if not values.get("async_client"):
+            values["async_client"] = openai.AsyncOpenAI(**client_params).completions
+
+        return values
+
+    @property
+    def _default_params(self) -> Dict[str, Any]:
+        """Get the default parameters for calling OpenAI API."""
+        normal_params: Dict[str, Any] = {
+            "temperature": self.temperature,
+            "top_p": self.top_p,
+            "frequency_penalty": self.frequency_penalty,
+            "presence_penalty": self.presence_penalty,
+            "n": self.n,
+            "logit_bias": self.logit_bias,
+        }
+
+        if self.max_tokens is not None:
+            normal_params["max_tokens"] = self.max_tokens
+
+        # Azure gpt-35-turbo doesn't support best_of
+        # don't specify best_of if it is 1
+        if self.best_of > 1:
+            normal_params["best_of"] = self.best_of
+
+        return {**normal_params, **self.model_kwargs}
+
+    def _stream(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> Iterator[GenerationChunk]:
+        params = {**self._invocation_params, **kwargs, "stream": True}
+        self.get_sub_prompts(params, [prompt], stop)  # this mutates params
+        for stream_resp in self.client.create(prompt=prompt, **params):
+            if not isinstance(stream_resp, dict):
+                stream_resp = stream_resp.dict()
+            chunk = _stream_response_to_generation_chunk(stream_resp)
+            yield chunk
+            if run_manager:
+                run_manager.on_llm_new_token(
+                    chunk.text,
+                    chunk=chunk,
+                    verbose=self.verbose,
+                    logprobs=chunk.generation_info["logprobs"]
+                    if chunk.generation_info
+                    else None,
+                )
+
+    async def _astream(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> AsyncIterator[GenerationChunk]:
+        params = {**self._invocation_params, **kwargs, "stream": True}
+        self.get_sub_prompts(params, [prompt], stop)  # this mutates params
+        async for stream_resp in await self.async_client.create(
+            prompt=prompt, **params
+        ):
+            if not isinstance(stream_resp, dict):
+                stream_resp = stream_resp.dict()
+            chunk = _stream_response_to_generation_chunk(stream_resp)
+            yield chunk
+            if run_manager:
+                await run_manager.on_llm_new_token(
+                    chunk.text,
+                    chunk=chunk,
+                    verbose=self.verbose,
+                    logprobs=chunk.generation_info["logprobs"]
+                    if chunk.generation_info
+                    else None,
+                )
+
+    def _generate(
+        self,
+        prompts: List[str],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> LLMResult:
+        """Call out to OpenAI's endpoint with k unique prompts.
+
+        Args:
+            prompts: The prompts to pass into the model.
+            stop: Optional list of stop words to use when generating.
+
+        Returns:
+            The full LLM output.
+
+        Example:
+            .. code-block:: python
+
+                response = openai.generate(["Tell me a joke."])
+        """
+        # TODO: write a unit test for this
+        params = self._invocation_params
+        params = {**params, **kwargs}
+        sub_prompts = self.get_sub_prompts(params, prompts, stop)
+        choices = []
+        token_usage: Dict[str, int] = {}
+        # Get the token usage from the response.
+        # Includes prompt, completion, and total tokens used.
+        _keys = {"completion_tokens", "prompt_tokens", "total_tokens"}
+        system_fingerprint: Optional[str] = None
+        for _prompts in sub_prompts:
+            if self.streaming:
+                if len(_prompts) > 1:
+                    raise ValueError("Cannot stream results with multiple prompts.")
+
+                generation: Optional[GenerationChunk] = None
+                for chunk in self._stream(_prompts[0], stop, run_manager, **kwargs):
+                    if generation is None:
+                        generation = chunk
+                    else:
+                        generation += chunk
+                assert generation is not None
+                choices.append(
+                    {
+                        "text": generation.text,
+                        "finish_reason": generation.generation_info.get("finish_reason")
+                        if generation.generation_info
+                        else None,
+                        "logprobs": generation.generation_info.get("logprobs")
+                        if generation.generation_info
+                        else None,
+                    }
+                )
+            else:
+                response = self.client.create(prompt=_prompts, **params)
+                if not isinstance(response, dict):
+                    # V1 client returns the response in an PyDantic object instead of
+                    # dict. For the transition period, we deep convert it to dict.
+                    response = response.dict()
+
+                choices.extend(response["choices"])
+                _update_token_usage(_keys, response, token_usage)
+                if not system_fingerprint:
+                    system_fingerprint = response.get("system_fingerprint")
+        return self.create_llm_result(
+            choices,
+            prompts,
+            params,
+            token_usage,
+            system_fingerprint=system_fingerprint,
+        )
+
+    async def _agenerate(
+        self,
+        prompts: List[str],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> LLMResult:
+        """Call out to OpenAI's endpoint async with k unique prompts."""
+        params = self._invocation_params
+        params = {**params, **kwargs}
+        sub_prompts = self.get_sub_prompts(params, prompts, stop)
+        choices = []
+        token_usage: Dict[str, int] = {}
+        # Get the token usage from the response.
+        # Includes prompt, completion, and total tokens used.
+        _keys = {"completion_tokens", "prompt_tokens", "total_tokens"}
+        system_fingerprint: Optional[str] = None
+        for _prompts in sub_prompts:
+            if self.streaming:
+                if len(_prompts) > 1:
+                    raise ValueError("Cannot stream results with multiple prompts.")
+
+                generation: Optional[GenerationChunk] = None
+                async for chunk in self._astream(
+                    _prompts[0], stop, run_manager, **kwargs
+                ):
+                    if generation is None:
+                        generation = chunk
+                    else:
+                        generation += chunk
+                assert generation is not None
+                choices.append(
+                    {
+                        "text": generation.text,
+                        "finish_reason": generation.generation_info.get("finish_reason")
+                        if generation.generation_info
+                        else None,
+                        "logprobs": generation.generation_info.get("logprobs")
+                        if generation.generation_info
+                        else None,
+                    }
+                )
+            else:
+                response = await self.async_client.create(prompt=_prompts, **params)
+                if not isinstance(response, dict):
+                    response = response.dict()
+                choices.extend(response["choices"])
+                _update_token_usage(_keys, response, token_usage)
+        return self.create_llm_result(
+            choices,
+            prompts,
+            params,
+            token_usage,
+            system_fingerprint=system_fingerprint,
+        )
+
+    def get_sub_prompts(
+        self,
+        params: Dict[str, Any],
+        prompts: List[str],
+        stop: Optional[List[str]] = None,
+    ) -> List[List[str]]:
+        """Get the sub prompts for llm call."""
+        if stop is not None:
+            if "stop" in params:
+                raise ValueError("`stop` found in both the input and default params.")
+            params["stop"] = stop
+        if params["max_tokens"] == -1:
+            if len(prompts) != 1:
+                raise ValueError(
+                    "max_tokens set to -1 not supported for multiple inputs."
+                )
+            params["max_tokens"] = self.max_tokens_for_prompt(prompts[0])
+        sub_prompts = [
+            prompts[i : i + self.batch_size]
+            for i in range(0, len(prompts), self.batch_size)
+        ]
+        return sub_prompts
+
+    def create_llm_result(
+        self,
+        choices: Any,
+        prompts: List[str],
+        params: Dict[str, Any],
+        token_usage: Dict[str, int],
+        *,
+        system_fingerprint: Optional[str] = None,
+    ) -> LLMResult:
+        """Create the LLMResult from the choices and prompts."""
+        generations = []
+        n = params.get("n", self.n)
+        for i, _ in enumerate(prompts):
+            sub_choices = choices[i * n : (i + 1) * n]
+            generations.append(
+                [
+                    Generation(
+                        text=choice["text"],
+                        generation_info=dict(
+                            finish_reason=choice.get("finish_reason"),
+                            logprobs=choice.get("logprobs"),
+                        ),
+                    )
+                    for choice in sub_choices
+                ]
+            )
+        llm_output = {"token_usage": token_usage, "model_name": self.model_name}
+        if system_fingerprint:
+            llm_output["system_fingerprint"] = system_fingerprint
+        return LLMResult(generations=generations, llm_output=llm_output)
+
+    @property
+    def _invocation_params(self) -> Dict[str, Any]:
+        """Get the parameters used to invoke the model."""
+        return self._default_params
+
+    @property
+    def _identifying_params(self) -> Mapping[str, Any]:
+        """Get the identifying parameters."""
+        return {**{"model_name": self.model_name}, **self._default_params}
+
+    @property
+    def _llm_type(self) -> str:
+        """Return type of llm."""
+        return "openai"
+
+    def get_token_ids(self, text: str) -> List[int]:
+        """Get the token IDs using the tiktoken package."""
+        # tiktoken NOT supported for Python < 3.8
+        if sys.version_info[1] < 8:
+            return super().get_num_tokens(text)
+
+        model_name = self.tiktoken_model_name or self.model_name
+        try:
+            enc = tiktoken.encoding_for_model(model_name)
+        except KeyError:
+            logger.warning("Warning: model not found. Using cl100k_base encoding.")
+            model = "cl100k_base"
+            enc = tiktoken.get_encoding(model)
+
+        return enc.encode(
+            text,
+            allowed_special=self.allowed_special,
+            disallowed_special=self.disallowed_special,
+        )
+
+    @staticmethod
+    def modelname_to_contextsize(modelname: str) -> int:
+        """Calculate the maximum number of tokens possible to generate for a model.
+
+        Args:
+            modelname: The modelname we want to know the context size for.
+
+        Returns:
+            The maximum context size
+
+        Example:
+            .. code-block:: python
+
+                max_tokens = openai.modelname_to_contextsize("gpt-3.5-turbo-instruct")
+        """
+        model_token_mapping = {
+            "gpt-4": 8192,
+            "gpt-4-0314": 8192,
+            "gpt-4-0613": 8192,
+            "gpt-4-32k": 32768,
+            "gpt-4-32k-0314": 32768,
+            "gpt-4-32k-0613": 32768,
+            "gpt-3.5-turbo": 4096,
+            "gpt-3.5-turbo-0301": 4096,
+            "gpt-3.5-turbo-0613": 4096,
+            "gpt-3.5-turbo-16k": 16385,
+            "gpt-3.5-turbo-16k-0613": 16385,
+            "gpt-3.5-turbo-instruct": 4096,
+            "text-ada-001": 2049,
+            "ada": 2049,
+            "text-babbage-001": 2040,
+            "babbage": 2049,
+            "text-curie-001": 2049,
+            "curie": 2049,
+            "davinci": 2049,
+            "text-davinci-003": 4097,
+            "text-davinci-002": 4097,
+            "code-davinci-002": 8001,
+            "code-davinci-001": 8001,
+            "code-cushman-002": 2048,
+            "code-cushman-001": 2048,
+        }
+
+        # handling finetuned models
+        if "ft-" in modelname:
+            modelname = modelname.split(":")[0]
+
+        context_size = model_token_mapping.get(modelname, None)
+
+        if context_size is None:
+            raise ValueError(
+                f"Unknown model: {modelname}. Please provide a valid OpenAI model name."
+                "Known models are: " + ", ".join(model_token_mapping.keys())
+            )
+
+        return context_size
+
+    @property
+    def max_context_size(self) -> int:
+        """Get max context size for this model."""
+        return self.modelname_to_contextsize(self.model_name)
+
+    def max_tokens_for_prompt(self, prompt: str) -> int:
+        """Calculate the maximum number of tokens possible to generate for a prompt.
+
+        Args:
+            prompt: The prompt to pass into the model.
+
+        Returns:
+            The maximum number of tokens to generate for a prompt.
+
+        Example:
+            .. code-block:: python
+
+                max_tokens = openai.max_token_for_prompt("Tell me a joke.")
+        """
+        num_tokens = self.get_num_tokens(prompt)
+        return self.max_context_size - num_tokens
+
+
+class OpenAI(BaseOpenAI):
+    """OpenAI large language models.
+
+    To use, you should have the ``openai`` python package installed, and the
+    environment variable ``OPENAI_API_KEY`` set with your API key.
+
+    Any parameters that are valid to be passed to the openai.create call can be passed
+    in, even if not explicitly saved on this class.
+
+    Example:
+        .. code-block:: python
+
+            from langchain_community.llms import OpenAI
+            openai = OpenAI(model_name="gpt-3.5-turbo-instruct")
+    """
+
+    @classmethod
+    def get_lc_namespace(cls) -> List[str]:
+        """Get the namespace of the langchain object."""
+        return ["langchain", "llms", "openai"]
+
+    @classmethod
+    def is_lc_serializable(cls) -> bool:
+        """Return whether this model can be serialized by Langchain."""
+        return True
+
+    @property
+    def _invocation_params(self) -> Dict[str, Any]:
+        return {**{"model": self.model_name}, **super()._invocation_params}
--- a/libs/partners/openai/langchain_openai/py.typed
+++ b/libs/partners/openai/langchain_openai/py.typed