mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-01 11:02:37 +00:00
Takeoff integration (#9045)
## Description: This PR adds the Titan Takeoff Server to the available LLMs in LangChain. Titan Takeoff is an inference server created by [TitanML](https://www.titanml.co/) that allows you to deploy large language models locally on your hardware in a single command. Most generative model architectures are included, such as Falcon, Llama 2, GPT2, T5 and many more. Read more about Titan Takeoff here: - [Blog](https://medium.com/@TitanML/introducing-titan-takeoff-6c30e55a8e1e) - [Docs](https://docs.titanml.co/docs/titan-takeoff/getting-started) #### Testing As Titan Takeoff runs locally on port 8000 by default, no network access is needed. Responses are mocked for testing. - [x] Make Lint - [x] Make Format - [x] Make Test #### Dependencies No new dependencies are introduced. However, users will need to install the titan-iris package in their local environment and start the Titan Takeoff inferencing server in order to use the Titan Takeoff integration. Thanks for your help and please let me know if you have any questions. cc: @hwchase17 @baskaryan
This commit is contained in:
committed by
GitHub
parent
3bdc273ab3
commit
8d351bfc20
@@ -76,6 +76,7 @@ from langchain.llms.self_hosted_hugging_face import SelfHostedHuggingFaceLLM
|
||||
from langchain.llms.stochasticai import StochasticAI
|
||||
from langchain.llms.symblai_nebula import Nebula
|
||||
from langchain.llms.textgen import TextGen
|
||||
from langchain.llms.titan_takeoff import TitanTakeoff
|
||||
from langchain.llms.tongyi import Tongyi
|
||||
from langchain.llms.vertexai import VertexAI
|
||||
from langchain.llms.vllm import VLLM
|
||||
@@ -142,6 +143,7 @@ __all__ = [
|
||||
"SelfHostedHuggingFaceLLM",
|
||||
"SelfHostedPipeline",
|
||||
"StochasticAI",
|
||||
"TitanTakeoff",
|
||||
"Tongyi",
|
||||
"VertexAI",
|
||||
"VLLM",
|
||||
@@ -203,6 +205,7 @@ type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
|
||||
"self_hosted_hugging_face": SelfHostedHuggingFaceLLM,
|
||||
"stochasticai": StochasticAI,
|
||||
"tongyi": Tongyi,
|
||||
"titan_takeoff": TitanTakeoff,
|
||||
"vertexai": VertexAI,
|
||||
"openllm": OpenLLM,
|
||||
"openllm_client": OpenLLM,
|
||||
|
157
libs/langchain/langchain/llms/titan_takeoff.py
Normal file
157
libs/langchain/langchain/llms/titan_takeoff.py
Normal file
@@ -0,0 +1,157 @@
|
||||
from typing import Any, Iterator, List, Mapping, Optional
|
||||
|
||||
import requests
|
||||
from requests.exceptions import ConnectionError
|
||||
|
||||
from langchain.callbacks.manager import CallbackManagerForLLMRun
|
||||
from langchain.llms.base import LLM
|
||||
from langchain.llms.utils import enforce_stop_tokens
|
||||
from langchain.schema.output import GenerationChunk
|
||||
|
||||
|
||||
class TitanTakeoff(LLM):
|
||||
port: int = 8000
|
||||
"""Specifies the port to use for the Titan Takeoff API. Default = 8000."""
|
||||
|
||||
generate_max_length: int = 128
|
||||
"""Maximum generation length. Default = 128."""
|
||||
|
||||
sampling_topk: int = 1
|
||||
"""Sample predictions from the top K most probable candidates. Default = 1."""
|
||||
|
||||
sampling_topp: float = 1.0
|
||||
"""Sample from predictions whose cumulative probability exceeds this value.
|
||||
Default = 1.0.
|
||||
"""
|
||||
|
||||
sampling_temperature: float = 1.0
|
||||
"""Sample with randomness. Bigger temperatures are associated with
|
||||
more randomness and 'creativity'. Default = 1.0.
|
||||
"""
|
||||
|
||||
repetition_penalty: float = 1.0
|
||||
"""Penalise the generation of tokens that have been generated before.
|
||||
Set to > 1 to penalize. Default = 1 (no penalty).
|
||||
"""
|
||||
|
||||
no_repeat_ngram_size: int = 0
|
||||
"""Prevent repetitions of ngrams of this size. Default = 0 (turned off)."""
|
||||
|
||||
streaming: bool = False
|
||||
"""Whether to stream the output. Default = False."""
|
||||
|
||||
@property
|
||||
def _default_params(self) -> Mapping[str, Any]:
|
||||
"""Get the default parameters for calling Titan Takeoff Server."""
|
||||
params = {
|
||||
"generate_max_length": self.generate_max_length,
|
||||
"sampling_topk": self.sampling_topk,
|
||||
"sampling_topp": self.sampling_topp,
|
||||
"sampling_temperature": self.sampling_temperature,
|
||||
"repetition_penalty": self.repetition_penalty,
|
||||
"no_repeat_ngram_size": self.no_repeat_ngram_size,
|
||||
}
|
||||
return params
|
||||
|
||||
@property
|
||||
def _llm_type(self) -> str:
|
||||
"""Return type of llm."""
|
||||
return "titan_takeoff"
|
||||
|
||||
def _call(
|
||||
self,
|
||||
prompt: str,
|
||||
stop: Optional[List[str]] = None,
|
||||
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
"""Call out to Titan Takeoff generate endpoint.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to pass into the model.
|
||||
stop: Optional list of stop words to use when generating.
|
||||
|
||||
Returns:
|
||||
The string generated by the model.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
prompt = "What is the capital of the United Kingdom?"
|
||||
response = model(prompt)
|
||||
|
||||
"""
|
||||
try:
|
||||
if self.streaming:
|
||||
text_output = ""
|
||||
for chunk in self._stream(
|
||||
prompt=prompt,
|
||||
stop=stop,
|
||||
run_manager=run_manager,
|
||||
):
|
||||
text_output += chunk.text
|
||||
return text_output
|
||||
|
||||
url = f"http://localhost:{self.port}/generate"
|
||||
params = {"text": prompt, **self._default_params}
|
||||
|
||||
response = requests.post(url, json=params)
|
||||
response.raise_for_status()
|
||||
response.encoding = "utf-8"
|
||||
text = ""
|
||||
|
||||
if "message" in response.json():
|
||||
text = response.json()["message"]
|
||||
else:
|
||||
raise ValueError("Something went wrong.")
|
||||
if stop is not None:
|
||||
text = enforce_stop_tokens(text, stop)
|
||||
return text
|
||||
except ConnectionError:
|
||||
raise ConnectionError(
|
||||
"Could not connect to Titan Takeoff server. \
|
||||
Please make sure that the server is running."
|
||||
)
|
||||
|
||||
def _stream(
|
||||
self,
|
||||
prompt: str,
|
||||
stop: Optional[List[str]] = None,
|
||||
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> Iterator[GenerationChunk]:
|
||||
"""Call out to Titan Takeoff stream endpoint.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to pass into the model.
|
||||
stop: Optional list of stop words to use when generating.
|
||||
|
||||
Returns:
|
||||
The string generated by the model.
|
||||
|
||||
Yields:
|
||||
A dictionary like object containing a string token.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
prompt = "What is the capital of the United Kingdom?"
|
||||
response = model(prompt)
|
||||
|
||||
"""
|
||||
url = f"http://localhost:{self.port}/generate_stream"
|
||||
params = {"text": prompt, **self._default_params}
|
||||
|
||||
response = requests.post(url, json=params, stream=True)
|
||||
response.encoding = "utf-8"
|
||||
for text in response.iter_content(chunk_size=1, decode_unicode=True):
|
||||
if text:
|
||||
chunk = GenerationChunk(text=text)
|
||||
yield chunk
|
||||
if run_manager:
|
||||
run_manager.on_llm_new_token(token=chunk.text)
|
||||
|
||||
@property
|
||||
def _identifying_params(self) -> Mapping[str, Any]:
|
||||
"""Get the identifying parameters."""
|
||||
return {"port": self.port, **{}, **self._default_params}
|
@@ -0,0 +1,18 @@
|
||||
"""Test Titan Takeoff wrapper."""
|
||||
|
||||
|
||||
import responses
|
||||
|
||||
from langchain.llms.titan_takeoff import TitanTakeoff
|
||||
|
||||
|
||||
@responses.activate
|
||||
def test_titan_takeoff_call() -> None:
|
||||
"""Test valid call to Titan Takeoff."""
|
||||
url = "http://localhost:8000/generate"
|
||||
responses.add(responses.POST, url, json={"message": "2 + 2 is 4"}, status=200)
|
||||
|
||||
# response = requests.post(url)
|
||||
llm = TitanTakeoff()
|
||||
output = llm("What is 2 + 2?")
|
||||
assert isinstance(output, str)
|
Reference in New Issue
Block a user