mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-19 09:30:15 +00:00
feat: ctransformers support async chain (#6859)
- Description: Adding async method for CTransformers - Issue: I've found impossible without this code to run Websockets inside a FastAPI micro service and a CTransformers model. - Tag maintainer: Not necessary yet, I don't like to mention directly - Twitter handle: @_semoal
This commit is contained in:
parent
d2cf0d16b3
commit
21a353e9c2
@ -1,9 +1,13 @@
|
|||||||
"""Wrapper around the C Transformers library."""
|
"""Wrapper around the C Transformers library."""
|
||||||
from typing import Any, Dict, Optional, Sequence
|
from functools import partial
|
||||||
|
from typing import Any, Dict, List, Optional, Sequence
|
||||||
|
|
||||||
from pydantic import root_validator
|
from pydantic import root_validator
|
||||||
|
|
||||||
from langchain.callbacks.manager import CallbackManagerForLLMRun
|
from langchain.callbacks.manager import (
|
||||||
|
AsyncCallbackManagerForLLMRun,
|
||||||
|
CallbackManagerForLLMRun,
|
||||||
|
)
|
||||||
from langchain.llms.base import LLM
|
from langchain.llms.base import LLM
|
||||||
|
|
||||||
|
|
||||||
@ -103,3 +107,36 @@ class CTransformers(LLM):
|
|||||||
text.append(chunk)
|
text.append(chunk)
|
||||||
_run_manager.on_llm_new_token(chunk, verbose=self.verbose)
|
_run_manager.on_llm_new_token(chunk, verbose=self.verbose)
|
||||||
return "".join(text)
|
return "".join(text)
|
||||||
|
|
||||||
|
async def _acall(
|
||||||
|
self,
|
||||||
|
prompt: str,
|
||||||
|
stop: Optional[List[str]] = None,
|
||||||
|
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> str:
|
||||||
|
"""Asynchronous Call out to CTransformers generate method.
|
||||||
|
Very helpful when streaming (like with websockets!)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt: The prompt to pass into the model.
|
||||||
|
stop: A list of strings to stop generation when encountered.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The string generated by the model.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
.. code-block:: python
|
||||||
|
response = llm("Once upon a time, ")
|
||||||
|
"""
|
||||||
|
text_callback = None
|
||||||
|
if run_manager:
|
||||||
|
text_callback = partial(run_manager.on_llm_new_token, verbose=self.verbose)
|
||||||
|
|
||||||
|
text = ""
|
||||||
|
for token in self.client(prompt, stop=stop, stream=True):
|
||||||
|
if text_callback:
|
||||||
|
await text_callback(token)
|
||||||
|
text += token
|
||||||
|
|
||||||
|
return text
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
"""Test C Transformers wrapper."""
|
"""Test C Transformers wrapper."""
|
||||||
|
import pytest
|
||||||
|
|
||||||
from langchain.llms import CTransformers
|
from langchain.llms import CTransformers
|
||||||
from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler
|
from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler
|
||||||
@ -19,3 +20,20 @@ def test_ctransformers_call() -> None:
|
|||||||
assert isinstance(output, str)
|
assert isinstance(output, str)
|
||||||
assert len(output) > 1
|
assert len(output) > 1
|
||||||
assert 0 < callback_handler.llm_streams <= config["max_new_tokens"]
|
assert 0 < callback_handler.llm_streams <= config["max_new_tokens"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_ctransformers_async_inference() -> None:
|
||||||
|
config = {"max_new_tokens": 5}
|
||||||
|
callback_handler = FakeCallbackHandler()
|
||||||
|
|
||||||
|
llm = CTransformers(
|
||||||
|
model="marella/gpt-2-ggml",
|
||||||
|
config=config,
|
||||||
|
callbacks=[callback_handler],
|
||||||
|
)
|
||||||
|
|
||||||
|
output = await llm._acall(prompt="Say foo:")
|
||||||
|
assert isinstance(output, str)
|
||||||
|
assert len(output) > 1
|
||||||
|
assert 0 < callback_handler.llm_streams <= config["max_new_tokens"]
|
||||||
|
Loading…
Reference in New Issue
Block a user