mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-16 06:53:16 +00:00
Add C Transformers for GGML Models (#5218)
# Add C Transformers for GGML Models I created Python bindings for the GGML models: https://github.com/marella/ctransformers Currently it supports GPT-2, GPT-J, GPT-NeoX, LLaMA, MPT, etc. See [Supported Models](https://github.com/marella/ctransformers#supported-models). It provides a unified interface for all models: ```python from langchain.llms import CTransformers llm = CTransformers(model='/path/to/ggml-gpt-2.bin', model_type='gpt2') print(llm('AI is going to')) ``` It can be used with models hosted on the Hugging Face Hub: ```py llm = CTransformers(model='marella/gpt-2-ggml') ``` It supports streaming: ```py from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler llm = CTransformers(model='marella/gpt-2-ggml', callbacks=[StreamingStdOutCallbackHandler()]) ``` Please see [README](https://github.com/marella/ctransformers#readme) for more details. --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
This commit is contained in:
21
tests/integration_tests/llms/test_ctransformers.py
Normal file
21
tests/integration_tests/llms/test_ctransformers.py
Normal file
@@ -0,0 +1,21 @@
|
||||
"""Test C Transformers wrapper."""
|
||||
|
||||
from langchain.llms import CTransformers
|
||||
from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler
|
||||
|
||||
|
||||
def test_ctransformers_call() -> None:
|
||||
"""Test valid call to C Transformers."""
|
||||
config = {"max_new_tokens": 5}
|
||||
callback_handler = FakeCallbackHandler()
|
||||
|
||||
llm = CTransformers(
|
||||
model="marella/gpt-2-ggml",
|
||||
config=config,
|
||||
callbacks=[callback_handler],
|
||||
)
|
||||
|
||||
output = llm("Say foo:")
|
||||
assert isinstance(output, str)
|
||||
assert len(output) > 1
|
||||
assert 0 < callback_handler.llm_streams <= config["max_new_tokens"]
|
Reference in New Issue
Block a user