mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-08 14:31:55 +00:00
community[minor]: Adds Llamafile as an LLM (#17431)
* **Description:** Adds a simple LLM implementation for interacting with [llamafile](https://github.com/Mozilla-Ocho/llamafile)-based models. * **Dependencies:** N/A * **Issue:** N/A **Detail** [llamafile](https://github.com/Mozilla-Ocho/llamafile) lets you run LLMs locally from a single file on most computers without installing any dependencies. To use the llamafile LLM implementation, the user needs to: 1. Download a llamafile e.g. https://huggingface.co/jartine/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile?download=true 2. Make the file executable. 3. Run the llamafile in 'server mode'. (All llamafiles come packaged with a lightweight server; by default, the server listens at `http://localhost:8080`.) ```bash wget https://url/of/model.llamafile chmod +x model.llamafile ./model.llamafile --server --nobrowser ``` Now, the user can invoke the LLM via the LangChain client: ```python from langchain_community.llms.llamafile import Llamafile llm = Llamafile() llm.invoke("Tell me a joke.") ```
This commit is contained in:
@@ -0,0 +1,46 @@
|
||||
import os
|
||||
from typing import Generator
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from requests.exceptions import ConnectionError, HTTPError
|
||||
|
||||
from langchain_community.llms.llamafile import Llamafile
|
||||
|
||||
LLAMAFILE_SERVER_BASE_URL = os.getenv(
|
||||
"LLAMAFILE_SERVER_BASE_URL", "http://localhost:8080"
|
||||
)
|
||||
|
||||
|
||||
def _ping_llamafile_server() -> bool:
|
||||
try:
|
||||
response = requests.get(LLAMAFILE_SERVER_BASE_URL)
|
||||
response.raise_for_status()
|
||||
except (ConnectionError, HTTPError):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _ping_llamafile_server(),
|
||||
reason=f"unable to find llamafile server at {LLAMAFILE_SERVER_BASE_URL}, "
|
||||
f"please start one and re-run this test",
|
||||
)
|
||||
def test_llamafile_call() -> None:
|
||||
llm = Llamafile()
|
||||
output = llm.invoke("Say foo:")
|
||||
assert isinstance(output, str)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _ping_llamafile_server(),
|
||||
reason=f"unable to find llamafile server at {LLAMAFILE_SERVER_BASE_URL}, "
|
||||
f"please start one and re-run this test",
|
||||
)
|
||||
def test_llamafile_streaming() -> None:
|
||||
llm = Llamafile(streaming=True)
|
||||
generator = llm.stream("Tell me about Roman dodecahedrons.")
|
||||
assert isinstance(generator, Generator)
|
||||
for token in generator:
|
||||
assert isinstance(token, str)
|
Reference in New Issue
Block a user