DB-GPT/dbgpt/model/llm_out/llama_cpp_llm.py
2024-01-10 10:39:04 +08:00

10 lines
257 B
Python

from typing import Dict
import torch
@torch.inference_mode()
def generate_stream(model, tokenizer, params: Dict, device: str, context_len: int):
# Just support LlamaCppModel
return model.generate_streaming(params=params, context_len=context_len)