mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-26 08:33:49 +00:00
fix RAG with quantized embeddings notebook (#24422)
1. Fix HuggingfacePipeline import error to newer partner package 2. Switch to IPEXModelForCausalLM for performance There are no dependency changes since optimum intel is also needed for QuantizedBiEncoderEmbeddings --------- Co-authored-by: ccurme <chester.curme@gmail.com>
This commit is contained in:
parent
40c02cedaf
commit
37b89fb7fc
@ -370,13 +370,14 @@
|
||||
],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"from langchain.llms.huggingface_pipeline import HuggingFacePipeline\n",
|
||||
"from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline\n",
|
||||
"from langchain_huggingface.llms import HuggingFacePipeline\n",
|
||||
"from optimum.intel.ipex import IPEXModelForCausalLM\n",
|
||||
"from transformers import AutoTokenizer, pipeline\n",
|
||||
"\n",
|
||||
"model_id = \"Intel/neural-chat-7b-v3-3\"\n",
|
||||
"tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
|
||||
"model = AutoModelForCausalLM.from_pretrained(\n",
|
||||
" model_id, device_map=\"auto\", torch_dtype=torch.bfloat16\n",
|
||||
"model = IPEXModelForCausalLM.from_pretrained(\n",
|
||||
" model_id, torch_dtype=torch.bfloat16, export=True\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"pipe = pipeline(\"text-generation\", model=model, tokenizer=tokenizer, max_new_tokens=100)\n",
|
||||
@ -581,7 +582,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.18"
|
||||
"version": "3.10.14"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
Loading…
Reference in New Issue
Block a user