mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-02 03:15:11 +00:00
community: Add save_model function for openvino reranker and embedding (#19896)
This commit is contained in:
parent
9c1d7f2405
commit
2d6d796040
@ -18,7 +18,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"jupyter": {
|
"jupyter": {
|
||||||
@ -28,42 +28,7 @@
|
|||||||
"is_executing": true
|
"is_executing": true
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
|
|
||||||
"To disable this warning, you can either:\n",
|
|
||||||
"\t- Avoid using `tokenizers` before the fork if possible\n",
|
|
||||||
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
|
|
||||||
"To disable this warning, you can either:\n",
|
|
||||||
"\t- Avoid using `tokenizers` before the fork if possible\n",
|
|
||||||
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"%pip install --upgrade-strategy eager \"optimum[openvino,nncf]\" --quiet\n",
|
"%pip install --upgrade-strategy eager \"optimum[openvino,nncf]\" --quiet\n",
|
||||||
"%pip install --upgrade --quiet faiss-cpu"
|
"%pip install --upgrade --quiet faiss-cpu"
|
||||||
@ -404,46 +369,23 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"jupyter": {
|
"jupyter": {
|
||||||
"outputs_hidden": false
|
"outputs_hidden": false
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Framework not specified. Using pt to export the model.\n",
|
|
||||||
"Using the export variant default. Available variants are:\n",
|
|
||||||
" - default: The default ONNX variant.\n",
|
|
||||||
"Using framework PyTorch: 2.2.1+cu121\n",
|
|
||||||
"Overriding 1 configuration item(s)\n",
|
|
||||||
"\t- use_cache -> False\n",
|
|
||||||
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4193: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
|
|
||||||
" warnings.warn(\n",
|
|
||||||
"Compiling the model to CPU ...\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"[0, 16, 18, 6]\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"from langchain.retrievers import ContextualCompressionRetriever\n",
|
"from langchain.retrievers import ContextualCompressionRetriever\n",
|
||||||
"from langchain_community.document_compressors.openvino_rerank import OpenVINOReranker\n",
|
"from langchain_community.document_compressors.openvino_rerank import OpenVINOReranker\n",
|
||||||
"\n",
|
"\n",
|
||||||
"model_name = \"BAAI/bge-reranker-large\"\n",
|
"model_name = \"BAAI/bge-reranker-large\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"compressor = OpenVINOReranker(model_name_or_path=model_name)\n",
|
"ov_compressor = OpenVINOReranker(model_name_or_path=model_name, top_n=4)\n",
|
||||||
"compression_retriever = ContextualCompressionRetriever(\n",
|
"compression_retriever = ContextualCompressionRetriever(\n",
|
||||||
" base_compressor=compressor, base_retriever=retriever\n",
|
" base_compressor=ov_compressor, base_retriever=retriever\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"compressed_docs = compression_retriever.get_relevant_documents(\n",
|
"compressed_docs = compression_retriever.get_relevant_documents(\n",
|
||||||
@ -461,7 +403,7 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"After reranking, the top 3 documents are different from the top 3 documents retrieved by the base retriever."
|
"After reranking, the top 4 documents are different from the top 4 documents retrieved by the base retriever."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -532,37 +474,13 @@
|
|||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": 5,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Framework not specified. Using pt to export the model.\n",
|
|
||||||
"Using the export variant default. Available variants are:\n",
|
|
||||||
" - default: The default ONNX variant.\n",
|
|
||||||
"Using framework PyTorch: 2.2.1+cu121\n",
|
|
||||||
"Overriding 1 configuration item(s)\n",
|
|
||||||
"\t- use_cache -> False\n",
|
|
||||||
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4193: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
|
|
||||||
" warnings.warn(\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"from pathlib import Path\n",
|
"from pathlib import Path\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ov_model_dir = \"bge-reranker-large-ov\"\n",
|
"ov_model_dir = \"bge-reranker-large-ov\"\n",
|
||||||
"if not Path(ov_model_dir).exists():\n",
|
"if not Path(ov_model_dir).exists():\n",
|
||||||
" from optimum.intel.openvino import OVModelForSequenceClassification\n",
|
" ov_compressor.save_model(ov_model_dir)"
|
||||||
" from transformers import AutoTokenizer\n",
|
|
||||||
"\n",
|
|
||||||
" ov_model = OVModelForSequenceClassification.from_pretrained(\n",
|
|
||||||
" model_name, compile=False, export=True\n",
|
|
||||||
" )\n",
|
|
||||||
" tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
|
||||||
" ov_model.half()\n",
|
|
||||||
" ov_model.save_pretrained(ov_model_dir)\n",
|
|
||||||
" tokenizer.save_pretrained(ov_model_dir)"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -579,7 +497,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"compressor = OpenVINOReranker(model_name_or_path=ov_model_dir)"
|
"ov_compressor = OpenVINOReranker(model_name_or_path=ov_model_dir)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -41,41 +41,10 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": null,
|
||||||
"id": "ff9be586",
|
"id": "ff9be586",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
|
||||||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/utils/import_utils.py:519: FutureWarning: `is_torch_tpu_available` is deprecated and will be removed in 4.41.0. Please use the `is_torch_xla_available` instead.\n",
|
|
||||||
" warnings.warn(\n",
|
|
||||||
"Framework not specified. Using pt to export the model.\n",
|
|
||||||
"Using the export variant default. Available variants are:\n",
|
|
||||||
" - default: The default ONNX variant.\n",
|
|
||||||
"Using framework PyTorch: 2.2.1+cu121\n",
|
|
||||||
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
|
|
||||||
" warnings.warn(\n",
|
|
||||||
"Compiling the model to CPU ...\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"model_name = \"sentence-transformers/all-mpnet-base-v2\"\n",
|
"model_name = \"sentence-transformers/all-mpnet-base-v2\"\n",
|
||||||
"model_kwargs = {\"device\": \"CPU\"}\n",
|
"model_kwargs = {\"device\": \"CPU\"}\n",
|
||||||
@ -131,7 +100,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": 6,
|
||||||
"id": "bb5e74c0",
|
"id": "bb5e74c0",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@ -150,7 +119,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 7,
|
||||||
"id": "a6544a65",
|
"id": "a6544a65",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@ -159,24 +128,23 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"ov_model_dir = \"all-mpnet-base-v2-ov\"\n",
|
"ov_model_dir = \"all-mpnet-base-v2-ov\"\n",
|
||||||
"if not Path(ov_model_dir).exists():\n",
|
"if not Path(ov_model_dir).exists():\n",
|
||||||
" from optimum.intel.openvino import OVModelForFeatureExtraction\n",
|
" ov_embeddings.save_model(ov_model_dir)"
|
||||||
" from transformers import AutoTokenizer\n",
|
|
||||||
"\n",
|
|
||||||
" ov_model = OVModelForFeatureExtraction.from_pretrained(\n",
|
|
||||||
" model_name, compile=False, export=True\n",
|
|
||||||
" )\n",
|
|
||||||
" tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
|
||||||
" ov_model.half()\n",
|
|
||||||
" ov_model.save_pretrained(ov_model_dir)\n",
|
|
||||||
" tokenizer.save_pretrained(ov_model_dir)"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 8,
|
||||||
"id": "162004c4",
|
"id": "162004c4",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Compiling the model to CPU ...\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"ov_embeddings = OpenVINOEmbeddings(\n",
|
"ov_embeddings = OpenVINOEmbeddings(\n",
|
||||||
" model_name_or_path=ov_model_dir,\n",
|
" model_name_or_path=ov_model_dir,\n",
|
||||||
@ -196,43 +164,10 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": null,
|
||||||
"id": "66f5c6ba-1446-43e1-b012-800d17cef300",
|
"id": "66f5c6ba-1446-43e1-b012-800d17cef300",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
|
||||||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/utils/import_utils.py:519: FutureWarning: `is_torch_tpu_available` is deprecated and will be removed in 4.41.0. Please use the `is_torch_xla_available` instead.\n",
|
|
||||||
" warnings.warn(\n",
|
|
||||||
"Framework not specified. Using pt to export the model.\n",
|
|
||||||
"Using the export variant default. Available variants are:\n",
|
|
||||||
" - default: The default ONNX variant.\n",
|
|
||||||
"Using framework PyTorch: 2.2.1+cu121\n",
|
|
||||||
"Overriding 1 configuration item(s)\n",
|
|
||||||
"\t- use_cache -> False\n",
|
|
||||||
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
|
|
||||||
" warnings.warn(\n",
|
|
||||||
"Compiling the model to CPU ...\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"from langchain_community.embeddings import OpenVINOBgeEmbeddings\n",
|
"from langchain_community.embeddings import OpenVINOBgeEmbeddings\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
@ -155,3 +155,12 @@ class OpenVINOReranker(BaseDocumentCompressor):
|
|||||||
)
|
)
|
||||||
final_results.append(doc)
|
final_results.append(doc)
|
||||||
return final_results
|
return final_results
|
||||||
|
|
||||||
|
def save_model(
|
||||||
|
self,
|
||||||
|
model_path: str,
|
||||||
|
) -> bool:
|
||||||
|
self.ov_model.half()
|
||||||
|
self.ov_model.save_pretrained(model_path)
|
||||||
|
self.tokenizer.save_pretrained(model_path)
|
||||||
|
return True
|
||||||
|
@ -276,6 +276,15 @@ class OpenVINOEmbeddings(BaseModel, Embeddings):
|
|||||||
"""
|
"""
|
||||||
return self.embed_documents([text])[0]
|
return self.embed_documents([text])[0]
|
||||||
|
|
||||||
|
def save_model(
|
||||||
|
self,
|
||||||
|
model_path: str,
|
||||||
|
) -> bool:
|
||||||
|
self.ov_model.half()
|
||||||
|
self.ov_model.save_pretrained(model_path)
|
||||||
|
self.tokenizer.save_pretrained(model_path)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
class OpenVINOBgeEmbeddings(OpenVINOEmbeddings):
|
class OpenVINOBgeEmbeddings(OpenVINOEmbeddings):
|
||||||
"""OpenVNO BGE embedding models.
|
"""OpenVNO BGE embedding models.
|
||||||
@ -285,7 +294,7 @@ class OpenVINOBgeEmbeddings(OpenVINOEmbeddings):
|
|||||||
|
|
||||||
from langchain_community.embeddings import OpenVINOBgeEmbeddings
|
from langchain_community.embeddings import OpenVINOBgeEmbeddings
|
||||||
|
|
||||||
model_name_or_path = "BAAI/bge-large-en"
|
model_name = "BAAI/bge-large-en"
|
||||||
model_kwargs = {'device': 'CPU'}
|
model_kwargs = {'device': 'CPU'}
|
||||||
encode_kwargs = {'normalize_embeddings': True}
|
encode_kwargs = {'normalize_embeddings': True}
|
||||||
ov = OpenVINOBgeEmbeddings(
|
ov = OpenVINOBgeEmbeddings(
|
||||||
@ -295,14 +304,6 @@ class OpenVINOBgeEmbeddings(OpenVINOEmbeddings):
|
|||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
model_name_or_path: str
|
|
||||||
"""HuggingFace model id."""
|
|
||||||
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
|
|
||||||
"""Keyword arguments to pass to the model."""
|
|
||||||
encode_kwargs: Dict[str, Any] = Field(default_factory=dict)
|
|
||||||
"""Keyword arguments to pass when calling the `encode` method of the model."""
|
|
||||||
show_progress: bool = False
|
|
||||||
"""Whether to show a progress bar."""
|
|
||||||
query_instruction: str = DEFAULT_QUERY_BGE_INSTRUCTION_EN
|
query_instruction: str = DEFAULT_QUERY_BGE_INSTRUCTION_EN
|
||||||
"""Instruction to use for embedding query."""
|
"""Instruction to use for embedding query."""
|
||||||
embed_instruction: str = ""
|
embed_instruction: str = ""
|
||||||
|
Loading…
Reference in New Issue
Block a user