community: Add save_model function for openvino reranker and embedding (#19896)

This commit is contained in:
Ethan Yang
2024-04-18 22:20:33 +08:00
committed by GitHub
parent 9c1d7f2405
commit 2d6d796040
4 changed files with 46 additions and 183 deletions

View File

@@ -18,7 +18,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {
"collapsed": false,
"jupyter": {
@@ -28,42 +28,7 @@
"is_executing": true
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"outputs": [],
"source": [
"%pip install --upgrade-strategy eager \"optimum[openvino,nncf]\" --quiet\n",
"%pip install --upgrade --quiet faiss-cpu"
@@ -404,46 +369,23 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Framework not specified. Using pt to export the model.\n",
"Using the export variant default. Available variants are:\n",
" - default: The default ONNX variant.\n",
"Using framework PyTorch: 2.2.1+cu121\n",
"Overriding 1 configuration item(s)\n",
"\t- use_cache -> False\n",
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4193: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
" warnings.warn(\n",
"Compiling the model to CPU ...\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0, 16, 18, 6]\n"
]
}
],
"outputs": [],
"source": [
"from langchain.retrievers import ContextualCompressionRetriever\n",
"from langchain_community.document_compressors.openvino_rerank import OpenVINOReranker\n",
"\n",
"model_name = \"BAAI/bge-reranker-large\"\n",
"\n",
"compressor = OpenVINOReranker(model_name_or_path=model_name)\n",
"ov_compressor = OpenVINOReranker(model_name_or_path=model_name, top_n=4)\n",
"compression_retriever = ContextualCompressionRetriever(\n",
" base_compressor=compressor, base_retriever=retriever\n",
" base_compressor=ov_compressor, base_retriever=retriever\n",
")\n",
"\n",
"compressed_docs = compression_retriever.get_relevant_documents(\n",
@@ -461,7 +403,7 @@
}
},
"source": [
"After reranking, the top 3 documents are different from the top 3 documents retrieved by the base retriever."
"After reranking, the top 4 documents are different from the top 4 documents retrieved by the base retriever."
]
},
{
@@ -532,37 +474,13 @@
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Framework not specified. Using pt to export the model.\n",
"Using the export variant default. Available variants are:\n",
" - default: The default ONNX variant.\n",
"Using framework PyTorch: 2.2.1+cu121\n",
"Overriding 1 configuration item(s)\n",
"\t- use_cache -> False\n",
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4193: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
" warnings.warn(\n"
]
}
],
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"ov_model_dir = \"bge-reranker-large-ov\"\n",
"if not Path(ov_model_dir).exists():\n",
" from optimum.intel.openvino import OVModelForSequenceClassification\n",
" from transformers import AutoTokenizer\n",
"\n",
" ov_model = OVModelForSequenceClassification.from_pretrained(\n",
" model_name, compile=False, export=True\n",
" )\n",
" tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
" ov_model.half()\n",
" ov_model.save_pretrained(ov_model_dir)\n",
" tokenizer.save_pretrained(ov_model_dir)"
" ov_compressor.save_model(ov_model_dir)"
]
},
{
@@ -579,7 +497,7 @@
}
],
"source": [
"compressor = OpenVINOReranker(model_name_or_path=ov_model_dir)"
"ov_compressor = OpenVINOReranker(model_name_or_path=ov_model_dir)"
]
},
{

View File

@@ -41,41 +41,10 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "ff9be586",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/utils/import_utils.py:519: FutureWarning: `is_torch_tpu_available` is deprecated and will be removed in 4.41.0. Please use the `is_torch_xla_available` instead.\n",
" warnings.warn(\n",
"Framework not specified. Using pt to export the model.\n",
"Using the export variant default. Available variants are:\n",
" - default: The default ONNX variant.\n",
"Using framework PyTorch: 2.2.1+cu121\n",
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
" warnings.warn(\n",
"Compiling the model to CPU ...\n"
]
}
],
"outputs": [],
"source": [
"model_name = \"sentence-transformers/all-mpnet-base-v2\"\n",
"model_kwargs = {\"device\": \"CPU\"}\n",
@@ -131,7 +100,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"id": "bb5e74c0",
"metadata": {},
"outputs": [],
@@ -150,7 +119,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"id": "a6544a65",
"metadata": {},
"outputs": [],
@@ -159,24 +128,23 @@
"\n",
"ov_model_dir = \"all-mpnet-base-v2-ov\"\n",
"if not Path(ov_model_dir).exists():\n",
" from optimum.intel.openvino import OVModelForFeatureExtraction\n",
" from transformers import AutoTokenizer\n",
"\n",
" ov_model = OVModelForFeatureExtraction.from_pretrained(\n",
" model_name, compile=False, export=True\n",
" )\n",
" tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
" ov_model.half()\n",
" ov_model.save_pretrained(ov_model_dir)\n",
" tokenizer.save_pretrained(ov_model_dir)"
" ov_embeddings.save_model(ov_model_dir)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"id": "162004c4",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Compiling the model to CPU ...\n"
]
}
],
"source": [
"ov_embeddings = OpenVINOEmbeddings(\n",
" model_name_or_path=ov_model_dir,\n",
@@ -196,43 +164,10 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "66f5c6ba-1446-43e1-b012-800d17cef300",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/utils/import_utils.py:519: FutureWarning: `is_torch_tpu_available` is deprecated and will be removed in 4.41.0. Please use the `is_torch_xla_available` instead.\n",
" warnings.warn(\n",
"Framework not specified. Using pt to export the model.\n",
"Using the export variant default. Available variants are:\n",
" - default: The default ONNX variant.\n",
"Using framework PyTorch: 2.2.1+cu121\n",
"Overriding 1 configuration item(s)\n",
"\t- use_cache -> False\n",
"/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
" warnings.warn(\n",
"Compiling the model to CPU ...\n"
]
}
],
"outputs": [],
"source": [
"from langchain_community.embeddings import OpenVINOBgeEmbeddings\n",
"\n",