community: Add save_model function for openvino reranker and embedding (#19896)

2025-09-15 14:36:54 +00:00 · 2024-04-18 22:20:33 +08:00
parent 9c1d7f2405
commit 2d6d796040
4 changed files with 46 additions and 183 deletions
--- a/docs/docs/integrations/document_transformers/openvino_rerank.ipynb
+++ b/docs/docs/integrations/document_transformers/openvino_rerank.ipynb
@@ -18,7 +18,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
@@ -28,42 +28,7 @@
     "is_executing": true
    }
   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
-      "To disable this warning, you can either:\n",
-      "\t- Avoid using `tokenizers` before the fork if possible\n",
-      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
-      "To disable this warning, you can either:\n",
-      "\t- Avoid using `tokenizers` before the fork if possible\n",
-      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "%pip install --upgrade-strategy eager \"optimum[openvino,nncf]\" --quiet\n",
    "%pip install --upgrade --quiet  faiss-cpu"
@@ -404,46 +369,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Framework not specified. Using pt to export the model.\n",
-      "Using the export variant default. Available variants are:\n",
-      "    - default: The default ONNX variant.\n",
-      "Using framework PyTorch: 2.2.1+cu121\n",
-      "Overriding 1 configuration item(s)\n",
-      "\t- use_cache -> False\n",
-      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4193: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
-      "  warnings.warn(\n",
-      "Compiling the model to CPU ...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[0, 16, 18, 6]\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "from langchain.retrievers import ContextualCompressionRetriever\n",
    "from langchain_community.document_compressors.openvino_rerank import OpenVINOReranker\n",
    "\n",
    "model_name = \"BAAI/bge-reranker-large\"\n",
    "\n",
-    "compressor = OpenVINOReranker(model_name_or_path=model_name)\n",
+    "ov_compressor = OpenVINOReranker(model_name_or_path=model_name, top_n=4)\n",
    "compression_retriever = ContextualCompressionRetriever(\n",
-    "    base_compressor=compressor, base_retriever=retriever\n",
+    "    base_compressor=ov_compressor, base_retriever=retriever\n",
    ")\n",
    "\n",
    "compressed_docs = compression_retriever.get_relevant_documents(\n",
@@ -461,7 +403,7 @@
    }
   },
   "source": [
-    "After reranking, the top 3 documents are different from the top 3 documents retrieved by the base retriever."
+    "After reranking, the top 4 documents are different from the top 4 documents retrieved by the base retriever."
   ]
  },
  {
@@ -532,37 +474,13 @@
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Framework not specified. Using pt to export the model.\n",
-      "Using the export variant default. Available variants are:\n",
-      "    - default: The default ONNX variant.\n",
-      "Using framework PyTorch: 2.2.1+cu121\n",
-      "Overriding 1 configuration item(s)\n",
-      "\t- use_cache -> False\n",
-      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4193: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "\n",
    "ov_model_dir = \"bge-reranker-large-ov\"\n",
    "if not Path(ov_model_dir).exists():\n",
-    "    from optimum.intel.openvino import OVModelForSequenceClassification\n",
-    "    from transformers import AutoTokenizer\n",
-    "\n",
-    "    ov_model = OVModelForSequenceClassification.from_pretrained(\n",
-    "        model_name, compile=False, export=True\n",
-    "    )\n",
-    "    tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
-    "    ov_model.half()\n",
-    "    ov_model.save_pretrained(ov_model_dir)\n",
-    "    tokenizer.save_pretrained(ov_model_dir)"
+    "    ov_compressor.save_model(ov_model_dir)"
   ]
  },
  {
@@ -579,7 +497,7 @@
    }
   ],
   "source": [
-    "compressor = OpenVINOReranker(model_name_or_path=ov_model_dir)"
+    "ov_compressor = OpenVINOReranker(model_name_or_path=ov_model_dir)"
   ]
  },
  {
--- a/docs/docs/integrations/text_embedding/openvino.ipynb
+++ b/docs/docs/integrations/text_embedding/openvino.ipynb
@@ -41,41 +41,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "id": "ff9be586",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/utils/import_utils.py:519: FutureWarning: `is_torch_tpu_available` is deprecated and will be removed in 4.41.0. Please use the `is_torch_xla_available` instead.\n",
-      "  warnings.warn(\n",
-      "Framework not specified. Using pt to export the model.\n",
-      "Using the export variant default. Available variants are:\n",
-      "    - default: The default ONNX variant.\n",
-      "Using framework PyTorch: 2.2.1+cu121\n",
-      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
-      "  warnings.warn(\n",
-      "Compiling the model to CPU ...\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "model_name = \"sentence-transformers/all-mpnet-base-v2\"\n",
    "model_kwargs = {\"device\": \"CPU\"}\n",
@@ -131,7 +100,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
   "id": "bb5e74c0",
   "metadata": {},
   "outputs": [],
@@ -150,7 +119,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
   "id": "a6544a65",
   "metadata": {},
   "outputs": [],
@@ -159,24 +128,23 @@
    "\n",
    "ov_model_dir = \"all-mpnet-base-v2-ov\"\n",
    "if not Path(ov_model_dir).exists():\n",
-    "    from optimum.intel.openvino import OVModelForFeatureExtraction\n",
-    "    from transformers import AutoTokenizer\n",
-    "\n",
-    "    ov_model = OVModelForFeatureExtraction.from_pretrained(\n",
-    "        model_name, compile=False, export=True\n",
-    "    )\n",
-    "    tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
-    "    ov_model.half()\n",
-    "    ov_model.save_pretrained(ov_model_dir)\n",
-    "    tokenizer.save_pretrained(ov_model_dir)"
+    "    ov_embeddings.save_model(ov_model_dir)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
   "id": "162004c4",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Compiling the model to CPU ...\n"
+     ]
+    }
+   ],
   "source": [
    "ov_embeddings = OpenVINOEmbeddings(\n",
    "    model_name_or_path=ov_model_dir,\n",
@@ -196,43 +164,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "id": "66f5c6ba-1446-43e1-b012-800d17cef300",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/utils/import_utils.py:519: FutureWarning: `is_torch_tpu_available` is deprecated and will be removed in 4.41.0. Please use the `is_torch_xla_available` instead.\n",
-      "  warnings.warn(\n",
-      "Framework not specified. Using pt to export the model.\n",
-      "Using the export variant default. Available variants are:\n",
-      "    - default: The default ONNX variant.\n",
-      "Using framework PyTorch: 2.2.1+cu121\n",
-      "Overriding 1 configuration item(s)\n",
-      "\t- use_cache -> False\n",
-      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
-      "  warnings.warn(\n",
-      "Compiling the model to CPU ...\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "from langchain_community.embeddings import OpenVINOBgeEmbeddings\n",
    "\n",