community: Add save_model function for openvino reranker and embedding (#19896)

2025-08-23 11:32:10 +00:00 · 2024-04-18 22:20:33 +08:00 · 2024-04-18 22:20:33 +08:00 · 2d6d796040
commit 2d6d796040
parent 9c1d7f2405
4 changed files with 46 additions and 183 deletions
--- a/docs/docs/integrations/document_transformers/openvino_rerank.ipynb
+++ b/docs/docs/integrations/document_transformers/openvino_rerank.ipynb
@ -18,7 +18,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
@ -28,42 +28,7 @@
     "is_executing": true
    }
   },
-   "outputs": [
+   "outputs": [],
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
      "To disable this warning, you can either:\n",
      "\t- Avoid using `tokenizers` before the fork if possible\n",
      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
      "To disable this warning, you can either:\n",
      "\t- Avoid using `tokenizers` before the fork if possible\n",
      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "%pip install --upgrade-strategy eager \"optimum[openvino,nncf]\" --quiet\n",
    "%pip install --upgrade --quiet  faiss-cpu"
@ -404,46 +369,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
-   "outputs": [
+   "outputs": [],
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Framework not specified. Using pt to export the model.\n",
      "Using the export variant default. Available variants are:\n",
      "    - default: The default ONNX variant.\n",
      "Using framework PyTorch: 2.2.1+cu121\n",
      "Overriding 1 configuration item(s)\n",
      "\t- use_cache -> False\n",
      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4193: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
      "  warnings.warn(\n",
      "Compiling the model to CPU ...\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0, 16, 18, 6]\n"
     ]
    }
   ],
   "source": [
    "from langchain.retrievers import ContextualCompressionRetriever\n",
    "from langchain_community.document_compressors.openvino_rerank import OpenVINOReranker\n",
    "\n",
    "model_name = \"BAAI/bge-reranker-large\"\n",
    "\n",
-    "compressor = OpenVINOReranker(model_name_or_path=model_name)\n",
+    "ov_compressor = OpenVINOReranker(model_name_or_path=model_name, top_n=4)\n",
    "compression_retriever = ContextualCompressionRetriever(\n",
-    "    base_compressor=compressor, base_retriever=retriever\n",
+    "    base_compressor=ov_compressor, base_retriever=retriever\n",
    ")\n",
    "\n",
    "compressed_docs = compression_retriever.get_relevant_documents(\n",
@ -461,7 +403,7 @@
    }
   },
   "source": [
-    "After reranking, the top 3 documents are different from the top 3 documents retrieved by the base retriever."
+    "After reranking, the top 4 documents are different from the top 4 documents retrieved by the base retriever."
   ]
  },
  {
@ -532,37 +474,13 @@
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
-   "outputs": [
+   "outputs": [],
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Framework not specified. Using pt to export the model.\n",
      "Using the export variant default. Available variants are:\n",
      "    - default: The default ONNX variant.\n",
      "Using framework PyTorch: 2.2.1+cu121\n",
      "Overriding 1 configuration item(s)\n",
      "\t- use_cache -> False\n",
      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4193: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "from pathlib import Path\n",
    "\n",
    "ov_model_dir = \"bge-reranker-large-ov\"\n",
    "if not Path(ov_model_dir).exists():\n",
-    "    from optimum.intel.openvino import OVModelForSequenceClassification\n",
+    "    ov_compressor.save_model(ov_model_dir)"
    "    from transformers import AutoTokenizer\n",
    "\n",
    "    ov_model = OVModelForSequenceClassification.from_pretrained(\n",
    "        model_name, compile=False, export=True\n",
    "    )\n",
    "    tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
    "    ov_model.half()\n",
    "    ov_model.save_pretrained(ov_model_dir)\n",
    "    tokenizer.save_pretrained(ov_model_dir)"
   ]
  },
  {
@ -579,7 +497,7 @@
    }
   ],
   "source": [
-    "compressor = OpenVINOReranker(model_name_or_path=ov_model_dir)"
+    "ov_compressor = OpenVINOReranker(model_name_or_path=ov_model_dir)"
   ]
  },
  {
--- a/docs/docs/integrations/text_embedding/openvino.ipynb
+++ b/docs/docs/integrations/text_embedding/openvino.ipynb
@ -41,41 +41,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "id": "ff9be586",
   "metadata": {},
-   "outputs": [
+   "outputs": [],
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/utils/import_utils.py:519: FutureWarning: `is_torch_tpu_available` is deprecated and will be removed in 4.41.0. Please use the `is_torch_xla_available` instead.\n",
      "  warnings.warn(\n",
      "Framework not specified. Using pt to export the model.\n",
      "Using the export variant default. Available variants are:\n",
      "    - default: The default ONNX variant.\n",
      "Using framework PyTorch: 2.2.1+cu121\n",
      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
      "  warnings.warn(\n",
      "Compiling the model to CPU ...\n"
     ]
    }
   ],
   "source": [
    "model_name = \"sentence-transformers/all-mpnet-base-v2\"\n",
    "model_kwargs = {\"device\": \"CPU\"}\n",
@ -131,7 +100,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
   "id": "bb5e74c0",
   "metadata": {},
   "outputs": [],
@ -150,7 +119,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
   "id": "a6544a65",
   "metadata": {},
   "outputs": [],
@ -159,24 +128,23 @@
    "\n",
    "ov_model_dir = \"all-mpnet-base-v2-ov\"\n",
    "if not Path(ov_model_dir).exists():\n",
-    "    from optimum.intel.openvino import OVModelForFeatureExtraction\n",
+    "    ov_embeddings.save_model(ov_model_dir)"
    "    from transformers import AutoTokenizer\n",
    "\n",
    "    ov_model = OVModelForFeatureExtraction.from_pretrained(\n",
    "        model_name, compile=False, export=True\n",
    "    )\n",
    "    tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
    "    ov_model.half()\n",
    "    ov_model.save_pretrained(ov_model_dir)\n",
    "    tokenizer.save_pretrained(ov_model_dir)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
   "id": "162004c4",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Compiling the model to CPU ...\n"
     ]
    }
   ],
   "source": [
    "ov_embeddings = OpenVINOEmbeddings(\n",
    "    model_name_or_path=ov_model_dir,\n",
@ -196,43 +164,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "id": "66f5c6ba-1446-43e1-b012-800d17cef300",
   "metadata": {},
-   "outputs": [
+   "outputs": [],
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/utils/import_utils.py:519: FutureWarning: `is_torch_tpu_available` is deprecated and will be removed in 4.41.0. Please use the `is_torch_xla_available` instead.\n",
      "  warnings.warn(\n",
      "Framework not specified. Using pt to export the model.\n",
      "Using the export variant default. Available variants are:\n",
      "    - default: The default ONNX variant.\n",
      "Using framework PyTorch: 2.2.1+cu121\n",
      "Overriding 1 configuration item(s)\n",
      "\t- use_cache -> False\n",
      "/home/ethan/intel/langchain_test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead\n",
      "  warnings.warn(\n",
      "Compiling the model to CPU ...\n"
     ]
    }
   ],
   "source": [
    "from langchain_community.embeddings import OpenVINOBgeEmbeddings\n",
    "\n",
--- a/libs/community/langchain_community/document_compressors/openvino_rerank.py
+++ b/libs/community/langchain_community/document_compressors/openvino_rerank.py
@ -155,3 +155,12 @@ class OpenVINOReranker(BaseDocumentCompressor):
            )
            final_results.append(doc)
        return final_results
    def save_model(
        self,
        model_path: str,
    ) -> bool:
        self.ov_model.half()
        self.ov_model.save_pretrained(model_path)
        self.tokenizer.save_pretrained(model_path)
        return True
--- a/libs/community/langchain_community/embeddings/openvino.py
+++ b/libs/community/langchain_community/embeddings/openvino.py
@ -276,6 +276,15 @@ class OpenVINOEmbeddings(BaseModel, Embeddings):
        """
        return self.embed_documents([text])[0]
    def save_model(
        self,
        model_path: str,
    ) -> bool:
        self.ov_model.half()
        self.ov_model.save_pretrained(model_path)
        self.tokenizer.save_pretrained(model_path)
        return True
 class OpenVINOBgeEmbeddings(OpenVINOEmbeddings):
    """OpenVNO BGE embedding models.
@ -285,7 +294,7 @@ class OpenVINOBgeEmbeddings(OpenVINOEmbeddings):
            from langchain_community.embeddings import OpenVINOBgeEmbeddings
-            model_name_or_path = "BAAI/bge-large-en"
+            model_name = "BAAI/bge-large-en"
            model_kwargs = {'device': 'CPU'}
            encode_kwargs = {'normalize_embeddings': True}
            ov = OpenVINOBgeEmbeddings(
@ -295,14 +304,6 @@ class OpenVINOBgeEmbeddings(OpenVINOEmbeddings):
            )
    """
    model_name_or_path: str
    """HuggingFace model id."""
    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
    """Keyword arguments to pass to the model."""
    encode_kwargs: Dict[str, Any] = Field(default_factory=dict)
    """Keyword arguments to pass when calling the `encode` method of the model."""
    show_progress: bool = False
    """Whether to show a progress bar."""
    query_instruction: str = DEFAULT_QUERY_BGE_INSTRUCTION_EN
    """Instruction to use for embedding query."""
    embed_instruction: str = ""