HF Injection Identifier Refactor

2025-09-01 02:50:47 +00:00 · 2023-09-11 14:44:51 -07:00
parent 2c656e457c
commit 0f81b3dd2f
1 changed files with 20 additions and 22 deletions
--- a/libs/experimental/langchain_experimental/prompt_injection_identifier/hugging_face_identifier.py
+++ b/libs/experimental/langchain_experimental/prompt_injection_identifier/hugging_face_identifier.py
@@ -1,21 +1,28 @@
 """Tool for the identification of prompt injection attacks."""
 from __future__ import annotations
-from enum import Enum
+from typing import TYPE_CHECKING
 from langchain.pydantic_v1 import Field
 from langchain.tools.base import BaseTool
-from transformers import Pipeline, pipeline
+
 if TYPE_CHECKING:
    from transformers import Pipeline
-class PromptInjectionModelOutput(str, Enum):
+def _model_default_factory() -> Pipeline:
-    """Output of the prompt injection model."""
+    try:
-
+        from transformers import pipeline
-    LEGIT = "LEGIT"
+    except ImportError as e:
-    INJECTION = "INJECTION"
+        raise ImportError(
            "Cannot import transformers, please install with "
            "`pip install transformers`."
        ) from e
    return pipeline("text-classification", model="deepset/deberta-v3-base-injection")
 class HuggingFaceInjectionIdentifier(BaseTool):
-    """Tool that uses deberta-v3-base-injection model
+    """Tool that uses deberta-v3-base-injection to detect prompt injection attacks."""
    to identify prompt injection attacks."""
    name: str = "hugging_face_injection_identifier"
    description: str = (
@@ -23,21 +30,12 @@ class HuggingFaceInjectionIdentifier(BaseTool):
        "Useful for when you need to ensure that prompt is free of injection attacks. "
        "Input should be any message from the user."
    )
-
+    model: Pipeline = Field(default_factory=_model_default_factory)
    model: Pipeline = pipeline(
        "text-classification", model="deepset/deberta-v3-base-injection"
    )
    def _classify_user_input(self, query: str) -> bool:
        result = self.model(query)
        result = sorted(result, key=lambda x: x["score"], reverse=True)
        if result[0]["label"] == PromptInjectionModelOutput.INJECTION:
            return False
        return True
    def _run(self, query: str) -> str:
        """Use the tool."""
-        is_query_safe = self._classify_user_input(query)
+        result = self.model(query)
-        if not is_query_safe:
+        result = sorted(result, key=lambda x: x["score"], reverse=True)
        if result[0]["label"] == "INJECTION":
            raise ValueError("Prompt injection attack detected")
        return query