Json Schema

2026-02-12 04:01:05 +00:00 · 2023-10-04 14:51:54 -07:00
1 changed files with 92 additions and 2 deletions
--- a/libs/langchain/langchain/evaluation/parsing/base.py
+++ b/libs/langchain/langchain/evaluation/parsing/base.py
@@ -4,6 +4,7 @@ from typing import Any, Callable, Optional, Union, cast

 from langchain.evaluation.schema import StringEvaluator
 from langchain.output_parsers.json import parse_json_markdown
+import json


 class JsonValidityEvaluator(StringEvaluator):
@@ -51,7 +52,7 @@ class JsonValidityEvaluator(StringEvaluator):
        prediction: str,
        input: Optional[str] = None,
        reference: Optional[str] = None,
-        **kwargs: Any
+        **kwargs: Any,
    ) -> dict:
        """Evaluate the prediction string.

@@ -131,7 +132,7 @@ class JsonEqualityEvaluator(StringEvaluator):
        prediction: str,
        input: Optional[str] = None,
        reference: Optional[str] = None,
-        **kwargs: Any
+        **kwargs: Any,
    ) -> dict:
        """Evaluate the prediction string.

@@ -151,3 +152,92 @@ class JsonEqualityEvaluator(StringEvaluator):
            parsed = sorted(parsed, key=lambda x: str(x))
            label = sorted(label, key=lambda x: str(x))
        return {"score": self.operator(parsed, label)}
+
+
+class JsonSchemaEvaluator(StringEvaluator):
+    """Evaluates whether the prediction conforms to a given JSON schema.
+
+    This evaluator checks if the prediction, when parsed as JSON, conforms to a
+    specified JSON schema. It does not require an input string, but does require
+    a reference string which should be the JSON schema.
+
+    Attributes:
+        requires_input (bool): Whether this evaluator requires an
+            input string. Always False.
+        requires_reference (bool): Whether this evaluator requires
+            a reference string. Always True.
+        evaluation_name (str): The name of the evaluation metric.
+            Always "json_schema".
+
+    Examples:
+        >>> evaluator = JsonSchemaEvaluator()
+        >>> schema = {
+        ...     "type": "object",
+        ...     "properties": {
+        ...         "name": {"type": "string"},
+        ...         "age": {"type": "integer"}
+        ...     },
+        ...     "required": ["name", "age"]
+        ... }
+        >>> evaluator.evaluate_strings('{"name": "John", "age": 30}', reference=schema)
+        {'score': 1}
+        >>> evaluator.evaluate_strings('{"name": "John", "age": "30"}', reference=schema)
+        {'score': 0, 'reasoning': '30 is not of type \'integer\''}
+
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        super().__init__()
+
+    @property
+    def requires_input(self) -> bool:
+        return False
+
+    @property
+    def requires_reference(self) -> bool:
+        return True
+
+    @property
+    def evaluation_name(self) -> str:
+        return "json_schema"
+
+    def _evaluate_strings(
+        self,
+        prediction: str,
+        input: Optional[str] = None,
+        reference: Optional[str] = None,
+        **kwargs: Any,
+    ) -> dict:
+        """Evaluate the prediction string.
+
+        Args:
+            prediction (str): The prediction string to evaluate.
+            input (str, optional): Not used in this evaluator.
+            reference (str): The JSON schema to validate against.
+
+        Returns:
+            dict: A dictionary containing the evaluation score. The score is 1 if
+            the prediction conforms to the schema, and 0 otherwise.
+                If the prediction does not conform to the schema, the dictionary
+                also contains a "reasoning" field with the error message.
+        """
+        try:
+            import jsonschema
+        except ImportError:
+            raise ImportError(
+                "The jsonschema package is required for the JsonSchemaEvaluator. "
+                "You can install it with `pip install jsonschema`."
+            )
+        if isinstance(reference, str):
+            schema_json = parse_json_markdown(reference)
+        else:
+            schema_json = reference
+        try:
+            prediction_json = parse_json_markdown(prediction)
+            # Validate the prediction against the schema
+            jsonschema.validate(instance=prediction_json, schema=schema_json)
+            return {"score": 1}
+        except jsonschema.exceptions.ValidationError as e:
+            return {"score": 0, "reasoning": str(e)}
+        except json.JSONDecodeError as e:
+            return {"score": 0, "reasoning": f"JSON Decode Error: {str(e)}"}