mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-03 11:47:49 +00:00
Wfh/json schema evaluation (#12389)
Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
This commit is contained in:
parent
afcc12d99e
commit
922d7910ef
@ -221,7 +221,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 8,
|
||||
"id": "7a8f3ec5-1cde-4b0e-80cd-ac0ac290d375",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -261,10 +261,101 @@
|
||||
"print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6b15d18e-9b97-434f-905c-70acd4c35aea",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## JsonSchemaEvaluator\n",
|
||||
"\n",
|
||||
"The `JsonSchemaEvaluator` validates a JSON prediction against a provided JSON schema. If the prediction conforms to the schema, it returns a score of True (indicating no errors). Otherwise, it returns a score of 0 (indicating an error).\n",
|
||||
"\n",
|
||||
"### Overview:\n",
|
||||
"- **Requires Input?**: Yes\n",
|
||||
"- **Requires Reference?**: Yes (A JSON schema)\n",
|
||||
"- **Score**: True (No errors) or False (Error occurred)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "85afcf33-d2f4-406e-9d8f-15dc0a4772f2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'score': True}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.evaluation import JsonSchemaEvaluator\n",
|
||||
"\n",
|
||||
"evaluator = JsonSchemaEvaluator()\n",
|
||||
"# Equivalently\n",
|
||||
"# evaluator = load_evaluator(\"json_schema_validation\")\n",
|
||||
"\n",
|
||||
"result = evaluator.evaluate_strings(\n",
|
||||
" prediction='{\"name\": \"John\", \"age\": 30}',\n",
|
||||
" reference={\n",
|
||||
" \"type\": \"object\",\n",
|
||||
" \"properties\": {\"name\": {\"type\": \"string\"}, \"age\": {\"type\": \"integer\"}},\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "bb5b89f6-0c87-4335-9091-55fd67a0565f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'score': True}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = evaluator.evaluate_strings(\n",
|
||||
" prediction='{\"name\": \"John\", \"age\": 30}',\n",
|
||||
" reference='{\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"age\": {\"type\": \"integer\"}}}',\n",
|
||||
")\n",
|
||||
"print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "ff914d24-36bc-482a-a9ba-259cd0dd2a52",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'score': False, 'reasoning': \"<ValidationError: '30 is less than the minimum of 66'>\"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = evaluator.evaluate_strings(\n",
|
||||
" prediction='{\"name\": \"John\", \"age\": 30}',\n",
|
||||
" reference='{\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"},'\n",
|
||||
" '\"age\": {\"type\": \"integer\", \"minimum\": 66}}}',\n",
|
||||
")\n",
|
||||
"print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "85afcf33-d2f4-406e-9d8f-15dc0a4772f2",
|
||||
"id": "b073f12d-4603-481c-8081-fab1af6bfcfe",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
|
@ -74,6 +74,7 @@ from langchain.evaluation.parsing.base import (
|
||||
JsonValidityEvaluator,
|
||||
)
|
||||
from langchain.evaluation.parsing.json_distance import JsonEditDistanceEvaluator
|
||||
from langchain.evaluation.parsing.json_schema import JsonSchemaEvaluator
|
||||
from langchain.evaluation.qa import ContextQAEvalChain, CotQAEvalChain, QAEvalChain
|
||||
from langchain.evaluation.regex_match.base import RegexMatchStringEvaluator
|
||||
from langchain.evaluation.schema import (
|
||||
@ -122,4 +123,5 @@ __all__ = [
|
||||
"JsonValidityEvaluator",
|
||||
"JsonEqualityEvaluator",
|
||||
"JsonEditDistanceEvaluator",
|
||||
"JsonSchemaEvaluator",
|
||||
]
|
||||
|
@ -20,6 +20,7 @@ from langchain.evaluation.parsing.base import (
|
||||
JsonValidityEvaluator,
|
||||
)
|
||||
from langchain.evaluation.parsing.json_distance import JsonEditDistanceEvaluator
|
||||
from langchain.evaluation.parsing.json_schema import JsonSchemaEvaluator
|
||||
from langchain.evaluation.qa import ContextQAEvalChain, CotQAEvalChain, QAEvalChain
|
||||
from langchain.evaluation.regex_match.base import RegexMatchStringEvaluator
|
||||
from langchain.evaluation.schema import EvaluatorType, LLMEvalChain, StringEvaluator
|
||||
@ -88,6 +89,7 @@ _EVALUATOR_MAP: Dict[
|
||||
EvaluatorType.JSON_VALIDITY: JsonValidityEvaluator,
|
||||
EvaluatorType.JSON_EQUALITY: JsonEqualityEvaluator,
|
||||
EvaluatorType.JSON_EDIT_DISTANCE: JsonEditDistanceEvaluator,
|
||||
EvaluatorType.JSON_SCHEMA_VALIDATION: JsonSchemaEvaluator,
|
||||
EvaluatorType.REGEX_MATCH: RegexMatchStringEvaluator,
|
||||
EvaluatorType.EXACT_MATCH: ExactMatchStringEvaluator,
|
||||
}
|
||||
|
@ -51,7 +51,7 @@ class JsonValidityEvaluator(StringEvaluator):
|
||||
prediction: str,
|
||||
input: Optional[str] = None,
|
||||
reference: Optional[str] = None,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Evaluate the prediction string.
|
||||
|
||||
@ -134,7 +134,7 @@ class JsonEqualityEvaluator(StringEvaluator):
|
||||
prediction: str,
|
||||
input: Optional[str] = None,
|
||||
reference: Optional[str] = None,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Evaluate the prediction string.
|
||||
|
||||
|
@ -38,7 +38,7 @@ class JsonEditDistanceEvaluator(StringEvaluator):
|
||||
self,
|
||||
string_distance: Optional[Callable[[str, str], float]] = None,
|
||||
canonicalize: Optional[Callable[[Any], Any]] = None,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
if string_distance is not None:
|
||||
@ -58,7 +58,9 @@ class JsonEditDistanceEvaluator(StringEvaluator):
|
||||
self._canonicalize = canonicalize
|
||||
else:
|
||||
self._canonicalize = lambda x: json.dumps(
|
||||
x, separators=(",", ":"), sort_keys=True # eliminate whitespace
|
||||
x,
|
||||
separators=(",", ":"),
|
||||
sort_keys=True, # eliminate whitespace
|
||||
)
|
||||
|
||||
@property
|
||||
@ -83,7 +85,7 @@ class JsonEditDistanceEvaluator(StringEvaluator):
|
||||
prediction: str,
|
||||
input: Optional[str] = None,
|
||||
reference: Optional[str] = None,
|
||||
**kwargs: Any
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
parsed = self._canonicalize(self._parse_json(prediction))
|
||||
label = self._canonicalize(self._parse_json(reference))
|
||||
|
95
libs/langchain/langchain/evaluation/parsing/json_schema.py
Normal file
95
libs/langchain/langchain/evaluation/parsing/json_schema.py
Normal file
@ -0,0 +1,95 @@
|
||||
from typing import Any, Union
|
||||
|
||||
from langchain.evaluation.schema import StringEvaluator
|
||||
from langchain.output_parsers.json import parse_json_markdown
|
||||
|
||||
|
||||
class JsonSchemaEvaluator(StringEvaluator):
|
||||
"""An evaluator that validates a JSON prediction against a JSON schema reference.
|
||||
|
||||
This evaluator checks if a given JSON prediction conforms to the provided JSON schema.
|
||||
If the prediction is valid, the score is True (no errors). Otherwise, the score is False (error occurred).
|
||||
|
||||
Attributes:
|
||||
requires_input (bool): Whether the evaluator requires input.
|
||||
requires_reference (bool): Whether the evaluator requires reference.
|
||||
evaluation_name (str): The name of the evaluation.
|
||||
|
||||
Examples:
|
||||
evaluator = JsonSchemaEvaluator()
|
||||
result = evaluator.evaluate_strings(
|
||||
prediction='{"name": "John", "age": 30}',
|
||||
reference={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"age": {"type": "integer"}
|
||||
}
|
||||
}
|
||||
)
|
||||
assert result["score"] is not None
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
"""Initializes the JsonSchemaEvaluator.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional keyword arguments.
|
||||
|
||||
Raises:
|
||||
ImportError: If the jsonschema package is not installed.
|
||||
"""
|
||||
super().__init__()
|
||||
try:
|
||||
import jsonschema # noqa: F401
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"The JsonSchemaEvaluator requires the jsonschema package."
|
||||
" Please install it with `pip install jsonschema`."
|
||||
)
|
||||
|
||||
@property
|
||||
def requires_input(self) -> bool:
|
||||
"""Returns whether the evaluator requires input."""
|
||||
return False
|
||||
|
||||
@property
|
||||
def requires_reference(self) -> bool:
|
||||
"""Returns whether the evaluator requires reference."""
|
||||
return True
|
||||
|
||||
@property
|
||||
def evaluation_name(self) -> str:
|
||||
"""Returns the name of the evaluation."""
|
||||
return "json_schema_validation"
|
||||
|
||||
def _parse_json(self, node: Any) -> Union[dict, list, None, float, bool, int, str]:
|
||||
if isinstance(node, str):
|
||||
return parse_json_markdown(node)
|
||||
elif hasattr(node, "schema") and callable(getattr(node, "schema")):
|
||||
# Pydantic model
|
||||
return getattr(node, "schema")()
|
||||
return node
|
||||
|
||||
def _validate(self, prediction: Any, schema: Any) -> dict:
|
||||
from jsonschema import ValidationError, validate # noqa: F401
|
||||
|
||||
try:
|
||||
validate(instance=prediction, schema=schema)
|
||||
return {
|
||||
"score": True,
|
||||
}
|
||||
except ValidationError as e:
|
||||
return {"score": False, "reasoning": repr(e)}
|
||||
|
||||
def _evaluate_strings(
|
||||
self,
|
||||
prediction: Union[str, Any],
|
||||
input: Union[str, Any] = None,
|
||||
reference: Union[str, Any] = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
parsed_prediction = self._parse_json(prediction)
|
||||
schema = self._parse_json(reference)
|
||||
return self._validate(parsed_prediction, schema)
|
@ -6,7 +6,7 @@ import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from enum import Enum
|
||||
from functools import partial
|
||||
from typing import Any, Optional, Sequence, Tuple
|
||||
from typing import Any, Optional, Sequence, Tuple, Union
|
||||
from warnings import warn
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
@ -66,6 +66,8 @@ class EvaluatorType(str, Enum):
|
||||
"""Check if a prediction is equal to a reference JSON."""
|
||||
JSON_EDIT_DISTANCE = "json_edit_distance"
|
||||
"""Compute the edit distance between two JSON strings after canonicalization."""
|
||||
JSON_SCHEMA_VALIDATION = "json_schema_validation"
|
||||
"""Check if a prediction is valid JSON according to a JSON schema."""
|
||||
|
||||
|
||||
class LLMEvalChain(Chain):
|
||||
@ -144,9 +146,9 @@ class StringEvaluator(_EvalArgsMixin, ABC):
|
||||
def _evaluate_strings(
|
||||
self,
|
||||
*,
|
||||
prediction: str,
|
||||
reference: Optional[str] = None,
|
||||
input: Optional[str] = None,
|
||||
prediction: Union[str, Any],
|
||||
reference: Optional[Union[str, Any]] = None,
|
||||
input: Optional[Union[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Evaluate Chain or LLM output, based on optional input and label.
|
||||
@ -167,9 +169,9 @@ class StringEvaluator(_EvalArgsMixin, ABC):
|
||||
async def _aevaluate_strings(
|
||||
self,
|
||||
*,
|
||||
prediction: str,
|
||||
reference: Optional[str] = None,
|
||||
input: Optional[str] = None,
|
||||
prediction: Union[str, Any],
|
||||
reference: Optional[Union[str, Any]] = None,
|
||||
input: Optional[Union[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Asynchronously evaluate Chain or LLM output, based on optional input and label.
|
||||
|
30
libs/langchain/poetry.lock
generated
30
libs/langchain/poetry.lock
generated
@ -3790,7 +3790,6 @@ optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*"
|
||||
files = [
|
||||
{file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"},
|
||||
{file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -4598,16 +4597,6 @@ files = [
|
||||
{file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
|
||||
{file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
|
||||
{file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
|
||||
{file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
|
||||
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
|
||||
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
|
||||
@ -7728,7 +7717,6 @@ files = [
|
||||
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
|
||||
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
|
||||
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
|
||||
{file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
|
||||
{file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
|
||||
{file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
|
||||
@ -7736,15 +7724,8 @@ files = [
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
|
||||
{file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
|
||||
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
|
||||
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
|
||||
@ -7761,7 +7742,6 @@ files = [
|
||||
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
|
||||
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
|
||||
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
|
||||
{file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
|
||||
{file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
|
||||
{file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
|
||||
@ -7769,7 +7749,6 @@ files = [
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
|
||||
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
|
||||
@ -8733,11 +8712,6 @@ files = [
|
||||
{file = "scikit_learn-1.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f66eddfda9d45dd6cadcd706b65669ce1df84b8549875691b1f403730bdef217"},
|
||||
{file = "scikit_learn-1.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6448c37741145b241eeac617028ba6ec2119e1339b1385c9720dae31367f2be"},
|
||||
{file = "scikit_learn-1.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:c413c2c850241998168bbb3bd1bb59ff03b1195a53864f0b80ab092071af6028"},
|
||||
{file = "scikit_learn-1.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ef540e09873e31569bc8b02c8a9f745ee04d8e1263255a15c9969f6f5caa627f"},
|
||||
{file = "scikit_learn-1.3.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:9147a3a4df4d401e618713880be023e36109c85d8569b3bf5377e6cd3fecdeac"},
|
||||
{file = "scikit_learn-1.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2cd3634695ad192bf71645702b3df498bd1e246fc2d529effdb45a06ab028b4"},
|
||||
{file = "scikit_learn-1.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c275a06c5190c5ce00af0acbb61c06374087949f643ef32d355ece12c4db043"},
|
||||
{file = "scikit_learn-1.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:0e1aa8f206d0de814b81b41d60c1ce31f7f2c7354597af38fae46d9c47c45122"},
|
||||
{file = "scikit_learn-1.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:52b77cc08bd555969ec5150788ed50276f5ef83abb72e6f469c5b91a0009bbca"},
|
||||
{file = "scikit_learn-1.3.1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:a683394bc3f80b7c312c27f9b14ebea7766b1f0a34faf1a2e9158d80e860ec26"},
|
||||
{file = "scikit_learn-1.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a15d964d9eb181c79c190d3dbc2fff7338786bf017e9039571418a1d53dab236"},
|
||||
@ -11041,7 +11015,7 @@ cli = ["typer"]
|
||||
cohere = ["cohere"]
|
||||
docarray = ["docarray"]
|
||||
embeddings = ["sentence-transformers"]
|
||||
extended-testing = ["aiosqlite", "amazon-textract-caller", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "dashvector", "esprima", "faiss-cpu", "feedparser", "geopandas", "gitpython", "google-cloud-documentai", "gql", "html2text", "jinja2", "jq", "lxml", "markdownify", "motor", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openai", "openapi-pydantic", "pandas", "pdfminer-six", "pgvector", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict"]
|
||||
extended-testing = ["aiosqlite", "amazon-textract-caller", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "dashvector", "esprima", "faiss-cpu", "feedparser", "geopandas", "gitpython", "google-cloud-documentai", "gql", "html2text", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openai", "openapi-pydantic", "pandas", "pdfminer-six", "pgvector", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict"]
|
||||
javascript = ["esprima"]
|
||||
llms = ["clarifai", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "openlm", "torch", "transformers"]
|
||||
openai = ["openai", "tiktoken"]
|
||||
@ -11051,4 +11025,4 @@ text-helpers = ["chardet"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.8.1,<4.0"
|
||||
content-hash = "6bf06e81190f228675452f1a7581614898c983d27f2d56ae9ddd92119c114b03"
|
||||
content-hash = "19dcb9abd0bda24034e36b571e7ac04d432f47281a80fdc5d4a9810add60966b"
|
||||
|
@ -114,6 +114,7 @@ cassio = {version = "^0.1.0", optional = true}
|
||||
rdflib = {version = "^6.3.2", optional = true}
|
||||
sympy = {version = "^1.12", optional = true}
|
||||
rapidfuzz = {version = "^3.1.1", optional = true}
|
||||
jsonschema = {version = ">1", optional = true}
|
||||
langsmith = "~0.0.52"
|
||||
rank-bm25 = {version = "^0.2.2", optional = true}
|
||||
amadeus = {version = ">=8.1.0", optional = true}
|
||||
@ -350,6 +351,7 @@ extended_testing = [
|
||||
"openai",
|
||||
"sympy",
|
||||
"rapidfuzz",
|
||||
"jsonschema",
|
||||
"openai",
|
||||
"rank-bm25",
|
||||
"geopandas",
|
||||
|
@ -0,0 +1,77 @@
|
||||
import pytest
|
||||
|
||||
from langchain.evaluation.parsing.json_schema import JsonSchemaEvaluator
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def json_schema_evaluator() -> JsonSchemaEvaluator:
|
||||
return JsonSchemaEvaluator()
|
||||
|
||||
|
||||
@pytest.mark.requires("jsonschema")
|
||||
def test_json_schema_evaluator_requires_input(
|
||||
json_schema_evaluator: JsonSchemaEvaluator,
|
||||
) -> None:
|
||||
assert json_schema_evaluator.requires_input is False
|
||||
|
||||
|
||||
@pytest.mark.requires("jsonschema")
|
||||
def test_json_schema_evaluator_requires_reference(
|
||||
json_schema_evaluator: JsonSchemaEvaluator,
|
||||
) -> None:
|
||||
assert json_schema_evaluator.requires_reference is True
|
||||
|
||||
|
||||
@pytest.mark.requires("jsonschema")
|
||||
def test_json_schema_evaluator_evaluation_name(
|
||||
json_schema_evaluator: JsonSchemaEvaluator,
|
||||
) -> None:
|
||||
assert json_schema_evaluator.evaluation_name == "json_schema_validation"
|
||||
|
||||
|
||||
@pytest.mark.requires("jsonschema")
|
||||
def test_json_schema_evaluator_valid_prediction(
|
||||
json_schema_evaluator: JsonSchemaEvaluator,
|
||||
) -> None:
|
||||
prediction = '{"name": "John", "age": 30}'
|
||||
reference = {
|
||||
"type": "object",
|
||||
"properties": {"name": {"type": "string"}, "age": {"type": "integer"}},
|
||||
}
|
||||
result = json_schema_evaluator._evaluate_strings(
|
||||
prediction=prediction, reference=reference
|
||||
)
|
||||
assert result["score"] is True
|
||||
|
||||
|
||||
@pytest.mark.requires("jsonschema")
|
||||
def test_json_schema_evaluator_invalid_prediction(
|
||||
json_schema_evaluator: JsonSchemaEvaluator,
|
||||
) -> None:
|
||||
prediction = '{"name": "John", "age": "30"}' # age is a string instead of integer
|
||||
reference = {
|
||||
"type": "object",
|
||||
"properties": {"name": {"type": "string"}, "age": {"type": "integer"}},
|
||||
}
|
||||
result = json_schema_evaluator._evaluate_strings(
|
||||
prediction=prediction, reference=reference
|
||||
)
|
||||
assert result["score"] is False
|
||||
assert "reasoning" in result
|
||||
|
||||
|
||||
@pytest.mark.requires("jsonschema")
|
||||
def test_json_schema_evaluator_missing_property(
|
||||
json_schema_evaluator: JsonSchemaEvaluator,
|
||||
) -> None:
|
||||
prediction = '{"name": "John"}' # age property is missing
|
||||
reference = {
|
||||
"type": "object",
|
||||
"properties": {"name": {"type": "string"}, "age": {"type": "integer"}},
|
||||
"required": ["name", "age"],
|
||||
}
|
||||
result = json_schema_evaluator._evaluate_strings(
|
||||
prediction=prediction, reference=reference
|
||||
)
|
||||
assert result["score"] is False
|
||||
assert "reasoning" in result
|
Loading…
Reference in New Issue
Block a user