Add progress bar + runner fixes (#10348)

- Add progress bar to eval runs - Use thread pool for concurrency - Update some error messages - Friendlier project name - Print out quantiles of the final stats Closes LS-902
2025-08-20 09:57:32 +00:00 · 2023-09-08 07:45:28 -07:00 · 2023-09-08 07:45:28 -07:00 · 46e9abdc75
commit 46e9abdc75
parent 0672533b3e
8 changed files with 1098 additions and 515 deletions
--- a/libs/langchain/langchain/callbacks/tracers/evaluation.py
+++ b/libs/langchain/langchain/callbacks/tracers/evaluation.py
@ -2,29 +2,20 @@
 from __future__ import annotations

 import logging
-from concurrent.futures import Future, ThreadPoolExecutor, wait
+from concurrent.futures import Future, ThreadPoolExecutor
 from typing import Any, Dict, List, Optional, Sequence, Set, Union
 from uuid import UUID

 import langsmith
 from langsmith import schemas as langsmith_schemas

-from langchain.callbacks.manager import tracing_v2_enabled
+from langchain.callbacks import manager
+from langchain.callbacks.tracers import langchain as langchain_tracer
 from langchain.callbacks.tracers.base import BaseTracer
-from langchain.callbacks.tracers.langchain import _get_client
 from langchain.callbacks.tracers.schemas import Run

 logger = logging.getLogger(__name__)

-_TRACERS: List[EvaluatorCallbackHandler] = []
-
-
-def wait_for_all_evaluators() -> None:
-    """Wait for all tracers to finish."""
-    global _TRACERS
-    for tracer in _TRACERS:
-        tracer.wait_for_futures()
-

 class EvaluatorCallbackHandler(BaseTracer):
    """A tracer that runs a run evaluator whenever a run is persisted.
@ -79,17 +70,13 @@ class EvaluatorCallbackHandler(BaseTracer):
        self.example_id = (
            UUID(example_id) if isinstance(example_id, str) else example_id
        )
-        self.client = client or _get_client()
+        self.client = client or langchain_tracer.get_client()
        self.evaluators = evaluators
-        self.executor = ThreadPoolExecutor(
-            max_workers=max(max_workers or len(evaluators), 1)
-        )
+        self.max_workers = max_workers or len(evaluators)
        self.futures: Set[Future] = set()
        self.skip_unfinished = skip_unfinished
        self.project_name = project_name
        self.logged_feedback: Dict[str, List[langsmith_schemas.Feedback]] = {}
-        global _TRACERS
-        _TRACERS.append(self)

    def _evaluate_in_project(self, run: Run, evaluator: langsmith.RunEvaluator) -> None:
        """Evaluate the run in the project.
@ -105,7 +92,7 @@ class EvaluatorCallbackHandler(BaseTracer):
        try:
            if self.project_name is None:
                feedback = self.client.evaluate_run(run, evaluator)
-            with tracing_v2_enabled(
+            with manager.tracing_v2_enabled(
                project_name=self.project_name, tags=["eval"], client=self.client
            ):
                feedback = self.client.evaluate_run(run, evaluator)
@ -133,14 +120,15 @@ class EvaluatorCallbackHandler(BaseTracer):
            return
        run_ = run.copy()
        run_.reference_example_id = self.example_id
-        for evaluator in self.evaluators:
-            self.futures.add(
-                self.executor.submit(self._evaluate_in_project, run_, evaluator)
-            )
-
-    def wait_for_futures(self) -> None:
-        """Wait for all futures to complete."""
-        futures = list(self.futures)
-        wait(futures)
-        for future in futures:
-            self.futures.remove(future)
+        if self.max_workers > 0:
+            with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+                list(
+                    executor.map(
+                        self._evaluate_in_project,
+                        [run_ for _ in range(len(self.evaluators))],
+                        self.evaluators,
+                    )
+                )
+        else:
+            for evaluator in self.evaluators:
+                self._evaluate_in_project(run_, evaluator)
--- a/libs/langchain/langchain/callbacks/tracers/langchain.py
+++ b/libs/langchain/langchain/callbacks/tracers/langchain.py
@ -42,7 +42,7 @@ def wait_for_all_tracers() -> None:
            tracer.wait_for_futures()


-def _get_client() -> Client:
+def get_client() -> Client:
    """Get the client."""
    global _CLIENT
    if _CLIENT is None:
@ -83,7 +83,7 @@ class LangChainTracer(BaseTracer):
                _EXECUTORS.append(self.executor)
        else:
            self.executor = None
-        self.client = client or _get_client()
+        self.client = client or get_client()
        self._futures: Set[Future] = set()
        self.tags = tags or []
        global _TRACERS
--- a/libs/langchain/langchain/smith/evaluation/name_generation.py
+++ b/libs/langchain/langchain/smith/evaluation/name_generation.py
@ -0,0 +1,729 @@
+import random
+
+adjectives = [
+    "abandoned",
+    "aching",
+    "advanced",
+    "ample",
+    "artistic",
+    "back",
+    "best",
+    "bold",
+    "brief",
+    "clear",
+    "cold",
+    "complicated",
+    "cooked",
+    "crazy",
+    "crushing",
+    "damp",
+    "dear",
+    "definite",
+    "dependable",
+    "diligent",
+    "drab",
+    "earnest",
+    "elderly",
+    "enchanted",
+    "essential",
+    "excellent",
+    "extraneous",
+    "fixed",
+    "flowery",
+    "formal",
+    "fresh",
+    "frosty",
+    "giving",
+    "glossy",
+    "healthy",
+    "helpful",
+    "impressionable",
+    "kind",
+    "large",
+    "left",
+    "long",
+    "loyal",
+    "mealy",
+    "memorable",
+    "monthly",
+    "new",
+    "notable",
+    "only",
+    "ordinary",
+    "passionate",
+    "perfect",
+    "pertinent",
+    "proper",
+    "puzzled",
+    "reflecting",
+    "respectful",
+    "roasted",
+    "scholarly",
+    "shiny",
+    "slight",
+    "sparkling",
+    "spotless",
+    "stupendous",
+    "sunny",
+    "tart",
+    "terrific",
+    "timely",
+    "unique",
+    "upbeat",
+    "vacant",
+    "virtual",
+    "warm",
+    "weary",
+    "whispered",
+    "worthwhile",
+    "yellow",
+]
+
+nouns = [
+    "account",
+    "acknowledgment",
+    "address",
+    "advertising",
+    "airplane",
+    "animal",
+    "appointment",
+    "arrival",
+    "artist",
+    "attachment",
+    "attitude",
+    "availability",
+    "backpack",
+    "bag",
+    "balance",
+    "bass",
+    "bean",
+    "beauty",
+    "bibliography",
+    "bill",
+    "bite",
+    "blossom",
+    "boat",
+    "book",
+    "box",
+    "boy",
+    "bread",
+    "bridge",
+    "broccoli",
+    "building",
+    "butter",
+    "button",
+    "cabbage",
+    "cake",
+    "camera",
+    "camp",
+    "candle",
+    "candy",
+    "canvas",
+    "car",
+    "card",
+    "carrot",
+    "cart",
+    "case",
+    "cat",
+    "chain",
+    "chair",
+    "chalk",
+    "chance",
+    "change",
+    "channel",
+    "character",
+    "charge",
+    "charm",
+    "chart",
+    "check",
+    "cheek",
+    "cheese",
+    "chef",
+    "cherry",
+    "chicken",
+    "child",
+    "church",
+    "circle",
+    "class",
+    "clay",
+    "click",
+    "clock",
+    "cloth",
+    "cloud",
+    "clove",
+    "club",
+    "coach",
+    "coal",
+    "coast",
+    "coat",
+    "cod",
+    "coffee",
+    "collar",
+    "color",
+    "comb",
+    "comfort",
+    "comic",
+    "committee",
+    "community",
+    "company",
+    "comparison",
+    "competition",
+    "condition",
+    "connection",
+    "control",
+    "cook",
+    "copper",
+    "copy",
+    "corn",
+    "cough",
+    "country",
+    "cover",
+    "crate",
+    "crayon",
+    "cream",
+    "creator",
+    "crew",
+    "crown",
+    "current",
+    "curtain",
+    "curve",
+    "cushion",
+    "dad",
+    "daughter",
+    "day",
+    "death",
+    "debt",
+    "decision",
+    "deer",
+    "degree",
+    "design",
+    "desire",
+    "desk",
+    "detail",
+    "development",
+    "digestion",
+    "dime",
+    "dinner",
+    "direction",
+    "dirt",
+    "discovery",
+    "discussion",
+    "disease",
+    "disgust",
+    "distance",
+    "distribution",
+    "division",
+    "doctor",
+    "dog",
+    "door",
+    "drain",
+    "drawer",
+    "dress",
+    "drink",
+    "driving",
+    "dust",
+    "ear",
+    "earth",
+    "edge",
+    "education",
+    "effect",
+    "egg",
+    "end",
+    "energy",
+    "engine",
+    "error",
+    "event",
+    "example",
+    "exchange",
+    "existence",
+    "expansion",
+    "experience",
+    "expert",
+    "eye",
+    "face",
+    "fact",
+    "fall",
+    "family",
+    "farm",
+    "father",
+    "fear",
+    "feeling",
+    "field",
+    "finger",
+    "fire",
+    "fish",
+    "flag",
+    "flight",
+    "floor",
+    "flower",
+    "fold",
+    "food",
+    "football",
+    "force",
+    "form",
+    "frame",
+    "friend",
+    "frog",
+    "fruit",
+    "fuel",
+    "furniture",
+    "game",
+    "garden",
+    "gate",
+    "girl",
+    "glass",
+    "glove",
+    "goat",
+    "gold",
+    "government",
+    "grade",
+    "grain",
+    "grass",
+    "green",
+    "grip",
+    "group",
+    "growth",
+    "guide",
+    "guitar",
+    "hair",
+    "hall",
+    "hand",
+    "harbor",
+    "harmony",
+    "hat",
+    "head",
+    "health",
+    "heart",
+    "heat",
+    "hill",
+    "history",
+    "hobbies",
+    "hole",
+    "hope",
+    "horn",
+    "horse",
+    "hospital",
+    "hour",
+    "house",
+    "humor",
+    "idea",
+    "impulse",
+    "income",
+    "increase",
+    "industry",
+    "ink",
+    "insect",
+    "instrument",
+    "insurance",
+    "interest",
+    "invention",
+    "iron",
+    "island",
+    "jelly",
+    "jet",
+    "jewel",
+    "join",
+    "judge",
+    "juice",
+    "jump",
+    "kettle",
+    "key",
+    "kick",
+    "kiss",
+    "kitten",
+    "knee",
+    "knife",
+    "knowledge",
+    "land",
+    "language",
+    "laugh",
+    "law",
+    "lead",
+    "learning",
+    "leather",
+    "leg",
+    "lettuce",
+    "level",
+    "library",
+    "lift",
+    "light",
+    "limit",
+    "line",
+    "linen",
+    "lip",
+    "liquid",
+    "list",
+    "look",
+    "loss",
+    "love",
+    "lunch",
+    "machine",
+    "man",
+    "manager",
+    "map",
+    "marble",
+    "mark",
+    "market",
+    "mass",
+    "match",
+    "meal",
+    "measure",
+    "meat",
+    "meeting",
+    "memory",
+    "metal",
+    "middle",
+    "milk",
+    "mind",
+    "mine",
+    "minute",
+    "mist",
+    "mitten",
+    "mom",
+    "money",
+    "monkey",
+    "month",
+    "moon",
+    "morning",
+    "mother",
+    "motion",
+    "mountain",
+    "mouth",
+    "muscle",
+    "music",
+    "nail",
+    "name",
+    "nation",
+    "neck",
+    "need",
+    "news",
+    "night",
+    "noise",
+    "note",
+    "number",
+    "nut",
+    "observation",
+    "offer",
+    "oil",
+    "operation",
+    "opinion",
+    "orange",
+    "order",
+    "organization",
+    "ornament",
+    "oven",
+    "page",
+    "pail",
+    "pain",
+    "paint",
+    "pan",
+    "pancake",
+    "paper",
+    "parcel",
+    "parent",
+    "part",
+    "passenger",
+    "paste",
+    "payment",
+    "peace",
+    "pear",
+    "pen",
+    "pencil",
+    "person",
+    "pest",
+    "pet",
+    "picture",
+    "pie",
+    "pin",
+    "pipe",
+    "pizza",
+    "place",
+    "plane",
+    "plant",
+    "plastic",
+    "plate",
+    "play",
+    "pleasure",
+    "plot",
+    "plough",
+    "pocket",
+    "point",
+    "poison",
+    "police",
+    "pollution",
+    "popcorn",
+    "porter",
+    "position",
+    "pot",
+    "potato",
+    "powder",
+    "power",
+    "price",
+    "print",
+    "process",
+    "produce",
+    "product",
+    "profit",
+    "property",
+    "prose",
+    "protest",
+    "pull",
+    "pump",
+    "punishment",
+    "purpose",
+    "push",
+    "quarter",
+    "question",
+    "quiet",
+    "quill",
+    "quilt",
+    "quince",
+    "rabbit",
+    "rail",
+    "rain",
+    "range",
+    "rat",
+    "rate",
+    "ray",
+    "reaction",
+    "reading",
+    "reason",
+    "record",
+    "regret",
+    "relation",
+    "religion",
+    "representative",
+    "request",
+    "respect",
+    "rest",
+    "reward",
+    "rhythm",
+    "rice",
+    "river",
+    "road",
+    "roll",
+    "room",
+    "root",
+    "rose",
+    "route",
+    "rub",
+    "rule",
+    "run",
+    "sack",
+    "sail",
+    "salt",
+    "sand",
+    "scale",
+    "scarecrow",
+    "scarf",
+    "scene",
+    "scent",
+    "school",
+    "science",
+    "scissors",
+    "screw",
+    "sea",
+    "seat",
+    "secretary",
+    "seed",
+    "selection",
+    "self",
+    "sense",
+    "servant",
+    "shade",
+    "shake",
+    "shame",
+    "shape",
+    "sheep",
+    "sheet",
+    "shelf",
+    "ship",
+    "shirt",
+    "shock",
+    "shoe",
+    "shop",
+    "show",
+    "side",
+    "sign",
+    "silk",
+    "sink",
+    "sister",
+    "size",
+    "sky",
+    "slave",
+    "sleep",
+    "smash",
+    "smell",
+    "smile",
+    "smoke",
+    "snail",
+    "snake",
+    "sneeze",
+    "snow",
+    "soap",
+    "society",
+    "sock",
+    "soda",
+    "sofa",
+    "son",
+    "song",
+    "sort",
+    "sound",
+    "soup",
+    "space",
+    "spark",
+    "speed",
+    "sponge",
+    "spoon",
+    "spray",
+    "spring",
+    "spy",
+    "square",
+    "stamp",
+    "star",
+    "start",
+    "statement",
+    "station",
+    "steam",
+    "steel",
+    "stem",
+    "step",
+    "stew",
+    "stick",
+    "stitch",
+    "stocking",
+    "stomach",
+    "stone",
+    "stop",
+    "store",
+    "story",
+    "stove",
+    "stranger",
+    "straw",
+    "stream",
+    "street",
+    "stretch",
+    "string",
+    "structure",
+    "substance",
+    "sugar",
+    "suggestion",
+    "suit",
+    "summer",
+    "sun",
+    "support",
+    "surprise",
+    "sweater",
+    "swim",
+    "system",
+    "table",
+    "tail",
+    "talk",
+    "tank",
+    "taste",
+    "tax",
+    "tea",
+    "teaching",
+    "team",
+    "tendency",
+    "test",
+    "texture",
+    "theory",
+    "thing",
+    "thought",
+    "thread",
+    "throat",
+    "thumb",
+    "thunder",
+    "ticket",
+    "time",
+    "tin",
+    "title",
+    "toad",
+    "toe",
+    "tooth",
+    "toothpaste",
+    "touch",
+    "town",
+    "toy",
+    "trade",
+    "train",
+    "transport",
+    "tray",
+    "treatment",
+    "tree",
+    "trick",
+    "trip",
+    "trouble",
+    "trousers",
+    "truck",
+    "tub",
+    "turkey",
+    "turn",
+    "twist",
+    "umbrella",
+    "uncle",
+    "underwear",
+    "unit",
+    "use",
+    "vacation",
+    "value",
+    "van",
+    "vase",
+    "vegetable",
+    "veil",
+    "vein",
+    "verse",
+    "vessel",
+    "view",
+    "visitor",
+    "voice",
+    "volcano",
+    "walk",
+    "wall",
+    "war",
+    "wash",
+    "waste",
+    "watch",
+    "water",
+    "wave",
+    "wax",
+    "way",
+    "wealth",
+    "weather",
+    "week",
+    "weight",
+    "wheel",
+    "whip",
+    "whistle",
+    "window",
+    "wine",
+    "wing",
+    "winter",
+    "wire",
+    "wish",
+    "woman",
+    "wood",
+    "wool",
+    "word",
+    "work",
+    "worm",
+    "wound",
+    "wrist",
+    "writer",
+    "yard",
+    "yoke",
+    "zebra",
+    "zinc",
+    "zipper",
+    "zone",
+]
+
+
+def random_name(prefix: str = "test") -> str:
+    """Generate a random name."""
+    adjective = random.choice(adjectives)
+    noun = random.choice(nouns)
+    number = random.randint(1, 100)
+
+    return f"{prefix}-{adjective}-{noun}-{number}"
--- a/libs/langchain/langchain/smith/evaluation/progress.py
+++ b/libs/langchain/langchain/smith/evaluation/progress.py
@ -0,0 +1,82 @@
+"""A simple progress bar for the console."""
+import threading
+from typing import Any, Dict, Optional, Sequence
+from uuid import UUID
+
+from langchain.callbacks import base as base_callbacks
+from langchain.schema.document import Document
+from langchain.schema.output import LLMResult
+
+
+class ProgressBarCallback(base_callbacks.BaseCallbackHandler):
+    """A simple progress bar for the console."""
+
+    def __init__(self, total: int, ncols: int = 50, **kwargs: Any):
+        """Initialize the progress bar.
+
+        Args:
+            total: int, the total number of items to be processed.
+            ncols: int, the character width of the progress bar.
+        """
+        self.total = total
+        self.ncols = ncols
+        self.counter = 0
+        self.lock = threading.Lock()
+        self._print_bar()
+
+    def increment(self) -> None:
+        """Increment the counter and update the progress bar."""
+        with self.lock:
+            self.counter += 1
+            self._print_bar()
+
+    def _print_bar(self) -> None:
+        """Print the progress bar to the console."""
+        progress = self.counter / self.total
+        arrow = "-" * int(round(progress * self.ncols) - 1) + ">"
+        spaces = " " * (self.ncols - len(arrow))
+        print(f"\r[{arrow + spaces}] {self.counter}/{self.total}", end="")
+
+    def on_chain_end(
+        self,
+        outputs: Dict[str, Any],
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        **kwargs: Any,
+    ) -> Any:
+        if parent_run_id is None:
+            self.increment()
+
+    def on_retriever_end(
+        self,
+        documents: Sequence[Document],
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        **kwargs: Any,
+    ) -> Any:
+        if parent_run_id is None:
+            self.increment()
+
+    def on_llm_end(
+        self,
+        response: LLMResult,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        **kwargs: Any,
+    ) -> Any:
+        if parent_run_id is None:
+            self.increment()
+
+    def on_tool_end(
+        self,
+        output: str,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        **kwargs: Any,
+    ) -> Any:
+        if parent_run_id is None:
+            self.increment()
--- a/libs/langchain/langchain/smith/evaluation/runner_utils.py
+++ b/libs/langchain/langchain/smith/evaluation/runner_utils.py
--- a/libs/langchain/langchain/smith/evaluation/string_run_evaluator.py
+++ b/libs/langchain/langchain/smith/evaluation/string_run_evaluator.py
@ -148,13 +148,27 @@ class ChainStringRunMapper(StringRunMapper):
    def map(self, run: Run) -> Dict[str, str]:
        """Maps the Run to a dictionary."""
        if not run.outputs:
-            raise ValueError(f"Run {run.id} has no outputs to evaluate.")
-        if self.input_key is not None and self.input_key not in run.inputs:
-            raise ValueError(f"Run {run.id} does not have input key {self.input_key}.")
-        elif self.prediction_key is not None and self.prediction_key not in run.outputs:
            raise ValueError(
-                f"Run {run.id} does not have prediction key {self.prediction_key}."
+                f"Run with ID {run.id} lacks outputs required for evaluation."
+                " Ensure the Run has valid outputs."
            )
+        if self.input_key is not None and self.input_key not in run.inputs:
+            raise ValueError(
+                f"Run with ID {run.id} is missing the expected input key"
+                f" '{self.input_key}'.\nAvailable input keys in this Run"
+                f"  are: {run.inputs.keys()}.\nAdjust the evaluator's"
+                f" input_key or ensure your input data includes key"
+                f" '{self.input_key}'."
+            )
+        elif self.prediction_key is not None and self.prediction_key not in run.outputs:
+            available_keys = ", ".join(run.outputs.keys())
+            raise ValueError(
+                f"Run with ID {run.id} doesn't have the expected prediction key"
+                f" '{self.prediction_key}'. Available prediction keys in this Run are:"
+                f" {available_keys}. Adjust the evaluator's prediction_key or"
+                " ensure the Run object's outputs the expected key."
+            )
+
        else:
            input_ = self._get_key(run.inputs, self.input_key, "input")
            prediction = self._get_key(run.outputs, self.prediction_key, "prediction")
--- a/libs/langchain/tests/integration_tests/smith/evaluation/test_runner_utils.py
+++ b/libs/langchain/tests/integration_tests/smith/evaluation/test_runner_utils.py
@ -5,7 +5,6 @@ import pytest
 from langsmith import Client as Client
 from langsmith.schemas import DataType

-from langchain.callbacks.tracers.evaluation import wait_for_all_evaluators
 from langchain.chains.llm import LLMChain
 from langchain.chat_models import ChatOpenAI
 from langchain.evaluation import EvaluatorType
@ -22,7 +21,6 @@ def _check_all_feedback_passed(_project_name: str, client: Client) -> None:
    # chain or llm passes for the feedback provided.
    runs = list(client.list_runs(project_name=_project_name, execution_order=1))
    assert len(runs) == 4
-    wait_for_all_evaluators()
    feedback = list(client.list_feedback(run_ids=[run.id for run in runs]))
    assert len(feedback) == 8
    assert all([f.score == 1 for f in feedback])
--- a/libs/langchain/tests/unit_tests/smith/evaluation/test_runner_utils.py
+++ b/libs/langchain/tests/unit_tests/smith/evaluation/test_runner_utils.py
@ -181,11 +181,15 @@ def test_run_llm_or_chain_with_input_mapper() -> None:
        assert "the wrong input" in inputs
        return {"the right input": inputs["the wrong input"]}

-    result = _run_llm_or_chain(example, lambda: mock_chain, input_mapper=input_mapper)
+    result = _run_llm_or_chain(
+        example,
+        {"callbacks": [], "tags": []},
+        llm_or_chain_factory=lambda: mock_chain,
+        input_mapper=input_mapper,
+    )
    assert result == {"output": "2", "the right input": "1"}
    bad_result = _run_llm_or_chain(
-        example,
-        lambda: mock_chain,
+        example, {"callbacks": [], "tags": []}, llm_or_chain_factory=lambda: mock_chain
    )
    assert "Error" in bad_result

@ -195,7 +199,12 @@ def test_run_llm_or_chain_with_input_mapper() -> None:
        return "the right input"

    mock_llm = FakeLLM(queries={"the right input": "somenumber"})
-    llm_result = _run_llm_or_chain(example, mock_llm, input_mapper=llm_input_mapper)
+    llm_result = _run_llm_or_chain(
+        example,
+        {"callbacks": [], "tags": []},
+        llm_or_chain_factory=mock_llm,
+        input_mapper=llm_input_mapper,
+    )
    assert isinstance(llm_result, str)
    assert llm_result == "somenumber"

@ -324,10 +333,14 @@ async def test_arun_on_dataset(monkeypatch: pytest.MonkeyPatch) -> None:
        )

        expected = {
-            uuid_: {
-                "output": {"result": f"Result for example {uuid.UUID(uuid_)}"},
+            str(example.id): {
+                "output": {
+                    "result": f"Result for example {uuid.UUID(str(example.id))}"
+                },
+                "input": {"input": example.inputs["input"]},
+                "reference": {"output": example.outputs["output"]},
                "feedback": [],
            }
-            for uuid_ in uuids
+            for example in examples
        }
        assert results["results"] == expected