Harrison/official pre release (#8106)

2025-09-23 11:30:37 +00:00 · 2023-07-21 18:44:32 -07:00
parent 95bcf68802
commit aa0e69bc98
65 changed files with 210 additions and 602 deletions
--- a/libs/experimental/langchain_experimental/cpal/README.md
+++ b/libs/experimental/langchain_experimental/cpal/README.md
@@ -0,0 +1,4 @@
+# Causal program-aided language (CPAL) chain
+
+
+see https://github.com/hwchase17/langchain/pull/6255
--- a/libs/experimental/langchain_experimental/cpal/init.py
+++ b/libs/experimental/langchain_experimental/cpal/init.py
--- a/libs/experimental/langchain_experimental/cpal/base.py
+++ b/libs/experimental/langchain_experimental/cpal/base.py
@@ -0,0 +1,271 @@
+"""
+CPAL Chain and its subchains
+"""
+from __future__ import annotations
+
+import json
+from typing import Any, ClassVar, Dict, List, Optional, Type
+
+import pydantic
+from langchain.base_language import BaseLanguageModel
+from langchain.callbacks.manager import CallbackManagerForChainRun
+from langchain.chains.base import Chain
+from langchain.chains.llm import LLMChain
+from langchain.output_parsers import PydanticOutputParser
+from langchain.prompts.prompt import PromptTemplate
+
+from langchain_experimental.cpal.constants import Constant
+from langchain_experimental.cpal.models import (
+    CausalModel,
+    InterventionModel,
+    NarrativeModel,
+    QueryModel,
+    StoryModel,
+)
+from langchain_experimental.cpal.templates.univariate.causal import (
+    template as causal_template,
+)
+from langchain_experimental.cpal.templates.univariate.intervention import (
+    template as intervention_template,
+)
+from langchain_experimental.cpal.templates.univariate.narrative import (
+    template as narrative_template,
+)
+from langchain_experimental.cpal.templates.univariate.query import (
+    template as query_template,
+)
+
+
+class _BaseStoryElementChain(Chain):
+    chain: LLMChain
+    input_key: str = Constant.narrative_input.value  #: :meta private:
+    output_key: str = Constant.chain_answer.value  #: :meta private:
+    pydantic_model: ClassVar[
+        Optional[Type[pydantic.BaseModel]]
+    ] = None  #: :meta private:
+    template: ClassVar[Optional[str]] = None  #: :meta private:
+
+    @classmethod
+    def parser(cls) -> PydanticOutputParser:
+        """Parse LLM output into a pydantic object."""
+        if cls.pydantic_model is None:
+            raise NotImplementedError(
+                f"pydantic_model not implemented for {cls.__name__}"
+            )
+        return PydanticOutputParser(pydantic_object=cls.pydantic_model)
+
+    @property
+    def input_keys(self) -> List[str]:
+        """Return the input keys.
+
+        :meta private:
+        """
+        return [self.input_key]
+
+    @property
+    def output_keys(self) -> List[str]:
+        """Return the output keys.
+
+        :meta private:
+        """
+        _output_keys = [self.output_key]
+        return _output_keys
+
+    @classmethod
+    def from_univariate_prompt(
+        cls,
+        llm: BaseLanguageModel,
+        **kwargs: Any,
+    ) -> Any:
+        return cls(
+            chain=LLMChain(
+                llm=llm,
+                prompt=PromptTemplate(
+                    input_variables=[Constant.narrative_input.value],
+                    template=kwargs.get("template", cls.template),
+                    partial_variables={
+                        "format_instructions": cls.parser().get_format_instructions()
+                    },
+                ),
+            ),
+            **kwargs,
+        )
+
+    def _call(
+        self,
+        inputs: Dict[str, Any],
+        run_manager: Optional[CallbackManagerForChainRun] = None,
+    ) -> Dict[str, Any]:
+        completion = self.chain.run(inputs[self.input_key])
+        pydantic_data = self.__class__.parser().parse(completion)
+        return {
+            Constant.chain_data.value: pydantic_data,
+            Constant.chain_answer.value: None,
+        }
+
+
+class NarrativeChain(_BaseStoryElementChain):
+    """Decompose the narrative into its story elements
+
+    - causal model
+    - query
+    - intervention
+    """
+
+    pydantic_model: ClassVar[Type[pydantic.BaseModel]] = NarrativeModel
+    template: ClassVar[str] = narrative_template
+
+
+class CausalChain(_BaseStoryElementChain):
+    """Translate the causal narrative into a stack of operations."""
+
+    pydantic_model: ClassVar[Type[pydantic.BaseModel]] = CausalModel
+    template: ClassVar[str] = causal_template
+
+
+class InterventionChain(_BaseStoryElementChain):
+    """Set the hypothetical conditions for the causal model."""
+
+    pydantic_model: ClassVar[Type[pydantic.BaseModel]] = InterventionModel
+    template: ClassVar[str] = intervention_template
+
+
+class QueryChain(_BaseStoryElementChain):
+    """Query the outcome table using SQL."""
+
+    pydantic_model: ClassVar[Type[pydantic.BaseModel]] = QueryModel
+    template: ClassVar[str] = query_template  # TODO: incl. table schema
+
+
+class CPALChain(_BaseStoryElementChain):
+    llm: BaseLanguageModel
+    narrative_chain: Optional[NarrativeChain] = None
+    causal_chain: Optional[CausalChain] = None
+    intervention_chain: Optional[InterventionChain] = None
+    query_chain: Optional[QueryChain] = None
+    _story: StoryModel = pydantic.PrivateAttr(default=None)  # TODO: change name ?
+
+    @classmethod
+    def from_univariate_prompt(
+        cls,
+        llm: BaseLanguageModel,
+        **kwargs: Any,
+    ) -> CPALChain:
+        """instantiation depends on component chains"""
+        return cls(
+            llm=llm,
+            chain=LLMChain(
+                llm=llm,
+                prompt=PromptTemplate(
+                    input_variables=["question", "query_result"],
+                    template=(
+                        "Summarize this answer '{query_result}' to this "
+                        "question '{question}'? "
+                    ),
+                ),
+            ),
+            narrative_chain=NarrativeChain.from_univariate_prompt(llm=llm),
+            causal_chain=CausalChain.from_univariate_prompt(llm=llm),
+            intervention_chain=InterventionChain.from_univariate_prompt(llm=llm),
+            query_chain=QueryChain.from_univariate_prompt(llm=llm),
+            **kwargs,
+        )
+
+    def _call(
+        self,
+        inputs: Dict[str, Any],
+        run_manager: Optional[CallbackManagerForChainRun] = None,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        # instantiate component chains
+        if self.narrative_chain is None:
+            self.narrative_chain = NarrativeChain.from_univariate_prompt(llm=self.llm)
+        if self.causal_chain is None:
+            self.causal_chain = CausalChain.from_univariate_prompt(llm=self.llm)
+        if self.intervention_chain is None:
+            self.intervention_chain = InterventionChain.from_univariate_prompt(
+                llm=self.llm
+            )
+        if self.query_chain is None:
+            self.query_chain = QueryChain.from_univariate_prompt(llm=self.llm)
+
+        # decompose narrative into three causal story elements
+        narrative = self.narrative_chain(inputs[Constant.narrative_input.value])[
+            Constant.chain_data.value
+        ]
+
+        story = StoryModel(
+            causal_operations=self.causal_chain(narrative.story_plot)[
+                Constant.chain_data.value
+            ],
+            intervention=self.intervention_chain(narrative.story_hypothetical)[
+                Constant.chain_data.value
+            ],
+            query=self.query_chain(narrative.story_outcome_question)[
+                Constant.chain_data.value
+            ],
+        )
+        self._story = story
+
+        def pretty_print_str(title: str, d: str) -> str:
+            return title + "\n" + d
+
+        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
+        _run_manager.on_text(
+            pretty_print_str("story outcome data", story._outcome_table.to_string()),
+            color="green",
+            end="\n\n",
+            verbose=self.verbose,
+        )
+
+        def pretty_print_dict(title: str, d: dict) -> str:
+            return title + "\n" + json.dumps(d, indent=4)
+
+        _run_manager.on_text(
+            pretty_print_dict("query data", story.query.dict()),
+            color="blue",
+            end="\n\n",
+            verbose=self.verbose,
+        )
+        if story.query._result_table.empty:
+            # prevent piping bad data into subsequent chains
+            raise ValueError(
+                (
+                    "unanswerable, query and outcome are incoherent\n"
+                    "\n"
+                    "outcome:\n"
+                    f"{story._outcome_table}\n"
+                    "query:\n"
+                    f"{story.query.dict()}"
+                )
+            )
+        else:
+            query_result = float(story.query._result_table.values[0][-1])
+            if False:
+                """TODO: add this back in when demanded by composable chains"""
+                reporting_chain = self.chain
+                human_report = reporting_chain.run(
+                    question=story.query.question, query_result=query_result
+                )
+                query_result = {
+                    "query_result": query_result,
+                    "human_report": human_report,
+                }
+        output = {
+            Constant.chain_data.value: story,
+            self.output_key: query_result,
+            **kwargs,
+        }
+        return output
+
+    def draw(self, **kwargs: Any) -> None:
+        """
+        CPAL chain can draw its resulting DAG.
+
+        Usage in a jupyter notebook:
+
+            >>> from IPython.display import SVG
+            >>> cpal_chain.draw(path="graph.svg")
+            >>> SVG('graph.svg')
+        """
+        self._story._networkx_wrapper.draw_graphviz(**kwargs)
--- a/libs/experimental/langchain_experimental/cpal/constants.py
+++ b/libs/experimental/langchain_experimental/cpal/constants.py
@@ -0,0 +1,7 @@
+from enum import Enum
+
+
+class Constant(Enum):
+    narrative_input = "narrative_input"
+    chain_answer = "chain_answer"  # natural language answer
+    chain_data = "chain_data"  # pydantic instance
--- a/libs/experimental/langchain_experimental/cpal/models.py
+++ b/libs/experimental/langchain_experimental/cpal/models.py
@@ -0,0 +1,245 @@
+from __future__ import annotations  # allows pydantic model to reference itself
+
+import re
+from typing import Any, Optional, Union
+
+import duckdb
+import pandas as pd
+from langchain.graphs.networkx_graph import NetworkxEntityGraph
+from pydantic import BaseModel, Field, PrivateAttr, root_validator, validator
+
+from langchain_experimental.cpal.constants import Constant
+
+
+class NarrativeModel(BaseModel):
+    """
+    Represent the narrative input as three story elements.
+    """
+
+    story_outcome_question: str
+    story_hypothetical: str
+    story_plot: str  # causal stack of operations
+
+    @validator("*", pre=True)
+    def empty_str_to_none(cls, v: str) -> Union[str, None]:
+        """Empty strings are not allowed"""
+        if v == "":
+            return None
+        return v
+
+
+class EntityModel(BaseModel):
+    name: str = Field(description="entity name")
+    code: str = Field(description="entity actions")
+    value: float = Field(description="entity initial value")
+    depends_on: list[str] = Field(default=[], description="ancestor entities")
+
+    # TODO: generalize to multivariate math
+    # TODO: acyclic graph
+
+    class Config:
+        validate_assignment = True
+
+    @validator("name")
+    def lower_case_name(cls, v: str) -> str:
+        v = v.lower()
+        return v
+
+
+class CausalModel(BaseModel):
+    attribute: str = Field(description="name of the attribute to be calculated")
+    entities: list[EntityModel] = Field(description="entities in the story")
+
+    # TODO: root validate each `entity.depends_on` using system's entity names
+
+
+class EntitySettingModel(BaseModel):
+    """
+    Initial conditions for an entity
+
+    {"name": "bud", "attribute": "pet_count", "value": 12}
+    """
+
+    name: str = Field(description="name of the entity")
+    attribute: str = Field(description="name of the attribute to be calculated")
+    value: float = Field(description="entity's attribute value (calculated)")
+
+    @validator("name")
+    def lower_case_transform(cls, v: str) -> str:
+        v = v.lower()
+        return v
+
+
+class SystemSettingModel(BaseModel):
+    """
+    Initial global conditions for the system.
+
+    {"parameter": "interest_rate", "value": .05}
+    """
+
+    parameter: str
+    value: float
+
+
+class InterventionModel(BaseModel):
+    """
+    aka initial conditions
+
+    >>> intervention.dict()
+    {
+        entity_settings: [
+            {"name": "bud", "attribute": "pet_count", "value": 12},
+            {"name": "pat", "attribute": "pet_count", "value": 0},
+        ],
+        system_settings: None,
+    }
+    """
+
+    entity_settings: list[EntitySettingModel]
+    system_settings: Optional[list[SystemSettingModel]] = None
+
+    @validator("system_settings")
+    def lower_case_name(cls, v: str) -> Union[str, None]:
+        if v is not None:
+            raise NotImplementedError("system_setting is not implemented yet")
+        return v
+
+
+class QueryModel(BaseModel):
+    """translate a question about the story outcome into a programmatic expression"""
+
+    question: str = Field(alias=Constant.narrative_input.value)  # input
+    expression: str  # output, part of llm completion
+    llm_error_msg: str  # output, part of llm completion
+    _result_table: str = PrivateAttr()  # result of the executed query
+
+
+class ResultModel(BaseModel):
+    question: str = Field(alias=Constant.narrative_input.value)  # input
+    _result_table: str = PrivateAttr()  # result of the executed query
+
+
+class StoryModel(BaseModel):
+    causal_operations: Any = Field(required=True)
+    intervention: Any = Field(required=True)
+    query: Any = Field(required=True)
+    _outcome_table: pd.DataFrame = PrivateAttr(default=None)
+    _networkx_wrapper: Any = PrivateAttr(default=None)
+
+    def __init__(self, **kwargs: Any):
+        super().__init__(**kwargs)
+        self._compute()
+
+        # TODO: when langchain adopts pydantic.v2 replace w/ `__post_init__`
+        # misses hints github.com/pydantic/pydantic/issues/1729#issuecomment-1300576214
+
+    @root_validator
+    def check_intervention_is_valid(cls, values: dict) -> dict:
+        valid_names = [e.name for e in values["causal_operations"].entities]
+        for setting in values["intervention"].entity_settings:
+            if setting.name not in valid_names:
+                error_msg = f"""
+                    Hypothetical question has an invalid entity name.
+                    `{setting.name}` not in `{valid_names}`
+                """
+                raise ValueError(error_msg)
+        return values
+
+    def _block_back_door_paths(self) -> None:
+        # stop intervention entities from depending on others
+        intervention_entities = [
+            entity_setting.name for entity_setting in self.intervention.entity_settings
+        ]
+        for entity in self.causal_operations.entities:
+            if entity.name in intervention_entities:
+                entity.depends_on = []
+                entity.code = "pass"
+
+    def _set_initial_conditions(self) -> None:
+        for entity_setting in self.intervention.entity_settings:
+            for entity in self.causal_operations.entities:
+                if entity.name == entity_setting.name:
+                    entity.value = entity_setting.value
+
+    def _make_graph(self) -> None:
+        self._networkx_wrapper = NetworkxEntityGraph()
+        for entity in self.causal_operations.entities:
+            for parent_name in entity.depends_on:
+                self._networkx_wrapper._graph.add_edge(
+                    parent_name, entity.name, relation=entity.code
+                )
+
+        # TODO: is it correct to drop entities with no impact on the outcome (?)
+        self.causal_operations.entities = [
+            entity
+            for entity in self.causal_operations.entities
+            if entity.name in self._networkx_wrapper.get_topological_sort()
+        ]
+
+    def _sort_entities(self) -> None:
+        # order the sequence of causal actions
+        sorted_nodes = self._networkx_wrapper.get_topological_sort()
+        self.causal_operations.entities.sort(key=lambda x: sorted_nodes.index(x.name))
+
+    def _forward_propagate(self) -> None:
+        entity_scope = {
+            entity.name: entity for entity in self.causal_operations.entities
+        }
+        for entity in self.causal_operations.entities:
+            if entity.code == "pass":
+                continue
+            else:
+                # gist.github.com/dean0x7d/df5ce97e4a1a05be4d56d1378726ff92
+                exec(entity.code, globals(), entity_scope)
+        row_values = [entity.dict() for entity in entity_scope.values()]
+        self._outcome_table = pd.DataFrame(row_values)
+
+    def _run_query(self) -> None:
+        def humanize_sql_error_msg(error: str) -> str:
+            pattern = r"column\s+(.*?)\s+not found"
+            col_match = re.search(pattern, error)
+            if col_match:
+                return (
+                    "SQL error: "
+                    + col_match.group(1)
+                    + " is not an attribute in your story!"
+                )
+            else:
+                return str(error)
+
+        if self.query.llm_error_msg == "":
+            try:
+                df = self._outcome_table  # noqa
+                query_result = duckdb.sql(self.query.expression).df()
+                self.query._result_table = query_result
+            except duckdb.BinderException as e:
+                self.query._result_table = humanize_sql_error_msg(str(e))
+            except Exception as e:
+                self.query._result_table = str(e)
+        else:
+            msg = "LLM maybe failed to translate question to SQL query."
+            raise ValueError(
+                {
+                    "question": self.query.question,
+                    "llm_error_msg": self.query.llm_error_msg,
+                    "msg": msg,
+                }
+            )
+
+    def _compute(self) -> Any:
+        self._block_back_door_paths()
+        self._set_initial_conditions()
+        self._make_graph()
+        self._sort_entities()
+        self._forward_propagate()
+        self._run_query()
+
+    def print_debug_report(self) -> None:
+        report = {
+            "outcome": self._outcome_table,
+            "query": self.query.dict(),
+            "result": self.query._result_table,
+        }
+        from pprint import pprint
+
+        pprint(report)
--- a/libs/experimental/langchain_experimental/cpal/templates/init.py
+++ b/libs/experimental/langchain_experimental/cpal/templates/init.py
--- a/libs/experimental/langchain_experimental/cpal/templates/univariate/init.py
+++ b/libs/experimental/langchain_experimental/cpal/templates/univariate/init.py
--- a/libs/experimental/langchain_experimental/cpal/templates/univariate/causal.py
+++ b/libs/experimental/langchain_experimental/cpal/templates/univariate/causal.py
@@ -0,0 +1,113 @@
+# flake8: noqa E501
+
+# fmt: off
+template = (
+    """
+Transform the math story plot into a JSON object. Don't guess at any of the parts.
+
+{format_instructions}
+
+
+
+Story: Boris has seven times the number of pets as Marcia. Jan has three times the number of pets as Marcia. Marcia has two more pets than Cindy.
+
+
+
+# JSON:
+
+
+
+{{
+    "attribute": "pet_count",
+    "entities": [
+        {{
+            "name": "cindy",
+            "value": 0,
+            "depends_on": [],
+            "code": "pass"
+        }},
+        {{
+            "name": "marcia",
+            "value": 0,
+            "depends_on": ["cindy"],
+            "code": "marcia.value = cindy.value + 2"
+        }},
+        {{
+            "name": "boris",
+            "value": 0,
+            "depends_on": ["marcia"],
+            "code": "boris.value = marcia.value * 7"
+        }},
+        {{
+            "name": "jan",
+            "value": 0,
+            "depends_on": ["marcia"],
+            "code": "jan.value = marcia.value * 3"
+        }}
+    ]
+}}
+
+
+
+
+Story: Boris gives 20 percent of his money to Marcia. Marcia gives 10
+percent of her money to Cindy. Cindy gives 5 percent of her money to Jan.
+
+
+
+
+# JSON:
+
+
+
+{{
+    "attribute": "money",
+    "entities": [
+        {{
+            "name": "boris",
+            "value": 0,
+            "depends_on": [],
+            "code": "pass"
+        }},
+        {{
+            "name": "marcia",
+            "value": 0,
+            "depends_on": ["boris"],
+            "code": "
+                marcia.value = boris.value * 0.2
+                boris.value = boris.value * 0.8
+            "
+        }},
+        {{
+            "name": "cindy",
+            "value": 0,
+            "depends_on": ["marcia"],
+            "code": "
+                cindy.value = marcia.value * 0.1
+                marcia.value = marcia.value * 0.9
+            "
+        }},
+        {{
+            "name": "jan",
+            "value": 0,
+            "depends_on": ["cindy"],
+            "code": "
+                jan.value = cindy.value * 0.05
+                cindy.value = cindy.value * 0.9
+            "
+        }}
+    ]
+}}
+
+
+
+
+Story: {narrative_input}
+
+
+
+# JSON:
+""".strip()
+    + "\n"
+)
+# fmt: on
--- a/libs/experimental/langchain_experimental/cpal/templates/univariate/intervention.py
+++ b/libs/experimental/langchain_experimental/cpal/templates/univariate/intervention.py
@@ -0,0 +1,59 @@
+# flake8: noqa E501
+
+# fmt: off
+template = (
+    """
+Transform the hypothetical whatif statement into JSON. Don't guess at any of the parts. Write NONE if you are unsure.
+
+{format_instructions}
+
+
+
+statement: if cindy's pet count was 4
+
+
+
+
+# JSON:
+
+
+
+{{
+    "entity_settings" : [
+        {{ "name": "cindy", "attribute": "pet_count", "value": "4" }}
+    ]
+}}
+
+
+
+
+
+statement: Let's say boris has ten dollars and Bill has 20 dollars.
+
+
+
+
+# JSON:
+
+
+{{
+    "entity_settings" : [
+        {{ "name": "boris", "attribute": "dollars", "value": "10" }},
+        {{ "name": "bill", "attribute": "dollars", "value": "20" }}
+    ]
+}}
+
+
+
+
+
+Statement: {narrative_input}
+
+
+
+
+# JSON:
+""".strip()
+    + "\n\n\n"
+)
+# fmt: on
--- a/libs/experimental/langchain_experimental/cpal/templates/univariate/narrative.py
+++ b/libs/experimental/langchain_experimental/cpal/templates/univariate/narrative.py
@@ -0,0 +1,79 @@
+# flake8: noqa E501
+
+
+# fmt: off
+template = (
+    """
+Split the given text into three parts: the question, the story_hypothetical, and the logic. Don't guess at any of the parts. Write NONE if you are unsure.
+
+{format_instructions}
+
+
+
+Q: Boris has seven times the number of pets as Marcia. Jan has three times the number of pets as Marcia. Marcia has two more pets than Cindy. If Cindy has four pets, how many total pets do the three have?
+
+
+
+# JSON
+
+
+
+{{
+    "story_outcome_question": "how many total pets do the three have?",
+    "story_hypothetical": "If Cindy has four pets",
+    "story_plot": "Boris has seven times the number of pets as Marcia. Jan has three times the number of pets as Marcia. Marcia has two more pets than Cindy."
+}}
+
+
+
+Q: boris gives ten percent of his money to marcia. marcia gives ten
+percent of her money to andy. If boris has 100 dollars, how much money
+will andy have?
+
+
+
+# JSON
+
+
+
+{{
+    "story_outcome_question": "how much money will andy have?",
+    "story_hypothetical": "If boris has 100 dollars"
+    "story_plot": "boris gives ten percent of his money to marcia. marcia gives ten percent of her money to andy."
+}}
+
+
+
+
+Q: boris gives ten percent of his candy to marcia. marcia gives ten
+percent of her candy to andy. If boris has 100 pounds of candy and marcia has
+200 pounds of candy, then how many pounds of candy will andy have?
+
+
+
+
+
+# JSON
+
+
+
+
+{{
+    "story_outcome_question": "how many pounds of candy will andy have?",
+    "story_hypothetical": "If boris has 100 pounds of candy and marcia has 200 pounds of candy"
+    "story_plot": "boris gives ten percent of his candy to marcia. marcia gives ten percent of her candy to andy."
+}}
+
+
+
+
+
+Q: {narrative_input}
+
+
+
+# JSON
+""".strip()
+    + "\n\n\n"
+)
+# fmt: on
--- a/libs/experimental/langchain_experimental/cpal/templates/univariate/query.py
+++ b/libs/experimental/langchain_experimental/cpal/templates/univariate/query.py
@@ -0,0 +1,270 @@
+# flake8: noqa E501
+
+
+# fmt: off
+template = (
+    """
+Transform the narrative_input into an SQL expression. If you are
+unsure, then do not guess, instead add a llm_error_msg that explains why you are unsure.
+
+
+{format_instructions}
+
+
+narrative_input: how much money will boris have?
+
+
+# JSON:
+
+    {{
+        "narrative_input": "how much money will boris have?",
+        "llm_error_msg": "",
+        "expression": "SELECT name, value FROM df WHERE name = 'boris'"
+    }}
+
+
+
+narrative_input: How much money does ted have?
+
+
+
+# JSON:
+
+    {{
+        "narrative_input": "How much money does ted have?",
+        "llm_error_msg": "",
+        "expression": "SELECT name, value FROM df WHERE name = 'ted'"
+    }}
+
+
+
+narrative_input: what is the sum of pet count for all the people?
+
+
+
+# JSON:
+
+    {{
+        "narrative_input": "what is the sum of pet count for all the people?",
+        "llm_error_msg": "",
+        "expression": "SELECT SUM(value) FROM df"
+    }}
+
+
+
+
+narrative_input: what's the average of the pet counts for all the people?
+
+
+
+# JSON:
+
+    {{
+        "narrative_input": "what's the average of the pet counts for all the people?",
+        "llm_error_msg": "",
+        "expression": "SELECT AVG(value) FROM df"
+    }}
+
+
+
+
+narrative_input: what's the maximum of the pet counts for all the people?
+
+
+
+# JSON:
+
+    {{
+        "narrative_input": "what's the maximum of the pet counts for all the people?",
+        "llm_error_msg": "",
+        "expression": "SELECT MAX(value) FROM df"
+    }}
+
+
+
+
+narrative_input: what's the minimum of the pet counts for all the people?
+
+
+
+# JSON:
+
+    {{
+        "narrative_input": "what's the minimum of the pet counts for all the people?",
+        "llm_error_msg": "",
+        "expression": "SELECT MIN(value) FROM df"
+    }}
+
+
+
+
+narrative_input: what's the number of people with pet counts greater than 10?
+
+
+
+# JSON:
+
+    {{
+        "narrative_input": "what's the number of people with pet counts greater than 10?",
+        "llm_error_msg": "",
+        "expression": "SELECT COUNT(*) FROM df WHERE value > 10"
+    }}
+
+
+
+
+narrative_input: what's the pet count for boris?
+
+
+
+# JSON:
+
+    {{
+        "narrative_input": "what's the pet count for boris?",
+        "llm_error_msg": "",
+        "expression": "SELECT name, value FROM df WHERE name = 'boris'"
+    }}
+
+
+
+
+narrative_input: what's the pet count for cindy and marcia?
+
+
+
+# JSON:
+
+    {{
+        "narrative_input": "what's the pet count for cindy and marcia?",
+        "llm_error_msg": "",
+        "expression": "SELECT name, value FROM df WHERE name IN ('cindy', 'marcia')"
+    }}
+
+
+
+
+narrative_input: what's the total pet count for cindy and marcia?
+
+
+
+# JSON:
+
+    {{
+        "narrative_input": "what's the total pet count for cindy and marcia?",
+        "llm_error_msg": "",
+        "expression": "SELECT SUM(value) FROM df WHERE name IN ('cindy', 'marcia')"
+    }}
+
+
+
+
+narrative_input: what's the total pet count for TED?
+
+
+
+# JSON:
+
+    {{
+        "narrative_input": "what's the total pet count for TED?",
+        "llm_error_msg": "",
+        "expression": "SELECT SUM(value) FROM df WHERE name = 'TED'"
+    }}
+
+
+
+
+
+narrative_input: what's the total dollar count for TED and cindy?
+
+
+
+# JSON:
+
+    {{
+        "narrative_input": "what's the total dollar count for TED and cindy?",
+        "llm_error_msg": "",
+        "expression": "SELECT SUM(value) FROM df WHERE name IN ('TED', 'cindy')"
+    }}
+
+
+
+
+narrative_input: what's the total pet count for TED and cindy?
+
+
+
+
+# JSON:
+
+    {{
+        "narrative_input": "what's the total pet count for TED and cindy?",
+        "llm_error_msg": "",
+        "expression": "SELECT SUM(value) FROM df WHERE name IN ('TED', 'cindy')"
+    }}
+
+
+
+
+narrative_input: what's the best for TED and cindy?
+
+
+
+
+# JSON:
+
+    {{
+        "narrative_input": "what's the best for TED and cindy?",
+        "llm_error_msg": "ambiguous narrative_input, not sure what 'best' means",
+        "expression": ""
+    }}
+
+
+
+
+narrative_input: what's the value?
+
+
+
+
+# JSON:
+
+    {{
+        "narrative_input": "what's the value?",
+        "llm_error_msg": "ambiguous narrative_input, not sure what entity is being asked about",
+        "expression": ""
+    }}
+
+
+
+
+
+
+narrative_input: how many total pets do the three have?
+
+
+
+
+
+# JSON:
+
+    {{
+        "narrative_input": "how many total pets do the three have?",
+        "llm_error_msg": "",
+        "expression": "SELECT SUM(value) FROM df"
+    }}
+
+
+
+
+
+
+narrative_input: {narrative_input}
+
+
+
+
+# JSON:
+""".strip()
+    + "\n\n\n"
+)
+# fmt: on