Harrison/official pre release (#8106)

This commit is contained in:
Harrison Chase
2023-07-21 18:44:32 -07:00
committed by GitHub
parent 95bcf68802
commit aa0e69bc98
65 changed files with 210 additions and 602 deletions

View File

@@ -0,0 +1,4 @@
# Causal program-aided language (CPAL) chain
see https://github.com/hwchase17/langchain/pull/6255

View File

@@ -0,0 +1,271 @@
"""
CPAL Chain and its subchains
"""
from __future__ import annotations
import json
from typing import Any, ClassVar, Dict, List, Optional, Type
import pydantic
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.manager import CallbackManagerForChainRun
from langchain.chains.base import Chain
from langchain.chains.llm import LLMChain
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts.prompt import PromptTemplate
from langchain_experimental.cpal.constants import Constant
from langchain_experimental.cpal.models import (
CausalModel,
InterventionModel,
NarrativeModel,
QueryModel,
StoryModel,
)
from langchain_experimental.cpal.templates.univariate.causal import (
template as causal_template,
)
from langchain_experimental.cpal.templates.univariate.intervention import (
template as intervention_template,
)
from langchain_experimental.cpal.templates.univariate.narrative import (
template as narrative_template,
)
from langchain_experimental.cpal.templates.univariate.query import (
template as query_template,
)
class _BaseStoryElementChain(Chain):
chain: LLMChain
input_key: str = Constant.narrative_input.value #: :meta private:
output_key: str = Constant.chain_answer.value #: :meta private:
pydantic_model: ClassVar[
Optional[Type[pydantic.BaseModel]]
] = None #: :meta private:
template: ClassVar[Optional[str]] = None #: :meta private:
@classmethod
def parser(cls) -> PydanticOutputParser:
"""Parse LLM output into a pydantic object."""
if cls.pydantic_model is None:
raise NotImplementedError(
f"pydantic_model not implemented for {cls.__name__}"
)
return PydanticOutputParser(pydantic_object=cls.pydantic_model)
@property
def input_keys(self) -> List[str]:
"""Return the input keys.
:meta private:
"""
return [self.input_key]
@property
def output_keys(self) -> List[str]:
"""Return the output keys.
:meta private:
"""
_output_keys = [self.output_key]
return _output_keys
@classmethod
def from_univariate_prompt(
cls,
llm: BaseLanguageModel,
**kwargs: Any,
) -> Any:
return cls(
chain=LLMChain(
llm=llm,
prompt=PromptTemplate(
input_variables=[Constant.narrative_input.value],
template=kwargs.get("template", cls.template),
partial_variables={
"format_instructions": cls.parser().get_format_instructions()
},
),
),
**kwargs,
)
def _call(
self,
inputs: Dict[str, Any],
run_manager: Optional[CallbackManagerForChainRun] = None,
) -> Dict[str, Any]:
completion = self.chain.run(inputs[self.input_key])
pydantic_data = self.__class__.parser().parse(completion)
return {
Constant.chain_data.value: pydantic_data,
Constant.chain_answer.value: None,
}
class NarrativeChain(_BaseStoryElementChain):
"""Decompose the narrative into its story elements
- causal model
- query
- intervention
"""
pydantic_model: ClassVar[Type[pydantic.BaseModel]] = NarrativeModel
template: ClassVar[str] = narrative_template
class CausalChain(_BaseStoryElementChain):
"""Translate the causal narrative into a stack of operations."""
pydantic_model: ClassVar[Type[pydantic.BaseModel]] = CausalModel
template: ClassVar[str] = causal_template
class InterventionChain(_BaseStoryElementChain):
"""Set the hypothetical conditions for the causal model."""
pydantic_model: ClassVar[Type[pydantic.BaseModel]] = InterventionModel
template: ClassVar[str] = intervention_template
class QueryChain(_BaseStoryElementChain):
"""Query the outcome table using SQL."""
pydantic_model: ClassVar[Type[pydantic.BaseModel]] = QueryModel
template: ClassVar[str] = query_template # TODO: incl. table schema
class CPALChain(_BaseStoryElementChain):
llm: BaseLanguageModel
narrative_chain: Optional[NarrativeChain] = None
causal_chain: Optional[CausalChain] = None
intervention_chain: Optional[InterventionChain] = None
query_chain: Optional[QueryChain] = None
_story: StoryModel = pydantic.PrivateAttr(default=None) # TODO: change name ?
@classmethod
def from_univariate_prompt(
cls,
llm: BaseLanguageModel,
**kwargs: Any,
) -> CPALChain:
"""instantiation depends on component chains"""
return cls(
llm=llm,
chain=LLMChain(
llm=llm,
prompt=PromptTemplate(
input_variables=["question", "query_result"],
template=(
"Summarize this answer '{query_result}' to this "
"question '{question}'? "
),
),
),
narrative_chain=NarrativeChain.from_univariate_prompt(llm=llm),
causal_chain=CausalChain.from_univariate_prompt(llm=llm),
intervention_chain=InterventionChain.from_univariate_prompt(llm=llm),
query_chain=QueryChain.from_univariate_prompt(llm=llm),
**kwargs,
)
def _call(
self,
inputs: Dict[str, Any],
run_manager: Optional[CallbackManagerForChainRun] = None,
**kwargs: Any,
) -> Dict[str, Any]:
# instantiate component chains
if self.narrative_chain is None:
self.narrative_chain = NarrativeChain.from_univariate_prompt(llm=self.llm)
if self.causal_chain is None:
self.causal_chain = CausalChain.from_univariate_prompt(llm=self.llm)
if self.intervention_chain is None:
self.intervention_chain = InterventionChain.from_univariate_prompt(
llm=self.llm
)
if self.query_chain is None:
self.query_chain = QueryChain.from_univariate_prompt(llm=self.llm)
# decompose narrative into three causal story elements
narrative = self.narrative_chain(inputs[Constant.narrative_input.value])[
Constant.chain_data.value
]
story = StoryModel(
causal_operations=self.causal_chain(narrative.story_plot)[
Constant.chain_data.value
],
intervention=self.intervention_chain(narrative.story_hypothetical)[
Constant.chain_data.value
],
query=self.query_chain(narrative.story_outcome_question)[
Constant.chain_data.value
],
)
self._story = story
def pretty_print_str(title: str, d: str) -> str:
return title + "\n" + d
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
_run_manager.on_text(
pretty_print_str("story outcome data", story._outcome_table.to_string()),
color="green",
end="\n\n",
verbose=self.verbose,
)
def pretty_print_dict(title: str, d: dict) -> str:
return title + "\n" + json.dumps(d, indent=4)
_run_manager.on_text(
pretty_print_dict("query data", story.query.dict()),
color="blue",
end="\n\n",
verbose=self.verbose,
)
if story.query._result_table.empty:
# prevent piping bad data into subsequent chains
raise ValueError(
(
"unanswerable, query and outcome are incoherent\n"
"\n"
"outcome:\n"
f"{story._outcome_table}\n"
"query:\n"
f"{story.query.dict()}"
)
)
else:
query_result = float(story.query._result_table.values[0][-1])
if False:
"""TODO: add this back in when demanded by composable chains"""
reporting_chain = self.chain
human_report = reporting_chain.run(
question=story.query.question, query_result=query_result
)
query_result = {
"query_result": query_result,
"human_report": human_report,
}
output = {
Constant.chain_data.value: story,
self.output_key: query_result,
**kwargs,
}
return output
def draw(self, **kwargs: Any) -> None:
"""
CPAL chain can draw its resulting DAG.
Usage in a jupyter notebook:
>>> from IPython.display import SVG
>>> cpal_chain.draw(path="graph.svg")
>>> SVG('graph.svg')
"""
self._story._networkx_wrapper.draw_graphviz(**kwargs)

View File

@@ -0,0 +1,7 @@
from enum import Enum
class Constant(Enum):
narrative_input = "narrative_input"
chain_answer = "chain_answer" # natural language answer
chain_data = "chain_data" # pydantic instance

View File

@@ -0,0 +1,245 @@
from __future__ import annotations # allows pydantic model to reference itself
import re
from typing import Any, Optional, Union
import duckdb
import pandas as pd
from langchain.graphs.networkx_graph import NetworkxEntityGraph
from pydantic import BaseModel, Field, PrivateAttr, root_validator, validator
from langchain_experimental.cpal.constants import Constant
class NarrativeModel(BaseModel):
"""
Represent the narrative input as three story elements.
"""
story_outcome_question: str
story_hypothetical: str
story_plot: str # causal stack of operations
@validator("*", pre=True)
def empty_str_to_none(cls, v: str) -> Union[str, None]:
"""Empty strings are not allowed"""
if v == "":
return None
return v
class EntityModel(BaseModel):
name: str = Field(description="entity name")
code: str = Field(description="entity actions")
value: float = Field(description="entity initial value")
depends_on: list[str] = Field(default=[], description="ancestor entities")
# TODO: generalize to multivariate math
# TODO: acyclic graph
class Config:
validate_assignment = True
@validator("name")
def lower_case_name(cls, v: str) -> str:
v = v.lower()
return v
class CausalModel(BaseModel):
attribute: str = Field(description="name of the attribute to be calculated")
entities: list[EntityModel] = Field(description="entities in the story")
# TODO: root validate each `entity.depends_on` using system's entity names
class EntitySettingModel(BaseModel):
"""
Initial conditions for an entity
{"name": "bud", "attribute": "pet_count", "value": 12}
"""
name: str = Field(description="name of the entity")
attribute: str = Field(description="name of the attribute to be calculated")
value: float = Field(description="entity's attribute value (calculated)")
@validator("name")
def lower_case_transform(cls, v: str) -> str:
v = v.lower()
return v
class SystemSettingModel(BaseModel):
"""
Initial global conditions for the system.
{"parameter": "interest_rate", "value": .05}
"""
parameter: str
value: float
class InterventionModel(BaseModel):
"""
aka initial conditions
>>> intervention.dict()
{
entity_settings: [
{"name": "bud", "attribute": "pet_count", "value": 12},
{"name": "pat", "attribute": "pet_count", "value": 0},
],
system_settings: None,
}
"""
entity_settings: list[EntitySettingModel]
system_settings: Optional[list[SystemSettingModel]] = None
@validator("system_settings")
def lower_case_name(cls, v: str) -> Union[str, None]:
if v is not None:
raise NotImplementedError("system_setting is not implemented yet")
return v
class QueryModel(BaseModel):
"""translate a question about the story outcome into a programmatic expression"""
question: str = Field(alias=Constant.narrative_input.value) # input
expression: str # output, part of llm completion
llm_error_msg: str # output, part of llm completion
_result_table: str = PrivateAttr() # result of the executed query
class ResultModel(BaseModel):
question: str = Field(alias=Constant.narrative_input.value) # input
_result_table: str = PrivateAttr() # result of the executed query
class StoryModel(BaseModel):
causal_operations: Any = Field(required=True)
intervention: Any = Field(required=True)
query: Any = Field(required=True)
_outcome_table: pd.DataFrame = PrivateAttr(default=None)
_networkx_wrapper: Any = PrivateAttr(default=None)
def __init__(self, **kwargs: Any):
super().__init__(**kwargs)
self._compute()
# TODO: when langchain adopts pydantic.v2 replace w/ `__post_init__`
# misses hints github.com/pydantic/pydantic/issues/1729#issuecomment-1300576214
@root_validator
def check_intervention_is_valid(cls, values: dict) -> dict:
valid_names = [e.name for e in values["causal_operations"].entities]
for setting in values["intervention"].entity_settings:
if setting.name not in valid_names:
error_msg = f"""
Hypothetical question has an invalid entity name.
`{setting.name}` not in `{valid_names}`
"""
raise ValueError(error_msg)
return values
def _block_back_door_paths(self) -> None:
# stop intervention entities from depending on others
intervention_entities = [
entity_setting.name for entity_setting in self.intervention.entity_settings
]
for entity in self.causal_operations.entities:
if entity.name in intervention_entities:
entity.depends_on = []
entity.code = "pass"
def _set_initial_conditions(self) -> None:
for entity_setting in self.intervention.entity_settings:
for entity in self.causal_operations.entities:
if entity.name == entity_setting.name:
entity.value = entity_setting.value
def _make_graph(self) -> None:
self._networkx_wrapper = NetworkxEntityGraph()
for entity in self.causal_operations.entities:
for parent_name in entity.depends_on:
self._networkx_wrapper._graph.add_edge(
parent_name, entity.name, relation=entity.code
)
# TODO: is it correct to drop entities with no impact on the outcome (?)
self.causal_operations.entities = [
entity
for entity in self.causal_operations.entities
if entity.name in self._networkx_wrapper.get_topological_sort()
]
def _sort_entities(self) -> None:
# order the sequence of causal actions
sorted_nodes = self._networkx_wrapper.get_topological_sort()
self.causal_operations.entities.sort(key=lambda x: sorted_nodes.index(x.name))
def _forward_propagate(self) -> None:
entity_scope = {
entity.name: entity for entity in self.causal_operations.entities
}
for entity in self.causal_operations.entities:
if entity.code == "pass":
continue
else:
# gist.github.com/dean0x7d/df5ce97e4a1a05be4d56d1378726ff92
exec(entity.code, globals(), entity_scope)
row_values = [entity.dict() for entity in entity_scope.values()]
self._outcome_table = pd.DataFrame(row_values)
def _run_query(self) -> None:
def humanize_sql_error_msg(error: str) -> str:
pattern = r"column\s+(.*?)\s+not found"
col_match = re.search(pattern, error)
if col_match:
return (
"SQL error: "
+ col_match.group(1)
+ " is not an attribute in your story!"
)
else:
return str(error)
if self.query.llm_error_msg == "":
try:
df = self._outcome_table # noqa
query_result = duckdb.sql(self.query.expression).df()
self.query._result_table = query_result
except duckdb.BinderException as e:
self.query._result_table = humanize_sql_error_msg(str(e))
except Exception as e:
self.query._result_table = str(e)
else:
msg = "LLM maybe failed to translate question to SQL query."
raise ValueError(
{
"question": self.query.question,
"llm_error_msg": self.query.llm_error_msg,
"msg": msg,
}
)
def _compute(self) -> Any:
self._block_back_door_paths()
self._set_initial_conditions()
self._make_graph()
self._sort_entities()
self._forward_propagate()
self._run_query()
def print_debug_report(self) -> None:
report = {
"outcome": self._outcome_table,
"query": self.query.dict(),
"result": self.query._result_table,
}
from pprint import pprint
pprint(report)

View File

@@ -0,0 +1,113 @@
# flake8: noqa E501
# fmt: off
template = (
"""
Transform the math story plot into a JSON object. Don't guess at any of the parts.
{format_instructions}
Story: Boris has seven times the number of pets as Marcia. Jan has three times the number of pets as Marcia. Marcia has two more pets than Cindy.
# JSON:
{{
"attribute": "pet_count",
"entities": [
{{
"name": "cindy",
"value": 0,
"depends_on": [],
"code": "pass"
}},
{{
"name": "marcia",
"value": 0,
"depends_on": ["cindy"],
"code": "marcia.value = cindy.value + 2"
}},
{{
"name": "boris",
"value": 0,
"depends_on": ["marcia"],
"code": "boris.value = marcia.value * 7"
}},
{{
"name": "jan",
"value": 0,
"depends_on": ["marcia"],
"code": "jan.value = marcia.value * 3"
}}
]
}}
Story: Boris gives 20 percent of his money to Marcia. Marcia gives 10
percent of her money to Cindy. Cindy gives 5 percent of her money to Jan.
# JSON:
{{
"attribute": "money",
"entities": [
{{
"name": "boris",
"value": 0,
"depends_on": [],
"code": "pass"
}},
{{
"name": "marcia",
"value": 0,
"depends_on": ["boris"],
"code": "
marcia.value = boris.value * 0.2
boris.value = boris.value * 0.8
"
}},
{{
"name": "cindy",
"value": 0,
"depends_on": ["marcia"],
"code": "
cindy.value = marcia.value * 0.1
marcia.value = marcia.value * 0.9
"
}},
{{
"name": "jan",
"value": 0,
"depends_on": ["cindy"],
"code": "
jan.value = cindy.value * 0.05
cindy.value = cindy.value * 0.9
"
}}
]
}}
Story: {narrative_input}
# JSON:
""".strip()
+ "\n"
)
# fmt: on

View File

@@ -0,0 +1,59 @@
# flake8: noqa E501
# fmt: off
template = (
"""
Transform the hypothetical whatif statement into JSON. Don't guess at any of the parts. Write NONE if you are unsure.
{format_instructions}
statement: if cindy's pet count was 4
# JSON:
{{
"entity_settings" : [
{{ "name": "cindy", "attribute": "pet_count", "value": "4" }}
]
}}
statement: Let's say boris has ten dollars and Bill has 20 dollars.
# JSON:
{{
"entity_settings" : [
{{ "name": "boris", "attribute": "dollars", "value": "10" }},
{{ "name": "bill", "attribute": "dollars", "value": "20" }}
]
}}
Statement: {narrative_input}
# JSON:
""".strip()
+ "\n\n\n"
)
# fmt: on

View File

@@ -0,0 +1,79 @@
# flake8: noqa E501
# fmt: off
template = (
"""
Split the given text into three parts: the question, the story_hypothetical, and the logic. Don't guess at any of the parts. Write NONE if you are unsure.
{format_instructions}
Q: Boris has seven times the number of pets as Marcia. Jan has three times the number of pets as Marcia. Marcia has two more pets than Cindy. If Cindy has four pets, how many total pets do the three have?
# JSON
{{
"story_outcome_question": "how many total pets do the three have?",
"story_hypothetical": "If Cindy has four pets",
"story_plot": "Boris has seven times the number of pets as Marcia. Jan has three times the number of pets as Marcia. Marcia has two more pets than Cindy."
}}
Q: boris gives ten percent of his money to marcia. marcia gives ten
percent of her money to andy. If boris has 100 dollars, how much money
will andy have?
# JSON
{{
"story_outcome_question": "how much money will andy have?",
"story_hypothetical": "If boris has 100 dollars"
"story_plot": "boris gives ten percent of his money to marcia. marcia gives ten percent of her money to andy."
}}
Q: boris gives ten percent of his candy to marcia. marcia gives ten
percent of her candy to andy. If boris has 100 pounds of candy and marcia has
200 pounds of candy, then how many pounds of candy will andy have?
# JSON
{{
"story_outcome_question": "how many pounds of candy will andy have?",
"story_hypothetical": "If boris has 100 pounds of candy and marcia has 200 pounds of candy"
"story_plot": "boris gives ten percent of his candy to marcia. marcia gives ten percent of her candy to andy."
}}
Q: {narrative_input}
# JSON
""".strip()
+ "\n\n\n"
)
# fmt: on

View File

@@ -0,0 +1,270 @@
# flake8: noqa E501
# fmt: off
template = (
"""
Transform the narrative_input into an SQL expression. If you are
unsure, then do not guess, instead add a llm_error_msg that explains why you are unsure.
{format_instructions}
narrative_input: how much money will boris have?
# JSON:
{{
"narrative_input": "how much money will boris have?",
"llm_error_msg": "",
"expression": "SELECT name, value FROM df WHERE name = 'boris'"
}}
narrative_input: How much money does ted have?
# JSON:
{{
"narrative_input": "How much money does ted have?",
"llm_error_msg": "",
"expression": "SELECT name, value FROM df WHERE name = 'ted'"
}}
narrative_input: what is the sum of pet count for all the people?
# JSON:
{{
"narrative_input": "what is the sum of pet count for all the people?",
"llm_error_msg": "",
"expression": "SELECT SUM(value) FROM df"
}}
narrative_input: what's the average of the pet counts for all the people?
# JSON:
{{
"narrative_input": "what's the average of the pet counts for all the people?",
"llm_error_msg": "",
"expression": "SELECT AVG(value) FROM df"
}}
narrative_input: what's the maximum of the pet counts for all the people?
# JSON:
{{
"narrative_input": "what's the maximum of the pet counts for all the people?",
"llm_error_msg": "",
"expression": "SELECT MAX(value) FROM df"
}}
narrative_input: what's the minimum of the pet counts for all the people?
# JSON:
{{
"narrative_input": "what's the minimum of the pet counts for all the people?",
"llm_error_msg": "",
"expression": "SELECT MIN(value) FROM df"
}}
narrative_input: what's the number of people with pet counts greater than 10?
# JSON:
{{
"narrative_input": "what's the number of people with pet counts greater than 10?",
"llm_error_msg": "",
"expression": "SELECT COUNT(*) FROM df WHERE value > 10"
}}
narrative_input: what's the pet count for boris?
# JSON:
{{
"narrative_input": "what's the pet count for boris?",
"llm_error_msg": "",
"expression": "SELECT name, value FROM df WHERE name = 'boris'"
}}
narrative_input: what's the pet count for cindy and marcia?
# JSON:
{{
"narrative_input": "what's the pet count for cindy and marcia?",
"llm_error_msg": "",
"expression": "SELECT name, value FROM df WHERE name IN ('cindy', 'marcia')"
}}
narrative_input: what's the total pet count for cindy and marcia?
# JSON:
{{
"narrative_input": "what's the total pet count for cindy and marcia?",
"llm_error_msg": "",
"expression": "SELECT SUM(value) FROM df WHERE name IN ('cindy', 'marcia')"
}}
narrative_input: what's the total pet count for TED?
# JSON:
{{
"narrative_input": "what's the total pet count for TED?",
"llm_error_msg": "",
"expression": "SELECT SUM(value) FROM df WHERE name = 'TED'"
}}
narrative_input: what's the total dollar count for TED and cindy?
# JSON:
{{
"narrative_input": "what's the total dollar count for TED and cindy?",
"llm_error_msg": "",
"expression": "SELECT SUM(value) FROM df WHERE name IN ('TED', 'cindy')"
}}
narrative_input: what's the total pet count for TED and cindy?
# JSON:
{{
"narrative_input": "what's the total pet count for TED and cindy?",
"llm_error_msg": "",
"expression": "SELECT SUM(value) FROM df WHERE name IN ('TED', 'cindy')"
}}
narrative_input: what's the best for TED and cindy?
# JSON:
{{
"narrative_input": "what's the best for TED and cindy?",
"llm_error_msg": "ambiguous narrative_input, not sure what 'best' means",
"expression": ""
}}
narrative_input: what's the value?
# JSON:
{{
"narrative_input": "what's the value?",
"llm_error_msg": "ambiguous narrative_input, not sure what entity is being asked about",
"expression": ""
}}
narrative_input: how many total pets do the three have?
# JSON:
{{
"narrative_input": "how many total pets do the three have?",
"llm_error_msg": "",
"expression": "SELECT SUM(value) FROM df"
}}
narrative_input: {narrative_input}
# JSON:
""".strip()
+ "\n\n\n"
)
# fmt: on