mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-27 00:48:45 +00:00
Merge pull request #6 from VowpalWabbit/cb_defaults
cb defaults and some fixes
This commit is contained in:
commit
5fb781dfde
2
.github/workflows/langchain_ci.yml
vendored
2
.github/workflows/langchain_ci.yml
vendored
@ -60,7 +60,7 @@ jobs:
|
|||||||
- "3.8"
|
- "3.8"
|
||||||
- "3.9"
|
- "3.9"
|
||||||
- "3.10"
|
- "3.10"
|
||||||
- "3.11"
|
# - "3.11"
|
||||||
name: Python ${{ matrix.python-version }} extended tests
|
name: Python ${{ matrix.python-version }} extended tests
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
@ -227,15 +227,17 @@ class Embedder(Generic[TEvent], ABC):
|
|||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
class SelectionScorer(ABC, BaseModel):
|
class SelectionScorer(Generic[TEvent], ABC, BaseModel):
|
||||||
"""Abstract method to grade the chosen selection or the response of the llm"""
|
"""Abstract method to grade the chosen selection or the response of the llm"""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def score_response(self, inputs: Dict[str, Any], llm_response: str) -> float:
|
def score_response(
|
||||||
|
self, inputs: Dict[str, Any], llm_response: str, event: TEvent
|
||||||
|
) -> float:
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
class AutoSelectionScorer(SelectionScorer, BaseModel):
|
class AutoSelectionScorer(SelectionScorer[Event], BaseModel):
|
||||||
llm_chain: LLMChain
|
llm_chain: LLMChain
|
||||||
prompt: Union[BasePromptTemplate, None] = None
|
prompt: Union[BasePromptTemplate, None] = None
|
||||||
scoring_criteria_template_str: Optional[str] = None
|
scoring_criteria_template_str: Optional[str] = None
|
||||||
@ -254,7 +256,7 @@ class AutoSelectionScorer(SelectionScorer, BaseModel):
|
|||||||
def get_default_prompt() -> ChatPromptTemplate:
|
def get_default_prompt() -> ChatPromptTemplate:
|
||||||
human_template = 'Given this based_on "{rl_chain_selected_based_on}" \
|
human_template = 'Given this based_on "{rl_chain_selected_based_on}" \
|
||||||
as the most important attribute, rank how good or bad this text is: \
|
as the most important attribute, rank how good or bad this text is: \
|
||||||
"{llm_response}".'
|
"{rl_chain_selected}".'
|
||||||
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
|
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
|
||||||
default_system_prompt = AutoSelectionScorer.get_default_system_prompt()
|
default_system_prompt = AutoSelectionScorer.get_default_system_prompt()
|
||||||
chat_prompt = ChatPromptTemplate.from_messages(
|
chat_prompt = ChatPromptTemplate.from_messages(
|
||||||
@ -281,7 +283,9 @@ class AutoSelectionScorer(SelectionScorer, BaseModel):
|
|||||||
values["llm_chain"] = LLMChain(llm=llm, prompt=prompt)
|
values["llm_chain"] = LLMChain(llm=llm, prompt=prompt)
|
||||||
return values
|
return values
|
||||||
|
|
||||||
def score_response(self, inputs: Dict[str, Any], llm_response: str) -> float:
|
def score_response(
|
||||||
|
self, inputs: Dict[str, Any], llm_response: str, event: Event
|
||||||
|
) -> float:
|
||||||
ranking = self.llm_chain.predict(llm_response=llm_response, **inputs)
|
ranking = self.llm_chain.predict(llm_response=llm_response, **inputs)
|
||||||
ranking = ranking.strip()
|
ranking = ranking.strip()
|
||||||
try:
|
try:
|
||||||
@ -304,7 +308,7 @@ class RLChain(Chain, Generic[TEvent]):
|
|||||||
- prompt (BasePromptTemplate): The template for the base prompt.
|
- prompt (BasePromptTemplate): The template for the base prompt.
|
||||||
- selection_scorer (Union[SelectionScorer, None]): Scorer for the selection. Can be set to None.
|
- selection_scorer (Union[SelectionScorer, None]): Scorer for the selection. Can be set to None.
|
||||||
- policy (Optional[Policy]): The policy used by the chain to learn to populate a dynamic prompt.
|
- policy (Optional[Policy]): The policy used by the chain to learn to populate a dynamic prompt.
|
||||||
- auto_embed (bool): Determines if embedding should be automatic. Default is True.
|
- auto_embed (bool): Determines if embedding should be automatic. Default is False.
|
||||||
- metrics (Optional[MetricsTracker]): Tracker for metrics, can be set to None.
|
- metrics (Optional[MetricsTracker]): Tracker for metrics, can be set to None.
|
||||||
|
|
||||||
Initialization Attributes:
|
Initialization Attributes:
|
||||||
@ -338,7 +342,7 @@ class RLChain(Chain, Generic[TEvent]):
|
|||||||
prompt: BasePromptTemplate
|
prompt: BasePromptTemplate
|
||||||
selection_scorer: Union[SelectionScorer, None]
|
selection_scorer: Union[SelectionScorer, None]
|
||||||
active_policy: Policy = _NoOpPolicy()
|
active_policy: Policy = _NoOpPolicy()
|
||||||
auto_embed: bool = True
|
auto_embed: bool = False
|
||||||
selected_input_key = "rl_chain_selected"
|
selected_input_key = "rl_chain_selected"
|
||||||
selected_based_on_input_key = "rl_chain_selected_based_on"
|
selected_based_on_input_key = "rl_chain_selected_based_on"
|
||||||
metrics: Optional[MetricsTracker] = None
|
metrics: Optional[MetricsTracker] = None
|
||||||
@ -492,7 +496,7 @@ class RLChain(Chain, Generic[TEvent]):
|
|||||||
try:
|
try:
|
||||||
if self.selection_scorer:
|
if self.selection_scorer:
|
||||||
score = self.selection_scorer.score_response(
|
score = self.selection_scorer.score_response(
|
||||||
inputs=next_chain_inputs, llm_response=output
|
inputs=next_chain_inputs, llm_response=output, event=event
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.info(
|
logger.info(
|
||||||
@ -553,7 +557,7 @@ def embed_string_type(
|
|||||||
|
|
||||||
def embed_dict_type(item: Dict, model: Any) -> Dict[str, Any]:
|
def embed_dict_type(item: Dict, model: Any) -> Dict[str, Any]:
|
||||||
"""Helper function to embed a dictionary item."""
|
"""Helper function to embed a dictionary item."""
|
||||||
inner_dict: Dict[str, Any] = {}
|
inner_dict: Dict = {}
|
||||||
for ns, embed_item in item.items():
|
for ns, embed_item in item.items():
|
||||||
if isinstance(embed_item, list):
|
if isinstance(embed_item, list):
|
||||||
inner_dict[ns] = []
|
inner_dict[ns] = []
|
||||||
@ -568,10 +572,17 @@ def embed_dict_type(item: Dict, model: Any) -> Dict[str, Any]:
|
|||||||
def embed_list_type(
|
def embed_list_type(
|
||||||
item: list, model: Any, namespace: Optional[str] = None
|
item: list, model: Any, namespace: Optional[str] = None
|
||||||
) -> List[Dict[str, Union[str, List[str]]]]:
|
) -> List[Dict[str, Union[str, List[str]]]]:
|
||||||
ret_list: List[Dict[str, Union[str, List[str]]]] = []
|
ret_list: List = []
|
||||||
for embed_item in item:
|
for embed_item in item:
|
||||||
if isinstance(embed_item, dict):
|
if isinstance(embed_item, dict):
|
||||||
ret_list.append(embed_dict_type(embed_item, model))
|
ret_list.append(embed_dict_type(embed_item, model))
|
||||||
|
elif isinstance(embed_item, list):
|
||||||
|
item_embedding = embed_list_type(embed_item, model, namespace)
|
||||||
|
# Get the first key from the first dictionary
|
||||||
|
first_key = next(iter(item_embedding[0]))
|
||||||
|
# Group the values under that key
|
||||||
|
grouping = {first_key: [item[first_key] for item in item_embedding]}
|
||||||
|
ret_list.append(grouping)
|
||||||
else:
|
else:
|
||||||
ret_list.append(embed_string_type(embed_item, model, namespace))
|
ret_list.append(embed_string_type(embed_item, model, namespace))
|
||||||
return ret_list
|
return ret_list
|
||||||
|
@ -161,7 +161,7 @@ class PickBest(base.RLChain[PickBestEvent]):
|
|||||||
"--quiet",
|
"--quiet",
|
||||||
"--interactions=::",
|
"--interactions=::",
|
||||||
"--coin",
|
"--coin",
|
||||||
"--epsilon=0.2",
|
"--squarecb",
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
if "--cb_explore_adf" not in vw_cmd:
|
if "--cb_explore_adf" not in vw_cmd:
|
||||||
|
@ -140,7 +140,12 @@ def test_user_defined_scorer() -> None:
|
|||||||
llm, PROMPT = setup()
|
llm, PROMPT = setup()
|
||||||
|
|
||||||
class CustomSelectionScorer(rl_chain.SelectionScorer):
|
class CustomSelectionScorer(rl_chain.SelectionScorer):
|
||||||
def score_response(self, inputs: Dict[str, Any], llm_response: str) -> float:
|
def score_response(
|
||||||
|
self,
|
||||||
|
inputs: Dict[str, Any],
|
||||||
|
llm_response: str,
|
||||||
|
event: pick_best_chain.PickBestEvent,
|
||||||
|
) -> float:
|
||||||
score = 200
|
score = 200
|
||||||
return score
|
return score
|
||||||
|
|
||||||
@ -161,11 +166,11 @@ def test_user_defined_scorer() -> None:
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
|
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
|
||||||
def test_default_embeddings() -> None:
|
def test_auto_embeddings_on() -> None:
|
||||||
llm, PROMPT = setup()
|
llm, PROMPT = setup()
|
||||||
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder())
|
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder())
|
||||||
chain = pick_best_chain.PickBest.from_llm(
|
chain = pick_best_chain.PickBest.from_llm(
|
||||||
llm=llm, prompt=PROMPT, feature_embedder=feature_embedder
|
llm=llm, prompt=PROMPT, feature_embedder=feature_embedder, auto_embed=True
|
||||||
)
|
)
|
||||||
|
|
||||||
str1 = "0"
|
str1 = "0"
|
||||||
@ -194,6 +199,32 @@ def test_default_embeddings() -> None:
|
|||||||
assert vw_str == expected
|
assert vw_str == expected
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
|
||||||
|
def test_default_auto_embedder_is_off() -> None:
|
||||||
|
llm, PROMPT = setup()
|
||||||
|
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder())
|
||||||
|
chain = pick_best_chain.PickBest.from_llm(
|
||||||
|
llm=llm, prompt=PROMPT, feature_embedder=feature_embedder
|
||||||
|
)
|
||||||
|
|
||||||
|
str1 = "0"
|
||||||
|
str2 = "1"
|
||||||
|
str3 = "2"
|
||||||
|
ctx_str_1 = "context1"
|
||||||
|
|
||||||
|
expected = f"""shared |User {ctx_str_1} \n|action {str1} \n|action {str2} \n|action {str3} """ # noqa
|
||||||
|
|
||||||
|
actions = [str1, str2, str3]
|
||||||
|
|
||||||
|
response = chain.run(
|
||||||
|
User=pick_best_chain.base.BasedOn(ctx_str_1),
|
||||||
|
action=pick_best_chain.base.ToSelectFrom(actions),
|
||||||
|
)
|
||||||
|
selection_metadata = response["selection_metadata"]
|
||||||
|
vw_str = feature_embedder.format(selection_metadata)
|
||||||
|
assert vw_str == expected
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
|
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
|
||||||
def test_default_embeddings_off() -> None:
|
def test_default_embeddings_off() -> None:
|
||||||
llm, PROMPT = setup()
|
llm, PROMPT = setup()
|
||||||
@ -225,7 +256,7 @@ def test_default_embeddings_mixed_w_explicit_user_embeddings() -> None:
|
|||||||
llm, PROMPT = setup()
|
llm, PROMPT = setup()
|
||||||
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder())
|
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder())
|
||||||
chain = pick_best_chain.PickBest.from_llm(
|
chain = pick_best_chain.PickBest.from_llm(
|
||||||
llm=llm, prompt=PROMPT, feature_embedder=feature_embedder
|
llm=llm, prompt=PROMPT, feature_embedder=feature_embedder, auto_embed=True
|
||||||
)
|
)
|
||||||
|
|
||||||
str1 = "0"
|
str1 = "0"
|
||||||
|
Loading…
Reference in New Issue
Block a user