From b162f1c8e1e2823fc17242563bb469107651bdb3 Mon Sep 17 00:00:00 2001 From: olgavrou Date: Mon, 4 Sep 2023 05:50:15 -0400 Subject: [PATCH 1/7] dot product of encodings as default auto_embed --- .../langchain/chains/rl_chain/base.py | 7 +- .../chains/rl_chain/pick_best_chain.py | 133 ++++++++++++++++-- .../rl_chain/test_pick_best_chain_call.py | 40 +++--- .../rl_chain/test_pick_best_text_embedder.py | 30 ++-- 4 files changed, 163 insertions(+), 47 deletions(-) diff --git a/libs/langchain/langchain/chains/rl_chain/base.py b/libs/langchain/langchain/chains/rl_chain/base.py index 6e01bb5063a..66ead42e710 100644 --- a/libs/langchain/langchain/chains/rl_chain/base.py +++ b/libs/langchain/langchain/chains/rl_chain/base.py @@ -229,6 +229,9 @@ class VwPolicy(Policy): class Embedder(Generic[TEvent], ABC): + def __init__(self, *args: Any, **kwargs: Any): + pass + @abstractmethod def format(self, event: TEvent) -> str: ... @@ -498,8 +501,8 @@ class RLChain(Chain, Generic[TEvent]): ) -> Dict[str, Any]: _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager() - if self.auto_embed: - inputs = prepare_inputs_for_autoembed(inputs=inputs) + # if self.auto_embed: + # inputs = prepare_inputs_for_autoembed(inputs=inputs) event: TEvent = self._call_before_predict(inputs=inputs) prediction = self.active_policy.predict(event=event) diff --git a/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py b/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py index 04218d29348..5ed32c4cadc 100644 --- a/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py +++ b/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py @@ -53,21 +53,25 @@ class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]): model name (Any, optional): The type of embeddings to be used for feature representation. Defaults to BERT SentenceTransformer. """ # noqa E501 - def __init__(self, model: Optional[Any] = None, *args: Any, **kwargs: Any): + def __init__( + self, auto_embed: bool, model: Optional[Any] = None, *args: Any, **kwargs: Any + ): super().__init__(*args, **kwargs) if model is None: from sentence_transformers import SentenceTransformer - model = SentenceTransformer("bert-base-nli-mean-tokens") + model = SentenceTransformer("all-mpnet-base-v2") + # model = SentenceTransformer("all-MiniLM-L6-v2") self.model = model + self.auto_embed = auto_embed - def format(self, event: PickBestEvent) -> str: - """ - Converts the `BasedOn` and `ToSelectFrom` into a format that can be used by VW - """ + @staticmethod + def _str(embedding): + return " ".join([f"{i}:{e}" for i, e in enumerate(embedding)]) + def get_label(self, event: PickBestEvent) -> tuple: cost = None if event.selected: chosen_action = event.selected.index @@ -77,7 +81,11 @@ class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]): else None ) prob = event.selected.probability + return chosen_action, cost, prob + else: + return None, None, None + def get_context_and_action_embeddings(self, event: PickBestEvent) -> tuple: context_emb = base.embed(event.based_on, self.model) if event.based_on else None to_select_from_var_name, to_select_from = next( iter(event.to_select_from.items()), (None, None) @@ -97,6 +105,97 @@ class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]): raise ValueError( "Context and to_select_from must be provided in the inputs dictionary" ) + return context_emb, action_embs + + def get_indexed_dot_product(self, context_emb: List, action_embs: List) -> Dict: + import numpy as np + + unique_contexts = set() + for context_item in context_emb: + for ns, ee in context_item.items(): + if isinstance(ee, list): + for ea in ee: + unique_contexts.add(f"{ns}={ea}") + else: + unique_contexts.add(f"{ns}={ee}") + + encoded_contexts = self.model.encode(list(unique_contexts)) + context_embeddings = dict(zip(unique_contexts, encoded_contexts)) + + unique_actions = set() + for action in action_embs: + for ns, e in action.items(): + if isinstance(e, list): + for ea in e: + unique_actions.add(f"{ns}={ea}") + else: + unique_actions.add(f"{ns}={e}") + + encoded_actions = self.model.encode(list(unique_actions)) + action_embeddings = dict(zip(unique_actions, encoded_actions)) + + action_matrix = np.stack([v for k, v in action_embeddings.items()]) + context_matrix = np.stack([v for k, v in context_embeddings.items()]) + dot_product_matrix = np.dot(context_matrix, action_matrix.T) + + indexed_dot_product = {} + + for i, context_key in enumerate(context_embeddings.keys()): + indexed_dot_product[context_key] = {} + for j, action_key in enumerate(action_embeddings.keys()): + indexed_dot_product[context_key][action_key] = dot_product_matrix[i, j] + + return indexed_dot_product + + def format_auto_embed_on(self, event: PickBestEvent) -> str: + chosen_action, cost, prob = self.get_label(event) + context_emb, action_embs = self.get_context_and_action_embeddings(event) + indexed_dot_product = self.get_indexed_dot_product(context_emb, action_embs) + + action_lines = [] + for i, action in enumerate(action_embs): + line_parts = [] + dot_prods = [] + if cost is not None and chosen_action == i: + line_parts.append(f"{chosen_action}:{cost}:{prob}") + for ns, action in action.items(): + line_parts.append(f"|{ns}") + elements = action if isinstance(action, list) else [action] + nsa = [] + for elem in elements: + line_parts.append(f"{elem}") + ns_a = f"{ns}={elem}" + nsa.append(ns_a) + for k,v in indexed_dot_product.items(): + dot_prods.append(v[ns_a]) + nsa = " ".join(nsa) + line_parts.append(f"|# {nsa}") + + line_parts.append(f"|embedding {self._str(dot_prods)}") + action_lines.append(" ".join(line_parts)) + + shared = [] + for item in context_emb: + for ns, context in item.items(): + shared.append(f"|{ns}") + elements = context if isinstance(context, list) else [context] + nsc = [] + for elem in elements: + shared.append(f"{elem}") + nsc.append(f"{ns}={elem}") + nsc = " ".join(nsc) + shared.append(f"|@ {nsc}") + + r = "shared " + " ".join(shared) + "\n" + "\n".join(action_lines) + print(r) + return r + + def format_auto_embed_off(self, event: PickBestEvent) -> str: + """ + Converts the `BasedOn` and `ToSelectFrom` into a format that can be used by VW + """ + chosen_action, cost, prob = self.get_label(event) + context_emb, action_embs = self.get_context_and_action_embeddings(event) example_string = "" example_string += "shared " @@ -120,6 +219,12 @@ class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]): # Strip the last newline return example_string[:-1] + def format(self, event: PickBestEvent) -> str: + if self.auto_embed: + return self.format_auto_embed_on(event) + else: + return self.format_auto_embed_off(event) + class PickBest(base.RLChain[PickBestEvent]): """ @@ -154,12 +259,20 @@ class PickBest(base.RLChain[PickBestEvent]): *args: Any, **kwargs: Any, ): + auto_embed = kwargs.get("auto_embed", False) + vw_cmd = kwargs.get("vw_cmd", []) if not vw_cmd: - vw_cmd = [ + interactions = ["--interactions=::"] + if auto_embed: + interactions = [ + "--interactions=@#", + "--ignore_linear=@", + "--ignore_linear=#", + "--noconstant", + ] + vw_cmd = interactions + [ "--cb_explore_adf", - "--quiet", - "--interactions=::", "--coin", "--squarecb", ] @@ -172,7 +285,7 @@ class PickBest(base.RLChain[PickBestEvent]): feature_embedder = kwargs.get("feature_embedder", None) if not feature_embedder: - feature_embedder = PickBestFeatureEmbedder() + feature_embedder = PickBestFeatureEmbedder(auto_embed=auto_embed) kwargs["feature_embedder"] = feature_embedder super().__init__(*args, **kwargs) diff --git a/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_chain_call.py b/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_chain_call.py index 7bfa5ad5506..3678523a04a 100644 --- a/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_chain_call.py +++ b/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_chain_call.py @@ -26,7 +26,7 @@ def test_multiple_ToSelectFrom_throws() -> None: chain = pick_best_chain.PickBest.from_llm( llm=llm, prompt=PROMPT, - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), ) actions = ["0", "1", "2"] with pytest.raises(ValueError): @@ -43,7 +43,7 @@ def test_missing_basedOn_from_throws() -> None: chain = pick_best_chain.PickBest.from_llm( llm=llm, prompt=PROMPT, - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), ) actions = ["0", "1", "2"] with pytest.raises(ValueError): @@ -56,7 +56,7 @@ def test_ToSelectFrom_not_a_list_throws() -> None: chain = pick_best_chain.PickBest.from_llm( llm=llm, prompt=PROMPT, - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), ) actions = {"actions": ["0", "1", "2"]} with pytest.raises(ValueError): @@ -75,7 +75,7 @@ def test_update_with_delayed_score_with_auto_validator_throws() -> None: llm=llm, prompt=PROMPT, selection_scorer=rl_chain.AutoSelectionScorer(llm=auto_val_llm), - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), ) actions = ["0", "1", "2"] response = chain.run( @@ -98,7 +98,7 @@ def test_update_with_delayed_score_force() -> None: llm=llm, prompt=PROMPT, selection_scorer=rl_chain.AutoSelectionScorer(llm=auto_val_llm), - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), ) actions = ["0", "1", "2"] response = chain.run( @@ -121,7 +121,7 @@ def test_update_with_delayed_score() -> None: llm=llm, prompt=PROMPT, selection_scorer=None, - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), ) actions = ["0", "1", "2"] response = chain.run( @@ -153,7 +153,7 @@ def test_user_defined_scorer() -> None: llm=llm, prompt=PROMPT, selection_scorer=CustomSelectionScorer(), - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), ) actions = ["0", "1", "2"] response = chain.run( @@ -166,11 +166,11 @@ def test_user_defined_scorer() -> None: @pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers") -def test_auto_embeddings_on() -> None: +def test_everything_embedded() -> None: llm, PROMPT = setup() - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) chain = pick_best_chain.PickBest.from_llm( - llm=llm, prompt=PROMPT, feature_embedder=feature_embedder, auto_embed=True + llm=llm, prompt=PROMPT, feature_embedder=feature_embedder, auto_embed=False ) str1 = "0" @@ -189,8 +189,8 @@ def test_auto_embeddings_on() -> None: actions = [str1, str2, str3] response = chain.run( - User=rl_chain.BasedOn(ctx_str_1), - action=rl_chain.ToSelectFrom(actions), + User=rl_chain.EmbedAndKeep(rl_chain.BasedOn(ctx_str_1)), + action=rl_chain.EmbedAndKeep(rl_chain.ToSelectFrom(actions)), ) selection_metadata = response["selection_metadata"] vw_str = feature_embedder.format(selection_metadata) @@ -200,7 +200,7 @@ def test_auto_embeddings_on() -> None: @pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers") def test_default_auto_embedder_is_off() -> None: llm, PROMPT = setup() - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) chain = pick_best_chain.PickBest.from_llm( llm=llm, prompt=PROMPT, feature_embedder=feature_embedder ) @@ -226,7 +226,7 @@ def test_default_auto_embedder_is_off() -> None: @pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers") def test_default_embeddings_off() -> None: llm, PROMPT = setup() - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) chain = pick_best_chain.PickBest.from_llm( llm=llm, prompt=PROMPT, feature_embedder=feature_embedder, auto_embed=False ) @@ -252,7 +252,7 @@ def test_default_embeddings_off() -> None: @pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers") def test_default_embeddings_mixed_w_explicit_user_embeddings() -> None: llm, PROMPT = setup() - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=True, model=MockEncoder()) chain = pick_best_chain.PickBest.from_llm( llm=llm, prompt=PROMPT, feature_embedder=feature_embedder, auto_embed=True ) @@ -291,7 +291,7 @@ def test_default_no_scorer_specified() -> None: chain = pick_best_chain.PickBest.from_llm( llm=chain_llm, prompt=PROMPT, - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), ) response = chain.run( User=rl_chain.BasedOn("Context"), @@ -310,7 +310,7 @@ def test_explicitly_no_scorer() -> None: llm=llm, prompt=PROMPT, selection_scorer=None, - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), ) response = chain.run( User=rl_chain.BasedOn("Context"), @@ -330,7 +330,7 @@ def test_auto_scorer_with_user_defined_llm() -> None: llm=llm, prompt=PROMPT, selection_scorer=rl_chain.AutoSelectionScorer(llm=scorer_llm), - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), ) response = chain.run( User=rl_chain.BasedOn("Context"), @@ -348,7 +348,7 @@ def test_calling_chain_w_reserved_inputs_throws() -> None: chain = pick_best_chain.PickBest.from_llm( llm=llm, prompt=PROMPT, - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), ) with pytest.raises(ValueError): chain.run( @@ -371,7 +371,7 @@ def test_activate_and_deactivate_scorer() -> None: llm=llm, prompt=PROMPT, selection_scorer=pick_best_chain.base.AutoSelectionScorer(llm=scorer_llm), - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), ) response = chain.run( User=pick_best_chain.base.BasedOn("Context"), diff --git a/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_text_embedder.py b/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_text_embedder.py index 8683e3b0e54..734dae8d259 100644 --- a/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_text_embedder.py +++ b/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_text_embedder.py @@ -9,7 +9,7 @@ encoded_keyword = "[encoded]" @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_missing_context_throws() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) named_action = {"action": ["0", "1", "2"]} event = pick_best_chain.PickBestEvent( inputs={}, to_select_from=named_action, based_on={} @@ -20,7 +20,7 @@ def test_pickbest_textembedder_missing_context_throws() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_missing_actions_throws() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) event = pick_best_chain.PickBestEvent( inputs={}, to_select_from={}, based_on={"context": "context"} ) @@ -30,7 +30,7 @@ def test_pickbest_textembedder_missing_actions_throws() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_no_label_no_emb() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) named_actions = {"action1": ["0", "1", "2"]} expected = """shared |context context \n|action1 0 \n|action1 1 \n|action1 2 """ event = pick_best_chain.PickBestEvent( @@ -42,7 +42,7 @@ def test_pickbest_textembedder_no_label_no_emb() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_w_label_no_score_no_emb() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) named_actions = {"action1": ["0", "1", "2"]} expected = """shared |context context \n|action1 0 \n|action1 1 \n|action1 2 """ selected = pick_best_chain.PickBestSelected(index=0, probability=1.0) @@ -58,7 +58,7 @@ def test_pickbest_textembedder_w_label_no_score_no_emb() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_w_full_label_no_emb() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) named_actions = {"action1": ["0", "1", "2"]} expected = ( """shared |context context \n0:-0.0:1.0 |action1 0 \n|action1 1 \n|action1 2 """ @@ -76,7 +76,7 @@ def test_pickbest_textembedder_w_full_label_no_emb() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_w_full_label_w_emb() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) str1 = "0" str2 = "1" str3 = "2" @@ -100,7 +100,7 @@ def test_pickbest_textembedder_w_full_label_w_emb() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_w_full_label_w_embed_and_keep() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) str1 = "0" str2 = "1" str3 = "2" @@ -124,7 +124,7 @@ def test_pickbest_textembedder_w_full_label_w_embed_and_keep() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_more_namespaces_no_label_no_emb() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) named_actions = {"action1": [{"a": "0", "b": "0"}, "1", "2"]} context = {"context1": "context1", "context2": "context2"} expected = """shared |context1 context1 |context2 context2 \n|a 0 |b 0 \n|action1 1 \n|action1 2 """ # noqa: E501 @@ -137,7 +137,7 @@ def test_pickbest_textembedder_more_namespaces_no_label_no_emb() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_more_namespaces_w_label_no_emb() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) named_actions = {"action1": [{"a": "0", "b": "0"}, "1", "2"]} context = {"context1": "context1", "context2": "context2"} expected = """shared |context1 context1 |context2 context2 \n|a 0 |b 0 \n|action1 1 \n|action1 2 """ # noqa: E501 @@ -151,7 +151,7 @@ def test_pickbest_textembedder_more_namespaces_w_label_no_emb() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_more_namespaces_w_full_label_no_emb() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) named_actions = {"action1": [{"a": "0", "b": "0"}, "1", "2"]} context = {"context1": "context1", "context2": "context2"} expected = """shared |context1 context1 |context2 context2 \n0:-0.0:1.0 |a 0 |b 0 \n|action1 1 \n|action1 2 """ # noqa: E501 @@ -165,7 +165,7 @@ def test_pickbest_textembedder_more_namespaces_w_full_label_no_emb() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_more_namespaces_w_full_label_w_full_emb() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) str1 = "0" str2 = "1" @@ -198,7 +198,7 @@ def test_pickbest_textembedder_more_namespaces_w_full_label_w_full_emb() -> None def test_pickbest_textembedder_more_namespaces_w_full_label_w_full_embed_and_keep() -> ( None ): - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) str1 = "0" str2 = "1" @@ -231,7 +231,7 @@ def test_pickbest_textembedder_more_namespaces_w_full_label_w_full_embed_and_kee @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_more_namespaces_w_full_label_w_partial_emb() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) str1 = "0" str2 = "1" @@ -263,7 +263,7 @@ def test_pickbest_textembedder_more_namespaces_w_full_label_w_partial_emb() -> N @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_more_namespaces_w_full_label_w_partial_emakeep() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) str1 = "0" str2 = "1" @@ -298,7 +298,7 @@ def test_pickbest_textembedder_more_namespaces_w_full_label_w_partial_emakeep() @pytest.mark.requires("vowpal_wabbit_next") def test_raw_features_underscored() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) str1 = "this is a long string" str1_underscored = str1.replace(" ", "_") encoded_str1 = rl_chain.stringify_embedding(list(encoded_keyword + str1)) From ca163f0ee698e3cf7216cf597904c5a855717585 Mon Sep 17 00:00:00 2001 From: olgavrou Date: Mon, 4 Sep 2023 07:10:44 -0400 Subject: [PATCH 2/7] fixes and tests --- .../langchain/chains/rl_chain/base.py | 20 +--- .../chains/rl_chain/pick_best_chain.py | 48 ++++---- .../rl_chain/test_pick_best_chain_call.py | 111 +++++++++++++----- .../rl_chain/test_pick_best_text_embedder.py | 60 +++++++--- .../unit_tests/chains/rl_chain/test_utils.py | 12 ++ 5 files changed, 168 insertions(+), 83 deletions(-) diff --git a/libs/langchain/langchain/chains/rl_chain/base.py b/libs/langchain/langchain/chains/rl_chain/base.py index 66ead42e710..c69b21dd1f6 100644 --- a/libs/langchain/langchain/chains/rl_chain/base.py +++ b/libs/langchain/langchain/chains/rl_chain/base.py @@ -118,8 +118,7 @@ def get_based_on_and_to_select_from(inputs: Dict[str, Any]) -> Tuple[Dict, Dict] if not to_select_from: raise ValueError( - "No variables using 'ToSelectFrom' found in the inputs. \ - Please include at least one variable containing a list to select from." + "No variables using 'ToSelectFrom' found in the inputs. Please include at least one variable containing a list to select from." # noqa: E501 ) based_on = { @@ -303,9 +302,7 @@ class AutoSelectionScorer(SelectionScorer[Event], BaseModel): return resp except Exception as e: raise RuntimeError( - f"The auto selection scorer did not manage to score the response, \ - there is always the option to try again or tweak the reward prompt.\ - Error: {e}" + f"The auto selection scorer did not manage to score the response, there is always the option to try again or tweak the reward prompt. Error: {e}" # noqa: E501 ) @@ -426,8 +423,7 @@ class RLChain(Chain, Generic[TEvent]): """ # noqa: E501 if self._can_use_selection_scorer() and not force_score: raise RuntimeError( - "The selection scorer is set, and force_score was not set to True. \ - Please set force_score=True to use this function." + "The selection scorer is set, and force_score was not set to True. Please set force_score=True to use this function." # noqa: E501 ) if self.metrics: self.metrics.on_feedback(score) @@ -461,9 +457,7 @@ class RLChain(Chain, Generic[TEvent]): or self.selected_based_on_input_key in inputs.keys() ): raise ValueError( - f"The rl chain does not accept '{self.selected_input_key}' \ - or '{self.selected_based_on_input_key}' as input keys, \ - they are reserved for internal use during auto reward." + f"The rl chain does not accept '{self.selected_input_key}' or '{self.selected_based_on_input_key}' as input keys, they are reserved for internal use during auto reward." # noqa: E501 ) def _can_use_selection_scorer(self) -> bool: @@ -501,9 +495,6 @@ class RLChain(Chain, Generic[TEvent]): ) -> Dict[str, Any]: _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager() - # if self.auto_embed: - # inputs = prepare_inputs_for_autoembed(inputs=inputs) - event: TEvent = self._call_before_predict(inputs=inputs) prediction = self.active_policy.predict(event=event) if self.metrics: @@ -576,8 +567,7 @@ def embed_string_type( if namespace is None: raise ValueError( - "The default namespace must be \ - provided when embedding a string or _Embed object." + "The default namespace must be provided when embedding a string or _Embed object." # noqa: E501 ) return {namespace: keep_str + encoded} diff --git a/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py b/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py index 5ed32c4cadc..e3e93b138e1 100644 --- a/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py +++ b/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py @@ -118,7 +118,7 @@ class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]): unique_contexts.add(f"{ns}={ea}") else: unique_contexts.add(f"{ns}={ee}") - + encoded_contexts = self.model.encode(list(unique_contexts)) context_embeddings = dict(zip(unique_contexts, encoded_contexts)) @@ -144,9 +144,9 @@ class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]): indexed_dot_product[context_key] = {} for j, action_key in enumerate(action_embeddings.keys()): indexed_dot_product[context_key][action_key] = dot_product_matrix[i, j] - + return indexed_dot_product - + def format_auto_embed_on(self, event: PickBestEvent) -> str: chosen_action, cost, prob = self.get_label(event) context_emb, action_embs = self.get_context_and_action_embeddings(event) @@ -166,12 +166,12 @@ class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]): line_parts.append(f"{elem}") ns_a = f"{ns}={elem}" nsa.append(ns_a) - for k,v in indexed_dot_product.items(): + for k, v in indexed_dot_product.items(): dot_prods.append(v[ns_a]) nsa = " ".join(nsa) line_parts.append(f"|# {nsa}") - line_parts.append(f"|embedding {self._str(dot_prods)}") + line_parts.append(f"|dotprod {self._str(dot_prods)}") action_lines.append(" ".join(line_parts)) shared = [] @@ -186,9 +186,7 @@ class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]): nsc = " ".join(nsc) shared.append(f"|@ {nsc}") - r = "shared " + " ".join(shared) + "\n" + "\n".join(action_lines) - print(r) - return r + return "shared " + " ".join(shared) + "\n" + "\n".join(action_lines) def format_auto_embed_off(self, event: PickBestEvent) -> str: """ @@ -262,29 +260,35 @@ class PickBest(base.RLChain[PickBestEvent]): auto_embed = kwargs.get("auto_embed", False) vw_cmd = kwargs.get("vw_cmd", []) - if not vw_cmd: + if vw_cmd: + if "--cb_explore_adf" not in vw_cmd: + raise ValueError( + "If vw_cmd is specified, it must include --cb_explore_adf" + ) + else: interactions = ["--interactions=::"] if auto_embed: interactions = [ "--interactions=@#", "--ignore_linear=@", "--ignore_linear=#", - "--noconstant", ] vw_cmd = interactions + [ "--cb_explore_adf", "--coin", "--squarecb", + "--quiet", ] - else: - if "--cb_explore_adf" not in vw_cmd: - raise ValueError( - "If vw_cmd is specified, it must include --cb_explore_adf" - ) + kwargs["vw_cmd"] = vw_cmd feature_embedder = kwargs.get("feature_embedder", None) - if not feature_embedder: + if feature_embedder: + if "auto_embed" in kwargs: + logger.warning( + "auto_embed will take no effect when explicit feature_embedder is provided" # noqa E501 + ) + else: feature_embedder = PickBestFeatureEmbedder(auto_embed=auto_embed) kwargs["feature_embedder"] = feature_embedder @@ -294,23 +298,17 @@ class PickBest(base.RLChain[PickBestEvent]): context, actions = base.get_based_on_and_to_select_from(inputs=inputs) if not actions: raise ValueError( - "No variables using 'ToSelectFrom' found in the inputs. \ - Please include at least one variable containing \ - a list to select from." + "No variables using 'ToSelectFrom' found in the inputs. Please include at least one variable containing a list to select from." # noqa E501 ) if len(list(actions.values())) > 1: raise ValueError( - "Only one variable using 'ToSelectFrom' can be provided in the inputs \ - for the PickBest chain. Please provide only one variable \ - containing a list to select from." + "Only one variable using 'ToSelectFrom' can be provided in the inputs for the PickBest chain. Please provide only one variable containing a list to select from." # noqa E501 ) if not context: raise ValueError( - "No variables using 'BasedOn' found in the inputs. \ - Please include at least one variable containing information \ - to base the selected of ToSelectFrom on." + "No variables using 'BasedOn' found in the inputs. Please include at least one variable containing information to base the selected of ToSelectFrom on." # noqa E501 ) event = PickBestEvent(inputs=inputs, to_select_from=actions, based_on=context) diff --git a/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_chain_call.py b/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_chain_call.py index 3678523a04a..7eb7ca2aeac 100644 --- a/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_chain_call.py +++ b/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_chain_call.py @@ -1,7 +1,7 @@ from typing import Any, Dict import pytest -from test_utils import MockEncoder +from test_utils import MockEncoder, MockEncoderReturnsList import langchain.chains.rl_chain.base as rl_chain import langchain.chains.rl_chain.pick_best_chain as pick_best_chain @@ -26,7 +26,9 @@ def test_multiple_ToSelectFrom_throws() -> None: chain = pick_best_chain.PickBest.from_llm( llm=llm, prompt=PROMPT, - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ), ) actions = ["0", "1", "2"] with pytest.raises(ValueError): @@ -43,7 +45,9 @@ def test_missing_basedOn_from_throws() -> None: chain = pick_best_chain.PickBest.from_llm( llm=llm, prompt=PROMPT, - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ), ) actions = ["0", "1", "2"] with pytest.raises(ValueError): @@ -56,7 +60,9 @@ def test_ToSelectFrom_not_a_list_throws() -> None: chain = pick_best_chain.PickBest.from_llm( llm=llm, prompt=PROMPT, - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ), ) actions = {"actions": ["0", "1", "2"]} with pytest.raises(ValueError): @@ -75,7 +81,9 @@ def test_update_with_delayed_score_with_auto_validator_throws() -> None: llm=llm, prompt=PROMPT, selection_scorer=rl_chain.AutoSelectionScorer(llm=auto_val_llm), - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ), ) actions = ["0", "1", "2"] response = chain.run( @@ -98,7 +106,9 @@ def test_update_with_delayed_score_force() -> None: llm=llm, prompt=PROMPT, selection_scorer=rl_chain.AutoSelectionScorer(llm=auto_val_llm), - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ), ) actions = ["0", "1", "2"] response = chain.run( @@ -121,7 +131,9 @@ def test_update_with_delayed_score() -> None: llm=llm, prompt=PROMPT, selection_scorer=None, - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ), ) actions = ["0", "1", "2"] response = chain.run( @@ -153,7 +165,9 @@ def test_user_defined_scorer() -> None: llm=llm, prompt=PROMPT, selection_scorer=CustomSelectionScorer(), - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ), ) actions = ["0", "1", "2"] response = chain.run( @@ -168,7 +182,9 @@ def test_user_defined_scorer() -> None: @pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers") def test_everything_embedded() -> None: llm, PROMPT = setup() - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ) chain = pick_best_chain.PickBest.from_llm( llm=llm, prompt=PROMPT, feature_embedder=feature_embedder, auto_embed=False ) @@ -200,7 +216,9 @@ def test_everything_embedded() -> None: @pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers") def test_default_auto_embedder_is_off() -> None: llm, PROMPT = setup() - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ) chain = pick_best_chain.PickBest.from_llm( llm=llm, prompt=PROMPT, feature_embedder=feature_embedder ) @@ -224,9 +242,11 @@ def test_default_auto_embedder_is_off() -> None: @pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers") -def test_default_embeddings_off() -> None: +def test_default_w_embeddings_off() -> None: llm, PROMPT = setup() - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ) chain = pick_best_chain.PickBest.from_llm( llm=llm, prompt=PROMPT, feature_embedder=feature_embedder, auto_embed=False ) @@ -250,29 +270,54 @@ def test_default_embeddings_off() -> None: @pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers") -def test_default_embeddings_mixed_w_explicit_user_embeddings() -> None: +def test_default_w_embeddings_on() -> None: llm, PROMPT = setup() - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=True, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=True, model=MockEncoderReturnsList() + ) chain = pick_best_chain.PickBest.from_llm( llm=llm, prompt=PROMPT, feature_embedder=feature_embedder, auto_embed=True ) str1 = "0" str2 = "1" - str3 = "2" - encoded_str1 = rl_chain.stringify_embedding(list(encoded_keyword + str1)) - encoded_str2 = rl_chain.stringify_embedding(list(encoded_keyword + str2)) - encoded_str3 = rl_chain.stringify_embedding(list(encoded_keyword + str3)) + ctx_str_1 = "context1" + dot_prod = "dotprod 0:5.0" # dot prod of [1.0, 2.0] and [1.0, 2.0] + expected = f"""shared |User {ctx_str_1} |@ User={ctx_str_1}\n|action {str1} |# action={str1} |{dot_prod}\n|action {str2} |# action={str2} |{dot_prod}""" # noqa + + actions = [str1, str2] + + response = chain.run( + User=rl_chain.BasedOn(ctx_str_1), + action=rl_chain.ToSelectFrom(actions), + ) + selection_metadata = response["selection_metadata"] + vw_str = feature_embedder.format(selection_metadata) + assert vw_str == expected + + +@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers") +def test_default_embeddings_mixed_w_explicit_user_embeddings() -> None: + llm, PROMPT = setup() + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=True, model=MockEncoderReturnsList() + ) + chain = pick_best_chain.PickBest.from_llm( + llm=llm, prompt=PROMPT, feature_embedder=feature_embedder, auto_embed=True + ) + + str1 = "0" + str2 = "1" + encoded_str2 = rl_chain.stringify_embedding([1.0, 2.0]) ctx_str_1 = "context1" ctx_str_2 = "context2" + encoded_ctx_str_1 = rl_chain.stringify_embedding([1.0, 2.0]) + dot_prod = "dotprod 0:5.0 1:5.0" # dot prod of [1.0, 2.0] and [1.0, 2.0] - encoded_ctx_str_1 = rl_chain.stringify_embedding(list(encoded_keyword + ctx_str_1)) - encoded_ctx_str_2 = rl_chain.stringify_embedding(list(encoded_keyword + ctx_str_2)) + expected = f"""shared |User {encoded_ctx_str_1} |@ User={encoded_ctx_str_1} |User2 {ctx_str_2} |@ User2={ctx_str_2}\n|action {str1} |# action={str1} |{dot_prod}\n|action {encoded_str2} |# action={encoded_str2} |{dot_prod}""" # noqa - expected = f"""shared |User {encoded_ctx_str_1} |User2 {ctx_str_2 + " " + encoded_ctx_str_2} \n|action {str1 + " " + encoded_str1} \n|action {str2 + " " + encoded_str2} \n|action {encoded_str3} """ # noqa - - actions = [str1, str2, rl_chain.Embed(str3)] + actions = [str1, rl_chain.Embed(str2)] response = chain.run( User=rl_chain.BasedOn(rl_chain.Embed(ctx_str_1)), @@ -291,7 +336,9 @@ def test_default_no_scorer_specified() -> None: chain = pick_best_chain.PickBest.from_llm( llm=chain_llm, prompt=PROMPT, - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ), ) response = chain.run( User=rl_chain.BasedOn("Context"), @@ -310,7 +357,9 @@ def test_explicitly_no_scorer() -> None: llm=llm, prompt=PROMPT, selection_scorer=None, - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ), ) response = chain.run( User=rl_chain.BasedOn("Context"), @@ -330,7 +379,9 @@ def test_auto_scorer_with_user_defined_llm() -> None: llm=llm, prompt=PROMPT, selection_scorer=rl_chain.AutoSelectionScorer(llm=scorer_llm), - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ), ) response = chain.run( User=rl_chain.BasedOn("Context"), @@ -348,7 +399,9 @@ def test_calling_chain_w_reserved_inputs_throws() -> None: chain = pick_best_chain.PickBest.from_llm( llm=llm, prompt=PROMPT, - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ), ) with pytest.raises(ValueError): chain.run( @@ -371,7 +424,9 @@ def test_activate_and_deactivate_scorer() -> None: llm=llm, prompt=PROMPT, selection_scorer=pick_best_chain.base.AutoSelectionScorer(llm=scorer_llm), - feature_embedder=pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()), + feature_embedder=pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ), ) response = chain.run( User=pick_best_chain.base.BasedOn("Context"), diff --git a/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_text_embedder.py b/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_text_embedder.py index 734dae8d259..1fdbdff6444 100644 --- a/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_text_embedder.py +++ b/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_text_embedder.py @@ -9,7 +9,9 @@ encoded_keyword = "[encoded]" @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_missing_context_throws() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ) named_action = {"action": ["0", "1", "2"]} event = pick_best_chain.PickBestEvent( inputs={}, to_select_from=named_action, based_on={} @@ -20,7 +22,9 @@ def test_pickbest_textembedder_missing_context_throws() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_missing_actions_throws() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ) event = pick_best_chain.PickBestEvent( inputs={}, to_select_from={}, based_on={"context": "context"} ) @@ -30,7 +34,9 @@ def test_pickbest_textembedder_missing_actions_throws() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_no_label_no_emb() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ) named_actions = {"action1": ["0", "1", "2"]} expected = """shared |context context \n|action1 0 \n|action1 1 \n|action1 2 """ event = pick_best_chain.PickBestEvent( @@ -42,7 +48,9 @@ def test_pickbest_textembedder_no_label_no_emb() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_w_label_no_score_no_emb() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ) named_actions = {"action1": ["0", "1", "2"]} expected = """shared |context context \n|action1 0 \n|action1 1 \n|action1 2 """ selected = pick_best_chain.PickBestSelected(index=0, probability=1.0) @@ -58,7 +66,9 @@ def test_pickbest_textembedder_w_label_no_score_no_emb() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_w_full_label_no_emb() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ) named_actions = {"action1": ["0", "1", "2"]} expected = ( """shared |context context \n0:-0.0:1.0 |action1 0 \n|action1 1 \n|action1 2 """ @@ -76,7 +86,9 @@ def test_pickbest_textembedder_w_full_label_no_emb() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_w_full_label_w_emb() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ) str1 = "0" str2 = "1" str3 = "2" @@ -100,7 +112,9 @@ def test_pickbest_textembedder_w_full_label_w_emb() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_w_full_label_w_embed_and_keep() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ) str1 = "0" str2 = "1" str3 = "2" @@ -124,7 +138,9 @@ def test_pickbest_textembedder_w_full_label_w_embed_and_keep() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_more_namespaces_no_label_no_emb() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ) named_actions = {"action1": [{"a": "0", "b": "0"}, "1", "2"]} context = {"context1": "context1", "context2": "context2"} expected = """shared |context1 context1 |context2 context2 \n|a 0 |b 0 \n|action1 1 \n|action1 2 """ # noqa: E501 @@ -137,7 +153,9 @@ def test_pickbest_textembedder_more_namespaces_no_label_no_emb() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_more_namespaces_w_label_no_emb() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ) named_actions = {"action1": [{"a": "0", "b": "0"}, "1", "2"]} context = {"context1": "context1", "context2": "context2"} expected = """shared |context1 context1 |context2 context2 \n|a 0 |b 0 \n|action1 1 \n|action1 2 """ # noqa: E501 @@ -151,7 +169,9 @@ def test_pickbest_textembedder_more_namespaces_w_label_no_emb() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_more_namespaces_w_full_label_no_emb() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ) named_actions = {"action1": [{"a": "0", "b": "0"}, "1", "2"]} context = {"context1": "context1", "context2": "context2"} expected = """shared |context1 context1 |context2 context2 \n0:-0.0:1.0 |a 0 |b 0 \n|action1 1 \n|action1 2 """ # noqa: E501 @@ -165,7 +185,9 @@ def test_pickbest_textembedder_more_namespaces_w_full_label_no_emb() -> None: @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_more_namespaces_w_full_label_w_full_emb() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ) str1 = "0" str2 = "1" @@ -198,7 +220,9 @@ def test_pickbest_textembedder_more_namespaces_w_full_label_w_full_emb() -> None def test_pickbest_textembedder_more_namespaces_w_full_label_w_full_embed_and_keep() -> ( None ): - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ) str1 = "0" str2 = "1" @@ -231,7 +255,9 @@ def test_pickbest_textembedder_more_namespaces_w_full_label_w_full_embed_and_kee @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_more_namespaces_w_full_label_w_partial_emb() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ) str1 = "0" str2 = "1" @@ -263,7 +289,9 @@ def test_pickbest_textembedder_more_namespaces_w_full_label_w_partial_emb() -> N @pytest.mark.requires("vowpal_wabbit_next") def test_pickbest_textembedder_more_namespaces_w_full_label_w_partial_emakeep() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ) str1 = "0" str2 = "1" @@ -298,7 +326,9 @@ def test_pickbest_textembedder_more_namespaces_w_full_label_w_partial_emakeep() @pytest.mark.requires("vowpal_wabbit_next") def test_raw_features_underscored() -> None: - feature_embedder = pick_best_chain.PickBestFeatureEmbedder(auto_embed=False, model=MockEncoder()) + feature_embedder = pick_best_chain.PickBestFeatureEmbedder( + auto_embed=False, model=MockEncoder() + ) str1 = "this is a long string" str1_underscored = str1.replace(" ", "_") encoded_str1 = rl_chain.stringify_embedding(list(encoded_keyword + str1)) diff --git a/libs/langchain/tests/unit_tests/chains/rl_chain/test_utils.py b/libs/langchain/tests/unit_tests/chains/rl_chain/test_utils.py index 625c37ee000..b2cc90b1bce 100644 --- a/libs/langchain/tests/unit_tests/chains/rl_chain/test_utils.py +++ b/libs/langchain/tests/unit_tests/chains/rl_chain/test_utils.py @@ -1,3 +1,15 @@ +from typing import Any, List + + class MockEncoder: def encode(self, to_encode: str) -> str: return "[encoded]" + to_encode + + +class MockEncoderReturnsList: + def encode(self, to_encode: Any) -> List: + if isinstance(to_encode, str): + return [1.0, 2.0] + elif isinstance(to_encode, List): + return [[1.0, 2.0] for _ in range(len(to_encode))] + raise ValueError("Invalid input type for unit test") From 67dc1a9dd20ab4a6c41498962fa3bd6e5ae1e710 Mon Sep 17 00:00:00 2001 From: olgavrou Date: Mon, 4 Sep 2023 07:36:47 -0400 Subject: [PATCH 3/7] cleanup --- libs/langchain/langchain/chains/rl_chain/__init__.py | 2 ++ libs/langchain/langchain/chains/rl_chain/pick_best_chain.py | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/langchain/langchain/chains/rl_chain/__init__.py b/libs/langchain/langchain/chains/rl_chain/__init__.py index 3a14861bd7f..80242139f5c 100644 --- a/libs/langchain/langchain/chains/rl_chain/__init__.py +++ b/libs/langchain/langchain/chains/rl_chain/__init__.py @@ -15,6 +15,7 @@ from langchain.chains.rl_chain.base import ( from langchain.chains.rl_chain.pick_best_chain import ( PickBest, PickBestEvent, + PickBestFeatureEmbedder, PickBestSelected, ) @@ -37,6 +38,7 @@ __all__ = [ "PickBest", "PickBestEvent", "PickBestSelected", + "PickBestFeatureEmbedder", "Embed", "BasedOn", "ToSelectFrom", diff --git a/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py b/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py index e3e93b138e1..afc7fc1e301 100644 --- a/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py +++ b/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py @@ -60,9 +60,7 @@ class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]): if model is None: from sentence_transformers import SentenceTransformer - model = SentenceTransformer("all-mpnet-base-v2") - # model = SentenceTransformer("all-MiniLM-L6-v2") self.model = model self.auto_embed = auto_embed From 4e9aecda9068806c515dc8dddbcd967845e1be65 Mon Sep 17 00:00:00 2001 From: olgavrou Date: Mon, 4 Sep 2023 08:35:29 -0400 Subject: [PATCH 4/7] formatting --- libs/langchain/langchain/chains/rl_chain/pick_best_chain.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py b/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py index afc7fc1e301..1155d03a1b0 100644 --- a/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py +++ b/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py @@ -60,6 +60,7 @@ class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]): if model is None: from sentence_transformers import SentenceTransformer + model = SentenceTransformer("all-mpnet-base-v2") self.model = model From 0f7cde023bc259073d5c79d985492623015047ea Mon Sep 17 00:00:00 2001 From: olgavrou Date: Mon, 4 Sep 2023 08:43:48 -0400 Subject: [PATCH 5/7] fix linting errors --- .../langchain/chains/rl_chain/pick_best_chain.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py b/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py index 1155d03a1b0..34fc3584f5e 100644 --- a/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py +++ b/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py @@ -67,7 +67,7 @@ class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]): self.auto_embed = auto_embed @staticmethod - def _str(embedding): + def _str(embedding: List[float]): return " ".join([f"{i}:{e}" for i, e in enumerate(embedding)]) def get_label(self, event: PickBestEvent) -> tuple: @@ -137,7 +137,7 @@ class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]): context_matrix = np.stack([v for k, v in context_embeddings.items()]) dot_product_matrix = np.dot(context_matrix, action_matrix.T) - indexed_dot_product = {} + indexed_dot_product: Dict[Dict] = {} for i, context_key in enumerate(context_embeddings.keys()): indexed_dot_product[context_key] = {} @@ -167,8 +167,8 @@ class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]): nsa.append(ns_a) for k, v in indexed_dot_product.items(): dot_prods.append(v[ns_a]) - nsa = " ".join(nsa) - line_parts.append(f"|# {nsa}") + nsa_str = " ".join(nsa) + line_parts.append(f"|# {nsa_str}") line_parts.append(f"|dotprod {self._str(dot_prods)}") action_lines.append(" ".join(line_parts)) @@ -182,8 +182,8 @@ class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]): for elem in elements: shared.append(f"{elem}") nsc.append(f"{ns}={elem}") - nsc = " ".join(nsc) - shared.append(f"|@ {nsc}") + nsc_str = " ".join(nsc) + shared.append(f"|@ {nsc_str}") return "shared " + " ".join(shared) + "\n" + "\n".join(action_lines) From e10980d445a0f0eea98d58d773bd884f2abdb8b3 Mon Sep 17 00:00:00 2001 From: olgavrou Date: Mon, 4 Sep 2023 08:56:34 -0400 Subject: [PATCH 6/7] fix linting error --- libs/langchain/langchain/chains/rl_chain/pick_best_chain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py b/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py index 34fc3584f5e..e6a3007a560 100644 --- a/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py +++ b/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py @@ -67,7 +67,7 @@ class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]): self.auto_embed = auto_embed @staticmethod - def _str(embedding: List[float]): + def _str(embedding: List[float]) -> str: return " ".join([f"{i}:{e}" for i, e in enumerate(embedding)]) def get_label(self, event: PickBestEvent) -> tuple: From ae5edefdcdd0c93440512a48cc797c51d639989e Mon Sep 17 00:00:00 2001 From: olgavrou Date: Mon, 4 Sep 2023 16:36:29 -0400 Subject: [PATCH 7/7] cleanup --- .../langchain/chains/rl_chain/base.py | 5 ++-- .../chains/rl_chain/pick_best_chain.py | 24 ++++++++++--------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/libs/langchain/langchain/chains/rl_chain/base.py b/libs/langchain/langchain/chains/rl_chain/base.py index c69b21dd1f6..4b5ac572f9c 100644 --- a/libs/langchain/langchain/chains/rl_chain/base.py +++ b/libs/langchain/langchain/chains/rl_chain/base.py @@ -316,7 +316,7 @@ class RLChain(Chain, Generic[TEvent]): - selection_scorer (Union[SelectionScorer, None]): Scorer for the selection. Can be set to None. - policy (Optional[Policy]): The policy used by the chain to learn to populate a dynamic prompt. - auto_embed (bool): Determines if embedding should be automatic. Default is False. - - metrics (Optional[MetricsTracker]): Tracker for metrics, can be set to None. + - metrics (Optional[Union[MetricsTrackerRollingWindow, MetricsTrackerAverage]]): Tracker for metrics, can be set to None. Initialization Attributes: - feature_embedder (Embedder): Embedder used for the `BasedOn` and `ToSelectFrom` inputs. @@ -325,7 +325,8 @@ class RLChain(Chain, Generic[TEvent]): - vw_cmd (List[str], optional): Command line arguments for the VW model. - policy (Type[VwPolicy]): Policy used by the chain. - vw_logs (Optional[Union[str, os.PathLike]]): Path for the VW logs. - - metrics_step (int): Step for the metrics tracker. Default is -1. + - metrics_step (int): Step for the metrics tracker. Default is -1. If set without metrics_window_size, average metrics will be tracked, otherwise rolling window metrics will be tracked. + - metrics_window_size (int): Window size for the metrics tracker. Default is -1. If set, rolling window metrics will be tracked. Notes: The class initializes the VW model using the provided arguments. If `selection_scorer` is not provided, a warning is logged, indicating that no reinforcement learning will occur unless the `update_with_delayed_score` method is called. diff --git a/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py b/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py index e6a3007a560..791d12cdb46 100644 --- a/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py +++ b/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py @@ -137,7 +137,7 @@ class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]): context_matrix = np.stack([v for k, v in context_embeddings.items()]) dot_product_matrix = np.dot(context_matrix, action_matrix.T) - indexed_dot_product: Dict[Dict] = {} + indexed_dot_product: Dict = {} for i, context_key in enumerate(context_embeddings.keys()): indexed_dot_product[context_key] = {} @@ -258,6 +258,18 @@ class PickBest(base.RLChain[PickBestEvent]): ): auto_embed = kwargs.get("auto_embed", False) + feature_embedder = kwargs.get("feature_embedder", None) + if feature_embedder: + if "auto_embed" in kwargs: + logger.warning( + "auto_embed will take no effect when explicit feature_embedder is provided" # noqa E501 + ) + # turning auto_embed off for cli setting below + auto_embed = False + else: + feature_embedder = PickBestFeatureEmbedder(auto_embed=auto_embed) + kwargs["feature_embedder"] = feature_embedder + vw_cmd = kwargs.get("vw_cmd", []) if vw_cmd: if "--cb_explore_adf" not in vw_cmd: @@ -281,16 +293,6 @@ class PickBest(base.RLChain[PickBestEvent]): kwargs["vw_cmd"] = vw_cmd - feature_embedder = kwargs.get("feature_embedder", None) - if feature_embedder: - if "auto_embed" in kwargs: - logger.warning( - "auto_embed will take no effect when explicit feature_embedder is provided" # noqa E501 - ) - else: - feature_embedder = PickBestFeatureEmbedder(auto_embed=auto_embed) - kwargs["feature_embedder"] = feature_embedder - super().__init__(*args, **kwargs) def _call_before_predict(self, inputs: Dict[str, Any]) -> PickBestEvent: