diff --git a/.github/workflows/langchain_ci.yml b/.github/workflows/langchain_ci.yml
index 8f1fc5d8744..06d04b2f47a 100644
--- a/.github/workflows/langchain_ci.yml
+++ b/.github/workflows/langchain_ci.yml
@@ -60,7 +60,7 @@ jobs:
           - "3.8"
           - "3.9"
           - "3.10"
-          - "3.11"
+          # - "3.11"
     name: Python ${{ matrix.python-version }} extended tests
     steps:
       - uses: actions/checkout@v3
diff --git a/libs/langchain/langchain/chains/rl_chain/base.py b/libs/langchain/langchain/chains/rl_chain/base.py
index 721b7d35de9..fb4143f4655 100644
--- a/libs/langchain/langchain/chains/rl_chain/base.py
+++ b/libs/langchain/langchain/chains/rl_chain/base.py
@@ -227,15 +227,17 @@ class Embedder(Generic[TEvent], ABC):
         ...
 
 
-class SelectionScorer(ABC, BaseModel):
+class SelectionScorer(Generic[TEvent], ABC, BaseModel):
     """Abstract method to grade the chosen selection or the response of the llm"""
 
     @abstractmethod
-    def score_response(self, inputs: Dict[str, Any], llm_response: str) -> float:
+    def score_response(
+        self, inputs: Dict[str, Any], llm_response: str, event: TEvent
+    ) -> float:
         ...
 
 
-class AutoSelectionScorer(SelectionScorer, BaseModel):
+class AutoSelectionScorer(SelectionScorer[Event], BaseModel):
     llm_chain: LLMChain
     prompt: Union[BasePromptTemplate, None] = None
     scoring_criteria_template_str: Optional[str] = None
@@ -254,7 +256,7 @@ class AutoSelectionScorer(SelectionScorer, BaseModel):
     def get_default_prompt() -> ChatPromptTemplate:
         human_template = 'Given this based_on "{rl_chain_selected_based_on}" \
             as the most important attribute, rank how good or bad this text is: \
-                "{llm_response}".'
+                "{rl_chain_selected}".'
         human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
         default_system_prompt = AutoSelectionScorer.get_default_system_prompt()
         chat_prompt = ChatPromptTemplate.from_messages(
@@ -281,7 +283,9 @@ class AutoSelectionScorer(SelectionScorer, BaseModel):
         values["llm_chain"] = LLMChain(llm=llm, prompt=prompt)
         return values
 
-    def score_response(self, inputs: Dict[str, Any], llm_response: str) -> float:
+    def score_response(
+        self, inputs: Dict[str, Any], llm_response: str, event: Event
+    ) -> float:
         ranking = self.llm_chain.predict(llm_response=llm_response, **inputs)
         ranking = ranking.strip()
         try:
@@ -304,7 +308,7 @@ class RLChain(Chain, Generic[TEvent]):
         - prompt (BasePromptTemplate): The template for the base prompt.
         - selection_scorer (Union[SelectionScorer, None]): Scorer for the selection. Can be set to None.
         - policy (Optional[Policy]): The policy used by the chain to learn to populate a dynamic prompt.
-        - auto_embed (bool): Determines if embedding should be automatic. Default is True.
+        - auto_embed (bool): Determines if embedding should be automatic. Default is False.
         - metrics (Optional[MetricsTracker]): Tracker for metrics, can be set to None.
 
     Initialization Attributes:
@@ -338,7 +342,7 @@ class RLChain(Chain, Generic[TEvent]):
     prompt: BasePromptTemplate
     selection_scorer: Union[SelectionScorer, None]
     active_policy: Policy = _NoOpPolicy()
-    auto_embed: bool = True
+    auto_embed: bool = False
     selected_input_key = "rl_chain_selected"
     selected_based_on_input_key = "rl_chain_selected_based_on"
     metrics: Optional[MetricsTracker] = None
@@ -492,7 +496,7 @@ class RLChain(Chain, Generic[TEvent]):
         try:
             if self.selection_scorer:
                 score = self.selection_scorer.score_response(
-                    inputs=next_chain_inputs, llm_response=output
+                    inputs=next_chain_inputs, llm_response=output, event=event
                 )
         except Exception as e:
             logger.info(
@@ -553,7 +557,7 @@ def embed_string_type(
 
 def embed_dict_type(item: Dict, model: Any) -> Dict[str, Any]:
     """Helper function to embed a dictionary item."""
-    inner_dict: Dict[str, Any] = {}
+    inner_dict: Dict = {}
     for ns, embed_item in item.items():
         if isinstance(embed_item, list):
             inner_dict[ns] = []
@@ -568,10 +572,17 @@ def embed_dict_type(item: Dict, model: Any) -> Dict[str, Any]:
 def embed_list_type(
     item: list, model: Any, namespace: Optional[str] = None
 ) -> List[Dict[str, Union[str, List[str]]]]:
-    ret_list: List[Dict[str, Union[str, List[str]]]] = []
+    ret_list: List = []
     for embed_item in item:
         if isinstance(embed_item, dict):
             ret_list.append(embed_dict_type(embed_item, model))
+        elif isinstance(embed_item, list):
+            item_embedding = embed_list_type(embed_item, model, namespace)
+            # Get the first key from the first dictionary
+            first_key = next(iter(item_embedding[0]))
+            # Group the values under that key
+            grouping = {first_key: [item[first_key] for item in item_embedding]}
+            ret_list.append(grouping)
         else:
             ret_list.append(embed_string_type(embed_item, model, namespace))
     return ret_list
diff --git a/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py b/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py
index fa7f18f8fb2..04218d29348 100644
--- a/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py
+++ b/libs/langchain/langchain/chains/rl_chain/pick_best_chain.py
@@ -161,7 +161,7 @@ class PickBest(base.RLChain[PickBestEvent]):
                 "--quiet",
                 "--interactions=::",
                 "--coin",
-                "--epsilon=0.2",
+                "--squarecb",
             ]
         else:
             if "--cb_explore_adf" not in vw_cmd:
diff --git a/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_chain_call.py b/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_chain_call.py
index 1b882e932d4..2af08840b5e 100644
--- a/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_chain_call.py
+++ b/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_chain_call.py
@@ -140,7 +140,12 @@ def test_user_defined_scorer() -> None:
     llm, PROMPT = setup()
 
     class CustomSelectionScorer(rl_chain.SelectionScorer):
-        def score_response(self, inputs: Dict[str, Any], llm_response: str) -> float:
+        def score_response(
+            self,
+            inputs: Dict[str, Any],
+            llm_response: str,
+            event: pick_best_chain.PickBestEvent,
+        ) -> float:
             score = 200
             return score
 
@@ -161,11 +166,11 @@ def test_user_defined_scorer() -> None:
 
 
 @pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
-def test_default_embeddings() -> None:
+def test_auto_embeddings_on() -> None:
     llm, PROMPT = setup()
     feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder())
     chain = pick_best_chain.PickBest.from_llm(
-        llm=llm, prompt=PROMPT, feature_embedder=feature_embedder
+        llm=llm, prompt=PROMPT, feature_embedder=feature_embedder, auto_embed=True
     )
 
     str1 = "0"
@@ -194,6 +199,32 @@ def test_default_embeddings() -> None:
     assert vw_str == expected
 
 
+@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
+def test_default_auto_embedder_is_off() -> None:
+    llm, PROMPT = setup()
+    feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder())
+    chain = pick_best_chain.PickBest.from_llm(
+        llm=llm, prompt=PROMPT, feature_embedder=feature_embedder
+    )
+
+    str1 = "0"
+    str2 = "1"
+    str3 = "2"
+    ctx_str_1 = "context1"
+
+    expected = f"""shared |User {ctx_str_1} \n|action {str1} \n|action {str2} \n|action {str3} """  # noqa
+
+    actions = [str1, str2, str3]
+
+    response = chain.run(
+        User=pick_best_chain.base.BasedOn(ctx_str_1),
+        action=pick_best_chain.base.ToSelectFrom(actions),
+    )
+    selection_metadata = response["selection_metadata"]
+    vw_str = feature_embedder.format(selection_metadata)
+    assert vw_str == expected
+
+
 @pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
 def test_default_embeddings_off() -> None:
     llm, PROMPT = setup()
@@ -225,7 +256,7 @@ def test_default_embeddings_mixed_w_explicit_user_embeddings() -> None:
     llm, PROMPT = setup()
     feature_embedder = pick_best_chain.PickBestFeatureEmbedder(model=MockEncoder())
     chain = pick_best_chain.PickBest.from_llm(
-        llm=llm, prompt=PROMPT, feature_embedder=feature_embedder
+        llm=llm, prompt=PROMPT, feature_embedder=feature_embedder, auto_embed=True
     )
 
     str1 = "0"