diff --git a/docs/docs/integrations/llms/watsonxllm.ipynb b/docs/docs/integrations/llms/watsonxllm.ipynb
index dc347a18555..7a2676f5ee5 100644
--- a/docs/docs/integrations/llms/watsonxllm.ipynb
+++ b/docs/docs/integrations/llms/watsonxllm.ipynb
@@ -5,9 +5,9 @@
    "id": "70996d8a",
    "metadata": {},
    "source": [
-    "# WatsonxLLM\n",
+    "# IBM watsonx.ai\n",
     "\n",
-    "[WatsonxLLM](https://ibm.github.io/watson-machine-learning-sdk/fm_extensions.html) is wrapper for IBM [watsonx.ai](https://www.ibm.com/products/watsonx-ai) foundation models.\n",
+    "[WatsonxLLM](https://ibm.github.io/watsonx-ai-python-sdk/fm_extensions.html#langchain) is a wrapper for IBM [watsonx.ai](https://www.ibm.com/products/watsonx-ai) foundation models.\n",
     "This example shows how to communicate with watsonx.ai models using LangChain."
    ]
   },
@@ -16,7 +16,7 @@
    "id": "ea35b2b7",
    "metadata": {},
    "source": [
-    "Install the package [`ibm_watson_machine_learning`](https://ibm.github.io/watson-machine-learning-sdk/install.html)."
+    "Install the package [`ibm-watsonx-ai`](https://ibm.github.io/watsonx-ai-python-sdk/install.html)."
    ]
   },
   {
@@ -26,7 +26,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install ibm_watson_machine_learning"
+    "%pip install ibm-watsonx-ai"
    ]
   },
   {
@@ -60,7 +60,7 @@
    "metadata": {},
    "source": [
     "## Load the model\n",
-    "You might need to adjust model `parameters` for different models or tasks, to do so please refer to [documentation](https://ibm.github.io/watson-machine-learning-sdk/model.html#metanames.GenTextParamsMetaNames)."
+    "You might need to adjust model `parameters` for different models or tasks. For details, refer to [documentation](https://ibm.github.io/watsonx-ai-python-sdk/fm_model.html#metanames.GenTextParamsMetaNames)."
    ]
   },
   {
@@ -70,7 +70,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams\n",
+    "from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams\n",
     "\n",
     "parameters = {\n",
     "    GenParams.DECODING_METHOD: \"sample\",\n",
@@ -87,7 +87,15 @@
    "id": "2b586538",
    "metadata": {},
    "source": [
-    "Initialize the `WatsonxLLM` class with previous set params."
+    "Initialize the `WatsonxLLM` class with previously set parameters.\n",
+    "\n",
+    "\n",
+    "**Note**: \n",
+    "\n",
+    "- To provide context for the API call, you must add `project_id` or `space_id`. For more information see [documentation](https://www.ibm.com/docs/en/watsonx-as-a-service?topic=projects).\n",
+    "- Depending on the region of your provisioned service instance, use one of the urls described [here](https://ibm.github.io/watsonx-ai-python-sdk/setup_cloud.html#authentication).\n",
+    "\n",
+    "In this example, we’ll use the `project_id` and Dallas url."
    ]
   },
   {
@@ -102,7 +110,7 @@
     "watsonx_llm = WatsonxLLM(\n",
     "    model_id=\"google/flan-ul2\",\n",
     "    url=\"https://us-south.ml.cloud.ibm.com\",\n",
-    "    project_id=\"***\",\n",
+    "    project_id=\"PASTE YOUR PROJECT_ID HERE\",\n",
     "    params=parameters,\n",
     ")"
    ]
@@ -112,19 +120,49 @@
    "id": "2202f4e0",
    "metadata": {},
    "source": [
-    "Alternatively you can use Cloud Pak for Data credentials. For details, see [documentation](https://ibm.github.io/watson-machine-learning-sdk/setup_cpd.html).\n",
-    "```\n",
+    "Alternatively you can use Cloud Pak for Data credentials. For details, see [documentation](https://ibm.github.io/watsonx-ai-python-sdk/setup_cpd.html).    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "243ecccb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "watsonx_llm = WatsonxLLM(\n",
-    "    model_id='google/flan-ul2',\n",
-    "    url=\"***\",\n",
-    "    username=\"***\",\n",
-    "    password=\"***\",\n",
+    "    model_id=\"google/flan-ul2\",\n",
+    "    url=\"PASTE YOUR URL HERE\",\n",
+    "    username=\"PASTE YOUR USERNAME HERE\",\n",
+    "    password=\"PASTE YOUR PASSWORD HERE\",\n",
     "    instance_id=\"openshift\",\n",
     "    version=\"4.8\",\n",
-    "    project_id='***',\n",
-    "    params=parameters\n",
-    ")\n",
-    "```        "
+    "    project_id=\"PASTE YOUR PROJECT_ID HERE\",\n",
+    "    params=parameters,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "96ed13d4",
+   "metadata": {},
+   "source": [
+    "Instead of `model_id`, you can also pass the `deployment_id` of the previously tuned model. The entire model tuning workflow is described [here](https://ibm.github.io/watsonx-ai-python-sdk/pt_working_with_class_and_prompt_tuner.html)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "08e66c88",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "watsonx_llm = WatsonxLLM(\n",
+    "    deployment_id=\"PASTE YOUR DEPLOYMENT_ID HERE\",\n",
+    "    url=\"https://us-south.ml.cloud.ibm.com\",\n",
+    "    project_id=\"PASTE YOUR PROJECT_ID HERE\",\n",
+    "    params=parameters,\n",
+    ")"
    ]
   },
   {
@@ -187,7 +225,7 @@
    "metadata": {},
    "source": [
     "## Calling the Model Directly\n",
-    "To obtain completions, you can can the model directly using string prompt."
+    "To obtain completions, you can call the model directly using a string prompt."
    ]
   },
   {
diff --git a/libs/community/langchain_community/llms/watsonxllm.py b/libs/community/langchain_community/llms/watsonxllm.py
index 3e927eb16b2..380628f5ef3 100644
--- a/libs/community/langchain_community/llms/watsonxllm.py
+++ b/libs/community/langchain_community/llms/watsonxllm.py
@@ -15,7 +15,7 @@ class WatsonxLLM(BaseLLM):
     """
     IBM watsonx.ai large language models.
 
-    To use, you should have ``ibm_watson_machine_learning`` python package installed,
+    To use, you should have ``ibm_watsonx_ai`` python package installed,
     and the environment variable ``WATSONX_APIKEY`` set with your API key, or pass
     it as a named parameter to the constructor.
 
@@ -23,7 +23,7 @@ class WatsonxLLM(BaseLLM):
     Example:
         .. code-block:: python
 
-            from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames
+            from ibm_watsonx_ai.metanames import GenTextParamsMetaNames
             parameters = {
                 GenTextParamsMetaNames.DECODING_METHOD: "sample",
                 GenTextParamsMetaNames.MAX_NEW_TOKENS: 100,
@@ -34,7 +34,7 @@ class WatsonxLLM(BaseLLM):
             }
 
             from langchain_community.llms import WatsonxLLM
-            llm = WatsonxLLM(
+            watsonx_llm = WatsonxLLM(
                 model_id="google/flan-ul2",
                 url="https://us-south.ml.cloud.ibm.com",
                 apikey="*****",
@@ -46,6 +46,9 @@ class WatsonxLLM(BaseLLM):
     model_id: str = ""
     """Type of model to use."""
 
+    deployment_id: str = ""
+    """Type of deployed model to use."""
+
     project_id: str = ""
     """ID of the Watson Studio project."""
 
@@ -159,7 +162,7 @@ class WatsonxLLM(BaseLLM):
                 )
 
         try:
-            from ibm_watson_machine_learning.foundation_models import Model
+            from ibm_watsonx_ai.foundation_models import ModelInference
 
             credentials = {
                 "url": values["url"].get_secret_value() if values["url"] else None,
@@ -186,8 +189,9 @@ class WatsonxLLM(BaseLLM):
                 key: value for key, value in credentials.items() if value is not None
             }
 
-            watsonx_model = Model(
+            watsonx_model = ModelInference(
                 model_id=values["model_id"],
+                deployment_id=values["deployment_id"],
                 credentials=credentials_without_none_value,
                 params=values["params"],
                 project_id=values["project_id"],
@@ -198,8 +202,8 @@ class WatsonxLLM(BaseLLM):
 
         except ImportError:
             raise ImportError(
-                "Could not import ibm_watson_machine_learning python package. "
-                "Please install it with `pip install ibm_watson_machine_learning`."
+                "Could not import ibm_watsonx_ai python package. "
+                "Please install it with `pip install ibm_watsonx_ai`."
             )
         return values
 
@@ -208,6 +212,7 @@ class WatsonxLLM(BaseLLM):
         """Get the identifying parameters."""
         return {
             "model_id": self.model_id,
+            "deployment_id": self.deployment_id,
             "params": self.params,
             "project_id": self.project_id,
             "space_id": self.space_id,
@@ -257,9 +262,34 @@ class WatsonxLLM(BaseLLM):
                 )
                 generations.append([gen])
         final_token_usage = self._extract_token_usage(response)
-        llm_output = {"token_usage": final_token_usage, "model_id": self.model_id}
+        llm_output = {
+            "token_usage": final_token_usage,
+            "model_id": self.model_id,
+            "deployment_id": self.deployment_id,
+        }
         return LLMResult(generations=generations, llm_output=llm_output)
 
+    def _stream_response_to_generation_chunk(
+        self,
+        stream_response: Dict[str, Any],
+    ) -> GenerationChunk:
+        """Convert a stream response to a generation chunk."""
+        if not stream_response["results"]:
+            return GenerationChunk(text="")
+        return GenerationChunk(
+            text=stream_response["results"][0]["generated_text"],
+            generation_info=dict(
+                finish_reason=stream_response["results"][0].get("stop_reason", None),
+                llm_output={
+                    "generated_token_count": stream_response["results"][0].get(
+                        "generated_token_count", None
+                    ),
+                    "model_id": self.model_id,
+                    "deployment_id": self.deployment_id,
+                },
+            ),
+        )
+
     def _call(
         self,
         prompt: str,
@@ -277,7 +307,7 @@ class WatsonxLLM(BaseLLM):
         Example:
             .. code-block:: python
 
-                response = watsonxllm("What is a molecule")
+                response = watsonx_llm("What is a molecule")
         """
         result = self._generate(
             prompts=[prompt], stop=stop, run_manager=run_manager, **kwargs
@@ -302,8 +332,13 @@ class WatsonxLLM(BaseLLM):
         Example:
             .. code-block:: python
 
-                response = watsonxllm.generate(["What is a molecule"])
+                response = watsonx_llm.generate(["What is a molecule"])
         """
+        if stop:
+            if self.params:
+                self.params.update({"stop_sequences": stop})
+            else:
+                self.params = {"stop_sequences": stop}
         should_stream = stream if stream is not None else self.streaming
         if should_stream:
             if len(prompts) > 1:
@@ -320,9 +355,12 @@ class WatsonxLLM(BaseLLM):
                 else:
                     generation += chunk
             assert generation is not None
+            if isinstance(generation.generation_info, dict):
+                llm_output = generation.generation_info.pop("llm_output")
+                return LLMResult(generations=[[generation]], llm_output=llm_output)
             return LLMResult(generations=[[generation]])
         else:
-            response = self.watsonx_model.generate(prompt=prompts)
+            response = self.watsonx_model.generate(prompt=prompts, params=self.params)
             return self._create_llm_result(response)
 
     def _stream(
@@ -342,12 +380,20 @@ class WatsonxLLM(BaseLLM):
         Example:
             .. code-block:: python
 
-                response = watsonxllm.stream("What is a molecule")
+                response = watsonx_llm.stream("What is a molecule")
                 for chunk in response:
                     print(chunk, end='')
         """
-        for chunk in self.watsonx_model.generate_text_stream(prompt=prompt):
-            if chunk:
-                yield GenerationChunk(text=chunk)
-                if run_manager:
-                    run_manager.on_llm_new_token(chunk)
+        if stop:
+            if self.params:
+                self.params.update({"stop_sequences": stop})
+            else:
+                self.params = {"stop_sequences": stop}
+        for stream_resp in self.watsonx_model.generate_text_stream(
+            prompt=prompt, raw_response=True, params=self.params
+        ):
+            chunk = self._stream_response_to_generation_chunk(stream_resp)
+            yield chunk
+
+            if run_manager:
+                run_manager.on_llm_new_token(chunk.text, chunk=chunk)