diff --git a/docs/docs/modules/data_connection/document_transformers/markdown_header_metadata.ipynb b/docs/docs/modules/data_connection/document_transformers/markdown_header_metadata.ipynb
index 9da4bfbf1f6..f2cea000efa 100644
--- a/docs/docs/modules/data_connection/document_transformers/markdown_header_metadata.ipynb
+++ b/docs/docs/modules/data_connection/document_transformers/markdown_header_metadata.ipynb
@@ -117,6 +117,41 @@
     "type(md_header_splits[0])"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "102aad57-7bef-42d3-ab4e-b50d6dc11718",
+   "metadata": {},
+   "source": [
+    "By default, `MarkdownHeaderTextSplitter` strips headers being split on from the output chunk's content. This can be disabled by setting `strip_headers = False`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "9fce45ba-a4be-4a69-ad27-f5ff195c4fd7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(page_content='# Foo  \\n## Bar  \\nHi this is Jim  \\nHi this is Joe', metadata={'Header 1': 'Foo', 'Header 2': 'Bar'}),\n",
+       " Document(page_content='### Boo  \\nHi this is Lance', metadata={'Header 1': 'Foo', 'Header 2': 'Bar', 'Header 3': 'Boo'}),\n",
+       " Document(page_content='## Baz  \\nHi this is Molly', metadata={'Header 1': 'Foo', 'Header 2': 'Baz'})]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "markdown_splitter = MarkdownHeaderTextSplitter(\n",
+    "    headers_to_split_on=headers_to_split_on, strip_headers=False\n",
+    ")\n",
+    "md_header_splits = markdown_splitter.split_text(markdown_document)\n",
+    "md_header_splits"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "9bd8977a",
@@ -127,7 +162,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "id": "480e0e3a",
    "metadata": {
     "ExecuteTime": {
@@ -139,14 +174,14 @@
     {
      "data": {
       "text/plain": [
-       "[Document(page_content='Markdown[9] is a lightweight markup language for creating formatted text using a plain-text editor. John Gruber created Markdown in 2004 as a markup language that is appealing to human readers in its source code form.[9]', metadata={'Header 1': 'Intro', 'Header 2': 'History'}),\n",
+       "[Document(page_content='# Intro  \\n## History  \\nMarkdown[9] is a lightweight markup language for creating formatted text using a plain-text editor. John Gruber created Markdown in 2004 as a markup language that is appealing to human readers in its source code form.[9]', metadata={'Header 1': 'Intro', 'Header 2': 'History'}),\n",
        " Document(page_content='Markdown is widely used in blogging, instant messaging, online forums, collaborative software, documentation pages, and readme files.', metadata={'Header 1': 'Intro', 'Header 2': 'History'}),\n",
-       " Document(page_content='As Markdown popularity grew rapidly, many Markdown implementations appeared, driven mostly by the need for  \\nadditional features such as tables, footnotes, definition lists,[note 1] and Markdown inside HTML blocks.  \\n#### Standardization', metadata={'Header 1': 'Intro', 'Header 2': 'Rise and divergence'}),\n",
+       " Document(page_content='## Rise and divergence  \\nAs Markdown popularity grew rapidly, many Markdown implementations appeared, driven mostly by the need for  \\nadditional features such as tables, footnotes, definition lists,[note 1] and Markdown inside HTML blocks.', metadata={'Header 1': 'Intro', 'Header 2': 'Rise and divergence'}),\n",
        " Document(page_content='#### Standardization  \\nFrom 2012, a group of people, including Jeff Atwood and John MacFarlane, launched what Atwood characterised as a standardisation effort.', metadata={'Header 1': 'Intro', 'Header 2': 'Rise and divergence'}),\n",
-       " Document(page_content='Implementations of Markdown are available for over a dozen programming languages.', metadata={'Header 1': 'Intro', 'Header 2': 'Implementations'})]"
+       " Document(page_content='## Implementations  \\nImplementations of Markdown are available for over a dozen programming languages.', metadata={'Header 1': 'Intro', 'Header 2': 'Implementations'})]"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -160,7 +195,9 @@
     "]\n",
     "\n",
     "# MD splits\n",
-    "markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)\n",
+    "markdown_splitter = MarkdownHeaderTextSplitter(\n",
+    "    headers_to_split_on=headers_to_split_on, strip_headers=False\n",
+    ")\n",
     "md_header_splits = markdown_splitter.split_text(markdown_document)\n",
     "\n",
     "# Char-level splits\n",
diff --git a/libs/langchain/langchain/text_splitter.py b/libs/langchain/langchain/text_splitter.py
index be0cb5bdfa6..da65a80dc9f 100644
--- a/libs/langchain/langchain/text_splitter.py
+++ b/libs/langchain/langchain/text_splitter.py
@@ -323,13 +323,17 @@ class MarkdownHeaderTextSplitter:
     """Splitting markdown files based on specified headers."""
 
     def __init__(
-        self, headers_to_split_on: List[Tuple[str, str]], return_each_line: bool = False
+        self,
+        headers_to_split_on: List[Tuple[str, str]],
+        return_each_line: bool = False,
+        strip_headers: bool = True,
     ):
         """Create a new MarkdownHeaderTextSplitter.
 
         Args:
             headers_to_split_on: Headers we want to track
             return_each_line: Return each line w/ associated headers
+            strip_headers: Strip split headers from the content of the chunk
         """
         # Output line-by-line or aggregated into chunks w/ common headers
         self.return_each_line = return_each_line
@@ -338,6 +342,8 @@ class MarkdownHeaderTextSplitter:
         self.headers_to_split_on = sorted(
             headers_to_split_on, key=lambda split: len(split[0]), reverse=True
         )
+        # Strip headers split headers from the content of the chunk
+        self.strip_headers = strip_headers
 
     def aggregate_lines_to_chunks(self, lines: List[LineType]) -> List[Document]:
         """Combine lines with common metadata into chunks
@@ -355,6 +361,23 @@ class MarkdownHeaderTextSplitter:
                 # has the same metadata as the current line,
                 # append the current content to the last lines's content
                 aggregated_chunks[-1]["content"] += "  \n" + line["content"]
+            elif (
+                aggregated_chunks
+                and aggregated_chunks[-1]["metadata"] != line["metadata"]
+                # may be issues if other metadata is present
+                and len(aggregated_chunks[-1]["metadata"]) < len(line["metadata"])
+                and aggregated_chunks[-1]["content"].split("\n")[-1][0] == "#"
+                and not self.strip_headers
+            ):
+                # If the last line in the aggregated list
+                # has different metadata as the current line,
+                # and has shallower header level than the current line,
+                # and the last line is a header,
+                # and we are not stripping headers,
+                # append the current content to the last line's content
+                aggregated_chunks[-1]["content"] += "  \n" + line["content"]
+                # and update the last line's metadata
+                aggregated_chunks[-1]["metadata"] = line["metadata"]
             else:
                 # Otherwise, append the current line to the aggregated list
                 aggregated_chunks.append(line)
@@ -451,6 +474,9 @@ class MarkdownHeaderTextSplitter:
                         )
                         current_content.clear()
 
+                    if not self.strip_headers:
+                        current_content.append(stripped_line)
+
                     break
             else:
                 if stripped_line:
diff --git a/libs/langchain/tests/unit_tests/test_text_splitter.py b/libs/langchain/tests/unit_tests/test_text_splitter.py
index 2f9cf2ac600..f099cc7cc2d 100644
--- a/libs/langchain/tests/unit_tests/test_text_splitter.py
+++ b/libs/langchain/tests/unit_tests/test_text_splitter.py
@@ -1035,6 +1035,87 @@ def test_md_header_text_splitter_3() -> None:
     assert output == expected_output
 
 
+def test_md_header_text_splitter_preserve_headers_1() -> None:
+    """Test markdown splitter by header: Preserve Headers."""
+
+    markdown_document = (
+        "# Foo\n\n"
+        "    ## Bat\n\n"
+        "Hi this is Jim\n\n"
+        "Hi Joe\n\n"
+        "## Baz\n\n"
+        "# Bar\n\n"
+        "This is Alice\n\n"
+        "This is Bob"
+    )
+    headers_to_split_on = [
+        ("#", "Header 1"),
+    ]
+    markdown_splitter = MarkdownHeaderTextSplitter(
+        headers_to_split_on=headers_to_split_on,
+        strip_headers=False,
+    )
+    output = markdown_splitter.split_text(markdown_document)
+    expected_output = [
+        Document(
+            page_content="# Foo  \n## Bat  \nHi this is Jim  \nHi Joe  \n## Baz",
+            metadata={"Header 1": "Foo"},
+        ),
+        Document(
+            page_content="# Bar  \nThis is Alice  \nThis is Bob",
+            metadata={"Header 1": "Bar"},
+        ),
+    ]
+    assert output == expected_output
+
+
+def test_md_header_text_splitter_preserve_headers_2() -> None:
+    """Test markdown splitter by header: Preserve Headers."""
+
+    markdown_document = (
+        "# Foo\n\n"
+        "    ## Bar\n\n"
+        "Hi this is Jim\n\n"
+        "Hi this is Joe\n\n"
+        "### Boo \n\n"
+        "Hi this is Lance\n\n"
+        "## Baz\n\n"
+        "Hi this is Molly\n"
+        "    ## Buz\n"
+        "# Bop"
+    )
+    headers_to_split_on = [
+        ("#", "Header 1"),
+        ("##", "Header 2"),
+        ("###", "Header 3"),
+    ]
+    markdown_splitter = MarkdownHeaderTextSplitter(
+        headers_to_split_on=headers_to_split_on,
+        strip_headers=False,
+    )
+    output = markdown_splitter.split_text(markdown_document)
+    expected_output = [
+        Document(
+            page_content="# Foo  \n## Bar  \nHi this is Jim  \nHi this is Joe",
+            metadata={"Header 1": "Foo", "Header 2": "Bar"},
+        ),
+        Document(
+            page_content="### Boo  \nHi this is Lance",
+            metadata={"Header 1": "Foo", "Header 2": "Bar", "Header 3": "Boo"},
+        ),
+        Document(
+            page_content="## Baz  \nHi this is Molly",
+            metadata={"Header 1": "Foo", "Header 2": "Baz"},
+        ),
+        Document(
+            page_content="## Buz",
+            metadata={"Header 1": "Foo", "Header 2": "Buz"},
+        ),
+        Document(page_content="# Bop", metadata={"Header 1": "Bop"}),
+    ]
+    assert output == expected_output
+
+
 @pytest.mark.parametrize("fence", [("```"), ("~~~")])
 def test_md_header_text_splitter_fenced_code_block(fence: str) -> None:
     """Test markdown splitter by header: Fenced code block."""