mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-05 21:12:48 +00:00
Harrison/text splitter (#5417)
adds support for keeping separators around when using recursive text splitter
This commit is contained in:
@@ -42,17 +42,17 @@
|
||||
" \n",
|
||||
"def foo():\n",
|
||||
"\n",
|
||||
"def testing_func():\n",
|
||||
"def testing_func_with_long_name():\n",
|
||||
"\n",
|
||||
"def bar():\n",
|
||||
"\"\"\"\n",
|
||||
"python_splitter = PythonCodeTextSplitter(chunk_size=30, chunk_overlap=0)"
|
||||
"python_splitter = PythonCodeTextSplitter(chunk_size=40, chunk_overlap=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "6cdc55f3",
|
||||
"id": "8cc33770",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -62,15 +62,16 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "8cc33770",
|
||||
"id": "f5f70775",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Foo:\\n\\n def bar():', lookup_str='', metadata={}, lookup_index=0),\n",
|
||||
" Document(page_content='foo():\\n\\ndef testing_func():', lookup_str='', metadata={}, lookup_index=0),\n",
|
||||
" Document(page_content='bar():', lookup_str='', metadata={}, lookup_index=0)]"
|
||||
"[Document(page_content='class Foo:\\n\\n def bar():', metadata={}),\n",
|
||||
" Document(page_content='def foo():', metadata={}),\n",
|
||||
" Document(page_content='def testing_func_with_long_name():', metadata={}),\n",
|
||||
" Document(page_content='def bar():', metadata={})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
@@ -82,33 +83,10 @@
|
||||
"docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "de625e08-c440-489d-beed-020b6c53bf69",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['Foo:\\n\\n def bar():', 'foo():\\n\\ndef testing_func():', 'bar():']"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"python_splitter.split_text(python_text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "55aadd84-75ca-48ae-9b84-b39c368488ed",
|
||||
"id": "6e096d42",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -130,7 +108,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.9.1"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
Reference in New Issue
Block a user