mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-12 06:13:36 +00:00
chore: add support for TypeScript code splitting (#11160)
- **Description:** Adds typescript language to `TextSplitter` --------- Co-authored-by: Jacob Lee <jacoblee93@gmail.com>
This commit is contained in:
parent
17fcbed92c
commit
b738ccd91e
@ -18,6 +18,7 @@ from langchain.text_splitter import (
|
|||||||
'go',
|
'go',
|
||||||
'java',
|
'java',
|
||||||
'js',
|
'js',
|
||||||
|
'ts',
|
||||||
'php',
|
'php',
|
||||||
'proto',
|
'proto',
|
||||||
'python',
|
'python',
|
||||||
@ -107,6 +108,36 @@ js_docs
|
|||||||
|
|
||||||
</CodeOutputBlock>
|
</CodeOutputBlock>
|
||||||
|
|
||||||
|
## TS
|
||||||
|
Here's an example using the TS text splitter:
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
TS_CODE = """
|
||||||
|
function helloWorld(): void {
|
||||||
|
console.log("Hello, World!");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call the function
|
||||||
|
helloWorld();
|
||||||
|
"""
|
||||||
|
|
||||||
|
ts_splitter = RecursiveCharacterTextSplitter.from_language(
|
||||||
|
language=Language.TS, chunk_size=60, chunk_overlap=0
|
||||||
|
)
|
||||||
|
ts_docs = ts_splitter.create_documents([TS_CODE])
|
||||||
|
ts_docs
|
||||||
|
```
|
||||||
|
|
||||||
|
<CodeOutputBlock lang="python">
|
||||||
|
|
||||||
|
```
|
||||||
|
[Document(page_content='function helloWorld(): void {\n console.log("Hello, World!");\n}', metadata={}),
|
||||||
|
Document(page_content='// Call the function\nhelloWorld();', metadata={})]
|
||||||
|
```
|
||||||
|
|
||||||
|
</CodeOutputBlock>
|
||||||
|
|
||||||
## Markdown
|
## Markdown
|
||||||
|
|
||||||
Here's an example using the Markdown text splitter:
|
Here's an example using the Markdown text splitter:
|
||||||
|
@ -615,6 +615,7 @@ class Language(str, Enum):
|
|||||||
GO = "go"
|
GO = "go"
|
||||||
JAVA = "java"
|
JAVA = "java"
|
||||||
JS = "js"
|
JS = "js"
|
||||||
|
TS = "ts"
|
||||||
PHP = "php"
|
PHP = "php"
|
||||||
PROTO = "proto"
|
PROTO = "proto"
|
||||||
PYTHON = "python"
|
PYTHON = "python"
|
||||||
@ -782,6 +783,32 @@ class RecursiveCharacterTextSplitter(TextSplitter):
|
|||||||
" ",
|
" ",
|
||||||
"",
|
"",
|
||||||
]
|
]
|
||||||
|
elif language == Language.TS:
|
||||||
|
return [
|
||||||
|
"\nenum ",
|
||||||
|
"\ninterface ",
|
||||||
|
"\nnamespace ",
|
||||||
|
"\ntype ",
|
||||||
|
# Split along class definitions
|
||||||
|
"\nclass ",
|
||||||
|
# Split along function definitions
|
||||||
|
"\nfunction ",
|
||||||
|
"\nconst ",
|
||||||
|
"\nlet ",
|
||||||
|
"\nvar ",
|
||||||
|
# Split along control flow statements
|
||||||
|
"\nif ",
|
||||||
|
"\nfor ",
|
||||||
|
"\nwhile ",
|
||||||
|
"\nswitch ",
|
||||||
|
"\ncase ",
|
||||||
|
"\ndefault ",
|
||||||
|
# Split by the normal type of lines
|
||||||
|
"\n\n",
|
||||||
|
"\n",
|
||||||
|
" ",
|
||||||
|
"",
|
||||||
|
]
|
||||||
elif language == Language.PHP:
|
elif language == Language.PHP:
|
||||||
return [
|
return [
|
||||||
# Split along function definitions
|
# Split along function definitions
|
||||||
|
@ -472,6 +472,33 @@ helloWorld();
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_typescript_code_splitter() -> None:
|
||||||
|
splitter = RecursiveCharacterTextSplitter.from_language(
|
||||||
|
Language.TS, chunk_size=CHUNK_SIZE, chunk_overlap=0
|
||||||
|
)
|
||||||
|
code = """
|
||||||
|
function helloWorld(): void {
|
||||||
|
console.log("Hello, World!");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call the function
|
||||||
|
helloWorld();
|
||||||
|
"""
|
||||||
|
chunks = splitter.split_text(code)
|
||||||
|
assert chunks == [
|
||||||
|
"function",
|
||||||
|
"helloWorld():",
|
||||||
|
"void {",
|
||||||
|
'console.log("He',
|
||||||
|
"llo,",
|
||||||
|
'World!");',
|
||||||
|
"}",
|
||||||
|
"// Call the",
|
||||||
|
"function",
|
||||||
|
"helloWorld();",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_java_code_splitter() -> None:
|
def test_java_code_splitter() -> None:
|
||||||
splitter = RecursiveCharacterTextSplitter.from_language(
|
splitter = RecursiveCharacterTextSplitter.from_language(
|
||||||
Language.JAVA, chunk_size=CHUNK_SIZE, chunk_overlap=0
|
Language.JAVA, chunk_size=CHUNK_SIZE, chunk_overlap=0
|
||||||
|
Loading…
Reference in New Issue
Block a user