mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-24 03:52:10 +00:00
chore: add support for TypeScript code splitting (#11160)
- **Description:** Adds typescript language to `TextSplitter` --------- Co-authored-by: Jacob Lee <jacoblee93@gmail.com>
This commit is contained in:
@@ -615,6 +615,7 @@ class Language(str, Enum):
|
||||
GO = "go"
|
||||
JAVA = "java"
|
||||
JS = "js"
|
||||
TS = "ts"
|
||||
PHP = "php"
|
||||
PROTO = "proto"
|
||||
PYTHON = "python"
|
||||
@@ -782,6 +783,32 @@ class RecursiveCharacterTextSplitter(TextSplitter):
|
||||
" ",
|
||||
"",
|
||||
]
|
||||
elif language == Language.TS:
|
||||
return [
|
||||
"\nenum ",
|
||||
"\ninterface ",
|
||||
"\nnamespace ",
|
||||
"\ntype ",
|
||||
# Split along class definitions
|
||||
"\nclass ",
|
||||
# Split along function definitions
|
||||
"\nfunction ",
|
||||
"\nconst ",
|
||||
"\nlet ",
|
||||
"\nvar ",
|
||||
# Split along control flow statements
|
||||
"\nif ",
|
||||
"\nfor ",
|
||||
"\nwhile ",
|
||||
"\nswitch ",
|
||||
"\ncase ",
|
||||
"\ndefault ",
|
||||
# Split by the normal type of lines
|
||||
"\n\n",
|
||||
"\n",
|
||||
" ",
|
||||
"",
|
||||
]
|
||||
elif language == Language.PHP:
|
||||
return [
|
||||
# Split along function definitions
|
||||
|
@@ -472,6 +472,33 @@ helloWorld();
|
||||
]
|
||||
|
||||
|
||||
def test_typescript_code_splitter() -> None:
|
||||
splitter = RecursiveCharacterTextSplitter.from_language(
|
||||
Language.TS, chunk_size=CHUNK_SIZE, chunk_overlap=0
|
||||
)
|
||||
code = """
|
||||
function helloWorld(): void {
|
||||
console.log("Hello, World!");
|
||||
}
|
||||
|
||||
// Call the function
|
||||
helloWorld();
|
||||
"""
|
||||
chunks = splitter.split_text(code)
|
||||
assert chunks == [
|
||||
"function",
|
||||
"helloWorld():",
|
||||
"void {",
|
||||
'console.log("He',
|
||||
"llo,",
|
||||
'World!");',
|
||||
"}",
|
||||
"// Call the",
|
||||
"function",
|
||||
"helloWorld();",
|
||||
]
|
||||
|
||||
|
||||
def test_java_code_splitter() -> None:
|
||||
splitter = RecursiveCharacterTextSplitter.from_language(
|
||||
Language.JAVA, chunk_size=CHUNK_SIZE, chunk_overlap=0
|
||||
|
Reference in New Issue
Block a user