mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 07:35:18 +00:00
text-splitters[minor]: Add lua code splitting (#20421)
- **Description:** Complete the support for Lua code in langchain.text_splitter module. - **Dependencies:** No - **Twitter handle:** @saberuster If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, hwchase17. --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
4b6b0a87b6
commit
160bcaeb93
@ -571,6 +571,23 @@ class RecursiveCharacterTextSplitter(TextSplitter):
|
|||||||
" ",
|
" ",
|
||||||
"",
|
"",
|
||||||
]
|
]
|
||||||
|
elif language == Language.LUA:
|
||||||
|
return [
|
||||||
|
# Split along variable and table definitions
|
||||||
|
"\nlocal ",
|
||||||
|
# Split along function definitions
|
||||||
|
"\nfunction ",
|
||||||
|
# Split along control flow statements
|
||||||
|
"\nif ",
|
||||||
|
"\nfor ",
|
||||||
|
"\nwhile ",
|
||||||
|
"\nrepeat ",
|
||||||
|
# Split by the normal type of lines
|
||||||
|
"\n\n",
|
||||||
|
"\n",
|
||||||
|
" ",
|
||||||
|
"",
|
||||||
|
]
|
||||||
elif language == Language.HASKELL:
|
elif language == Language.HASKELL:
|
||||||
return [
|
return [
|
||||||
# Split along function definitions
|
# Split along function definitions
|
||||||
|
@ -1248,6 +1248,53 @@ def test_solidity_code_splitter() -> None:
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_lua_code_splitter() -> None:
|
||||||
|
splitter = RecursiveCharacterTextSplitter.from_language(
|
||||||
|
Language.LUA, chunk_size=CHUNK_SIZE, chunk_overlap=0
|
||||||
|
)
|
||||||
|
code = """
|
||||||
|
local variable = 10
|
||||||
|
|
||||||
|
function add(a, b)
|
||||||
|
return a + b
|
||||||
|
end
|
||||||
|
|
||||||
|
if variable > 5 then
|
||||||
|
for i=1, variable do
|
||||||
|
while i < variable do
|
||||||
|
repeat
|
||||||
|
print(i)
|
||||||
|
i = i + 1
|
||||||
|
until i >= variable
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
"""
|
||||||
|
chunks = splitter.split_text(code)
|
||||||
|
assert chunks == [
|
||||||
|
"local variable",
|
||||||
|
"= 10",
|
||||||
|
"function add(a,",
|
||||||
|
"b)",
|
||||||
|
"return a +",
|
||||||
|
"b",
|
||||||
|
"end",
|
||||||
|
"if variable > 5",
|
||||||
|
"then",
|
||||||
|
"for i=1,",
|
||||||
|
"variable do",
|
||||||
|
"while i",
|
||||||
|
"< variable do",
|
||||||
|
"repeat",
|
||||||
|
"print(i)",
|
||||||
|
"i = i + 1",
|
||||||
|
"until i >=",
|
||||||
|
"variable",
|
||||||
|
"end",
|
||||||
|
"end\nend",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_haskell_code_splitter() -> None:
|
def test_haskell_code_splitter() -> None:
|
||||||
splitter = RecursiveCharacterTextSplitter.from_language(
|
splitter = RecursiveCharacterTextSplitter.from_language(
|
||||||
Language.HASKELL, chunk_size=CHUNK_SIZE, chunk_overlap=0
|
Language.HASKELL, chunk_size=CHUNK_SIZE, chunk_overlap=0
|
||||||
|
Loading…
Reference in New Issue
Block a user