mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-02 11:39:18 +00:00
feat(text-splitters): add Visual Basic 6 support (#31173)
### **Description** Add Visual Basic 6 support. --- ### **Issue** No specific issue addressed. --- ### **Dependencies** No additional dependencies required. --------- Co-authored-by: Mason Daugherty <mason@langchain.dev>
This commit is contained in:
@@ -316,6 +316,7 @@ class Language(str, Enum):
|
||||
HASKELL = "haskell"
|
||||
ELIXIR = "elixir"
|
||||
POWERSHELL = "powershell"
|
||||
VISUALBASIC6 = "visualbasic6"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
|
@@ -734,6 +734,32 @@ class RecursiveCharacterTextSplitter(TextSplitter):
|
||||
" ",
|
||||
"",
|
||||
]
|
||||
if language == Language.VISUALBASIC6:
|
||||
vis = r"(?:Public|Private|Friend|Global|Static)\s+"
|
||||
return [
|
||||
# Split along definitions
|
||||
rf"\n(?!End\s){vis}?Sub\s+",
|
||||
rf"\n(?!End\s){vis}?Function\s+",
|
||||
rf"\n(?!End\s){vis}?Property\s+(?:Get|Let|Set)\s+",
|
||||
rf"\n(?!End\s){vis}?Type\s+",
|
||||
rf"\n(?!End\s){vis}?Enum\s+",
|
||||
# Split along control flow statements
|
||||
r"\n(?!End\s)If\s+",
|
||||
r"\nElseIf\s+",
|
||||
r"\nElse\s+",
|
||||
r"\nSelect\s+Case\s+",
|
||||
r"\nCase\s+",
|
||||
r"\nFor\s+",
|
||||
r"\nDo\s+",
|
||||
r"\nWhile\s+",
|
||||
r"\nWith\s+",
|
||||
# Split by the normal type of lines
|
||||
r"\n\n",
|
||||
r"\n",
|
||||
" ",
|
||||
"",
|
||||
]
|
||||
|
||||
if language in Language._value2member_map_:
|
||||
msg = f"Language {language} is not implemented yet!"
|
||||
raise ValueError(msg)
|
||||
|
@@ -3043,6 +3043,82 @@ $csvContent | ForEach-Object {
|
||||
]
|
||||
|
||||
|
||||
FAKE_VISUALBASIC6_TEXT = """
|
||||
Option Explicit
|
||||
|
||||
Public Function SumTwoIntegers(ByVal a As Integer, ByVal b As Integer) As Integer
|
||||
SumTwoIntegers = a + b
|
||||
End Function
|
||||
|
||||
Public Sub Main()
|
||||
Dim i As Integer
|
||||
Dim limit As Integer
|
||||
|
||||
i = 0
|
||||
limit = 50
|
||||
|
||||
While i < limit
|
||||
i = SumTwoIntegers(i, 1)
|
||||
|
||||
If i = limit \\ 2 Then
|
||||
MsgBox "Halfway there! i = " & i
|
||||
End If
|
||||
Wend
|
||||
|
||||
MsgBox "Done! Final value of i: " & i
|
||||
End Sub
|
||||
"""
|
||||
|
||||
|
||||
def test_visualbasic6_code_splitter() -> None:
|
||||
splitter = RecursiveCharacterTextSplitter.from_language(
|
||||
Language.VISUALBASIC6,
|
||||
chunk_size=CHUNK_SIZE,
|
||||
chunk_overlap=0,
|
||||
)
|
||||
chunks = splitter.split_text(FAKE_VISUALBASIC6_TEXT)
|
||||
|
||||
assert chunks == [
|
||||
"Option Explicit",
|
||||
"Public Function",
|
||||
"SumTwoIntegers(",
|
||||
"ByVal",
|
||||
"a As Integer,",
|
||||
"ByVal b As",
|
||||
"Integer) As",
|
||||
"Integer",
|
||||
"SumTwoIntegers",
|
||||
"= a + b",
|
||||
"End Function",
|
||||
"Public Sub",
|
||||
"Main()",
|
||||
"Dim i As",
|
||||
"Integer",
|
||||
"Dim limit",
|
||||
"As Integer",
|
||||
"i = 0",
|
||||
"limit = 50",
|
||||
"While i <",
|
||||
"limit",
|
||||
"i =",
|
||||
"SumTwoIntegers(",
|
||||
"i,",
|
||||
"1)",
|
||||
"If i =",
|
||||
"limit \\ 2 Then",
|
||||
'MsgBox "Halfway',
|
||||
'there! i = " &',
|
||||
"i",
|
||||
"End If",
|
||||
"Wend",
|
||||
"MsgBox",
|
||||
'"Done! Final',
|
||||
'value of i: " &',
|
||||
"i",
|
||||
"End Sub",
|
||||
]
|
||||
|
||||
|
||||
def custom_iframe_extractor(iframe_tag: Any) -> str:
|
||||
iframe_src = iframe_tag.get("src", "")
|
||||
return f"[iframe:{iframe_src}]({iframe_src})"
|
||||
|
Reference in New Issue
Block a user