feat(text-splitters): add Visual Basic 6 support (#31173)

### **Description**

Add Visual Basic 6 support.

---

### **Issue**

No specific issue addressed.

---

### **Dependencies**

No additional dependencies required.

---------

Co-authored-by: Mason Daugherty <mason@langchain.dev>
This commit is contained in:
Fabio Fontana
2025-07-14 15:51:16 +02:00
committed by GitHub
parent 7e146a185b
commit fd168e1c11
4 changed files with 261 additions and 93 deletions

View File

@@ -316,6 +316,7 @@ class Language(str, Enum):
HASKELL = "haskell"
ELIXIR = "elixir"
POWERSHELL = "powershell"
VISUALBASIC6 = "visualbasic6"
@dataclass(frozen=True)

View File

@@ -734,6 +734,32 @@ class RecursiveCharacterTextSplitter(TextSplitter):
" ",
"",
]
if language == Language.VISUALBASIC6:
vis = r"(?:Public|Private|Friend|Global|Static)\s+"
return [
# Split along definitions
rf"\n(?!End\s){vis}?Sub\s+",
rf"\n(?!End\s){vis}?Function\s+",
rf"\n(?!End\s){vis}?Property\s+(?:Get|Let|Set)\s+",
rf"\n(?!End\s){vis}?Type\s+",
rf"\n(?!End\s){vis}?Enum\s+",
# Split along control flow statements
r"\n(?!End\s)If\s+",
r"\nElseIf\s+",
r"\nElse\s+",
r"\nSelect\s+Case\s+",
r"\nCase\s+",
r"\nFor\s+",
r"\nDo\s+",
r"\nWhile\s+",
r"\nWith\s+",
# Split by the normal type of lines
r"\n\n",
r"\n",
" ",
"",
]
if language in Language._value2member_map_:
msg = f"Language {language} is not implemented yet!"
raise ValueError(msg)

View File

@@ -3043,6 +3043,82 @@ $csvContent | ForEach-Object {
]
FAKE_VISUALBASIC6_TEXT = """
Option Explicit
Public Function SumTwoIntegers(ByVal a As Integer, ByVal b As Integer) As Integer
SumTwoIntegers = a + b
End Function
Public Sub Main()
Dim i As Integer
Dim limit As Integer
i = 0
limit = 50
While i < limit
i = SumTwoIntegers(i, 1)
If i = limit \\ 2 Then
MsgBox "Halfway there! i = " & i
End If
Wend
MsgBox "Done! Final value of i: " & i
End Sub
"""
def test_visualbasic6_code_splitter() -> None:
splitter = RecursiveCharacterTextSplitter.from_language(
Language.VISUALBASIC6,
chunk_size=CHUNK_SIZE,
chunk_overlap=0,
)
chunks = splitter.split_text(FAKE_VISUALBASIC6_TEXT)
assert chunks == [
"Option Explicit",
"Public Function",
"SumTwoIntegers(",
"ByVal",
"a As Integer,",
"ByVal b As",
"Integer) As",
"Integer",
"SumTwoIntegers",
"= a + b",
"End Function",
"Public Sub",
"Main()",
"Dim i As",
"Integer",
"Dim limit",
"As Integer",
"i = 0",
"limit = 50",
"While i <",
"limit",
"i =",
"SumTwoIntegers(",
"i,",
"1)",
"If i =",
"limit \\ 2 Then",
'MsgBox "Halfway',
'there! i = " &',
"i",
"End If",
"Wend",
"MsgBox",
'"Done! Final',
'value of i: " &',
"i",
"End Sub",
]
def custom_iframe_extractor(iframe_tag: Any) -> str:
iframe_src = iframe_tag.get("src", "")
return f"[iframe:{iframe_src}]({iframe_src})"