mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-16 01:37:59 +00:00
feat(text-splitters): add Visual Basic 6 support (#31173)
### **Description** Add Visual Basic 6 support. --- ### **Issue** No specific issue addressed. --- ### **Dependencies** No additional dependencies required. --------- Co-authored-by: Mason Daugherty <mason@langchain.dev>
This commit is contained in:
parent
7e146a185b
commit
fd168e1c11
@ -63,7 +63,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 1,
|
||||
"id": "a9e37aa1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -84,7 +84,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"id": "e21a2434",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -114,10 +114,13 @@
|
||||
" 'c',\n",
|
||||
" 'lua',\n",
|
||||
" 'perl',\n",
|
||||
" 'haskell']"
|
||||
" 'haskell',\n",
|
||||
" 'elixir',\n",
|
||||
" 'powershell',\n",
|
||||
" 'visualbasic6']"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -136,7 +139,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 6,
|
||||
"id": "c92fb913",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -146,7 +149,7 @@
|
||||
"['\\nclass ', '\\ndef ', '\\n\\tdef ', '\\n\\n', '\\n', ' ', '']"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -168,18 +171,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"id": "a58512b9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='def hello_world():\\n print(\"Hello, World!\")'),\n",
|
||||
" Document(page_content='# Call the function\\nhello_world()')]"
|
||||
"[Document(metadata={}, page_content='def hello_world():\\n print(\"Hello, World!\")'),\n",
|
||||
" Document(metadata={}, page_content='# Call the function\\nhello_world()')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -210,18 +213,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 8,
|
||||
"id": "7db0d486",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='function helloWorld() {\\n console.log(\"Hello, World!\");\\n}'),\n",
|
||||
" Document(page_content='// Call the function\\nhelloWorld();')]"
|
||||
"[Document(metadata={}, page_content='function helloWorld() {\\n console.log(\"Hello, World!\");\\n}'),\n",
|
||||
" Document(metadata={}, page_content='// Call the function\\nhelloWorld();')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -254,19 +257,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 9,
|
||||
"id": "aee738a4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='function helloWorld(): void {'),\n",
|
||||
" Document(page_content='console.log(\"Hello, World!\");\\n}'),\n",
|
||||
" Document(page_content='// Call the function\\nhelloWorld();')]"
|
||||
"[Document(metadata={}, page_content='function helloWorld(): void {'),\n",
|
||||
" Document(metadata={}, page_content='console.log(\"Hello, World!\");\\n}'),\n",
|
||||
" Document(metadata={}, page_content='// Call the function\\nhelloWorld();')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -300,7 +303,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 10,
|
||||
"id": "ac9295d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -321,7 +324,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 11,
|
||||
"id": "bfa1771b-d4b0-48f8-a949-5537cd1df0dd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -337,7 +340,7 @@
|
||||
" Document(metadata={}, page_content='are extremely open to contributions.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -362,7 +365,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 12,
|
||||
"id": "77d1049d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -389,38 +392,38 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 13,
|
||||
"id": "4dbc47e1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='\\\\documentclass{article}\\n\\n\\x08egin{document}\\n\\n\\\\maketitle'),\n",
|
||||
" Document(page_content='\\\\section{Introduction}'),\n",
|
||||
" Document(page_content='Large language models (LLMs) are a type of machine learning'),\n",
|
||||
" Document(page_content='model that can be trained on vast amounts of text data to'),\n",
|
||||
" Document(page_content='generate human-like language. In recent years, LLMs have'),\n",
|
||||
" Document(page_content='made significant advances in a variety of natural language'),\n",
|
||||
" Document(page_content='processing tasks, including language translation, text'),\n",
|
||||
" Document(page_content='generation, and sentiment analysis.'),\n",
|
||||
" Document(page_content='\\\\subsection{History of LLMs}'),\n",
|
||||
" Document(page_content='The earliest LLMs were developed in the 1980s and 1990s,'),\n",
|
||||
" Document(page_content='but they were limited by the amount of data that could be'),\n",
|
||||
" Document(page_content='processed and the computational power available at the'),\n",
|
||||
" Document(page_content='time. In the past decade, however, advances in hardware and'),\n",
|
||||
" Document(page_content='software have made it possible to train LLMs on massive'),\n",
|
||||
" Document(page_content='datasets, leading to significant improvements in'),\n",
|
||||
" Document(page_content='performance.'),\n",
|
||||
" Document(page_content='\\\\subsection{Applications of LLMs}'),\n",
|
||||
" Document(page_content='LLMs have many applications in industry, including'),\n",
|
||||
" Document(page_content='chatbots, content creation, and virtual assistants. They'),\n",
|
||||
" Document(page_content='can also be used in academia for research in linguistics,'),\n",
|
||||
" Document(page_content='psychology, and computational linguistics.'),\n",
|
||||
" Document(page_content='\\\\end{document}')]"
|
||||
"[Document(metadata={}, page_content='\\\\documentclass{article}\\n\\n\\x08egin{document}\\n\\n\\\\maketitle'),\n",
|
||||
" Document(metadata={}, page_content='\\\\section{Introduction}'),\n",
|
||||
" Document(metadata={}, page_content='Large language models (LLMs) are a type of machine learning'),\n",
|
||||
" Document(metadata={}, page_content='model that can be trained on vast amounts of text data to'),\n",
|
||||
" Document(metadata={}, page_content='generate human-like language. In recent years, LLMs have'),\n",
|
||||
" Document(metadata={}, page_content='made significant advances in a variety of natural language'),\n",
|
||||
" Document(metadata={}, page_content='processing tasks, including language translation, text'),\n",
|
||||
" Document(metadata={}, page_content='generation, and sentiment analysis.'),\n",
|
||||
" Document(metadata={}, page_content='\\\\subsection{History of LLMs}'),\n",
|
||||
" Document(metadata={}, page_content='The earliest LLMs were developed in the 1980s and 1990s,'),\n",
|
||||
" Document(metadata={}, page_content='but they were limited by the amount of data that could be'),\n",
|
||||
" Document(metadata={}, page_content='processed and the computational power available at the'),\n",
|
||||
" Document(metadata={}, page_content='time. In the past decade, however, advances in hardware and'),\n",
|
||||
" Document(metadata={}, page_content='software have made it possible to train LLMs on massive'),\n",
|
||||
" Document(metadata={}, page_content='datasets, leading to significant improvements in'),\n",
|
||||
" Document(metadata={}, page_content='performance.'),\n",
|
||||
" Document(metadata={}, page_content='\\\\subsection{Applications of LLMs}'),\n",
|
||||
" Document(metadata={}, page_content='LLMs have many applications in industry, including'),\n",
|
||||
" Document(metadata={}, page_content='chatbots, content creation, and virtual assistants. They'),\n",
|
||||
" Document(metadata={}, page_content='can also be used in academia for research in linguistics,'),\n",
|
||||
" Document(metadata={}, page_content='psychology, and computational linguistics.'),\n",
|
||||
" Document(metadata={}, page_content='\\\\end{document}')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -445,7 +448,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 14,
|
||||
"id": "0fc78794",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -479,29 +482,29 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 15,
|
||||
"id": "e3e3fca1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='<!DOCTYPE html>\\n<html>'),\n",
|
||||
" Document(page_content='<head>\\n <title>🦜️🔗 LangChain</title>'),\n",
|
||||
" Document(page_content='<style>\\n body {\\n font-family: Aria'),\n",
|
||||
" Document(page_content='l, sans-serif;\\n }\\n h1 {'),\n",
|
||||
" Document(page_content='color: darkblue;\\n }\\n </style>\\n </head'),\n",
|
||||
" Document(page_content='>'),\n",
|
||||
" Document(page_content='<body>'),\n",
|
||||
" Document(page_content='<div>\\n <h1>🦜️🔗 LangChain</h1>'),\n",
|
||||
" Document(page_content='<p>⚡ Building applications with LLMs through composability ⚡'),\n",
|
||||
" Document(page_content='</p>\\n </div>'),\n",
|
||||
" Document(page_content='<div>\\n As an open-source project in a rapidly dev'),\n",
|
||||
" Document(page_content='eloping field, we are extremely open to contributions.'),\n",
|
||||
" Document(page_content='</div>\\n </body>\\n</html>')]"
|
||||
"[Document(metadata={}, page_content='<!DOCTYPE html>\\n<html>'),\n",
|
||||
" Document(metadata={}, page_content='<head>\\n <title>🦜️🔗 LangChain</title>'),\n",
|
||||
" Document(metadata={}, page_content='<style>\\n body {\\n font-family: Aria'),\n",
|
||||
" Document(metadata={}, page_content='l, sans-serif;\\n }\\n h1 {'),\n",
|
||||
" Document(metadata={}, page_content='color: darkblue;\\n }\\n </style>\\n </head'),\n",
|
||||
" Document(metadata={}, page_content='>'),\n",
|
||||
" Document(metadata={}, page_content='<body>'),\n",
|
||||
" Document(metadata={}, page_content='<div>\\n <h1>🦜️🔗 LangChain</h1>'),\n",
|
||||
" Document(metadata={}, page_content='<p>⚡ Building applications with LLMs through composability ⚡'),\n",
|
||||
" Document(metadata={}, page_content='</p>\\n </div>'),\n",
|
||||
" Document(metadata={}, page_content='<div>\\n As an open-source project in a rapidly dev'),\n",
|
||||
" Document(metadata={}, page_content='eloping field, we are extremely open to contributions.'),\n",
|
||||
" Document(metadata={}, page_content='</div>\\n </body>\\n</html>')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -525,18 +528,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 16,
|
||||
"id": "49a1df11",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='pragma solidity ^0.8.20;'),\n",
|
||||
" Document(page_content='contract HelloWorld {\\n function add(uint a, uint b) pure public returns(uint) {\\n return a + b;\\n }\\n}')]"
|
||||
"[Document(metadata={}, page_content='pragma solidity ^0.8.20;'),\n",
|
||||
" Document(metadata={}, page_content='contract HelloWorld {\\n function add(uint a, uint b) pure public returns(uint) {\\n return a + b;\\n }\\n}')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -569,21 +572,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 17,
|
||||
"id": "1524ae0f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='using System;'),\n",
|
||||
" Document(page_content='class Program\\n{\\n static void Main()\\n {\\n int age = 30; // Change the age value as needed'),\n",
|
||||
" Document(page_content='// Categorize the age without any console output\\n if (age < 18)\\n {\\n // Age is under 18'),\n",
|
||||
" Document(page_content='}\\n else if (age >= 18 && age < 65)\\n {\\n // Age is an adult\\n }\\n else\\n {'),\n",
|
||||
" Document(page_content='// Age is a senior citizen\\n }\\n }\\n}')]"
|
||||
"[Document(metadata={}, page_content='using System;'),\n",
|
||||
" Document(metadata={}, page_content='class Program\\n{\\n static void Main()\\n {\\n int age = 30; // Change the age value as needed'),\n",
|
||||
" Document(metadata={}, page_content='// Categorize the age without any console output\\n if (age < 18)\\n {\\n // Age is under 18'),\n",
|
||||
" Document(metadata={}, page_content='}\\n else if (age >= 18 && age < 65)\\n {\\n // Age is an adult\\n }\\n else\\n {'),\n",
|
||||
" Document(metadata={}, page_content='// Age is a senior citizen\\n }\\n }\\n}')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -631,20 +634,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 18,
|
||||
"id": "688185b5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='main :: IO ()'),\n",
|
||||
" Document(page_content='main = do\\n putStrLn \"Hello, World!\"\\n-- Some'),\n",
|
||||
" Document(page_content='sample functions\\nadd :: Int -> Int -> Int\\nadd x y'),\n",
|
||||
" Document(page_content='= x + y')]"
|
||||
"[Document(metadata={}, page_content='main :: IO ()'),\n",
|
||||
" Document(metadata={}, page_content='main = do\\n putStrLn \"Hello, World!\"\\n-- Some'),\n",
|
||||
" Document(metadata={}, page_content='sample functions\\nadd :: Int -> Int -> Int\\nadd x y'),\n",
|
||||
" Document(metadata={}, page_content='= x + y')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -676,23 +679,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 19,
|
||||
"id": "90c66e7e-87a5-4a81-bece-7949aabf2369",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='<?php\\nnamespace foo;'),\n",
|
||||
" Document(page_content='class Hello {'),\n",
|
||||
" Document(page_content='public function __construct() { }\\n}'),\n",
|
||||
" Document(page_content='function hello() {\\n echo \"Hello World!\";\\n}'),\n",
|
||||
" Document(page_content='interface Human {\\n public function breath();\\n}'),\n",
|
||||
" Document(page_content='trait Foo { }\\nenum Color\\n{\\n case Red;'),\n",
|
||||
" Document(page_content='case Blue;\\n}')]"
|
||||
"[Document(metadata={}, page_content='<?php\\nnamespace foo;'),\n",
|
||||
" Document(metadata={}, page_content='class Hello {'),\n",
|
||||
" Document(metadata={}, page_content='public function __construct() { }\\n}'),\n",
|
||||
" Document(metadata={}, page_content='function hello() {\\n echo \"Hello World!\";\\n}'),\n",
|
||||
" Document(metadata={}, page_content='interface Human {\\n public function breath();\\n}'),\n",
|
||||
" Document(metadata={}, page_content='trait Foo { }\\nenum Color\\n{\\n case Red;'),\n",
|
||||
" Document(metadata={}, page_content='case Blue;\\n}')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -733,10 +736,25 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 20,
|
||||
"id": "7e6893ad",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(metadata={}, page_content='$directoryPath = Get-Location\\n\\n$items = Get-ChildItem -Path $directoryPath'),\n",
|
||||
" Document(metadata={}, page_content='$files = $items | Where-Object { -not $_.PSIsContainer }'),\n",
|
||||
" Document(metadata={}, page_content='$sortedFiles = $files | Sort-Object LastWriteTime'),\n",
|
||||
" Document(metadata={}, page_content='foreach ($file in $sortedFiles) {'),\n",
|
||||
" Document(metadata={}, page_content='Write-Output (\"Name: \" + $file.Name + \" | Last Write Time: \" + $file.LastWriteTime)\\n}')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"POWERSHELL_CODE = \"\"\"\n",
|
||||
"$directoryPath = Get-Location\n",
|
||||
@ -757,11 +775,58 @@
|
||||
"powershell_docs = powershell_splitter.create_documents([POWERSHELL_CODE])\n",
|
||||
"powershell_docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3ef77730",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Visual Basic 6"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "1dc3c740",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(metadata={}, page_content='Option Explicit'),\n",
|
||||
" Document(metadata={}, page_content='Public Sub HelloWorld()\\n MsgBox \"Hello, World!\"\\nEnd Sub'),\n",
|
||||
" Document(metadata={}, page_content='Private Function Add(a As Integer, b As Integer) As Integer\\n Add = a + b\\nEnd Function')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"VISUALBASIC6_CODE = \"\"\"Option Explicit\n",
|
||||
"\n",
|
||||
"Public Sub HelloWorld()\n",
|
||||
" MsgBox \"Hello, World!\"\n",
|
||||
"End Sub\n",
|
||||
"\n",
|
||||
"Private Function Add(a As Integer, b As Integer) As Integer\n",
|
||||
" Add = a + b\n",
|
||||
"End Function\n",
|
||||
"\"\"\"\n",
|
||||
"visualbasic6_splitter = RecursiveCharacterTextSplitter.from_language(\n",
|
||||
" Language.VISUALBASIC6,\n",
|
||||
" chunk_size=128,\n",
|
||||
" chunk_overlap=0,\n",
|
||||
")\n",
|
||||
"visualbasic6_docs = visualbasic6_splitter.create_documents([VISUALBASIC6_CODE])\n",
|
||||
"visualbasic6_docs"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "langchain",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -775,7 +840,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
"version": "3.10.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -316,6 +316,7 @@ class Language(str, Enum):
|
||||
HASKELL = "haskell"
|
||||
ELIXIR = "elixir"
|
||||
POWERSHELL = "powershell"
|
||||
VISUALBASIC6 = "visualbasic6"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
|
@ -734,6 +734,32 @@ class RecursiveCharacterTextSplitter(TextSplitter):
|
||||
" ",
|
||||
"",
|
||||
]
|
||||
if language == Language.VISUALBASIC6:
|
||||
vis = r"(?:Public|Private|Friend|Global|Static)\s+"
|
||||
return [
|
||||
# Split along definitions
|
||||
rf"\n(?!End\s){vis}?Sub\s+",
|
||||
rf"\n(?!End\s){vis}?Function\s+",
|
||||
rf"\n(?!End\s){vis}?Property\s+(?:Get|Let|Set)\s+",
|
||||
rf"\n(?!End\s){vis}?Type\s+",
|
||||
rf"\n(?!End\s){vis}?Enum\s+",
|
||||
# Split along control flow statements
|
||||
r"\n(?!End\s)If\s+",
|
||||
r"\nElseIf\s+",
|
||||
r"\nElse\s+",
|
||||
r"\nSelect\s+Case\s+",
|
||||
r"\nCase\s+",
|
||||
r"\nFor\s+",
|
||||
r"\nDo\s+",
|
||||
r"\nWhile\s+",
|
||||
r"\nWith\s+",
|
||||
# Split by the normal type of lines
|
||||
r"\n\n",
|
||||
r"\n",
|
||||
" ",
|
||||
"",
|
||||
]
|
||||
|
||||
if language in Language._value2member_map_:
|
||||
msg = f"Language {language} is not implemented yet!"
|
||||
raise ValueError(msg)
|
||||
|
@ -3043,6 +3043,82 @@ $csvContent | ForEach-Object {
|
||||
]
|
||||
|
||||
|
||||
FAKE_VISUALBASIC6_TEXT = """
|
||||
Option Explicit
|
||||
|
||||
Public Function SumTwoIntegers(ByVal a As Integer, ByVal b As Integer) As Integer
|
||||
SumTwoIntegers = a + b
|
||||
End Function
|
||||
|
||||
Public Sub Main()
|
||||
Dim i As Integer
|
||||
Dim limit As Integer
|
||||
|
||||
i = 0
|
||||
limit = 50
|
||||
|
||||
While i < limit
|
||||
i = SumTwoIntegers(i, 1)
|
||||
|
||||
If i = limit \\ 2 Then
|
||||
MsgBox "Halfway there! i = " & i
|
||||
End If
|
||||
Wend
|
||||
|
||||
MsgBox "Done! Final value of i: " & i
|
||||
End Sub
|
||||
"""
|
||||
|
||||
|
||||
def test_visualbasic6_code_splitter() -> None:
|
||||
splitter = RecursiveCharacterTextSplitter.from_language(
|
||||
Language.VISUALBASIC6,
|
||||
chunk_size=CHUNK_SIZE,
|
||||
chunk_overlap=0,
|
||||
)
|
||||
chunks = splitter.split_text(FAKE_VISUALBASIC6_TEXT)
|
||||
|
||||
assert chunks == [
|
||||
"Option Explicit",
|
||||
"Public Function",
|
||||
"SumTwoIntegers(",
|
||||
"ByVal",
|
||||
"a As Integer,",
|
||||
"ByVal b As",
|
||||
"Integer) As",
|
||||
"Integer",
|
||||
"SumTwoIntegers",
|
||||
"= a + b",
|
||||
"End Function",
|
||||
"Public Sub",
|
||||
"Main()",
|
||||
"Dim i As",
|
||||
"Integer",
|
||||
"Dim limit",
|
||||
"As Integer",
|
||||
"i = 0",
|
||||
"limit = 50",
|
||||
"While i <",
|
||||
"limit",
|
||||
"i =",
|
||||
"SumTwoIntegers(",
|
||||
"i,",
|
||||
"1)",
|
||||
"If i =",
|
||||
"limit \\ 2 Then",
|
||||
'MsgBox "Halfway',
|
||||
'there! i = " &',
|
||||
"i",
|
||||
"End If",
|
||||
"Wend",
|
||||
"MsgBox",
|
||||
'"Done! Final',
|
||||
'value of i: " &',
|
||||
"i",
|
||||
"End Sub",
|
||||
]
|
||||
|
||||
|
||||
def custom_iframe_extractor(iframe_tag: Any) -> str:
|
||||
iframe_src = iframe_tag.get("src", "")
|
||||
return f"[iframe:{iframe_src}]({iframe_src})"
|
||||
|
Loading…
Reference in New Issue
Block a user