text-splitters[minor]: Added Haskell support in langchain.text_splitter module (#16191)

- **Description:** Haskell language support added in text_splitter
module
  - **Dependencies:** No
  - **Twitter handle:** @nisargtr

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
Nisarg Trivedi
2024-03-30 01:47:50 +05:30
committed by GitHub
parent b7344e3347
commit 1252ccce6f
4 changed files with 120 additions and 8 deletions

View File

@@ -22,7 +22,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 4,
"id": "a9e37aa1",
"metadata": {},
"outputs": [],
@@ -35,7 +35,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 5,
"id": "e21a2434",
"metadata": {},
"outputs": [
@@ -61,10 +61,14 @@
" 'html',\n",
" 'sol',\n",
" 'csharp',\n",
" 'cobol']"
" 'cobol',\n",
" 'c',\n",
" 'lua',\n",
" 'perl',\n",
" 'haskell']"
]
},
"execution_count": 2,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -564,13 +568,50 @@
"c_docs"
]
},
{
"cell_type": "markdown",
"id": "af9de667-230e-4c2a-8c5f-122a28515d97",
"metadata": {},
"source": [
"## Haskell\n",
"Here's an example using the Haskell text splitter:"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"id": "688185b5",
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='main :: IO ()'),\n",
" Document(page_content='main = do\\n putStrLn \"Hello, World!\"\\n-- Some'),\n",
" Document(page_content='sample functions\\nadd :: Int -> Int -> Int\\nadd x y'),\n",
" Document(page_content='= x + y')]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"HASKELL_CODE = \"\"\"\n",
"main :: IO ()\n",
"main = do\n",
" putStrLn \"Hello, World!\"\n",
"-- Some sample functions\n",
"add :: Int -> Int -> Int\n",
"add x y = x + y\n",
"\"\"\"\n",
"haskell_splitter = RecursiveCharacterTextSplitter.from_language(\n",
" language=Language.HASKELL, chunk_size=50, chunk_overlap=0\n",
")\n",
"haskell_docs = haskell_splitter.create_documents([HASKELL_CODE])\n",
"haskell_docs"
]
}
],
"metadata": {