langchain : text_splitters Added PowerShell (#24582)

- **Description:** Added PowerShell support for text splitters language
include docs relevant update
  - **Issue:** None
  - **Dependencies:** None

---------

Co-authored-by: tzitman <tamir.zitman@intel.com>
Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
Tamir Zitman
2024-07-30 19:13:52 +03:00
committed by GitHub
parent 187ee96f7a
commit b3e1378f2b
4 changed files with 119 additions and 5 deletions

View File

@@ -294,6 +294,7 @@ class Language(str, Enum):
PERL = "perl"
HASKELL = "haskell"
ELIXIR = "elixir"
POWERSHELL = "powershell"
@dataclass(frozen=True)

View File

@@ -659,6 +659,30 @@ class RecursiveCharacterTextSplitter(TextSplitter):
" ",
"",
]
elif language == Language.POWERSHELL:
return [
# Split along function definitions
"\nfunction ",
# Split along parameter declarations (escape parentheses)
"\nparam ",
# Split along control flow statements
"\nif ",
"\nforeach ",
"\nfor ",
"\nwhile ",
"\nswitch ",
# Split along class definitions (for PowerShell 5.0 and above)
"\nclass ",
# Split along try-catch-finally blocks
"\ntry ",
"\ncatch ",
"\nfinally ",
# Split by normal lines and empty spaces
"\n\n",
"\n",
" ",
"",
]
elif language in Language._value2member_map_:
raise ValueError(f"Language {language} is not implemented yet!")
else: