mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-08 06:23:20 +00:00
community: Add PHP language parser to document_loaders (#19850)
**Description:** Added a PHP language parser to document_loaders **Issue:** N/A **Dependencies:** N/A **Twitter handle:** N/A --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
@@ -18,6 +18,7 @@ from langchain_community.document_loaders.parsers.language.javascript import (
|
||||
from langchain_community.document_loaders.parsers.language.kotlin import KotlinSegmenter
|
||||
from langchain_community.document_loaders.parsers.language.lua import LuaSegmenter
|
||||
from langchain_community.document_loaders.parsers.language.perl import PerlSegmenter
|
||||
from langchain_community.document_loaders.parsers.language.php import PHPSegmenter
|
||||
from langchain_community.document_loaders.parsers.language.python import PythonSegmenter
|
||||
from langchain_community.document_loaders.parsers.language.ruby import RubySegmenter
|
||||
from langchain_community.document_loaders.parsers.language.rust import RustSegmenter
|
||||
@@ -42,6 +43,7 @@ LANGUAGE_EXTENSIONS: Dict[str, str] = {
|
||||
"pl": "perl",
|
||||
"ts": "ts",
|
||||
"java": "java",
|
||||
"php": "php",
|
||||
}
|
||||
|
||||
LANGUAGE_SEGMENTERS: Dict[str, Any] = {
|
||||
@@ -60,6 +62,7 @@ LANGUAGE_SEGMENTERS: Dict[str, Any] = {
|
||||
"perl": PerlSegmenter,
|
||||
"ts": TypeScriptSegmenter,
|
||||
"java": JavaSegmenter,
|
||||
"php": PHPSegmenter,
|
||||
}
|
||||
|
||||
Language = Literal[
|
||||
|
@@ -0,0 +1,35 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from langchain_community.document_loaders.parsers.language.tree_sitter_segmenter import ( # noqa: E501
|
||||
TreeSitterSegmenter,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from tree_sitter import Language
|
||||
|
||||
|
||||
CHUNK_QUERY = """
|
||||
[
|
||||
(function_definition) @function
|
||||
(class_declaration) @class
|
||||
(interface_declaration) @interface
|
||||
(trait_declaration) @trait
|
||||
(enum_declaration) @enum
|
||||
(namespace_definition) @namespace
|
||||
]
|
||||
""".strip()
|
||||
|
||||
|
||||
class PHPSegmenter(TreeSitterSegmenter):
|
||||
"""Code segmenter for PHP."""
|
||||
|
||||
def get_language(self) -> "Language":
|
||||
from tree_sitter_languages import get_language
|
||||
|
||||
return get_language("php")
|
||||
|
||||
def get_chunk_query(self) -> str:
|
||||
return CHUNK_QUERY
|
||||
|
||||
def make_line_comment(self, text: str) -> str:
|
||||
return f"// {text}"
|
Reference in New Issue
Block a user