community[patch]: doc loaders mypy fixes (#17368)

**Description:** Fixed `type: ignore`'s for mypy for some
document_loaders.
**Issue:** [Remove "type: ignore" comments #17048
](https://github.com/langchain-ai/langchain/issues/17048)

---------

Co-authored-by: Robby <h0rv@users.noreply.github.com>
Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
Robby
2024-02-12 19:51:06 -05:00
committed by GitHub
parent 0653aa469a
commit ece4b43a81
11 changed files with 37 additions and 24 deletions

View File

@@ -1,4 +1,4 @@
from typing import Any, List
from typing import Any, List, Tuple
from langchain_community.document_loaders.parsers.language.code_segmenter import (
CodeSegmenter,
@@ -55,15 +55,18 @@ class JavaScriptSegmenter(CodeSegmenter):
tree = esprima.parseScript(self.code, loc=True)
simplified_lines = self.source_lines[:]
indices_to_del: List[Tuple[int, int]] = []
for node in tree.body:
if isinstance(
node,
(esprima.nodes.FunctionDeclaration, esprima.nodes.ClassDeclaration),
):
start = node.loc.start.line - 1
start, end = node.loc.start.line - 1, node.loc.end.line
simplified_lines[start] = f"// Code for: {simplified_lines[start]}"
for line_num in range(start + 1, node.loc.end.line):
simplified_lines[line_num] = None # type: ignore
indices_to_del.append((start + 1, end))
return "\n".join(line for line in simplified_lines if line is not None)
for start, end in reversed(indices_to_del):
del simplified_lines[start + 0 : end]
return "\n".join(line for line in simplified_lines)

View File

@@ -1,5 +1,5 @@
import ast
from typing import Any, List
from typing import Any, List, Tuple
from langchain_community.document_loaders.parsers.language.code_segmenter import (
CodeSegmenter,
@@ -39,13 +39,15 @@ class PythonSegmenter(CodeSegmenter):
tree = ast.parse(self.code)
simplified_lines = self.source_lines[:]
indices_to_del: List[Tuple[int, int]] = []
for node in ast.iter_child_nodes(tree):
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
start = node.lineno - 1
start, end = node.lineno - 1, node.end_lineno
simplified_lines[start] = f"# Code for: {simplified_lines[start]}"
assert isinstance(end, int)
indices_to_del.append((start + 1, end))
assert isinstance(node.end_lineno, int)
for line_num in range(start + 1, node.end_lineno):
simplified_lines[line_num] = None # type: ignore
for start, end in reversed(indices_to_del):
del simplified_lines[start + 0 : end]
return "\n".join(line for line in simplified_lines if line is not None)
return "\n".join(simplified_lines)