diff --git a/libs/community/Makefile b/libs/community/Makefile index e898a19be1b..4ff7057ab7e 100644 --- a/libs/community/Makefile +++ b/libs/community/Makefile @@ -46,7 +46,7 @@ lint_tests: MYPY_CACHE=.mypy_cache_test lint lint_diff lint_package lint_tests: ./scripts/check_pydantic.sh . - ./scripts/lint_imports.sh + ./scripts/lint_imports.sh . ./scripts/check_pickle.sh . poetry run ruff check . [ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff diff --git a/libs/community/scripts/lint_imports.sh b/libs/community/scripts/lint_imports.sh index 97d9c96b031..5f2575f03fb 100755 --- a/libs/community/scripts/lint_imports.sh +++ b/libs/community/scripts/lint_imports.sh @@ -1,24 +1,46 @@ #!/bin/bash - +# This script searches for invalid imports in tracked files within a Git repository. +# +# Usage: ./scripts/lint_imports.sh /path/to/repository set -eu -# Initialize a variable to keep track of errors -errors=0 +# Check if a path argument is provided +if [ $# -ne 1 ]; then + echo "Usage: $0 /path/to/repository" + exit 1 +fi -# make sure not importing from langchain or langchain_experimental -git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1)) +repository_path="$1" + +# make sure not importing from langchain_experimental +result=$(git -C "$repository_path" grep -En '^import langchain_experimental|^from langchain_experimental' -- '*.py' || true) + +# Check if any matching lines were found +if [ -n "$result" ]; then + echo "ERROR: The following lines need to be updated:" + echo "$result" + echo "langchain_community should import from langchain_experimental." + exit 1 +fi # make sure no one is importing from the built-in xml library # instead defusedxml should be used to avoid getting CVEs. -# Whether the standary library actually poses a risk to users -# is very nuanced and dependns on user's environment. +# Whether the standard library actually poses a risk to users +# is very nuanced and depends on the user's environment. # https://docs.python.org/3/library/xml.etree.elementtree.html -git --no-pager grep '^from xml\.' . | grep -vE "# OK: user-must-opt-in" && errors=$((errors+1)) -git --no-pager grep '^import xml\.' . | grep -vE "# OK: user-must-opt-in" && errors=$((errors+1)) -# Decide on an exit status based on the errors -if [ "$errors" -gt 0 ]; then - exit 1 -else - exit 0 +result=$(git -C "$repository_path" grep -En '^from xml.|^import xml$|^import xml.' | grep -vE "# OK: user-must-opt-in" || true) + +if [ -n "$result" ]; then + echo "ERROR: The following lines need to be updated:" + echo "$result" + echo "Triggering an error due to usage of the built-in xml library. " + echo "Please see https://docs.python.org/3/library/xml.html#xml-vulnerabilities." + echo "If this happens, there's likely code that's relying on the standard library " + echo "to parse xml somewhere in the code path. " + echo "Please update the code to force the user to explicitly opt-in to using the standard library or running the code. " + echo "It should be **obvious** without reading the documentation that they are being forced to use the standard library. " + echo "After this is done feel free to add a comment to the line with '# OK: user-must-opt-in', after the import. " + echo "Lacking a clear opt-in mechanism is likely a security risk, and will result in rejection of the PR." + exit 1 fi