ci[patch]: Update community linter to provide a helpful error message (#24127)

Update community import linter to explain what's wrong
This commit is contained in:
Eugene Yurtsev 2024-07-11 12:22:08 -04:00 committed by GitHub
parent 16e178a8c2
commit 1e7d8ba9a6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 37 additions and 15 deletions

View File

@ -46,7 +46,7 @@ lint_tests: MYPY_CACHE=.mypy_cache_test
lint lint_diff lint_package lint_tests:
./scripts/check_pydantic.sh .
./scripts/lint_imports.sh
./scripts/lint_imports.sh .
./scripts/check_pickle.sh .
poetry run ruff check .
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff

View File

@ -1,24 +1,46 @@
#!/bin/bash
# This script searches for invalid imports in tracked files within a Git repository.
#
# Usage: ./scripts/lint_imports.sh /path/to/repository
set -eu
# Initialize a variable to keep track of errors
errors=0
# Check if a path argument is provided
if [ $# -ne 1 ]; then
echo "Usage: $0 /path/to/repository"
exit 1
fi
# make sure not importing from langchain or langchain_experimental
git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1))
repository_path="$1"
# make sure not importing from langchain_experimental
result=$(git -C "$repository_path" grep -En '^import langchain_experimental|^from langchain_experimental' -- '*.py' || true)
# Check if any matching lines were found
if [ -n "$result" ]; then
echo "ERROR: The following lines need to be updated:"
echo "$result"
echo "langchain_community should import from langchain_experimental."
exit 1
fi
# make sure no one is importing from the built-in xml library
# instead defusedxml should be used to avoid getting CVEs.
# Whether the standary library actually poses a risk to users
# is very nuanced and dependns on user's environment.
# Whether the standard library actually poses a risk to users
# is very nuanced and depends on the user's environment.
# https://docs.python.org/3/library/xml.etree.elementtree.html
git --no-pager grep '^from xml\.' . | grep -vE "# OK: user-must-opt-in" && errors=$((errors+1))
git --no-pager grep '^import xml\.' . | grep -vE "# OK: user-must-opt-in" && errors=$((errors+1))
# Decide on an exit status based on the errors
if [ "$errors" -gt 0 ]; then
exit 1
else
exit 0
result=$(git -C "$repository_path" grep -En '^from xml.|^import xml$|^import xml.' | grep -vE "# OK: user-must-opt-in" || true)
if [ -n "$result" ]; then
echo "ERROR: The following lines need to be updated:"
echo "$result"
echo "Triggering an error due to usage of the built-in xml library. "
echo "Please see https://docs.python.org/3/library/xml.html#xml-vulnerabilities."
echo "If this happens, there's likely code that's relying on the standard library "
echo "to parse xml somewhere in the code path. "
echo "Please update the code to force the user to explicitly opt-in to using the standard library or running the code. "
echo "It should be **obvious** without reading the documentation that they are being forced to use the standard library. "
echo "After this is done feel free to add a comment to the line with '# OK: user-must-opt-in', after the import. "
echo "Lacking a clear opt-in mechanism is likely a security risk, and will result in rejection of the PR."
exit 1
fi