mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-08 12:31:49 +00:00
community[major]: lint for usage of xml library (#22132)
* Lint for usage of standard xml library * Add forced opt-in for quip client * Actual security issue is with underlying QuipClient not LangChain integration (since the client is doing the parsing), but adding enforcement at the LangChain level.
This commit is contained in:
parent
5b5ea2af30
commit
d3db83abe3
@ -1,10 +1,9 @@
|
||||
import logging
|
||||
import re
|
||||
import xml.etree.cElementTree
|
||||
import xml.sax.saxutils
|
||||
import xml.etree.cElementTree # OK: user-must-opt-in
|
||||
from io import BytesIO
|
||||
from typing import List, Optional, Sequence
|
||||
from xml.etree.ElementTree import ElementTree
|
||||
from xml.etree.ElementTree import ElementTree # OK: user-must-opt-in
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -22,14 +21,20 @@ class QuipLoader(BaseLoader):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, api_url: str, access_token: str, request_timeout: Optional[int] = 60
|
||||
self,
|
||||
api_url: str,
|
||||
access_token: str,
|
||||
request_timeout: Optional[int] = 60,
|
||||
*,
|
||||
allow_dangerous_xml_parsing: bool = False,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
api_url: https://platform.quip.com
|
||||
access_token: token of access quip API. Please refer:
|
||||
https://quip.com/dev/automation/documentation/current#section/Authentication/Get-Access-to-Quip's-APIs
|
||||
https://quip.com/dev/automation/documentation/current#section/Authentication/Get-Access-to-Quip's-APIs
|
||||
request_timeout: timeout of request, default 60s.
|
||||
allow_dangerous_xml_parsing: Allow dangerous XML parsing, defaults to False
|
||||
"""
|
||||
try:
|
||||
from quip_api.quip import QuipClient
|
||||
@ -42,6 +47,17 @@ class QuipLoader(BaseLoader):
|
||||
access_token=access_token, base_url=api_url, request_timeout=request_timeout
|
||||
)
|
||||
|
||||
if not allow_dangerous_xml_parsing:
|
||||
raise ValueError(
|
||||
"The quip client uses the built-in XML parser which may cause"
|
||||
"security issues when parsing XML data in some cases. "
|
||||
"Please see "
|
||||
"https://docs.python.org/3/library/xml.html#xml-vulnerabilities "
|
||||
"For more information, set `allow_dangerous_xml_parsing` as True "
|
||||
"if you are sure that your distribution of the standard library "
|
||||
"is not vulnerable to XML vulnerabilities."
|
||||
)
|
||||
|
||||
def load(
|
||||
self,
|
||||
folder_ids: Optional[List[str]] = None,
|
||||
|
@ -8,6 +8,14 @@ errors=0
|
||||
# make sure not importing from langchain or langchain_experimental
|
||||
git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1))
|
||||
|
||||
# make sure no one is importing from the built-in xml library
|
||||
# instead defusedxml should be used to avoid getting CVEs.
|
||||
# Whether the standary library actually poses a risk to users
|
||||
# is very nuanced and dependns on user's environment.
|
||||
# https://docs.python.org/3/library/xml.etree.elementtree.html
|
||||
git --no-pager grep '^from xml\.' . | grep -vE "# OK: user-must-opt-in" && errors=$((errors+1))
|
||||
git --no-pager grep '^import xml\.' . | grep -vE "# OK: user-must-opt-in" && errors=$((errors+1))
|
||||
|
||||
# Decide on an exit status based on the errors
|
||||
if [ "$errors" -gt 0 ]; then
|
||||
exit 1
|
||||
|
Loading…
Reference in New Issue
Block a user