From b0f9a36008363d3b021e937c40ea29540642a7c0 Mon Sep 17 00:00:00 2001 From: dongpingx Date: Wed, 28 Aug 2024 16:42:02 +0800 Subject: [PATCH] misc: Bandit scan issue for lxml This patch is to fix Bandit scan issue b313-b320 which is vulnerable to XML attacks when parsing untrusted XML data. I replace lxml.etree with the equivalent defusedxml package. I confirm it works after making a Bandit scan, building the configurator and compiling the acrn. Signed-off-by: dongpingx Tracked-On: #8717 --- misc/config_tools/acpi_gen/asl_gen.py | 8 ++++---- misc/config_tools/acpi_gen/bin_gen.py | 8 ++++---- misc/config_tools/board_config/board_c.py | 20 +++++++++---------- .../board_inspector/board_inspector.py | 3 ++- .../board_inspector/inspectorlib/mmio.py | 4 ++-- .../board_inspector/inspectorlib/validator.py | 4 ++-- .../configurator/pyodide/loadBoard.py | 4 ++-- .../pyodide/populateDefaultValues.py | 2 +- .../configurator/pyodide/updateSchema.py | 3 ++- .../launch_config/launch_cfg_gen.py | 6 +++--- misc/config_tools/library/launch_cfg_lib.py | 4 ++-- .../scenario_config/config_summary.py | 6 +++--- .../scenario_config/lxml_loader.py | 3 ++- .../scenario_config/scenario_cfg_gen.py | 14 ++++++------- .../service_vm_config/serial_config.py | 6 +++--- misc/config_tools/static_allocators/main.py | 7 ++++--- 16 files changed, 53 insertions(+), 49 deletions(-) diff --git a/misc/config_tools/acpi_gen/asl_gen.py b/misc/config_tools/acpi_gen/asl_gen.py index 2f017ff15..c06ef5423 100644 --- a/misc/config_tools/acpi_gen/asl_gen.py +++ b/misc/config_tools/acpi_gen/asl_gen.py @@ -9,7 +9,7 @@ import sys, os, re, argparse, shutil, ctypes from acpi_const import * import board_cfg_lib, acrn_config_utilities import collections -import lxml.etree +from defusedxml.lxml import parse from acrn_config_utilities import get_node sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'board_inspector')) @@ -861,11 +861,11 @@ def main(args): scenario= params['--scenario'] out = params['--out'] - board_etree = lxml.etree.parse(board) + board_etree = parse(board) board_root = board_etree.getroot() - scenario_etree = lxml.etree.parse(scenario) + scenario_etree = parse(scenario) scenario_root = scenario_etree.getroot() - allocation_etree = lxml.etree.parse(os.path.join(os.path.dirname(board), "configs", "allocation.xml")) + allocation_etree = parse(os.path.join(os.path.dirname(board), "configs", "allocation.xml")) board_type = board_root.attrib['board'] scenario_name = scenario_root.attrib['scenario'] pcpu_list = board_root.find('CPU_PROCESSOR_INFO').text.strip().split(',') diff --git a/misc/config_tools/acpi_gen/bin_gen.py b/misc/config_tools/acpi_gen/bin_gen.py index 45056a9b7..217c80abd 100644 --- a/misc/config_tools/acpi_gen/bin_gen.py +++ b/misc/config_tools/acpi_gen/bin_gen.py @@ -9,7 +9,7 @@ import logging import subprocess # nosec import os, sys, argparse, re, shutil sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'board_inspector')) -import lxml.etree +from defusedxml.lxml import parse from acpi_const import * import acpiparser.tpm2 import inspectorlib.cdata @@ -251,8 +251,8 @@ def check_iasl(iasl_path, iasl_min_ver): def main(args): - board_etree = lxml.etree.parse(args.board) - scenario_etree = lxml.etree.parse(args.scenario) + board_etree = parse(args.board) + scenario_etree = parse(args.scenario) scenario_name = get_node("//@scenario", scenario_etree) @@ -266,7 +266,7 @@ def main(args): hypervisor_out = args.out DEST_ACPI_BIN_PATH = os.path.join(hypervisor_out, 'acpi') - allocation_etree = lxml.etree.parse(os.path.join(hypervisor_out, 'configs', 'allocation.xml')) + allocation_etree = parse(os.path.join(hypervisor_out, 'configs', 'allocation.xml')) if os.path.isdir(DEST_ACPI_BIN_PATH): shutil.rmtree(DEST_ACPI_BIN_PATH) diff --git a/misc/config_tools/board_config/board_c.py b/misc/config_tools/board_config/board_c.py index c0cfe631f..1971256b8 100644 --- a/misc/config_tools/board_config/board_c.py +++ b/misc/config_tools/board_config/board_c.py @@ -7,7 +7,7 @@ import sys import enum import board_cfg_lib import acrn_config_utilities -import lxml.etree +from defusedxml.lxml import parse import os from acrn_config_utilities import get_node @@ -125,7 +125,7 @@ def populate_mba_delay_mask(rdt_res, mba_delay_list, config): idx += 1 def get_rdt_enabled(): - scenario_etree = lxml.etree.parse(acrn_config_utilities.SCENARIO_INFO_FILE) + scenario_etree = parse(acrn_config_utilities.SCENARIO_INFO_FILE) enable = scenario_etree.xpath(f"//RDT_ENABLED/text()") if enable[0] == "y": return "true" @@ -133,7 +133,7 @@ def get_rdt_enabled(): return "false" def get_cdp_enabled(): - scenario_etree = lxml.etree.parse(acrn_config_utilities.SCENARIO_INFO_FILE) + scenario_etree = parse(acrn_config_utilities.SCENARIO_INFO_FILE) enable = scenario_etree.xpath(f"//CDP_ENABLED/text()") if enable[0] == "y": return "true" @@ -154,7 +154,7 @@ def gen_rdt_str(cache, config): err_dic = {} cat_mask_list = {} - board_etree = lxml.etree.parse(acrn_config_utilities.BOARD_INFO_FILE) + board_etree = parse(acrn_config_utilities.BOARD_INFO_FILE) mask_length = get_node(f"./capability[@id='CAT']/capacity_mask_length/text()", cache) clos_number = get_node(f"./capability[@id='CAT']/clos_number/text()", cache) @@ -220,7 +220,7 @@ def gen_rdt_str(cache, config): def get_mask_list(cache_level, cache_id): allocation_dir = os.path.split(acrn_config_utilities.SCENARIO_INFO_FILE)[0] + "/configs/allocation.xml" - allocation_etree = lxml.etree.parse(allocation_dir) + allocation_etree = parse(allocation_dir) if cache_level == "3": clos_list = allocation_etree.xpath(f"//clos_mask[@id = 'l3']/clos/text()") else: @@ -285,9 +285,9 @@ def gen_rdt_res(config): err_dic = {} res_present = [0, 0, 0] - scenario_etree = lxml.etree.parse(acrn_config_utilities.SCENARIO_INFO_FILE) - allocation_etree = lxml.etree.parse(acrn_config_utilities.SCENARIO_INFO_FILE) - board_etree = lxml.etree.parse(acrn_config_utilities.BOARD_INFO_FILE) + scenario_etree = parse(acrn_config_utilities.SCENARIO_INFO_FILE) + allocation_etree = parse(acrn_config_utilities.SCENARIO_INFO_FILE) + board_etree = parse(acrn_config_utilities.BOARD_INFO_FILE) cache_list = board_etree.xpath(f"//cache[capability/@id = 'CAT' or capability/@id = 'MBA']") gen_clos_array(cache_list, config) @@ -410,7 +410,7 @@ def gen_px_cx(config): def gen_pci_hide(config): """Generate hide pci information for this platform""" - scenario_etree = lxml.etree.parse(acrn_config_utilities.SCENARIO_INFO_FILE) + scenario_etree = parse(acrn_config_utilities.SCENARIO_INFO_FILE) hidden_pdev_list = [x.replace('.', ':') for x in scenario_etree.xpath(f"//HIDDEN_PDEV/text()")] if board_cfg_lib.BOARD_NAME in list(board_cfg_lib.KNOWN_HIDDEN_PDEVS_BOARD_DB.keys()) and board_cfg_lib.KNOWN_HIDDEN_PDEVS_BOARD_DB[board_cfg_lib.BOARD_NAME] != 0: @@ -458,7 +458,7 @@ def gen_known_caps_pci_devs(config): def gen_cpufreq_limits(config): allocation_dir = os.path.split(acrn_config_utilities.SCENARIO_INFO_FILE)[0] + "/configs/allocation.xml" - allocation_etree = lxml.etree.parse(allocation_dir) + allocation_etree = parse(allocation_dir) cpu_list = board_cfg_lib.get_processor_info() max_cpu_num = len(cpu_list) diff --git a/misc/config_tools/board_inspector/board_inspector.py b/misc/config_tools/board_inspector/board_inspector.py index 050717641..0ebe8735c 100755 --- a/misc/config_tools/board_inspector/board_inspector.py +++ b/misc/config_tools/board_inspector/board_inspector.py @@ -11,6 +11,7 @@ import logging import tempfile import subprocess # nosec import lxml.etree +from defusedxml.lxml import parse import argparse from tqdm import tqdm from collections import namedtuple @@ -158,7 +159,7 @@ def main(board_name, board_xml, args): env = { "PYTHONPATH": script_dir, "PATH": os.environ["PATH"] } subprocess.run([sys.executable, legacy_parser, args.board_name, "--out", board_xml], check=True, env=env) # ... then load the created board XML and append it with additional data by invoking the extractors. - board_etree = lxml.etree.parse(board_xml) + board_etree = parse(board_xml) root_node = board_etree.getroot() # Clear the whitespaces between adjacent children under the root node diff --git a/misc/config_tools/board_inspector/inspectorlib/mmio.py b/misc/config_tools/board_inspector/inspectorlib/mmio.py index eecdd6810..a2a9e5af6 100644 --- a/misc/config_tools/board_inspector/inspectorlib/mmio.py +++ b/misc/config_tools/board_inspector/inspectorlib/mmio.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import argparse -import lxml.etree +from defusedxml.lxml import parse def mmio_regions(etree): ret = [] @@ -21,7 +21,7 @@ if __name__ == "__main__": parser.add_argument("file", help="board XML file") args = parser.parse_args() - etree = lxml.etree.parse(args.file) + etree = parse(args.file) regions = mmio_regions(etree) for region in regions: print("%-4s 0x%08x 0x%08x" % (region[0], region[1], region[2])) diff --git a/misc/config_tools/board_inspector/inspectorlib/validator.py b/misc/config_tools/board_inspector/inspectorlib/validator.py index 6409fe328..934714f56 100644 --- a/misc/config_tools/board_inspector/inspectorlib/validator.py +++ b/misc/config_tools/board_inspector/inspectorlib/validator.py @@ -7,7 +7,7 @@ import sys, os import argparse -import lxml.etree as etree +from defusedxml.lxml import parse import logging import xmlschema @@ -18,7 +18,7 @@ logging_fn = { } def validate_board(xsd_path, board_etree): - schema_etree = etree.parse(xsd_path) + schema_etree = parse(xsd_path) schema_etree.xinclude() schema = xmlschema.XMLSchema11(schema_etree) diff --git a/misc/config_tools/configurator/pyodide/loadBoard.py b/misc/config_tools/configurator/pyodide/loadBoard.py index 6917bf652..89d32f79b 100644 --- a/misc/config_tools/configurator/pyodide/loadBoard.py +++ b/misc/config_tools/configurator/pyodide/loadBoard.py @@ -8,7 +8,7 @@ import os from copy import deepcopy import elementpath -import lxml.etree as etree +from defusedxml.lxml import fromstring from bs4 import BeautifulSoup from . import convert_result, nuc11_board, scenario_json_schema, nuc11_board_path @@ -20,7 +20,7 @@ def get_dynamic_scenario(board): :type board: str :param board: board xml text """ - board_xml = etree.fromstring(board) + board_xml = fromstring(board) def get_enum(source, options, option_names, obj_type): elements = [str(x) for x in elementpath.select(source, options) if x] diff --git a/misc/config_tools/configurator/pyodide/populateDefaultValues.py b/misc/config_tools/configurator/pyodide/populateDefaultValues.py index 241a8263d..dfbed3f74 100644 --- a/misc/config_tools/configurator/pyodide/populateDefaultValues.py +++ b/misc/config_tools/configurator/pyodide/populateDefaultValues.py @@ -9,7 +9,7 @@ __package__ = 'configurator.pyodide' from pathlib import Path from tempfile import TemporaryDirectory -from xml.etree.ElementTree import tostring +from defusedxml.ElementTree import tostring from scenario_config.pipeline import PipelineObject, PipelineEngine from scenario_config.xml_loader import XMLLoadStage diff --git a/misc/config_tools/configurator/pyodide/updateSchema.py b/misc/config_tools/configurator/pyodide/updateSchema.py index 4098d3fc2..2a85b74a3 100644 --- a/misc/config_tools/configurator/pyodide/updateSchema.py +++ b/misc/config_tools/configurator/pyodide/updateSchema.py @@ -5,13 +5,14 @@ from .pyodide import convert_result, nuc11_board, nuc11_scenario import re from lxml import etree +from defusedxml.lxml import fromstring class GenerateSchema: def __init__(self, board, scenario): parser = etree.XMLParser(remove_blank_text=True) - self.board_etree = etree.fromstring(board, parser) + self.board_etree = fromstring(board, parser) self.scenario = scenario @property diff --git a/misc/config_tools/launch_config/launch_cfg_gen.py b/misc/config_tools/launch_config/launch_cfg_gen.py index 88c43d948..3069de47b 100755 --- a/misc/config_tools/launch_config/launch_cfg_gen.py +++ b/misc/config_tools/launch_config/launch_cfg_gen.py @@ -16,7 +16,7 @@ import argparse import logging -import lxml.etree as etree +from defusedxml.lxml import parse def eval_xpath(element, xpath, default_value=None): @@ -428,8 +428,8 @@ def generate_for_one_vm(board_etree, hv_scenario_etree, vm_scenario_etree, vm_id def main(board_xml, scenario_xml, user_vm_id, out_dir): - board_etree = etree.parse(board_xml) - scenario_etree = etree.parse(scenario_xml) + board_etree = parse(board_xml) + scenario_etree = parse(scenario_xml) service_vm_id = eval_xpath(scenario_etree, "//vm[load_order = 'SERVICE_VM']/@id") service_vm_name = eval_xpath(scenario_etree, "//vm[load_order = 'SERVICE_VM']/name/text()") diff --git a/misc/config_tools/library/launch_cfg_lib.py b/misc/config_tools/library/launch_cfg_lib.py index e396b5eb5..2392548d4 100644 --- a/misc/config_tools/library/launch_cfg_lib.py +++ b/misc/config_tools/library/launch_cfg_lib.py @@ -11,7 +11,7 @@ import acrn_config_utilities import board_cfg_lib import scenario_cfg_lib import lxml -import lxml.etree +from defusedxml.lxml import parse ERR_LIST = {} BOOT_TYPE = ['no', 'ovmf'] @@ -673,7 +673,7 @@ def check_communication_vuart(launch_communication_vuarts, scenario_info): return def check_enable_ptm(launch_enable_ptm, scenario_info): - scenario_etree = lxml.etree.parse(scenario_info) + scenario_etree = parse(scenario_info) enable_ptm_vm_list = scenario_etree.xpath("//vm[PTM = 'y']/@id") for user_vmid, enable_ptm in launch_enable_ptm.items(): key = 'user_vm:id={},enable_ptm'.format(user_vmid) diff --git a/misc/config_tools/scenario_config/config_summary.py b/misc/config_tools/scenario_config/config_summary.py index 7c5619dc2..1a36ba49a 100644 --- a/misc/config_tools/scenario_config/config_summary.py +++ b/misc/config_tools/scenario_config/config_summary.py @@ -10,7 +10,7 @@ import logging import typing import functools import textwrap -from lxml import etree +from defusedxml.lxml import parse t_content = typing.Union[str, typing.List[str]] @@ -100,8 +100,8 @@ class GenerateRst: # Class initialization def __init__(self, board_file_name, scenario_file_name, rst_file_name) -> None: - self.board_etree = etree.parse(board_file_name) - self.scenario_etree = etree.parse(scenario_file_name) + self.board_etree = parse(board_file_name) + self.scenario_etree = parse(scenario_file_name) self.file = open(rst_file_name, 'w') self.doc = Doc(self.file) diff --git a/misc/config_tools/scenario_config/lxml_loader.py b/misc/config_tools/scenario_config/lxml_loader.py index 5d151dbde..557150942 100644 --- a/misc/config_tools/scenario_config/lxml_loader.py +++ b/misc/config_tools/scenario_config/lxml_loader.py @@ -5,7 +5,8 @@ # SPDX-License-Identifier: BSD-3-Clause # -from lxml.etree import parse, XMLParser +from lxml.etree import XMLParser +from defusedxml.lxml import parse from pipeline import PipelineStage class LXMLLoadStage(PipelineStage): diff --git a/misc/config_tools/scenario_config/scenario_cfg_gen.py b/misc/config_tools/scenario_config/scenario_cfg_gen.py index ff47fb288..43d7f7224 100755 --- a/misc/config_tools/scenario_config/scenario_cfg_gen.py +++ b/misc/config_tools/scenario_config/scenario_cfg_gen.py @@ -6,7 +6,7 @@ import os import sys import copy -import lxml.etree as etree +from defusedxml.lxml import parse, tostring sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'library')) sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'hv_config')) sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'acpi_gen')) @@ -100,9 +100,9 @@ def validate_scenario_schema(scenario_info): XMLSchema does not process XInclude. Use lxml to expand the schema which is feed to XMLSchema as a string. """ - xsd_doc = etree.parse(acrn_config_utilities.SCENARIO_SCHEMA_FILE) + xsd_doc = parse(acrn_config_utilities.SCENARIO_SCHEMA_FILE) xsd_doc.xinclude() - my_schema = xmlschema.XMLSchema11(etree.tostring(xsd_doc, encoding="unicode")) + my_schema = xmlschema.XMLSchema11(tostring(xsd_doc, encoding="unicode")) it = my_schema.iter_errors(scenario_info) for idx, validation_error in enumerate(it, start=1): @@ -124,12 +124,12 @@ def validate_scenario_schema(scenario_info): scenario_cfg_lib.ERR_LIST[key] = element + reason def apply_data_checks(board_info, scenario_info): - xsd_doc = etree.parse(acrn_config_utilities.DATACHECK_SCHEMA_FILE) + xsd_doc = parse(acrn_config_utilities.DATACHECK_SCHEMA_FILE) xsd_doc.xinclude() - datachecks_schema = xmlschema.XMLSchema11(etree.tostring(xsd_doc, encoding="unicode")) + datachecks_schema = xmlschema.XMLSchema11(tostring(xsd_doc, encoding="unicode")) - main_etree = etree.parse(board_info) - scenario_etree = etree.parse(scenario_info) + main_etree = parse(board_info) + scenario_etree = parse(scenario_info) main_etree.getroot().extend(scenario_etree.getroot()[:]) # FIXME: Figure out proper error keys for data check failures error_key = "" diff --git a/misc/config_tools/service_vm_config/serial_config.py b/misc/config_tools/service_vm_config/serial_config.py index cb902f10c..824ecbd20 100644 --- a/misc/config_tools/service_vm_config/serial_config.py +++ b/misc/config_tools/service_vm_config/serial_config.py @@ -8,7 +8,7 @@ import sys, os sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'library')) import argparse -import lxml.etree +from defusedxml.lxml import parse import acrn_config_utilities from acrn_config_utilities import get_node @@ -41,8 +41,8 @@ def main(args): Generate serial configuration file for service VM :param args: command line args """ - scenario_etree = lxml.etree.parse(args.scenario) - allocation_etree = lxml.etree.parse(args.allocation) + scenario_etree = parse(args.scenario) + allocation_etree = parse(args.allocation) vuart_target_vmid = {} vm_list = scenario_etree.xpath("//vm[load_order = 'SERVICE_VM']") diff --git a/misc/config_tools/static_allocators/main.py b/misc/config_tools/static_allocators/main.py index b8732cb31..6163f5170 100755 --- a/misc/config_tools/static_allocators/main.py +++ b/misc/config_tools/static_allocators/main.py @@ -7,6 +7,7 @@ import sys, os import lxml.etree +from defusedxml.lxml import parse, fromstring import argparse sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'library')) import acrn_config_utilities @@ -22,9 +23,9 @@ def main(args): scripts_path = os.path.dirname(os.path.realpath(__file__)) current = os.path.basename(__file__) - board_etree = lxml.etree.parse(args.board) - scenario_etree = lxml.etree.parse(args.scenario) - allocation_etree = lxml.etree.ElementTree(element=lxml.etree.fromstring("")) + board_etree = parse(args.board) + scenario_etree = parse(args.scenario) + allocation_etree = lxml.etree.ElementTree(element=fromstring("")) for script in sorted([f for f in os.listdir(scripts_path) if f.endswith(".py") and f != current]): module_name = os.path.splitext(script)[0] module = import_module(f"{module_name}")