config_tools: composing operations around XMLs as pipelines

There is an increasing demand of composing different operations around XML
schemas and/or data in different ways for different purpose. Today we
already have:

  - Validate XML data, which takes XML schemas and data (board and
    scenario) as inputs.
  - Fill in missing nodes in XML data with default values, which takes XML
    schema and data (scenario only) as inputs.

In the near future we'll extend the operations around XMLs by introducing
XML schema preprocessing and XML data upgrading, adding more possibilities
to construct a larger operation by composing smaller ones.

In order for minimized code repetition and easier composition, this patch
introduces an infrasturcture that abstracts each operation as a pipeline
stage. Each stage defines its own inputs and outputs and can be composed
sequentially as a larger, single operation.

The existing operations listed above, along with XML file loaders, are then
refactored to provide pipeline stages. The main methods are also refined to
complete their tasks by constructing and invoking pipelines.

Tracked-On: #6690
Signed-off-by: Junjie Mao <junjie.mao@intel.com>
This commit is contained in:
Junjie Mao 2022-02-23 22:05:51 +08:00 committed by acrnsi-robot
parent 4fb6ad247a
commit 0a7910c7f0
5 changed files with 273 additions and 74 deletions

View File

@ -5,10 +5,13 @@
# SPDX-License-Identifier: BSD-3-Clause # SPDX-License-Identifier: BSD-3-Clause
# #
import os
import argparse import argparse
import lxml.etree as etree
from scenario_transformer import ScenarioTransformer from scenario_transformer import ScenarioTransformer
from pipeline import PipelineObject, PipelineStage, PipelineEngine
class DefaultValuePopulator(ScenarioTransformer): class DefaultValuePopulator(ScenarioTransformer):
def add_missing_nodes(self, xsd_element_node, xml_parent_node, new_node_index): def add_missing_nodes(self, xsd_element_node, xml_parent_node, new_node_index):
element_name = xsd_element_node.get("name") element_name = xsd_element_node.get("name")
@ -20,7 +23,7 @@ class DefaultValuePopulator(ScenarioTransformer):
if self.complex_type_of_element(xsd_element_node) is None and default_value is None: if self.complex_type_of_element(xsd_element_node) is None and default_value is None:
return [] return []
new_node = etree.Element(element_name) new_node = xml_parent_node.makeelement(element_name, {})
new_node.text = default_value new_node.text = default_value
if new_node_index is not None: if new_node_index is not None:
@ -30,15 +33,30 @@ class DefaultValuePopulator(ScenarioTransformer):
return [new_node] return [new_node]
class DefaultValuePopulatingStage(PipelineStage):
uses = {"schema_etree", "scenario_etree"}
provides = {"scenario_etree"}
def run(self, obj):
populator = DefaultValuePopulator(obj.get("schema_etree"))
etree = obj.get("scenario_etree")
populator.transform(etree)
obj.set("scenario_etree", etree)
def main(xsd_file, xml_file, out_file): def main(xsd_file, xml_file, out_file):
xsd_etree = etree.parse(xsd_file) from xml_loader import XMLLoadStage
xsd_etree.xinclude() from lxml_loader import LXMLLoadStage
populator = DefaultValuePopulator(xsd_etree)
xml_etree = etree.parse(xml_file, etree.XMLParser(remove_blank_text=True)) pipeline = PipelineEngine(["schema_path", "scenario_path"])
populator.transform(xml_etree) pipeline.add_stages([
LXMLLoadStage("schema"),
XMLLoadStage("scenario"),
DefaultValuePopulatingStage(),
])
xml_etree.write(out_file, pretty_print=True) obj = PipelineObject(schema_path = xsd_file, scenario_path = xml_file)
pipeline.run(obj)
obj.get("scenario_etree").write(out_file)
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Populate a given scenario XML with default values of nonexistent nodes") parser = argparse.ArgumentParser(description="Populate a given scenario XML with default values of nonexistent nodes")

View File

@ -0,0 +1,20 @@
#!/usr/bin/env python3
#
# Copyright (C) 2022 Intel Corporation.
#
# SPDX-License-Identifier: BSD-3-Clause
#
from lxml.etree import parse, XMLParser
from pipeline import PipelineStage
class LXMLLoadStage(PipelineStage):
def __init__(self, tag):
self.consumes = f"{tag}_path"
self.provides = f"{tag}_etree"
def run(self, obj):
xml_path = obj.get(self.consumes)
etree = parse(xml_path, XMLParser(remove_blank_text=True))
etree.xinclude()
obj.set(self.provides, etree)

View File

@ -0,0 +1,72 @@
#!/usr/bin/env python3
#
# Copyright (C) 2022 Intel Corporation.
#
# SPDX-License-Identifier: BSD-3-Clause
#
class PipelineObject:
def __init__(self, **kwargs):
self.data = {}
for k,v in kwargs.items():
self.set(k, v)
def set(self, tag, data):
self.data[tag] = data
def get(self, tag):
return self.data[tag]
def has(self, tag):
return tag in self.data.keys()
def consume(self, tag):
return self.data.pop(tag, None)
def dump(self):
print(self.data)
class PipelineStage:
# The following three class variables defines the inputs and outputs of the stage. Each of them can be either a set
# or a string (which is interpreted as a unit set)
consumes = set() # Data consumed by this stage. Consumed data will be unavailable to later stages.
uses = set() # Data used but not consumed by this stage.
provides = set() # Data provided by this stage.
def run(self, obj):
raise NotImplementedError
class PipelineEngine:
def __init__(self, initial_data = []):
self.stages = []
self.initial_data = set(initial_data)
self.available_data = set(initial_data)
def add_stage(self, stage):
consumes = stage.consumes if isinstance(stage.consumes, set) else {stage.consumes}
uses = stage.uses if isinstance(stage.uses, set) else {stage.uses}
provides = stage.provides if isinstance(stage.provides, set) else {stage.provides}
all_uses = consumes.union(uses)
if not all_uses.issubset(self.available_data):
raise Exception(f"Data {uses - self.available_data} need by stage {stage.__class__.__name__} but not provided by the pipeline")
self.stages.append(stage)
self.available_data = self.available_data.difference(consumes).union(provides)
def add_stages(self, stages):
for stage in stages:
self.add_stage(stage)
def run(self, obj):
for tag in self.initial_data:
if not obj.has(tag):
raise AttributeError(f"Data {tag} is needed by the pipeline but not provided by the object")
for stage in self.stages:
stage.run(obj)
consumes = stage.consumes if isinstance(stage.consumes, set) else {stage.consumes}
for tag in consumes:
obj.consume(tag)

View File

@ -7,8 +7,9 @@
import sys, os import sys, os
import argparse import argparse
import lxml.etree as etree
import logging import logging
from copy import copy
from collections import namedtuple
try: try:
import xmlschema import xmlschema
@ -18,7 +19,8 @@ except ImportError:
"To enable the validation, install the python package by executing: pip3 install xmlschema.") "To enable the validation, install the python package by executing: pip3 install xmlschema.")
sys.exit(0) sys.exit(0)
from default_populator import DefaultValuePopulator from pipeline import PipelineObject, PipelineStage, PipelineEngine
from default_populator import DefaultValuePopulatingStage
def existing_file_type(parser): def existing_file_type(parser):
def aux(arg): def aux(arg):
@ -39,106 +41,174 @@ def log_level_type(parser):
parser.error(f"{arg} is not a valid log level") parser.error(f"{arg} is not a valid log level")
return aux return aux
def load_schema(xsd_xml, datachecks_xml): class ValidationError(dict):
global schema, schema_etree, datachecks def __init__(self, paths, message, severity):
super().__init__(paths = paths, message = message, severity = severity)
schema_etree = etree.parse(xsd_xml) def __str__(self):
schema_etree.xinclude() return f"{', '.join(self['paths'])}: {self['message']}"
schema = xmlschema.XMLSchema11(etree.tostring(schema_etree, encoding="unicode"))
datachecks_etree = etree.parse(datachecks_xml) class ScenarioValidator:
datachecks_etree.xinclude() def __init__(self, schema_etree, datachecks_etree):
datachecks = xmlschema.XMLSchema11(etree.tostring(datachecks_etree, encoding="unicode")) """Initialize the validator with preprocessed schemas in ElementTree."""
self.schema = xmlschema.XMLSchema11(schema_etree)
self.datachecks = xmlschema.XMLSchema11(datachecks_etree)
config_tools_dir = os.path.join(os.path.dirname(__file__), "..") def check_syntax(self, scenario_etree):
schema_dir = os.path.join(config_tools_dir, "schema") errors = []
schema = None
schema_etree = None
datachecks = None
load_schema(os.path.join(schema_dir, "config.xsd"), os.path.join(schema_dir, "datachecks.xsd"))
def validate_one(board_xml, scenario_xml): it = self.schema.iter_errors(scenario_etree)
nr_schema_errors = 0
nr_check_errors = 0
nr_check_warnings = 0
board_name = os.path.basename(board_xml)
scenario_name = os.path.basename(scenario_xml)
scenario_etree = etree.parse(scenario_xml, etree.XMLParser(remove_blank_text=True))
DefaultValuePopulator(schema_etree).transform(scenario_etree)
it = schema.iter_errors(scenario_etree)
for error in it:
logging.debug(error)
nr_schema_errors += 1
if nr_schema_errors == 0:
main_etree = etree.parse(board_xml)
main_etree.getroot().extend(scenario_etree.getroot()[:])
it = datachecks.iter_errors(main_etree)
for error in it: for error in it:
logging.debug(error) # Syntactic errors are always critical.
e = ValidationError([error.path], error.reason, "critical")
logging.debug(e)
errors.append(e)
return errors
def check_semantics(self, board_etree, scenario_etree):
errors = []
unified_node = copy(scenario_etree.getroot())
unified_node.extend(board_etree.getroot())
it = self.datachecks.iter_errors(unified_node)
for error in it:
logging.debug(f"{error.elem}: {error.message}")
anno = error.validator.annotation anno = error.validator.annotation
severity = anno.elem.get("{https://projectacrn.org}severity") severity = anno.elem.get("{https://projectacrn.org}severity")
errors.append(ValidationError([error.elem.tag], error.message, severity))
if severity == "error": return errors
nr_check_errors += 1
elif severity == "warning":
nr_check_warnings += 1
if nr_check_errors > 0: class ValidatorConstructionStage(PipelineStage):
logging.error(f"Board {board_name} and scenario {scenario_name} have inconsistent data: {nr_check_errors} errors, {nr_check_warnings} warnings.") # The schema etree may still useful for schema-based transformation. Do not consume it.
elif nr_check_warnings > 0: uses = {"schema_etree"}
logging.warning(f"Board {board_name} and scenario {scenario_name} have inconsistent data: {nr_check_warnings} warnings.") consumes = {"datachecks_etree"}
provides = {"validator"}
def run(self, obj):
validator = ScenarioValidator(obj.get("schema_etree"), obj.get("datachecks_etree"))
obj.set("validator", validator)
class ValidatorConstructionByFileStage(PipelineStage):
uses = {"schema_path", "datachecks_path"}
provides = {"validator"}
def run(self, obj):
validator = ScenarioValidator(obj.get("schema_path"), obj.get("datachecks_path"))
obj.set("validator", validator)
class SyntacticValidationStage(PipelineStage):
uses = {"validator", "scenario_etree"}
provides = {"syntactic_errors"}
def run(self, obj):
errors = obj.get("validator").check_syntax(obj.get("scenario_etree"))
obj.set("syntactic_errors", errors)
class SemanticValidationStage(PipelineStage):
uses = {"validator", "board_etree", "scenario_etree"}
provides = {"semantic_errors"}
def run(self, obj):
errors = obj.get("validator").check_semantics(obj.get("board_etree"), obj.get("scenario_etree"))
obj.set("semantic_errors", errors)
class ReportValidationResultStage(PipelineStage):
consumes = {"board_etree", "scenario_etree", "syntactic_errors", "semantic_errors"}
provides = {"nr_all_errors"}
def run(self, obj):
board_name = obj.get("board_etree").getroot().get("board")
scenario_name = obj.get("scenario_etree").getroot().get("scenario")
nr_critical = len(obj.get("syntactic_errors"))
nr_error = len(list(filter(lambda e: e["severity"] == "error", obj.get("semantic_errors"))))
nr_warning = len(list(filter(lambda e: e["severity"] == "warning", obj.get("semantic_errors"))))
if nr_critical > 0 or nr_error > 0:
logging.error(f"Board {board_name} and scenario {scenario_name} are inconsistent: {nr_critical} syntax errors, {nr_error} data errors, {nr_warning} warnings.")
elif nr_warning > 0:
logging.warning(f"Board {board_name} and scenario {scenario_name} are potentially inconsistent: {nr_warning} warnings.")
else: else:
logging.info(f"Board {board_name} and scenario {scenario_name} are valid and consistent.") logging.info(f"Board {board_name} and scenario {scenario_name} are valid and consistent.")
else:
logging.warning(f"Scenario {scenario_name} is invalid: {nr_schema_errors} schema errors.")
return nr_schema_errors + nr_check_errors + nr_check_warnings obj.set("nr_all_errors", nr_critical + nr_error + nr_warning)
def validate_board(board_xml): def validate_one(validation_pipeline, pipeline_obj, board_xml, scenario_xml):
pipeline_obj.set("board_path", board_xml)
pipeline_obj.set("scenario_path", scenario_xml)
validation_pipeline.run(pipeline_obj)
return pipeline_obj.consume("nr_all_errors")
def validate_board(validation_pipeline, pipeline_obj, board_xml):
board_dir = os.path.dirname(board_xml) board_dir = os.path.dirname(board_xml)
nr_violations = 0 nr_all_errors = 0
for f in os.listdir(board_dir): for f in os.listdir(board_dir):
if not f.endswith(".xml"): if not f.endswith(".xml"):
continue continue
if f == os.path.basename(board_xml) or "launch" in f: if f == os.path.basename(board_xml) or "launch" in f:
continue continue
nr_all_errors += validate_one(validation_pipeline, pipeline_obj, board_xml, os.path.join(board_dir, f))
nr_violations += validate_one(board_xml, os.path.join(board_dir, f)) return nr_all_errors
return nr_violations def validate_all(validation_pipeline, pipeline_obj, data_dir):
nr_all_errors = 0
def validate_all(data_dir):
nr_violations = 0
for f in os.listdir(data_dir): for f in os.listdir(data_dir):
board_xml = os.path.join(data_dir, f, f"{f}.xml") board_xml = os.path.join(data_dir, f, f"{f}.xml")
if os.path.isfile(board_xml): if os.path.isfile(board_xml):
nr_violations += validate_board(board_xml) nr_all_errors += validate_board(validation_pipeline, pipeline_obj, board_xml)
else: else:
logging.warning(f"Cannot find a board XML under {os.path.join(data_dir, f)}") logging.warning(f"Cannot find a board XML under {os.path.join(data_dir, f)}")
return nr_violations return nr_all_errors
def main(args):
from xml_loader import XMLLoadStage
from lxml_loader import LXMLLoadStage
validator_construction_pipeline = PipelineEngine(["schema_path", "datachecks_path"])
validator_construction_pipeline.add_stages([
LXMLLoadStage("schema"),
LXMLLoadStage("datachecks"),
ValidatorConstructionStage(),
])
validation_pipeline = PipelineEngine(["board_path", "scenario_path", "schema_etree", "validator"])
validation_pipeline.add_stages([
XMLLoadStage("board"),
XMLLoadStage("scenario"),
DefaultValuePopulatingStage(),
SyntacticValidationStage(),
SemanticValidationStage(),
ReportValidationResultStage(),
])
obj = PipelineObject(schema_path = args.schema, datachecks_path = args.datachecks)
validator_construction_pipeline.run(obj)
if args.board and args.scenario:
nr_all_errors = validate_one(validation_pipeline, obj, args.board, args.scenario)
elif args.board:
nr_all_errors = validate_board(validation_pipeline, obj, args.board)
else:
nr_all_errors = validate_all(validation_pipeline, obj, os.path.join(config_tools_dir, "data"))
sys.exit(1 if nr_all_errors > 0 else 0)
if __name__ == "__main__": if __name__ == "__main__":
config_tools_dir = os.path.join(os.path.dirname(__file__), "..")
schema_dir = os.path.join(config_tools_dir, "schema")
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("board", nargs="?", type=existing_file_type(parser), help="the board XML file to be validated") parser.add_argument("board", nargs="?", type=existing_file_type(parser), help="the board XML file to be validated")
parser.add_argument("scenario", nargs="?", type=existing_file_type(parser), help="the scenario XML file to be validated") parser.add_argument("scenario", nargs="?", type=existing_file_type(parser), help="the scenario XML file to be validated")
parser.add_argument("--loglevel", default="warning", type=log_level_type(parser), help="choose log level, e.g. debug, info, warning or error") parser.add_argument("--loglevel", default="warning", type=log_level_type(parser), help="choose log level, e.g. debug, info, warning or error")
parser.add_argument("--schema", default=os.path.join(schema_dir, "config.xsd"), help="the XML schema that defines the syntax of scenario XMLs")
parser.add_argument("--datachecks", default=os.path.join(schema_dir, "datachecks.xsd"), help="the XML schema that defines the semantic rules against board and scenario data")
args = parser.parse_args() args = parser.parse_args()
logging.basicConfig(level=args.loglevel.upper()) logging.basicConfig(level=args.loglevel.upper())
main(args)
if args.board and args.scenario:
nr_violations = validate_one(args.board, args.scenario)
elif args.board:
nr_violations = validate_board(args.board)
else:
nr_violations = validate_all(os.path.join(config_tools_dir, "data"))
sys.exit(1 if nr_violations > 0 else 0)

View File

@ -0,0 +1,19 @@
#!/usr/bin/env python3
#
# Copyright (C) 2022 Intel Corporation.
#
# SPDX-License-Identifier: BSD-3-Clause
#
from defusedxml.ElementTree import parse
from pipeline import PipelineStage
class XMLLoadStage(PipelineStage):
def __init__(self, tag):
self.consumes = f"{tag}_path"
self.provides = f"{tag}_etree"
def run(self, obj):
xml_path = obj.get(self.consumes)
etree = parse(xml_path)
obj.set(self.provides, etree)