Skip to content

Validating BPMN Documents

This tutorial covers BPMN document validation in PyBPMN Parser.

Overview

PyBPMN Parser automatically validates BPMN documents against the BPMN 2.0 schema during parsing. This ensures that only valid BPMN documents are processed.

Automatic Validation

Validation happens automatically when you parse:

from pathlib import Path
from pybpmn_parser.parse import Parser
from pybpmn_parser.validator import ValidationError

# Validation happens automatically
parser = Parser()
try:
    definitions = parser.parse_file(Path("my_process.bpmn"))
    print("✓ Document is valid")
except ValidationError as e:
    print(f"✗ Validation failed: {e}")

Understanding Validation Errors

ValidationError exceptions provide detailed information about what went wrong:

from pybpmn_parser.parse import Parser
from pybpmn_parser.validator import ValidationError

invalid_xml = """<?xml version="1.0"?>
<definitions xmlns="http://www.omg.org/spec/BPMN/20100524/MODEL"
             targetNamespace="http://bpmn.io/schema/bpmn">
    <process id="proc1">
        <invalidElement id="invalid" />
    </process>
</definitions>"""

try:
    definitions = Parser().parse_string(invalid_xml)
except ValidationError as e:
    print(f"Error: {e}")
    # Error will describe the invalid element

Common Validation Errors

Empty XML

from pybpmn_parser.validator import validate

result = validate("")
if result.errors:
    # First error will be the empty XML error
    print(result.errors[0])  # "EMPTY_XML: Value cannot be empty"

Invalid Root Element

from pybpmn_parser.validator import validate

invalid_xml = """<?xml version="1.0"?>
<root>
    <child>Invalid</child>
</root>"""

result = validate(invalid_xml)
for err in result.errors:
    print(err)  # SCHEMA_ERROR: ... (root)

Invalid BPMN Elements

from pybpmn_parser.validator import validate

invalid_xml = """<?xml version="1.0"?>
<definitions xmlns="http://www.omg.org/spec/BPMN/20100524/MODEL">
    <invalidTag />
</definitions>"""

result = validate(invalid_xml)
for err in result.errors:
    print(err)  # SCHEMA_ERROR: unexpected child element (definitions)

Manual Validation

You can validate without parsing:

from pybpmn_parser.validator import validate

bpmn_xml = """<?xml version="1.0"?>
<definitions xmlns="http://www.omg.org/spec/BPMN/20100524/MODEL"
             targetNamespace="http://bpmn.io/schema/bpmn">
    <process id="proc1" />
</definitions>"""

# Validate returns a validation result
result = validate(bpmn_xml)

if result.errors:
    print("Validation failed:")
    for error in result.errors:
        print(f"  - {error}")
else:
    print("✓ Valid BPMN")

Validation Best Practices

Validate Early

from pathlib import Path
from pybpmn_parser.validator import validate

def validate_file(file_path: Path) -> bool:
    """Validate a BPMN file before processing."""
    xml_content = file_path.read_text(encoding="utf-8")
    result = validate(xml_content)

    if result.errors:
        print(f"Validation failed for {file_path.name}:")
        for error in result.errors:
            print(f"  - {error}")
        return False

    return True

# Validate before parsing
from pybpmn_parser.parse import Parser

if validate_file(Path("my_process.bpmn")):
    parser = Parser()
    definitions = parser.parse_file(Path("my_process.bpmn"))

Batch Validation

from pathlib import Path
from pybpmn_parser.validator import validate

def validate_directory(directory: Path):
    """Validate all BPMN files in a directory."""
    results = {"valid": [], "invalid": []}

    for bpmn_file in directory.glob("*.bpmn"):
        xml_content = bpmn_file.read_text(encoding="utf-8")
        result = validate(xml_content)

        if result.errors:
            results["invalid"].append({
                "file": bpmn_file.name,
                "errors": [str(e) for e in result.errors]
            })
        else:
            results["valid"].append(bpmn_file.name)

    return results

# Validate all files
results = validate_directory(Path("bpmn_models/"))
print(f"Valid: {len(results['valid'])}")
print(f"Invalid: {len(results['invalid'])}")

Handle Validation Gracefully

from pathlib import Path
from pybpmn_parser.parse import Parser
from pybpmn_parser.validator import ValidationError

def safe_parse(file_path: Path):
    """Parse with graceful error handling."""
    parser = Parser()
    try:
        return parser.parse_file(file_path), None
    except ValidationError as e:
        error_msg = {
            "file": str(file_path),
            "error_type": "validation",
            "message": str(e)
        }
        return None, error_msg
    except FileNotFoundError:
        error_msg = {
            "file": str(file_path),
            "error_type": "not_found",
            "message": "File does not exist"
        }
        return None, error_msg

# Use the safe parser
definitions, error = safe_parse(Path("my_process.bpmn"))
if error:
    print(f"Failed: {error['message']}")
else:
    print("Success!")

Custom Validation Rules

While PyBPMN Parser validates against the BPMN schema, you can add custom business rules:

from pathlib import Path
from pybpmn_parser.parse import Parser
from pybpmn_parser.bpmn.event.start_event import StartEvent
from pybpmn_parser.bpmn.event.end_event import EndEvent

def validate_process_structure(process):
    """Custom validation: ensure process has start and end events."""
    errors = []

    # Check for start events
    start_events = [
        el for el in process.flow_elements
        if isinstance(el, StartEvent)
    ]
    if not start_events:
        errors.append(f"Process {process.id} has no start event")

    # Check for end events
    end_events = [
        el for el in process.flow_elements
        if isinstance(el, EndEvent)
    ]
    if not end_events:
        errors.append(f"Process {process.id} has no end event")

    return errors

# Validate structure
parser = Parser()
definitions = parser.parse_file(Path("my_process.bpmn"))
for process in definitions.processes:
    errors = validate_process_structure(process)
    if errors:
        print(f"Structure errors in {process.id}:")
        for error in errors:
            print(f"  - {error}")
    else:
        print(f"✓ Process {process.id} structure is valid")

Check for Disconnected Elements

from pathlib import Path
from pybpmn_parser.parse import Parser

def find_disconnected_elements(process):
    """Find elements not connected to any sequence flows."""
    disconnected = []

    # Build a set of connected element IDs
    connected = set()
    for flow in process.sequence_flows:
        connected.add(flow.source_ref)
        connected.add(flow.target_ref)

    # Check each flow element
    for element in process.flow_elements:
        if element.id not in connected:
            disconnected.append(element.id)

    return disconnected

parser = Parser()
definitions = parser.parse_file(Path("my_process.bpmn"))
process = definitions.processes[0]

disconnected = find_disconnected_elements(process)
if disconnected:
    print("Disconnected elements:")
    for element_id in disconnected:
        print(f"  - {element_id}")

Validate Naming Conventions

def validate_naming_conventions(process):
    """Ensure elements follow naming conventions."""
    errors = []

    for element in process.flow_elements:
        # Check if element has a name
        if hasattr(element, 'name'):
            if not element.name:
                errors.append(f"Element {element.id} has no name")
            elif len(element.name) < 3:
                errors.append(f"Element {element.id} name too short: {element.name}")

    return errors