Prevent missing regex_patterns (#15)

This commit is contained in:
Johan 2026-01-10 17:51:12 +02:00 committed by GitHub
parent 599fd6209f
commit 783609d2e3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
195 changed files with 1617 additions and 562 deletions

11
tests/__init__.py Normal file
View file

@ -0,0 +1,11 @@
# Run tests on output data stored in regex_patterns, custom_formats and profiles
# Ensure al references are correct and no broken links have been generated.
# Every custom format pattern should exist in regex_patterns by a file with the same name.
# Every profile custom format reference should exist in custom_formats by a file with the same name.
# If a file can not be found the test should fail.
# Tests should cover all the files in the output directories, it can be assumed the data has been updated already.
# Instructions should exist in README on how to run the tests.
# Pytest framework is used for the tests
# Tests are run in github actions - after generation completion.

View file

@ -0,0 +1,98 @@
"""Tests to ensure custom format patterns reference valid regex pattern files."""
from pathlib import Path
import pytest
import yaml
# Get the project root directory
PROJECT_ROOT = Path(__file__).parent.parent
CUSTOM_FORMATS_DIR = PROJECT_ROOT / "custom_formats"
REGEX_PATTERNS_DIR = PROJECT_ROOT / "regex_patterns"
def get_all_custom_formats():
"""Get all custom format YAML files."""
if not CUSTOM_FORMATS_DIR.exists():
return []
return list(CUSTOM_FORMATS_DIR.glob("*.yml"))
@pytest.fixture(scope="module")
def available_regex_patterns_file_names():
"""Fixture to load all available regex pattern names once."""
if not REGEX_PATTERNS_DIR.exists():
return set()
pattern_names = set()
for yml_file in REGEX_PATTERNS_DIR.glob("*.yml"):
# Use filename without extension as pattern name
pattern_names.add(yml_file.stem)
return pattern_names
@pytest.fixture(scope="module")
def available_regex_patterns_names():
"""Fixture to load all available regex pattern names once."""
if not REGEX_PATTERNS_DIR.exists():
return set()
pattern_names = set()
for yml_file in REGEX_PATTERNS_DIR.glob("*.yml"):
# Read name from YAML file content
with open(yml_file, encoding="utf-8") as f:
data = yaml.safe_load(f)
if data and "name" in data:
pattern_names.add(data["name"])
return pattern_names
@pytest.mark.parametrize("custom_format_file", get_all_custom_formats())
def test_custom_format_patterns_exist(
custom_format_file,
available_regex_patterns_file_names,
available_regex_patterns_names,
):
"""Test that every pattern referenced in a custom format exists in regex_patterns."""
with open(custom_format_file, encoding="utf-8") as f:
data = yaml.safe_load(f)
if not data or "conditions" not in data:
pytest.skip(f"No conditions found in {custom_format_file.name}")
missing_patterns_files = []
missing_patterns_names = []
for condition in data["conditions"]:
pattern_name = condition.get("pattern")
if not pattern_name:
continue
# Check if the referenced pattern exists in regex_patterns
if pattern_name not in available_regex_patterns_file_names:
missing_patterns_files.append(pattern_name)
if pattern_name not in available_regex_patterns_names:
missing_patterns_names.append(pattern_name)
if missing_patterns_files:
pytest.fail(
f"Custom format '{custom_format_file.name}' references missing regex patterns:\n"
+ "\n".join(f" - {pattern}" for pattern in missing_patterns_files)
)
if missing_patterns_names:
pytest.fail(
f"Custom format '{custom_format_file.name}' references missing regex patterns:\n"
+ "\n".join(f" - {pattern}" for pattern in missing_patterns_names)
)
def test_custom_formats_directory_exists():
"""Test that the custom_formats directory exists."""
assert CUSTOM_FORMATS_DIR.exists(), f"Custom formats directory not found: {CUSTOM_FORMATS_DIR}"
def test_regex_patterns_directory_exists():
"""Test that the regex_patterns directory exists."""
assert REGEX_PATTERNS_DIR.exists(), f"Regex patterns directory not found: {REGEX_PATTERNS_DIR}"

100
tests/test_profiles.py Normal file
View file

@ -0,0 +1,100 @@
"""Tests to ensure profiles reference valid custom format files."""
from pathlib import Path
import pytest
import yaml
# Get the project root directory
PROJECT_ROOT = Path(__file__).parent.parent
PROFILES_DIR = PROJECT_ROOT / "profiles"
CUSTOM_FORMATS_DIR = PROJECT_ROOT / "custom_formats"
def get_all_profiles():
"""Get all profile YAML files."""
if not PROFILES_DIR.exists():
return []
return list(PROFILES_DIR.glob("*.yml"))
@pytest.fixture(scope="module")
def available_custom_formats_file_names():
"""Fixture to load all available custom format names once."""
if not CUSTOM_FORMATS_DIR.exists():
return set()
format_names = set()
for yml_file in CUSTOM_FORMATS_DIR.glob("*.yml"):
# Use filename without extension as format name
format_names.add(yml_file.stem)
return format_names
@pytest.fixture(scope="module")
def available_custom_formats_names():
"""Fixture to load all available custom format names once."""
if not CUSTOM_FORMATS_DIR.exists():
return set()
format_names = set()
for yml_file in CUSTOM_FORMATS_DIR.glob("*.yml"):
# Read name from YAML file content
with open(yml_file, encoding="utf-8") as f:
data = yaml.safe_load(f)
if data and "name" in data:
format_names.add(data["name"])
return format_names
@pytest.mark.parametrize("profile_file", get_all_profiles())
def test_profile_custom_formats_exist(
profile_file, available_custom_formats_file_names, available_custom_formats_names
):
"""Test that every custom format referenced in a profile exists in custom_formats."""
with open(profile_file, encoding="utf-8") as f:
data = yaml.safe_load(f)
if not data or "custom_formats" not in data:
pytest.skip(f"No custom_formats found in {profile_file.name}")
missing_formats_files = []
missing_formats_names = []
for custom_format_ref in data["custom_formats"]:
format_name = custom_format_ref.get("name")
if not format_name:
continue
# Check if the referenced custom format exists in custom_formats
if format_name not in available_custom_formats_file_names:
missing_formats_files.append(format_name)
# Check if the referenced custom format exists in custom_formats
if format_name not in available_custom_formats_names:
missing_formats_names.append(format_name)
if missing_formats_files:
pytest.fail(
f"Profile '{profile_file.name}' references missing custom formats:\n"
+ "\n".join(f" - {format}" for format in missing_formats_files)
)
if missing_formats_names:
pytest.fail(
f"Profile '{profile_file.name}' references missing custom formats:\n"
+ "\n".join(f" - {format}" for format in missing_formats_names)
)
def test_profiles_directory_exists():
"""Test that the profiles directory exists."""
assert PROFILES_DIR.exists(), f"Profiles directory not found: {PROFILES_DIR}"
def test_custom_formats_directory_exists():
"""Test that the custom_formats directory exists."""
assert CUSTOM_FORMATS_DIR.exists(), f"Custom formats directory not found: {CUSTOM_FORMATS_DIR}"