Prevent missing regex_patterns (#15)

This commit is contained in:
Johan 2026-01-10 17:51:12 +02:00 committed by GitHub
parent 599fd6209f
commit 783609d2e3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
195 changed files with 1617 additions and 562 deletions

View file

@ -1,15 +1,16 @@
import os
import json
import yaml
import yaml
from markdownify import markdownify
from utils.mappings.languages import LANGUAGE_MAPPING
from utils.file_utils import iterate_json_files
from utils.mappings.indexer_flags import INDEXER_FLAG_MAPPING
from utils.mappings.release_type import RELEASE_TYPE_MAPPING
from utils.mappings.languages import LANGUAGE_MAPPING
from utils.mappings.quality_modifiers import QUALITY_MODIFIER_MAPPING
from utils.mappings.release_type import RELEASE_TYPE_MAPPING
from utils.mappings.source import SOURCE_MAPPING
from utils.strings import get_name, get_regex_pattern_name
from utils.strings import get_name
IMPLEMENTATION_TO_TAG_MAPPING = {
"ReleaseTitleSpecification": "Release Title",
@ -39,54 +40,68 @@ SERVICE_TO_TRASH_GUIDES_URL = {
}
def collect_custom_format(
service, file_name, input_json, output_dir, custom_regex_patterns
def _create_condition_base(service, spec):
"""Create base condition structure from specification."""
return {
"name": get_name(service, spec.get("name", "")),
"negate": spec.get("negate", False),
"required": spec.get("required", False),
"type": IMPLEMENTATION_TO_TYPE_MAPPING.get(
spec.get("implementation"), "unknown"
),
}
def _add_condition_value(
condition, implementation, spec, *, service, regex_patterns, file_name
):
"""Add implementation-specific value to condition."""
fields = spec.get("fields", {})
value = fields.get("value")
if implementation in ["ReleaseTitleSpecification", "ReleaseGroupSpecification"]:
pattern_name = regex_patterns["by_pattern"].get(value)["name"]
if not pattern_name:
raise ValueError(
f"Pattern '{value}' not found in collected regex patterns "
f"for {service} in custom format {file_name}."
)
condition["pattern"] = pattern_name
elif implementation == "ResolutionSpecification":
condition["resolution"] = f"{value}p"
elif implementation == "SourceSpecification":
condition["source"] = SOURCE_MAPPING[service][value]
elif implementation == "LanguageSpecification":
condition["language"] = LANGUAGE_MAPPING[service][value]
elif implementation == "IndexerFlagSpecification":
condition["flag"] = INDEXER_FLAG_MAPPING[service][value]
elif implementation == "QualityModifierSpecification":
condition["qualityModifier"] = QUALITY_MODIFIER_MAPPING[service][value]
elif implementation == "ReleaseTypeSpecification":
condition["releaseType"] = RELEASE_TYPE_MAPPING[service][value]
else:
return False
return True
def _collect_custom_format(
service, file_name, input_json, output_dir, regex_patterns
):
conditions = []
implementation_tags = set()
for spec in input_json.get("specifications", []):
condition = {
"name": get_name(service, spec.get("name", "")),
"negate": spec.get("negate", False),
"required": spec.get("required", False),
"type": IMPLEMENTATION_TO_TYPE_MAPPING.get(
spec.get("implementation"), "unknown"
),
}
implementation = spec.get("implementation")
implementation_tags.add(IMPLEMENTATION_TO_TAG_MAPPING[implementation])
if implementation in ["ReleaseTitleSpecification", "ReleaseGroupSpecification"]:
pattern = spec.get("fields", {}).get("value")
condition["pattern"] = custom_regex_patterns.get(
pattern, get_regex_pattern_name(service, spec.get("name", ""))
)
elif implementation in ["ResolutionSpecification"]:
condition["resolution"] = f"{spec.get('fields', {}).get('value')}p"
elif implementation in ["SourceSpecification"]:
condition["source"] = SOURCE_MAPPING[service][
spec.get("fields", {}).get("value")
]
elif implementation in ["LanguageSpecification"]:
# TODO: exceptLanguage
condition["language"] = LANGUAGE_MAPPING[service][
spec.get("fields", {}).get("value")
]
elif implementation in ["IndexerFlagSpecification"]:
condition["flag"] = INDEXER_FLAG_MAPPING[service][
spec.get("fields", {}).get("value")
]
elif implementation in ["QualityModifierSpecification"]:
condition["qualityModifier"] = QUALITY_MODIFIER_MAPPING[service][
spec.get("fields", {}).get("value")
]
elif implementation in ["ReleaseTypeSpecification"]:
condition["releaseType"] = RELEASE_TYPE_MAPPING[service][
spec.get("fields", {}).get("value")
]
else:
condition = _create_condition_base(service, spec)
if not _add_condition_value(
condition,
implementation,
spec,
service=service,
regex_patterns=regex_patterns,
file_name=file_name,
):
print(f"Unrecognised implementation ({implementation}), skipping for now.")
continue
@ -120,23 +135,14 @@ def collect_custom_format(
def collect_custom_formats(service, input_dir, output_dir, custom_regex_patterns):
trash_id_to_scoring_mapping = {}
for root, _, files in os.walk(input_dir):
for filename in sorted(files):
if not filename.endswith(".json"):
continue
for _, file_stem, data in iterate_json_files(input_dir):
trash_id = data.get("trash_id")
trash_scores = data.get("trash_scores", {})
if trash_id:
trash_id_to_scoring_mapping[trash_id] = trash_scores
file_path = os.path.join(root, filename)
file_stem = os.path.splitext(filename)[0] # Filename without extension
with open(file_path, "r", encoding="utf-8") as f:
data = json.load(f)
trash_id = data.get("trash_id")
trash_scores = data.get("trash_scores", {})
if trash_id:
trash_id_to_scoring_mapping[trash_id] = trash_scores
collect_custom_format(
service, file_stem, data, output_dir, custom_regex_patterns
)
_collect_custom_format(
service, file_stem, data, output_dir, custom_regex_patterns
)
return trash_id_to_scoring_mapping