Prevent missing regex_patterns (#15)
This commit is contained in:
parent
599fd6209f
commit
783609d2e3
195 changed files with 1617 additions and 562 deletions
|
|
@ -1,15 +1,16 @@
|
|||
import os
|
||||
import json
|
||||
import yaml
|
||||
|
||||
import yaml
|
||||
from markdownify import markdownify
|
||||
|
||||
from utils.mappings.languages import LANGUAGE_MAPPING
|
||||
from utils.file_utils import iterate_json_files
|
||||
from utils.mappings.indexer_flags import INDEXER_FLAG_MAPPING
|
||||
from utils.mappings.release_type import RELEASE_TYPE_MAPPING
|
||||
from utils.mappings.languages import LANGUAGE_MAPPING
|
||||
from utils.mappings.quality_modifiers import QUALITY_MODIFIER_MAPPING
|
||||
from utils.mappings.release_type import RELEASE_TYPE_MAPPING
|
||||
from utils.mappings.source import SOURCE_MAPPING
|
||||
from utils.strings import get_name, get_regex_pattern_name
|
||||
from utils.strings import get_name
|
||||
|
||||
|
||||
IMPLEMENTATION_TO_TAG_MAPPING = {
|
||||
"ReleaseTitleSpecification": "Release Title",
|
||||
|
|
@ -39,54 +40,68 @@ SERVICE_TO_TRASH_GUIDES_URL = {
|
|||
}
|
||||
|
||||
|
||||
def collect_custom_format(
|
||||
service, file_name, input_json, output_dir, custom_regex_patterns
|
||||
def _create_condition_base(service, spec):
|
||||
"""Create base condition structure from specification."""
|
||||
return {
|
||||
"name": get_name(service, spec.get("name", "")),
|
||||
"negate": spec.get("negate", False),
|
||||
"required": spec.get("required", False),
|
||||
"type": IMPLEMENTATION_TO_TYPE_MAPPING.get(
|
||||
spec.get("implementation"), "unknown"
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _add_condition_value(
|
||||
condition, implementation, spec, *, service, regex_patterns, file_name
|
||||
):
|
||||
"""Add implementation-specific value to condition."""
|
||||
fields = spec.get("fields", {})
|
||||
value = fields.get("value")
|
||||
|
||||
if implementation in ["ReleaseTitleSpecification", "ReleaseGroupSpecification"]:
|
||||
pattern_name = regex_patterns["by_pattern"].get(value)["name"]
|
||||
if not pattern_name:
|
||||
raise ValueError(
|
||||
f"Pattern '{value}' not found in collected regex patterns "
|
||||
f"for {service} in custom format {file_name}."
|
||||
)
|
||||
condition["pattern"] = pattern_name
|
||||
elif implementation == "ResolutionSpecification":
|
||||
condition["resolution"] = f"{value}p"
|
||||
elif implementation == "SourceSpecification":
|
||||
condition["source"] = SOURCE_MAPPING[service][value]
|
||||
elif implementation == "LanguageSpecification":
|
||||
condition["language"] = LANGUAGE_MAPPING[service][value]
|
||||
elif implementation == "IndexerFlagSpecification":
|
||||
condition["flag"] = INDEXER_FLAG_MAPPING[service][value]
|
||||
elif implementation == "QualityModifierSpecification":
|
||||
condition["qualityModifier"] = QUALITY_MODIFIER_MAPPING[service][value]
|
||||
elif implementation == "ReleaseTypeSpecification":
|
||||
condition["releaseType"] = RELEASE_TYPE_MAPPING[service][value]
|
||||
else:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _collect_custom_format(
|
||||
service, file_name, input_json, output_dir, regex_patterns
|
||||
):
|
||||
conditions = []
|
||||
implementation_tags = set()
|
||||
for spec in input_json.get("specifications", []):
|
||||
condition = {
|
||||
"name": get_name(service, spec.get("name", "")),
|
||||
"negate": spec.get("negate", False),
|
||||
"required": spec.get("required", False),
|
||||
"type": IMPLEMENTATION_TO_TYPE_MAPPING.get(
|
||||
spec.get("implementation"), "unknown"
|
||||
),
|
||||
}
|
||||
|
||||
implementation = spec.get("implementation")
|
||||
|
||||
implementation_tags.add(IMPLEMENTATION_TO_TAG_MAPPING[implementation])
|
||||
|
||||
if implementation in ["ReleaseTitleSpecification", "ReleaseGroupSpecification"]:
|
||||
pattern = spec.get("fields", {}).get("value")
|
||||
condition["pattern"] = custom_regex_patterns.get(
|
||||
pattern, get_regex_pattern_name(service, spec.get("name", ""))
|
||||
)
|
||||
elif implementation in ["ResolutionSpecification"]:
|
||||
condition["resolution"] = f"{spec.get('fields', {}).get('value')}p"
|
||||
elif implementation in ["SourceSpecification"]:
|
||||
condition["source"] = SOURCE_MAPPING[service][
|
||||
spec.get("fields", {}).get("value")
|
||||
]
|
||||
elif implementation in ["LanguageSpecification"]:
|
||||
# TODO: exceptLanguage
|
||||
condition["language"] = LANGUAGE_MAPPING[service][
|
||||
spec.get("fields", {}).get("value")
|
||||
]
|
||||
elif implementation in ["IndexerFlagSpecification"]:
|
||||
condition["flag"] = INDEXER_FLAG_MAPPING[service][
|
||||
spec.get("fields", {}).get("value")
|
||||
]
|
||||
elif implementation in ["QualityModifierSpecification"]:
|
||||
condition["qualityModifier"] = QUALITY_MODIFIER_MAPPING[service][
|
||||
spec.get("fields", {}).get("value")
|
||||
]
|
||||
elif implementation in ["ReleaseTypeSpecification"]:
|
||||
condition["releaseType"] = RELEASE_TYPE_MAPPING[service][
|
||||
spec.get("fields", {}).get("value")
|
||||
]
|
||||
else:
|
||||
condition = _create_condition_base(service, spec)
|
||||
if not _add_condition_value(
|
||||
condition,
|
||||
implementation,
|
||||
spec,
|
||||
service=service,
|
||||
regex_patterns=regex_patterns,
|
||||
file_name=file_name,
|
||||
):
|
||||
print(f"Unrecognised implementation ({implementation}), skipping for now.")
|
||||
continue
|
||||
|
||||
|
|
@ -120,23 +135,14 @@ def collect_custom_format(
|
|||
|
||||
def collect_custom_formats(service, input_dir, output_dir, custom_regex_patterns):
|
||||
trash_id_to_scoring_mapping = {}
|
||||
for root, _, files in os.walk(input_dir):
|
||||
for filename in sorted(files):
|
||||
if not filename.endswith(".json"):
|
||||
continue
|
||||
for _, file_stem, data in iterate_json_files(input_dir):
|
||||
trash_id = data.get("trash_id")
|
||||
trash_scores = data.get("trash_scores", {})
|
||||
if trash_id:
|
||||
trash_id_to_scoring_mapping[trash_id] = trash_scores
|
||||
|
||||
file_path = os.path.join(root, filename)
|
||||
file_stem = os.path.splitext(filename)[0] # Filename without extension
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
trash_id = data.get("trash_id")
|
||||
trash_scores = data.get("trash_scores", {})
|
||||
if trash_id:
|
||||
trash_id_to_scoring_mapping[trash_id] = trash_scores
|
||||
|
||||
collect_custom_format(
|
||||
service, file_stem, data, output_dir, custom_regex_patterns
|
||||
)
|
||||
_collect_custom_format(
|
||||
service, file_stem, data, output_dir, custom_regex_patterns
|
||||
)
|
||||
|
||||
return trash_id_to_scoring_mapping
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue