Prevent duplicate regex_patterns

This commit is contained in:
Johan van Eck 2025-07-27 20:32:00 +03:00
parent d4582e7330
commit 06a7c06933
235 changed files with 467 additions and 782 deletions

View file

@ -9,7 +9,7 @@ from utils.mappings.indexer_flags import INDEXER_FLAG_MAPPING
from utils.mappings.release_type import RELEASE_TYPE_MAPPING
from utils.mappings.quality_modifiers import QUALITY_MODIFIER_MAPPING
from utils.mappings.source import SOURCE_MAPPING
from utils.strings import get_name
from utils.strings import get_name, get_regex_pattern_name
IMPLEMENTATION_TO_TAG_MAPPING = {
"ReleaseTitleSpecification": "Release Title",
@ -57,7 +57,7 @@ def collect_custom_format(service, file_name, input_json, output_dir):
implementation_tags.add(IMPLEMENTATION_TO_TAG_MAPPING[implementation])
if implementation in ["ReleaseTitleSpecification", "ReleaseGroupSpecification"]:
condition["pattern"] = get_name(service, spec.get("name", ""))
condition["pattern"] = get_regex_pattern_name(service, spec.get("name", ""))
elif implementation in ["ResolutionSpecification"]:
condition["resolution"] = f"{spec.get('fields', {}).get('value')}p"
elif implementation in ["SourceSpecification"]:
@ -120,7 +120,7 @@ def collect_custom_formats(
):
trash_id_to_scoring_mapping = {}
for root, _, files in os.walk(input_dir):
for filename in files:
for filename in sorted(files):
if not filename.endswith(".json"):
continue

View file

@ -112,7 +112,7 @@ def collect_profiles(
trash_id_to_scoring_mapping,
):
for root, _, files in os.walk(input_dir):
for filename in files:
for filename in sorted(files):
if not filename.endswith(".json"):
continue

View file

@ -2,11 +2,7 @@ import os
import json
import yaml
from utils.strings import get_name
# TODO: prevent duplicates by only writing unique regex patterns to files
# In some cases negations will result in a new regex pattern as of now
# NOTE: would need to keep track of all duplicate patterns so that trash_id can still be matched
from utils.strings import get_regex_pattern_name
def collect_regex_pattern(service, file_name, input_json, output_dir):
@ -28,7 +24,7 @@ def collect_regex_pattern(service, file_name, input_json, output_dir):
# Compose YAML structure
name = spec.get("name", "")
yml_data = {
"name": get_name(service, name),
"name": get_regex_pattern_name(service, name),
"pattern": pattern,
"description": "",
"tags": [],
@ -38,8 +34,13 @@ def collect_regex_pattern(service, file_name, input_json, output_dir):
# Output path
output_path = os.path.join(
output_dir,
f"{get_name(service, name)}.yml",
f"{get_regex_pattern_name(service, name)}.yml",
)
if os.path.exists(output_path):
print(f"exists{output_path}, skipping")
continue
with open(output_path, "w", encoding="utf-8") as f:
yaml.dump(yml_data, f, sort_keys=False, allow_unicode=True)
print(f"Generated: {output_path}")
@ -47,7 +48,7 @@ def collect_regex_pattern(service, file_name, input_json, output_dir):
def collect_regex_patterns(service, input_dir, output_dir):
for root, _, files in os.walk(input_dir):
for filename in files:
for filename in sorted(files):
if not filename.endswith(".json"):
continue

View file

@ -8,3 +8,7 @@ def get_name(service, profile_name):
.replace("Atmos", "ATMOS")
)
return f"{service.capitalize()} - {safe_profile_name}"
def get_regex_pattern_name(service, regex_pattern_name):
return get_name(service, regex_pattern_name).replace("Not ", "")