Prevent duplicate regex_patterns
This commit is contained in:
parent
06a7c06933
commit
84e5f806c9
2747 changed files with 6157 additions and 10149 deletions
|
|
@ -39,7 +39,9 @@ SERVICE_TO_TRASH_GUIDES_URL = {
|
|||
}
|
||||
|
||||
|
||||
def collect_custom_format(service, file_name, input_json, output_dir):
|
||||
def collect_custom_format(
|
||||
service, file_name, input_json, output_dir, custom_regex_patterns
|
||||
):
|
||||
conditions = []
|
||||
implementation_tags = set()
|
||||
for spec in input_json.get("specifications", []):
|
||||
|
|
@ -57,7 +59,10 @@ def collect_custom_format(service, file_name, input_json, output_dir):
|
|||
implementation_tags.add(IMPLEMENTATION_TO_TAG_MAPPING[implementation])
|
||||
|
||||
if implementation in ["ReleaseTitleSpecification", "ReleaseGroupSpecification"]:
|
||||
condition["pattern"] = get_regex_pattern_name(service, spec.get("name", ""))
|
||||
pattern = spec.get("fields", {}).get("value")
|
||||
condition["pattern"] = custom_regex_patterns.get(
|
||||
pattern, get_regex_pattern_name(service, spec.get("name", ""))
|
||||
)
|
||||
elif implementation in ["ResolutionSpecification"]:
|
||||
condition["resolution"] = f"{spec.get('fields', {}).get('value')}p"
|
||||
elif implementation in ["SourceSpecification"]:
|
||||
|
|
@ -113,11 +118,7 @@ def collect_custom_format(service, file_name, input_json, output_dir):
|
|||
print(f"Generated: {output_path}")
|
||||
|
||||
|
||||
def collect_custom_formats(
|
||||
service,
|
||||
input_dir,
|
||||
output_dir,
|
||||
):
|
||||
def collect_custom_formats(service, input_dir, output_dir, custom_regex_patterns):
|
||||
trash_id_to_scoring_mapping = {}
|
||||
for root, _, files in os.walk(input_dir):
|
||||
for filename in sorted(files):
|
||||
|
|
@ -135,10 +136,7 @@ def collect_custom_formats(
|
|||
trash_id_to_scoring_mapping[trash_id] = trash_scores
|
||||
|
||||
collect_custom_format(
|
||||
service,
|
||||
file_stem,
|
||||
data,
|
||||
output_dir,
|
||||
service, file_stem, data, output_dir, custom_regex_patterns
|
||||
)
|
||||
|
||||
return trash_id_to_scoring_mapping
|
||||
|
|
|
|||
|
|
@ -2,7 +2,9 @@ import os
|
|||
import json
|
||||
import yaml
|
||||
|
||||
from utils.strings import get_regex_pattern_name
|
||||
from utils.strings import get_regex_pattern_name, get_safe_name
|
||||
|
||||
duplicate_regex_patterns = {}
|
||||
|
||||
|
||||
def collect_regex_pattern(service, file_name, input_json, output_dir):
|
||||
|
|
@ -18,11 +20,43 @@ def collect_regex_pattern(service, file_name, input_json, output_dir):
|
|||
continue
|
||||
|
||||
pattern = spec.get("fields", {}).get("value")
|
||||
|
||||
if not pattern:
|
||||
print(f"No pattern found in {file_name} for {implementation}")
|
||||
continue
|
||||
|
||||
# Compose YAML structure
|
||||
name = spec.get("name", "")
|
||||
|
||||
existing_pattern_name = duplicate_regex_patterns.get(pattern)
|
||||
if existing_pattern_name:
|
||||
existing_pattern_path = os.path.join(
|
||||
output_dir,
|
||||
f"{existing_pattern_name}.yml",
|
||||
)
|
||||
if (
|
||||
os.path.exists(existing_pattern_path)
|
||||
and service.capitalize() not in existing_pattern_path
|
||||
):
|
||||
new_path = os.path.join(
|
||||
output_dir,
|
||||
f"{get_safe_name(name)}.yml",
|
||||
)
|
||||
os.rename(
|
||||
existing_pattern_path,
|
||||
new_path,
|
||||
)
|
||||
with open(new_path, "r+", encoding="utf-8") as f:
|
||||
yml_data = yaml.safe_load(f)
|
||||
yml_data["name"] = get_safe_name(name)
|
||||
f.seek(0)
|
||||
yaml.dump(yml_data, f, sort_keys=False, allow_unicode=True)
|
||||
f.truncate()
|
||||
duplicate_regex_patterns[pattern] = get_safe_name(name)
|
||||
continue
|
||||
else:
|
||||
duplicate_regex_patterns[pattern] = get_regex_pattern_name(service, name)
|
||||
|
||||
yml_data = {
|
||||
"name": get_regex_pattern_name(service, name),
|
||||
"pattern": pattern,
|
||||
|
|
@ -57,3 +91,5 @@ def collect_regex_patterns(service, input_dir, output_dir):
|
|||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
collect_regex_pattern(service, file_stem, data, output_dir)
|
||||
|
||||
return duplicate_regex_patterns
|
||||
|
|
|
|||
|
|
@ -1,13 +1,16 @@
|
|||
def get_name(service, profile_name):
|
||||
safe_profile_name = (
|
||||
profile_name.replace("/", "-")
|
||||
def get_safe_name(name):
|
||||
return (
|
||||
name.replace("/", "-")
|
||||
.replace("[", "(")
|
||||
.replace("]", ")")
|
||||
.replace("HDR10Plus", "HDR10+")
|
||||
.replace("10 bit", "10bit")
|
||||
.replace("Atmos", "ATMOS")
|
||||
)
|
||||
return f"{service.capitalize()} - {safe_profile_name}"
|
||||
|
||||
|
||||
def get_name(service, name):
|
||||
return f"{service.capitalize()} - {get_safe_name(name)}"
|
||||
|
||||
|
||||
def get_regex_pattern_name(service, regex_pattern_name):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue