Prevent duplicate regex_patterns
This commit is contained in:
parent
d4582e7330
commit
06a7c06933
235 changed files with 467 additions and 782 deletions
|
|
@ -9,7 +9,7 @@ from utils.mappings.indexer_flags import INDEXER_FLAG_MAPPING
|
|||
from utils.mappings.release_type import RELEASE_TYPE_MAPPING
|
||||
from utils.mappings.quality_modifiers import QUALITY_MODIFIER_MAPPING
|
||||
from utils.mappings.source import SOURCE_MAPPING
|
||||
from utils.strings import get_name
|
||||
from utils.strings import get_name, get_regex_pattern_name
|
||||
|
||||
IMPLEMENTATION_TO_TAG_MAPPING = {
|
||||
"ReleaseTitleSpecification": "Release Title",
|
||||
|
|
@ -57,7 +57,7 @@ def collect_custom_format(service, file_name, input_json, output_dir):
|
|||
implementation_tags.add(IMPLEMENTATION_TO_TAG_MAPPING[implementation])
|
||||
|
||||
if implementation in ["ReleaseTitleSpecification", "ReleaseGroupSpecification"]:
|
||||
condition["pattern"] = get_name(service, spec.get("name", ""))
|
||||
condition["pattern"] = get_regex_pattern_name(service, spec.get("name", ""))
|
||||
elif implementation in ["ResolutionSpecification"]:
|
||||
condition["resolution"] = f"{spec.get('fields', {}).get('value')}p"
|
||||
elif implementation in ["SourceSpecification"]:
|
||||
|
|
@ -120,7 +120,7 @@ def collect_custom_formats(
|
|||
):
|
||||
trash_id_to_scoring_mapping = {}
|
||||
for root, _, files in os.walk(input_dir):
|
||||
for filename in files:
|
||||
for filename in sorted(files):
|
||||
if not filename.endswith(".json"):
|
||||
continue
|
||||
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ def collect_profiles(
|
|||
trash_id_to_scoring_mapping,
|
||||
):
|
||||
for root, _, files in os.walk(input_dir):
|
||||
for filename in files:
|
||||
for filename in sorted(files):
|
||||
if not filename.endswith(".json"):
|
||||
continue
|
||||
|
||||
|
|
|
|||
|
|
@ -2,11 +2,7 @@ import os
|
|||
import json
|
||||
import yaml
|
||||
|
||||
from utils.strings import get_name
|
||||
|
||||
# TODO: prevent duplicates by only writing unique regex patterns to files
|
||||
# In some cases negations will result in a new regex pattern as of now
|
||||
# NOTE: would need to keep track of all duplicate patterns so that trash_id can still be matched
|
||||
from utils.strings import get_regex_pattern_name
|
||||
|
||||
|
||||
def collect_regex_pattern(service, file_name, input_json, output_dir):
|
||||
|
|
@ -28,7 +24,7 @@ def collect_regex_pattern(service, file_name, input_json, output_dir):
|
|||
# Compose YAML structure
|
||||
name = spec.get("name", "")
|
||||
yml_data = {
|
||||
"name": get_name(service, name),
|
||||
"name": get_regex_pattern_name(service, name),
|
||||
"pattern": pattern,
|
||||
"description": "",
|
||||
"tags": [],
|
||||
|
|
@ -38,8 +34,13 @@ def collect_regex_pattern(service, file_name, input_json, output_dir):
|
|||
# Output path
|
||||
output_path = os.path.join(
|
||||
output_dir,
|
||||
f"{get_name(service, name)}.yml",
|
||||
f"{get_regex_pattern_name(service, name)}.yml",
|
||||
)
|
||||
|
||||
if os.path.exists(output_path):
|
||||
print(f"exists{output_path}, skipping")
|
||||
continue
|
||||
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
yaml.dump(yml_data, f, sort_keys=False, allow_unicode=True)
|
||||
print(f"Generated: {output_path}")
|
||||
|
|
@ -47,7 +48,7 @@ def collect_regex_pattern(service, file_name, input_json, output_dir):
|
|||
|
||||
def collect_regex_patterns(service, input_dir, output_dir):
|
||||
for root, _, files in os.walk(input_dir):
|
||||
for filename in files:
|
||||
for filename in sorted(files):
|
||||
if not filename.endswith(".json"):
|
||||
continue
|
||||
|
||||
|
|
|
|||
|
|
@ -8,3 +8,7 @@ def get_name(service, profile_name):
|
|||
.replace("Atmos", "ATMOS")
|
||||
)
|
||||
return f"{service.capitalize()} - {safe_profile_name}"
|
||||
|
||||
|
||||
def get_regex_pattern_name(service, regex_pattern_name):
|
||||
return get_name(service, regex_pattern_name).replace("Not ", "")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue