profilarr-trash-guides/scripts/utils/regex_patterns.py

import os
import json
import yaml

from utils.strings import get_regex_pattern_name, get_safe_name

duplicate_regex_patterns = {}


def collect_regex_pattern(service, file_name, input_json, output_dir):
    # Find the first pattern in specifications
    pattern = None

    for spec in input_json.get("specifications", []):
        implementation = spec.get("implementation")
        if implementation not in [
            "ReleaseTitleSpecification",
            "ReleaseGroupSpecification",
        ]:
            continue

        pattern = spec.get("fields", {}).get("value")

        if not pattern:
            print(f"No pattern found in {file_name} for {implementation}")
            continue

        # Compose YAML structure
        name = spec.get("name", "")

        existing_pattern_name = duplicate_regex_patterns.get(pattern)
        if existing_pattern_name:
            existing_pattern_path = os.path.join(
                output_dir,
                f"{existing_pattern_name}.yml",
            )
            if (
                os.path.exists(existing_pattern_path)
                and service.capitalize() not in existing_pattern_path
            ):
                new_path = os.path.join(
                    output_dir,
                    f"{get_safe_name(name)}.yml",
                )
                os.rename(
                    existing_pattern_path,
                    new_path,
                )
                with open(new_path, "r+", encoding="utf-8") as f:
                    yml_data = yaml.safe_load(f)
                    yml_data["name"] = get_safe_name(name)
                    yml_data["tags"].append(service.capitalize())
                    f.seek(0)
                    yaml.dump(yml_data, f, sort_keys=False, allow_unicode=True)
                    f.truncate()
                duplicate_regex_patterns[pattern] = get_safe_name(name)
            continue
        else:
            duplicate_regex_patterns[pattern] = get_regex_pattern_name(service, name)

        yml_data = {
            "name": get_regex_pattern_name(service, name),
            "pattern": pattern,
            "description": "",
            "tags": [service.capitalize()],
            "tests": [],
        }

        # Output path
        output_path = os.path.join(
            output_dir,
            f"{get_regex_pattern_name(service, name)}.yml",
        )

        if os.path.exists(output_path):
            print(f"exists{output_path}, skipping")
            continue

        with open(output_path, "w", encoding="utf-8") as f:
            yaml.dump(yml_data, f, sort_keys=False, allow_unicode=True)
        print(f"Generated: {output_path}")


def collect_regex_patterns(service, input_dir, output_dir):
    for root, _, files in os.walk(input_dir):
        for filename in sorted(files):
            if not filename.endswith(".json"):
                continue

            file_path = os.path.join(root, filename)
            file_stem = os.path.splitext(filename)[0]  # Filename without extension
            with open(file_path, "r", encoding="utf-8") as f:
                data = json.load(f)
            collect_regex_pattern(service, file_stem, data, output_dir)

    return duplicate_regex_patterns