Fix missing links and removal of trash data

This commit is contained in:
Johan van Eck 2025-07-26 15:49:21 +03:00
parent 2cbea8c226
commit f7aa3da4b0
239 changed files with 88 additions and 996 deletions

View file

@ -12,6 +12,7 @@ import yaml
from utils.custom_formats import collect_custom_formats
from utils.regex_patterns import collect_regex_patterns
from utils.profiles import collect_profiles
from utils.files import clean_files
# Prevent aliases from showing up
yaml.Dumper.ignore_aliases = lambda *args: True
@ -62,9 +63,24 @@ def main():
)
continue
collect_regex_patterns(service, trash_custom_formats_dir, regex_patterns_dir)
collect_custom_formats(service, trash_custom_formats_dir, custom_formats_dir)
collect_profiles(service, trash_profiles_dir, profiles_dir)
collect_regex_patterns(
service,
trash_custom_formats_dir,
regex_patterns_dir,
)
trash_id_to_scoring_mapping = collect_custom_formats(
service,
trash_custom_formats_dir,
custom_formats_dir,
)
collect_profiles(
service,
trash_profiles_dir,
profiles_dir,
trash_id_to_scoring_mapping,
)
clean_files([regex_patterns_dir, custom_formats_dir, profiles_dir])
if __name__ == "__main__":

View file

@ -62,11 +62,8 @@ def collect_custom_format(service, file_name, input_json, output_dir):
# Compose YAML structure
name = input_json.get("name", "")
trash_id = input_json.get("trash_id", "")
yml_data = {
"name": get_file_name(name),
"trash_id": trash_id,
"trash_scores": input_json.get("trash_scores", {}),
"description": f"""[Custom format from TRaSH-Guides.](https://trash-guides.info/{service.capitalize()}/{service.capitalize()}-collection-of-custom-formats/#{file_name})
{markdownify(input_json.get('description', ''))}""".strip(),
@ -75,9 +72,12 @@ def collect_custom_format(service, file_name, input_json, output_dir):
"tests": [],
}
include_in_rename = input_json.get("includeCustomFormatWhenRenaming", False)
if include_in_rename:
yml_data["metadata"] = {"includeInRename": include_in_rename}
# Include in rename is currently not supported from the file system
# It would require inserting into the DB
# TODO: Write a script that can do this?
# include_in_rename = input_json.get("includeCustomFormatWhenRenaming", False)
# if include_in_rename:
# yml_data["metadata"] = {"includeInRename": include_in_rename}
# Output path
output_path = os.path.join(output_dir, f"{get_file_name(name)}.yml")
@ -86,7 +86,12 @@ def collect_custom_format(service, file_name, input_json, output_dir):
print(f"Generated: {output_path}")
def collect_custom_formats(service, input_dir, output_dir):
def collect_custom_formats(
service,
input_dir,
output_dir,
):
trash_id_to_scoring_mapping = {}
for root, _, files in os.walk(input_dir):
for filename in files:
if not filename.endswith(".json"):
@ -96,4 +101,17 @@ def collect_custom_formats(service, input_dir, output_dir):
file_stem = os.path.splitext(filename)[0] # Filename without extension
with open(file_path, "r", encoding="utf-8") as f:
data = json.load(f)
collect_custom_format(service, file_stem, data, output_dir)
trash_id = data.get("trash_id")
trash_scores = data.get("trash_scores", {})
if trash_id:
trash_id_to_scoring_mapping[trash_id] = trash_scores
collect_custom_format(
service,
file_stem,
data,
output_dir,
)
return trash_id_to_scoring_mapping

21
scripts/utils/files.py Normal file
View file

@ -0,0 +1,21 @@
import os
import yaml
# TODO: Consider not writing these values to the file and rather keeping track in runtime
def remove_trash_references(file_path):
with open(file_path, "r", encoding="utf-8") as f:
data = yaml.safe_load(f)
data.pop("trash_id", None)
data.pop("trash_scores", None)
def clean_files(dirs):
for dir in dirs:
for root, _, files in os.walk(dir):
for filename in files:
if not filename.endswith(".yaml"):
continue
file_path = os.path.join(root, filename)
remove_trash_references(file_path)

View file

@ -7,57 +7,12 @@ from markdownify import markdownify
from utils.qualities import QUALITIES
from utils.strings import get_file_name
cache = {}
def find_score_for_custom_format(
trash_score_set, custom_format_name, trash_id, output_dir
):
custom_formats_dir = os.path.join(output_dir, "..", "custom_formats")
target_file = None
if cache.get(trash_id):
trash_scores = cache[trash_id].get("trash_scores", {})
if not trash_scores:
print(f"No trash scores found in cache for {custom_format_name}")
return 0
return trash_scores.get(trash_score_set, trash_scores.get("default", 0))
for fname in os.listdir(custom_formats_dir):
if fname.endswith(".yml"):
target_file = os.path.join(custom_formats_dir, fname)
if not target_file or not os.path.exists(target_file):
print(f"Target file {target_file} does not exist. Skipping...")
continue
with open(target_file, "r", encoding="utf-8") as f:
data = yaml.safe_load(f)
if not data or "trash_id" not in data:
print(f"Invalid custom format data for {custom_format_name}")
continue
cache[trash_id] = data
if data["trash_id"] != trash_id:
continue
trash_scores = data.get("trash_scores", {})
if not trash_scores:
print(f"No trash scores found in {custom_format_name}")
return 0
return trash_scores.get(trash_score_set, trash_scores.get("default", 0))
def collect_profile_formats(trash_score_set, format_items, output_dir):
def collect_profile_formats(trash_score_set, format_items, trash_id_to_scoring_mapping):
profile_format = []
for name, trash_id in format_items.items():
score = find_score_for_custom_format(
trash_score_set, name, trash_id, output_dir
)
scoring = trash_id_to_scoring_mapping[trash_id]
score = scoring.get(trash_score_set, scoring.get("default", 0))
if score == 0:
continue
@ -103,13 +58,13 @@ def get_upgrade_until(quality_name, profile_qualities):
)
if found_quality:
found_quality = found_quality.copy()
if not found_quality.get("description"):
if found_quality.get("description", "") == "":
found_quality.pop("description", None)
found_quality.pop("qualities", None)
return found_quality
def collect_profile(service, input_json, output_dir):
def collect_profile(service, input_json, output_dir, trash_id_to_scoring_mapping):
# Compose YAML structure
name = input_json.get("name", "")
trash_id = input_json.get("trash_id", "")
@ -119,7 +74,6 @@ def collect_profile(service, input_json, output_dir):
"description": f"""[Profile from TRaSH-Guides.](https://trash-guides.info/{service.capitalize()}/{service}-setup-quality-profiles)
{markdownify(input_json.get('trash_description', ''))}""".strip(),
"trash_id": trash_id,
"tags": [],
"upgradesAllowed": input_json.get("upgradeAllowed", True),
"minCustomFormatScore": input_json.get("minFormatScore", 0),
@ -130,7 +84,7 @@ def collect_profile(service, input_json, output_dir):
"custom_formats": collect_profile_formats(
input_json.get("trash_score_set"),
input_json.get("formatItems", {}),
output_dir,
trash_id_to_scoring_mapping,
),
"language": input_json.get("language", "any").lower(),
}
@ -142,7 +96,12 @@ def collect_profile(service, input_json, output_dir):
print(f"Generated: {output_path}")
def collect_profiles(service, input_dir, output_dir):
def collect_profiles(
service,
input_dir,
output_dir,
trash_id_to_scoring_mapping,
):
for root, _, files in os.walk(input_dir):
for filename in files:
if not filename.endswith(".json"):
@ -151,4 +110,4 @@ def collect_profiles(service, input_dir, output_dir):
file_path = os.path.join(root, filename)
with open(file_path, "r", encoding="utf-8") as f:
data = json.load(f)
collect_profile(service, data, output_dir)
collect_profile(service, data, output_dir, trash_id_to_scoring_mapping)

View file

@ -3,6 +3,7 @@ def get_file_name(profile_name):
profile_name.replace("/", "-")
.replace("[", "(")
.replace("]", ")")
# TODO: This triggers to often, how to be more specific?
.replace("HDR10Plus", "HDR10+")
.replace("10 bit", "10bit")
.replace("Atmos", "ATMOS")
)