build/lib/tools/common/patching_utils.py

#!/usr/bin/env python3
#
# SPDX-License-Identifier: GPL-2.0
#
# Copyright (c) 2013-2023 Igor Pecovnik, igor@armbian.com
#
# This file is a part of the Armbian Build Framework
# https://github.com/armbian/build/
#
import email.utils
import logging
import mailbox
import os
import re
import subprocess
import tempfile

import git  # GitPython
from unidecode import unidecode
from unidiff import PatchSet

from common.patching_config import PatchingConfig

MAGIC_MBOX_MARKER_STANDARD = "Mon Sep 17 00:00:00 2001"
MAGIC_MBOX_MARKER_B4 = "git@z Thu Jan  1 00:00:00 1970"

REGEX_PATCH_FILENAMES = r"^patching file \"(.+)\""
log: logging.Logger = logging.getLogger("patching_utils")


class PatchRootDir:
	def __init__(self, abs_dir, root_type, patch_type, root_dir):
		self.abs_dir = abs_dir
		self.root_type = root_type
		self.patch_type = patch_type
		self.root_dir = root_dir


class PatchSubDir:
	def __init__(self, rel_dir, sub_type):
		self.rel_dir = rel_dir
		self.sub_type = sub_type


class PatchDir:
	def __init__(self, patch_root_dir: PatchRootDir, patch_sub_dir: PatchSubDir, abs_root_dir: str):
		self.patch_root_dir: PatchRootDir = patch_root_dir
		self.patch_sub_dir: PatchSubDir = patch_sub_dir
		self.full_dir = os.path.realpath(os.path.join(self.patch_root_dir.abs_dir, self.patch_sub_dir.rel_dir))
		self.rel_dir = os.path.relpath(self.full_dir, abs_root_dir)
		self.root_type = self.patch_root_dir.root_type
		self.sub_type = self.patch_sub_dir.sub_type
		self.patch_files: list[PatchFileInDir] = []
		self.is_autogen_dir: bool = False

	def __str__(self) -> str:
		return "<PatchDir: full_dir:'" + str(self.full_dir) + "'>"

	def find_series_patch_files(self) -> list["PatchFileInDir"]:
		# do nothing if the self.full_path is not a real, existing, directory
		if not os.path.isdir(self.full_dir):
			return []

		# If the directory contains a series.conf file.
		series_patches: list[PatchFileInDir] = []
		series_conf_path = os.path.join(self.full_dir, "series.conf")
		if os.path.isfile(series_conf_path):
			counter = 0
			patches_in_series = self.parse_series_conf(series_conf_path)
			for patch_file_name in patches_in_series:
				patch_file_path = os.path.join(self.full_dir, patch_file_name)
				if os.path.isfile(patch_file_path):
					counter += 1
					patch_file = PatchFileInDir(patch_file_path, self)
					patch_file.from_series = True
					patch_file.series_counter = counter
					# Fix basename for patches in series.conf
					relative_path = os.path.relpath(patch_file_path, self.full_dir)
					patch_file.relative_dirs_and_base_file_name = os.path.splitext(relative_path)[0]
					series_patches.append(patch_file)
				else:
					raise Exception(
						f"series.conf file {series_conf_path} contains a patch file {patch_file_name} that does not exist")
		return series_patches

	def find_files_patch_files(self) -> list["PatchFileInDir"]:
		# do nothing if the self.full_path is not a real, existing, directory
		if not os.path.isdir(self.full_dir):
			return []

		# Find the files in self.full_dir that end in .patch; do not consider subdirectories.
		# Add them to self.patch_files.
		for file in os.listdir(self.full_dir):
			# noinspection PyTypeChecker
			if file.endswith(".patch"):
				self.patch_files.append(PatchFileInDir(file, self))
		return self.patch_files

	@staticmethod
	def parse_series_conf(series_conf_path):
		patches_in_series = []
		with open(series_conf_path, "r") as series_conf_file:
			for line in series_conf_file:
				line = line.strip()
				if line.startswith("#"):
					continue
				# if line begins with "-", skip it
				if line.startswith("-"):
					continue
				if line == "":
					continue
				patches_in_series.append(line)
		return patches_in_series


class PatchFileInDir:
	def __init__(self, file_name, patch_dir: PatchDir):
		self.file_name = file_name
		self.patch_dir: PatchDir = patch_dir
		self.relative_dirs_and_base_file_name = os.path.splitext(self.file_name)[0]
		self.file_name_no_ext_no_dirs = os.path.basename(self.relative_dirs_and_base_file_name)
		self.from_series = False
		self.series_counter = None

	def __str__(self) -> str:
		desc: str = f"<PatchFileInDir: file_name:'{self.file_name}', dir:{self.patch_dir.__str__()} >"
		return desc

	def full_file_path(self):
		return os.path.join(self.patch_dir.full_dir, self.file_name)

	def relative_to_src_filepath(self):
		return os.path.join(self.patch_dir.rel_dir, self.file_name)

	def split_patches_from_file(self) -> list["PatchInPatchFile"]:
		# Hack: for autogen dirs, we just need to be as fast as possible, don't parse anything.
		if self.patch_dir.is_autogen_dir:
			contents_bytes = read_file_as_bytes(self.full_file_path())
			# @TODO: date?
			bare_patch = PatchInPatchFile(
				self, 1, "", f"Autogenerated patch",
				f"Armbian Autopatcher <auto.patch@armbian.com>",
				f"[AUTOGEN] {self.relative_dirs_and_base_file_name}", None)
			bare_patch.diff_bytes = contents_bytes
			log.debug(f"Patch file {self.full_file_path()} is autogenerated.")
			return [bare_patch]

		counter: int = 1
		mbox: mailbox.mbox = mailbox.mbox(self.full_file_path())
		is_invalid_mbox: bool = False

		# Sanity check: if the file is understood as mailbox, make sure the first line is a valid "From " line,
		# and has the magic marker 'Mon Sep 17 00:00:00 2001' in it; otherwise, it could be a combined
		# bare patch + mbox-formatted patch in a single file, and we'd lose the bare patch.
		if len(mbox) > 0:
			contents, contents_read_problems = read_file_as_utf8(self.full_file_path())
			first_line = contents.splitlines()[0].strip()
			if not first_line.startswith("From ") or ((MAGIC_MBOX_MARKER_STANDARD not in first_line) and (MAGIC_MBOX_MARKER_B4 not in first_line)):
				# is_invalid_mbox = True # we might try to recover from this is there's too many
				# log.error(
				raise Exception(
					f"File {self.full_file_path()} seems to be a valid mbox file, but it begins with"
					f" '{first_line}', but in mbox the 1st line should be a valid From: header"
					f" with the magic date.")
			# Obtain how many times the magic marker date string is present in the contents
			magic_marker_count = contents.count(MAGIC_MBOX_MARKER_STANDARD) + contents.count(MAGIC_MBOX_MARKER_B4)
			if magic_marker_count != len(mbox):
				# is_invalid_mbox = True # we might try to recover from this is there's too many
				# log.error(
				raise Exception(
					f"File {self.full_file_path()} seems to be a valid mbox file, but it contains"
					f" {magic_marker_count} magic marker dates, while the mbox file has been parsed as"
					f" {len(mbox)} patches. Check the file for mbox formatting errors.")

		# if there is no emails, it's a diff-only patch file.
		if is_invalid_mbox or len(mbox) == 0:
			# read the file into a string; explicitly use utf-8 to not depend on the system locale
			diff, read_problems = read_file_as_utf8(self.full_file_path())
			bare_patch = PatchInPatchFile(self, counter, diff, None, None, None, None)
			bare_patch.problems.append("not_mbox")
			bare_patch.problems.extend(read_problems)
			log.debug(f"Patch file {self.full_file_path()} is not properly mbox-formatted.")
			return [bare_patch]

		# loop over the emails in the mbox
		patches: list[PatchInPatchFile] = []
		msg: mailbox.mboxMessage
		for msg in mbox:
			problems: list[str] = []
			patch: str
			try:
				patch = msg.get_payload(decode=True).decode("utf-8")
			except UnicodeDecodeError as e:
				log.warning(f"Invalid UTF-8 mbox payload in file {self.full_file_path()}(:{counter}): {e}")
				problems.append("invalid_utf8_mbox")
				patch = msg.get_payload()  # this will mangle valid utf-8; go ahead and use that anyway

			# split the patch itself and the description from the payload
			desc, patch_contents = self.split_description_and_patch(patch)
			if len(patch_contents) == 0:
				log.warning(f"WARNING: patch file {self.full_file_path()} fragment {counter} contains an empty patch")
				continue

			# Sanity check: if the patch_contents contains the magic marker, something is _very_ wrong, and we're gonna eat a patch.
			if (MAGIC_MBOX_MARKER_STANDARD in patch_contents) or (MAGIC_MBOX_MARKER_B4 in patch_contents):
				raise Exception(
					f"File {self.full_file_path()} fragment {counter} seems to be a valid mbox file, but it contains"
					f" the magic date in the patch contents, shouldn't happen. Check the mbox formatting.")

			patches.append(PatchInPatchFile(
				self, counter, patch_contents, desc, msg['From'], msg['Subject'], msg['Date']))

			counter += 1

		# sanity check, throw exception if there are no patches
		if len(patches) == 0:
			raise Exception("No valid patches found in file " + self.full_file_path())
		return patches

	@staticmethod
	def split_description_and_patch(full_message_text: str) -> tuple["str | None", str]:
		separator = "\n---\n"
		# check if the separator is in the patch, if so, split
		if separator in full_message_text:
			# find the _last_ occurrence of the separator, and split two chunks from that position
			separator_pos = full_message_text.rfind(separator)
			desc = full_message_text[:separator_pos]
			patch = full_message_text[separator_pos + len(separator):]
			return desc, patch
		else:  # no separator, so no description, patch is the full message
			desc = None
			patch = full_message_text
		return desc, patch

	def rewrite_patch_file(self, patches: list["PatchInPatchFile"]):
		# Produce a mailbox file from the patches.
		# The patches are assumed to be in the same order as they were in the original file.
		# The original file is overwritten.
		output_file = self.full_file_path()
		log.info(f"Rewriting {output_file} with new patches...")
		with open(output_file, "w") as f:
			for patch in patches:
				log.info(f"Writing patch {patch.counter} to {output_file}...")
				f.write(patch.rewritten_patch)


# Placeholder for future manual work
def shorten_patched_file_name_for_stats(path):
	return os.path.basename(path)


def parse_patch_stdout_for_files(stdout_output: str):
	# run the REGEX_PATCH_FILENAMES on the output; get the group 1 (the filename) for each match
	# log.debug(f"Running regex on {stdout_output}")
	ret: list[str] = re.findall(REGEX_PATCH_FILENAMES, stdout_output, re.MULTILINE)
	# log.debug(f"Found {len(ret)} patched files in patch output: {','.join(ret)}.")
	return ret  # warning: this includes files the patch *deleted* too


class PatchInPatchFile:

	def __init__(self, parent: PatchFileInDir, counter: int, diff: str, desc, from_hdr, sbj_hdr, date_hdr):
		self.problems: list[str] = []
		self.applied_ok: bool = False
		self.rewritten_patch: str | None = None
		self.git_commit_hash: str | None = None
		self.actually_patched_files: list[str] = []

		self.parent: PatchFileInDir = parent
		self.counter: int = counter
		self.diff: str | None = diff
		self.diff_bytes: bytes | None = None

		self.failed_to_parse: bool = False

		# Basic parsing of properly mbox-formatted patches
		self.desc: str = downgrade_to_ascii(desc) if desc is not None else None
		self.from_name, self.from_email = self.parse_from_name_email(from_hdr) if from_hdr is not None else (
			None, None)
		self.subject: str = downgrade_to_ascii(fix_patch_subject(sbj_hdr)) if sbj_hdr is not None else None
		self.date = email.utils.parsedate_to_datetime(date_hdr) if date_hdr is not None else None

		self.patched_file_stats_dict: dict = {}
		self.total_additions: int = 0
		self.total_deletions: int = 0
		self.files_modified: int = 0
		self.files_added: int = 0
		self.files_renamed: int = 0
		self.files_removed: int = 0
		self.created_file_names = []
		self.deleted_file_names = []
		self.renamed_file_names_source = []  # The original file names of renamed files
		self.all_file_names_touched = []
		self.rejects: str | None = None
		self.patch_output: str | None = None

	def parse_from_name_email(self, from_str: str) -> tuple["str | None", "str | None"]:
		m = re.match(r'(?P<name>.*)\s*<\s*(?P<email>.*)\s*>', from_str)
		if m is None:
			self.problems.append("invalid_author")
			log.debug(
				f"Failed to parse name and email from: '{from_str}' while parsing patch {self.counter} in file {self.parent.full_file_path()}")
			return downgrade_to_ascii(remove_quotes(from_str)), "unknown-email@domain.tld"
		else:
			# Return the name and email
			return downgrade_to_ascii(remove_quotes(m.group("name"))), remove_quotes(m.group("email"))

	def one_line_patch_stats(self) -> str:
		if (not self.failed_to_parse) and (not self.parent.patch_dir.is_autogen_dir):
			files_desc = ", ".join(self.patched_file_stats_dict)
			return f"{self.text_diffstats()} {{{files_desc}}}"
		return ""

	def text_diffstats(self) -> str:
		operations: list[str] = []
		operations.append(f"{self.files_modified}M") if self.files_modified > 0 else None
		operations.append(f"{self.files_added}A") if self.files_added > 0 else None
		operations.append(f"{self.files_removed}D") if self.files_removed > 0 else None
		operations.append(f"{self.files_renamed}R") if self.files_renamed > 0 else None
		return f"(+{self.total_additions}/-{self.total_deletions})[{', '.join(operations)}]"

	def parse_patch(self):
		# Hack: don't parse if autogenned; this could also be "don't parse if larger than X megabytes" since
		# large patches cause trouble
		if self.parent.patch_dir.is_autogen_dir:
			log.debug(
				f"Skipping parsing of auto-generated patch {self.counter} in file {self.parent.full_file_path()}")
			return
		else:
			# parse the patch, using the unidiff package
			try:
				patch = PatchSet(self.diff, encoding=None)
			except Exception as e:
				self.problems.append("invalid_diff")
				self.failed_to_parse = True
				log.warning(f"Failed to parse unidiff for file {self.parent.full_file_path()}(:{self.counter}): '{str(e).strip()}'")
				return  # no point in continuing; the patch is invalid; might be recovered during apply

		self.total_additions = 0
		self.total_deletions = 0
		self.files_renamed = 0
		self.files_modified = len(patch.modified_files)
		self.files_added = len(patch.added_files)
		self.files_removed = len(patch.removed_files)
		self.created_file_names = [f.path for f in patch.added_files]
		self.deleted_file_names = [f.path for f in patch.removed_files]
		self.renamed_file_names_source = []  # computed below
		self.all_file_names_touched = \
			[f.path for f in patch.added_files] + \
			[f.path for f in patch.modified_files] + \
			[f.path for f in patch.removed_files]
		self.patched_file_stats_dict = {}
		for f in patch:
			if not f.is_binary_file:
				self.total_additions += f.added
				self.total_deletions += f.removed
				self.patched_file_stats_dict[shorten_patched_file_name_for_stats(f.path)] = {
					"abs_changed_lines": f.added + f.removed}
			self.files_renamed = self.files_renamed + 1 if f.is_rename else self.files_renamed
			if f.is_rename:
				sans_prefix = f.source_file[2:] if f.source_file.startswith("a/") else f.source_file
				self.renamed_file_names_source.append(sans_prefix)
		# sort the self.patched_file_stats_dict by the abs_changed_lines, descending
		self.patched_file_stats_dict = dict(sorted(
			self.patched_file_stats_dict.items(),
			key=lambda item: item[1]["abs_changed_lines"],
			reverse=True))
		# sanity check; if all the values are zeroes, throw an exception
		if self.total_additions == 0 and self.total_deletions == 0 and \
			self.files_modified == 0 and self.files_added == 0 and self.files_removed == 0:
			self.problems.append("diff_has_no_changes")
			raise Exception(
				f"Patch file {self.parent.full_file_path()} has no changes. diff is {len(self.diff)} bytes: '{self.diff}'")

	def __str__(self) -> str:
		return self.str_oneline_around("->", "<-")

	def str_oneline_around(self, prefix, suffix):
		# extra_email = f"{self.from_email}" if self.from_email is not None else ""
		# extra_subject = f":'{self.subject}'" if self.subject is not None else ""
		# extra_author = f":{extra_email}{extra_subject}"
		desc: str = \
			f"{prefix}{self.parent.relative_dirs_and_base_file_name}(:{self.counter}) " + \
			f"{self.one_line_patch_stats()}{suffix}"
		return desc

	def apply_patch(self, working_dir: str, options: dict[str, bool]):
		# Sanity check: if patch would create files, make sure they don't exist to begin with.
		# This avoids patches being able to overwrite the mainline.
		for would_be_created_file in self.created_file_names:
			full_path = os.path.join(working_dir, would_be_created_file)
			if os.path.exists(full_path):
				self.problems.append("overwrites")
				if options["allow_recreate_existing_files"]:
					log.debug(f"Tolerating recreation of {would_be_created_file} in {self} as instructed.")
					os.remove(full_path)
				else:
					log.warning(
						f"File {would_be_created_file} already exists, but patch {self} would re-create it.")

		# Use the 'patch' utility to apply the patch.
		if self.diff_bytes is None:
			real_input = self.diff.encode("utf-8")
		else:
			real_input = self.diff_bytes

		# create a temporary filename (don't create the file yet: patch will maybe create it)
		rejects_file = tempfile.mktemp()
		# log.debug(f"Rejects file is going to be '{rejects_file}'...")

		proc = subprocess.run(
			["patch", "--batch", "-p1", "-N", f"--reject-file={rejects_file}", "--quoting-style=c"],
			cwd=working_dir,
			input=real_input,
			stdout=subprocess.PIPE,
			stderr=subprocess.PIPE,
			check=False)
		# read the output of the patch command
		stdout_output = proc.stdout.decode("utf-8").strip()
		stderr_output = proc.stderr.decode("utf-8").strip()
		# if stdout_output != "":
		# log.debug(f"patch stdout: {stdout_output}")
		# if stderr_output != "":
		# log.warning(f"patch stderr: {stderr_output}")

		# Check if the rejects exists:
		if os.path.exists(rejects_file):
			log.debug(f"Rejects file {rejects_file} exists.")
			# Show its contents
			with open(rejects_file, "r") as f:
				reject_contents = f.read()
				self.rejects = reject_contents
			# delete it
			os.remove(rejects_file)

		# Look at stdout. If it contains:
		if " (offset" in stdout_output or " with fuzz " in stdout_output:
			log.debug(f"Patch {self} needs rebase: offset/fuzz used during apply.")
			self.problems.append("needs_rebase")

		if "can't find file to patch at input line" in stdout_output:
			log.warning(f"Patch {self} needs fixing: can't find file to patch.")
			self.problems.append("missing_file")

		# parse the stdout output for the files actually patched.
		if options["set_patch_date"]:
			self.actually_patched_files = parse_patch_stdout_for_files(stdout_output)
			self.apply_patch_date_to_files(working_dir, options)

		# Store the stdout and stderr output
		patch_output = ""
		patch_output += f"{stdout_output}\n" if stdout_output != "" else ""
		patch_output += f"{stderr_output}\n" if stderr_output != "" else ""
		self.patch_output = f"{patch_output}"

		# Check if the exit code is not zero and bomb
		if proc.returncode != 0:
			self.problems.append("failed_apply")
			raise Exception(f"Failed to apply patch {self.parent.full_file_path()}")

	def commit_changes_to_git(self, repo: git.Repo, add_rebase_tags: bool, split_patches: bool, pconfig: PatchingConfig):
		log.info(f"Committing changes to git: {self.parent.relative_dirs_and_base_file_name}")
		# add all the files that were touched by the patch
		# if the patch failed to parse, this will be an empty list, so we'll just add all changes.
		add_all_changes_in_git = False
		if (not self.failed_to_parse) and (not self.parent.patch_dir.is_autogen_dir):
			# sanity check.
			if len(self.all_file_names_touched) == 0:
				raise Exception(
					f"Patch {self} has no files touched, but is not marked as failed to parse.")
			# add all files to git staging area
			all_files_to_add: list[str] = []
			for file_name in self.all_file_names_touched:
				is_delete = False
				# Check if deleted, don't complain if so.
				if file_name in self.deleted_file_names:
					is_delete = True

				log.info(f"Adding file {file_name} to git")
				full_path = os.path.join(repo.working_tree_dir, file_name)
				if (not os.path.exists(full_path)) and (not is_delete):
					self.problems.append("wrong_strip_level")
					log.error(f"File '{full_path}' does not exist, but is touched by {self}")
					add_all_changes_in_git = True
				else:
					all_files_to_add.append(file_name)

			# Also add all source (pre-rename) files that were renamed, sans-checking, since they won't exist.
			for file_name in self.renamed_file_names_source:
				# But, the file has to be at least inside the repo; it's not a real rename if it's outside (eg: bad /dev/null patches)
				if file_name.startswith("/"):
					log.warning(f"File {file_name} claims to be a renamed source file, but is outside the repo.")
					continue
				log.info(f"Adding file {file_name} (rename/source) to git")
				all_files_to_add.append(file_name)

			if split_patches:
				return self.commit_changes_to_git_grouped(all_files_to_add, repo)

			if not add_all_changes_in_git:
				log.debug(f"Adding (pre-config ) {len(all_files_to_add)} files to git: {' '.join(all_files_to_add)}")
				do_not_commit_files = pconfig.patches_to_git_config.do_not_commit_files  # from config
				do_not_commit_regexes = pconfig.patches_to_git_config.do_not_commit_regexes  # from config
				final_files_to_add = [f for f in all_files_to_add if f not in do_not_commit_files]
				final_files_to_add = [f for f in final_files_to_add if not any(re.match(r, f) for r in do_not_commit_regexes)]
				log.debug(f"Adding (post-config) {len(final_files_to_add)} files to git: {' '.join(final_files_to_add)}")
				repo.git.add("-f", final_files_to_add)

		if self.failed_to_parse or self.parent.patch_dir.is_autogen_dir or add_all_changes_in_git:
			log.warning(f"Rescue: adding all changed files to git for {self}")
			repo.git.add(repo.working_tree_dir)

		desc_no_none = self.desc if self.desc is not None else ""
		commit_message = f"{self.subject}\n\n{desc_no_none}"

		if add_rebase_tags:
			commit_message = f"{self.parent.relative_dirs_and_base_file_name}(:{self.counter})\n\nOriginal-Subject: {self.subject}\n{self.desc}"
			commit_message = f"{commit_message}\n{self.patch_rebase_tags_desc()}"

		author: git.Actor = git.Actor(self.from_name, self.from_email)
		committer: git.Actor = git.Actor("Armbian AutoPatcher", "patching@armbian.com")
		commit = repo.index.commit(
			message=commit_message,
			author=author,
			committer=committer,
			author_date=self.date,
			commit_date=self.date,
			skip_hooks=True
		)
		log.info(f"Committed changes to git: {commit.hexsha}")
		# Make sure the commit is not empty
		if commit.stats.total["files"] == 0:
			self.problems.append("empty_commit")
			raise Exception(
				f"Commit {commit.hexsha} ended up empty; source patch is {self} at {self.parent.full_file_path()}(:{self.counter})")
		return {"commit_hash": commit.hexsha, "patch": self}

	def commit_changes_to_git_grouped(self, all_files_to_add: list[str], repo: git.Repo):
		all_commits = []
		prefix = "Feiteng "
		grouped_files = {}
		# group files by directory
		for file_name in all_files_to_add:
			dir_name = os.path.dirname(file_name)
			if dir_name not in grouped_files:
				grouped_files[dir_name] = []
			grouped_files[dir_name].append(file_name)

		for group_name, files in grouped_files.items():
			for one_file in files:
				repo.git.add(one_file)

			commit_message = f"{prefix}{group_name}\n\n{prefix}{group_name}"
			author: git.Actor = git.Actor("Ricardo Pardini", "ricardo@pardini.net")
			commit = repo.index.commit(
				message=commit_message,
				author=author,
				committer=author,
				author_date=self.date,
				commit_date=self.date,
				skip_hooks=True
			)
			log.info(f"Committed changes to git: {commit.hexsha}")
			# Make sure the commit is not empty
			if commit.stats.total["files"] == 0:
				self.problems.append("empty_commit")
				raise Exception(
					f"Commit {commit.hexsha} ended up empty; source patch is {self} at {self.parent.full_file_path()}(:{self.counter})")
			all_commits.append({"commit_hash": commit.hexsha, "patch": self})
		return all_commits

	def patch_rebase_tags_desc(self):
		tags = {}
		tags["Patch-File"] = self.parent.relative_dirs_and_base_file_name
		tags["Patch-File-Counter"] = self.counter
		tags["Patch-Rel-Directory"] = self.parent.patch_dir.rel_dir
		tags["Patch-Type"] = self.parent.patch_dir.patch_root_dir.patch_type
		tags["Patch-Root-Type"] = self.parent.patch_dir.root_type
		tags["Patch-Sub-Type"] = self.parent.patch_dir.sub_type
		if self.subject is not None:
			tags["Original-Subject"] = self.subject
		ret = ""
		for k, v in tags.items():
			ret += f"X-Armbian: {k}: {v}\n"
		return ret

	def markdown_applied(self):
		if self.applied_ok:
			return "✅"
		return "❌"

	def markdown_problems(self):
		ret = []
		# if it's a patch in a series, add emoji
		if self.parent.from_series:
			ret.append(f" 📜 ")

		if self.parent.patch_dir.is_autogen_dir:
			ret.append(f" 🤖 ")

		if len(self.problems) == 0:
			ret.append("✅ ")

		for problem in self.problems:
			if problem in ["not_mbox", "needs_rebase"]:
				# warning emoji
				ret.append(f"⚠️`[{problem}]` ")
			elif problem in ["autogen"]:
				ret.append(f"ℹ️`[{problem}]` ")
			else:
				ret.append(f"❌`[{problem}]` ")

		# if it's a user patch, add smiley
		if self.parent.patch_dir.patch_root_dir.root_type == "user":
			ret.append(" 🫠`[user]` ")

		return " ".join(ret)

	def markdown_diffstat(self):
		return f"`{self.text_diffstats()}`"

	def text_files(self):
		ret = []
		max_files_shown = 15
		file_names = list(self.patched_file_stats_dict.keys())
		if len(file_names) == 0:
			return "?"
		for file_name in file_names[:max_files_shown]:
			ret.append(f"{file_name}")
		if len(file_names) > max_files_shown:
			ret.append(f"and {len(file_names) - max_files_shown} more")
		return ", ".join(ret)

	def markdown_files(self):
		ret = []
		max_files_shown = 15
		# Use the keys of the patch_file_stats_dict which is already sorted by the larger files
		file_names = list(self.patched_file_stats_dict.keys())
		# if no files were touched, just return an ?
		if len(file_names) == 0:
			return "`?`"
		for file_name in file_names[:max_files_shown]:
			ret.append(f"`{file_name}`")
		if len(file_names) > max_files_shown:
			ret.append(f"_and {len(file_names) - max_files_shown} more_")
		return ", ".join(ret)

	def text_author(self):
		if self.from_name:
			return f"{self.from_name.strip()}"
		return "[no Author]"

	def markdown_author(self):
		if self.from_name:
			return f"`{self.from_name.strip()}`"
		return "`[no Author]`"

	def markdown_subject(self):
		if self.subject:
			return f"_{self.subject}_"
		return "`[no Subject]`"

	def text_subject(self):
		if self.subject:
			return f"{self.subject}"
		return "[no Subject]"

	def markdown_link_to_patch(self):
		if self.git_commit_hash is None:
			return ""
		return f"{self.git_commit_hash} "

	def markdown_name(self, skip_markdown=False):
		ret = []
		escape = "`" if not skip_markdown else ""
		patch_name = self.parent.relative_dirs_and_base_file_name
		# if the basename includes slashes, split after the last slash, the first part is the directory, second the file
		if "/" in self.parent.relative_dirs_and_base_file_name:
			dir_name, patch_name = self.parent.relative_dirs_and_base_file_name.rsplit("/", 1)
			if dir_name is not None:
				# get only the last part of the dir_name
				dir_name = dir_name.split("/")[-1]
				ret.append(f"{escape}[{dir_name}/]{escape}")
		ret.append(f"{escape}{patch_name}{escape}")
		return " ".join(ret)

	def rich_name_status(self):
		color = "green"
		for problem in self.problems:
			if problem in ["not_mbox", "needs_rebase"]:
				color = "yellow"
			else:
				color = "red"
		# @TODO: once our ansi-haste supports it, use [link url=file://blaaa]
		return f"[bold {color}]{self.markdown_name(skip_markdown=True)}"

	def rich_patch_output(self):
		ret = self.patch_output
		color_tags = {
			'green': ['Reversed (or previously applied) patch detected!'],
			'yellow': ['with fuzz', 'offset ', ' hunks ignored', ' hunk ignored'],
			'red': ['hunk FAILED', 'hunks FAILED']
		}
		# use Rich's syntax highlighting to highlight with color
		for color in color_tags:
			for tag in color_tags[color]:
				ret = ret.replace(tag, f"[bold {color}]{tag}[/bold {color}]")
		return ret

	def apply_patch_date_to_files(self, working_dir, options):
		# The date applied to the patched files is:
		# 1) The date of the root Makefile
		# 2) The date of the patch file
		# And date is 2, unless 1 is higher.
		patch_mtime = os.path.getmtime(self.parent.full_file_path())
		makefile_mtime = options["root_makefile_date"]
		final_mtime = makefile_mtime
		if patch_mtime > makefile_mtime:
			log.debug(f"Patch {self.parent.full_file_path()} is newer than root Makefile, using patch date")
			final_mtime = patch_mtime
		else:
			log.debug(
				f"Root Makefile is newer than patch '{self.parent.full_file_path()}', using Makefile date")
		# Apply the date to all files that were touched by the patch
		# If the patch parsed OK, avoid trying to touch files the patch deleted.
		files_to_touch = self.actually_patched_files
		if not self.failed_to_parse:
			# remove self.deleted_file_names files_to_touch
			files_to_touch = [f for f in files_to_touch if f not in self.deleted_file_names]

		for file_name in files_to_touch:
			# log.debug(f"Setting mtime of '{file_name}' to '{final_mtime}'.")
			file_path = os.path.join(working_dir, file_name)
			try:
				os.utime(file_path, (final_mtime, final_mtime))
			except FileNotFoundError:
				log.warning(f"File '{file_path}' not found in patch {self}, can't set mtime.")


def fix_patch_subject(subject):
	# replace newlines with one space
	subject = re.sub(r"\s+", " ", subject.strip())
	# replace every non-printable character with a space
	subject = re.sub(r"[^\x20-\x7e]", " ", subject)
	# replace two consecutive spaces with one
	subject = re.sub(r" {2}", " ", subject).strip()
	# remove tags from the beginning of the subject
	tags = ['PATCH']
	for tag in tags:
		# subject might begin with "[tag xxxxx]"; remove it
		if subject.startswith(f"[{tag}"):
			subject = subject[subject.find("]") + 1:].strip()
	prefixes = ['FROMLIST(v1): ']
	for prefix in prefixes:
		if subject.startswith(prefix):
			subject = subject[len(prefix):].strip()
	return subject


# This is definitely not the right way to do this, but it works for now.
def prepare_clean_git_tree_for_patching(repo: git.Repo, revision_sha: str, branch_name: str):
	# Let's find the Commit object for the revision_sha
	log.debug("Resetting git tree to revision '%s'", revision_sha)
	commit = repo.commit(revision_sha)
	# Lets checkout, detached HEAD, to that Commit
	repo.head.reference = commit
	repo.head.reset(index=True, working_tree=True)
	# Let's create a new branch, and checkout to it, discarding any existing branch
	log.debug("Creating branch '%s'", branch_name)
	repo.create_head(branch_name, revision_sha, force=True)
	repo.head.reference = repo.heads[branch_name]
	repo.head.reset(index=True, working_tree=True)
	# Let's remove all the untracked, but not ignored, files from the working copy
	for file in repo.untracked_files:
		full_name = os.path.join(repo.working_tree_dir, file)
		log.debug(f"Removing untracked file '{file}'")
		os.remove(full_name)


def export_commit_as_patch(repo: git.Repo, commit: str):
	# Export the commit as a patch
	proc = subprocess.run([
		"git", "format-patch",
		"--unified=3",  # force 3 lines of diff context
		"--keep-subject",  # do not add a prefix to the subject "[PATCH] "
		# "--add-header=Organization: Armbian",  # add a header to the patch (ugly, changes the header)
		"--no-encode-email-headers",  # do not encode email headers
		# "--signature=66666" # add a signature; this does not work and causes patch to not be emitted.
		'--signature', "Armbian",
		'--zero-commit',  # do not use the git revision, instead 000000...0000
		'--stat=120',  # 'wider' stat output; default is 80
		'--stat-graph-width=10',  # shorten the diffgraph graph part, it's too long
		"-1", "--stdout", commit
	],
		cwd=repo.working_tree_dir,
		stdout=subprocess.PIPE,
		stderr=subprocess.PIPE,
		check=False)
	# read the output of the patch command
	stdout_output = proc.stdout.decode("utf-8")
	stderr_output = proc.stderr.decode("utf-8")
	# Check if the exit code is not zero and bomb
	if proc.returncode != 0:
		raise Exception(f"Failed to export commit {commit} to patch: {stderr_output}")
	if stdout_output == "":
		raise Exception(f"Failed to export commit {commit} to patch: no output")
	return stdout_output


# Hack
def downgrade_to_ascii(utf8: str) -> str:
	return unidecode(utf8)


def remove_quotes(utf8: str) -> str:
	return utf8.replace('"', '')


# Try hard to read a possibly invalid utf-8 file
def read_file_as_utf8(file_name: str) -> tuple[str, list[str]]:
	with open(file_name, "rb") as f:
		content = f.read()  # Read the file as bytes
		try:
			return content.decode("utf-8"), []  # no problems if this worked
		except UnicodeDecodeError as ude:
			log.warning(f"File '{file_name}' is not valid utf-8, trying to fix it...: '{ude}'")
			# If decoding failed, try to decode as iso-8859-1 # @TODO: or big5?
			return content.decode("iso-8859-1"), ["invalid_utf8"]  # utf-8 problems


def read_file_as_bytes(file_name: str) -> bytes:
	with open(file_name, "rb") as f:
		return f.read()  # Read the file as bytes


# Extremely Armbian-specific.
def perform_git_archeology(
	base_armbian_src_dir: str, armbian_git_repo: git.Repo, patch: PatchInPatchFile,
	bad_archeology_hexshas: list[str], fast: bool) -> bool:
	log.info(f"Trying to recover description for {patch.parent.file_name}:{patch.counter}")
	file_name_for_search = f"{patch.parent.file_name_no_ext_no_dirs}.patch"

	patch_file_paths: list[str] = []
	if fast:
		patch_file_paths = [patch.parent.full_file_path()]
	else:
		# Find all the files in the repo with the same name as the patch file.
		# Use the UNIX find command to find all the files with the same name as the patch file.
		proc = subprocess.run(
			[
				"find", base_armbian_src_dir,
				"-name", file_name_for_search,
				"-type", "f"
			],
			cwd=base_armbian_src_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
		patch_file_paths = proc.stdout.decode("utf-8").splitlines()
	log.info(f"Found {len(patch_file_paths)} files with name {file_name_for_search}")
	all_commits: list = []
	for found_file in patch_file_paths:
		relative_file_path = os.path.relpath(found_file, base_armbian_src_dir)
		hexshas = armbian_git_repo.git.log('--pretty=%H', '--follow', '--find-copies-harder', '--', relative_file_path) \
			.split('\n')
		log.info(f"- Trying to recover description for {relative_file_path} from {len(hexshas)} commits")

		# filter out hexshas that are in the known-bad archeology list
		hexshas = [hexsha for hexsha in hexshas if hexsha.strip() != '' and hexsha not in bad_archeology_hexshas]

		log.debug(f"hexshas: {hexshas}")

		commits = [armbian_git_repo.rev_parse(c) for c in hexshas]
		all_commits.extend(commits)

	unique_commits: list[git.Commit] = []
	for commit in all_commits:
		if commit not in unique_commits:
			unique_commits.append(commit)

	unique_commits.sort(key=lambda c: c.committed_datetime)

	if len(unique_commits) == 0:
		log.warning(f"Could not find any commits for '{file_name_for_search}'.")
		return False

	main_suspect: git.Commit = unique_commits[0]
	log.info(f"- Main suspect: {main_suspect}: {main_suspect.message.rstrip()} Author: {main_suspect.author}")

	# From the main_suspect, set the subject and the author, and the dates.
	main_suspect_msg_lines = main_suspect.message.splitlines()
	# strip each line
	main_suspect_msg_lines = [line.strip() for line in main_suspect_msg_lines]
	# remove empty lines
	main_suspect_msg_lines = [line for line in main_suspect_msg_lines if line != ""]
	main_suspect_subject = main_suspect_msg_lines[0].strip()
	# remove the first line, which is the subject
	suspect_desc_lines = main_suspect_msg_lines[1:]

	# Now, create a list for all other non-main suspects.
	other_suspects_desc: list[str] = []
	other_suspects_desc.extend(
		[f"> recovered message: > {suspect_desc_line}" for suspect_desc_line in suspect_desc_lines])
	other_suspects_desc.extend("")
	for commit in unique_commits:
		subject = commit.message.splitlines()[0].strip()
		rfc822_date = commit.committed_datetime.strftime("%a, %d %b %Y %H:%M:%S %z")
		other_suspects_desc.extend([
			f"- Revision {commit.hexsha}: https://github.com/armbian/build/commit/{commit.hexsha}",
			f"  Date: {rfc822_date}",
			f"  From: {commit.author.name} <{commit.author.email}>",
			f"  Subject: {subject}",
			""
		])

	patch.desc = downgrade_to_ascii("\n".join([f"> X-Git-Archeology: {line}" for line in other_suspects_desc]))

	if patch.subject is None:
		patch.subject = downgrade_to_ascii("[ARCHEOLOGY] " + main_suspect_subject)
	if patch.date is None:
		patch.date = main_suspect.committed_datetime
	if patch.from_name is None or patch.from_email is None:
		patch.from_name, patch.from_email = downgrade_to_ascii(
			main_suspect.author.name), main_suspect.author.email
	return True