build/lib/tools/common/patching_utils.py

923 lines
36 KiB
Python
Executable File
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
#
# SPDX-License-Identifier: GPL-2.0
#
# Copyright (c) 2013-2023 Igor Pecovnik, igor@armbian.com
#
# This file is a part of the Armbian Build Framework
# https://github.com/armbian/build/
#
import email.utils
import logging
import mailbox
import os
import re
import subprocess
import tempfile
import git # GitPython
from unidecode import unidecode
from unidiff import PatchSet
from common.patching_config import PatchingConfig
MAGIC_MBOX_MARKER_STANDARD = "Mon Sep 17 00:00:00 2001"
MAGIC_MBOX_MARKER_B4 = "git@z Thu Jan 1 00:00:00 1970"
REGEX_PATCH_FILENAMES = r"^patching file \"(.+)\""
log: logging.Logger = logging.getLogger("patching_utils")
class PatchRootDir:
def __init__(self, abs_dir, root_type, patch_type, root_dir):
self.abs_dir = abs_dir
self.root_type = root_type
self.patch_type = patch_type
self.root_dir = root_dir
class PatchSubDir:
def __init__(self, rel_dir, sub_type):
self.rel_dir = rel_dir
self.sub_type = sub_type
class PatchDir:
def __init__(self, patch_root_dir: PatchRootDir, patch_sub_dir: PatchSubDir, abs_root_dir: str):
self.patch_root_dir: PatchRootDir = patch_root_dir
self.patch_sub_dir: PatchSubDir = patch_sub_dir
self.full_dir = os.path.realpath(os.path.join(self.patch_root_dir.abs_dir, self.patch_sub_dir.rel_dir))
self.rel_dir = os.path.relpath(self.full_dir, abs_root_dir)
self.root_type = self.patch_root_dir.root_type
self.sub_type = self.patch_sub_dir.sub_type
self.patch_files: list[PatchFileInDir] = []
self.is_autogen_dir: bool = False
def __str__(self) -> str:
return "<PatchDir: full_dir:'" + str(self.full_dir) + "'>"
def find_series_patch_files(self) -> list["PatchFileInDir"]:
# do nothing if the self.full_path is not a real, existing, directory
if not os.path.isdir(self.full_dir):
return []
# If the directory contains a series.conf file.
series_patches: list[PatchFileInDir] = []
series_conf_path = os.path.join(self.full_dir, "series.conf")
if os.path.isfile(series_conf_path):
counter = 0
patches_in_series = self.parse_series_conf(series_conf_path)
for patch_file_name in patches_in_series:
patch_file_path = os.path.join(self.full_dir, patch_file_name)
if os.path.isfile(patch_file_path):
counter += 1
patch_file = PatchFileInDir(patch_file_path, self)
patch_file.from_series = True
patch_file.series_counter = counter
# Fix basename for patches in series.conf
relative_path = os.path.relpath(patch_file_path, self.full_dir)
patch_file.relative_dirs_and_base_file_name = os.path.splitext(relative_path)[0]
series_patches.append(patch_file)
else:
raise Exception(
f"series.conf file {series_conf_path} contains a patch file {patch_file_name} that does not exist")
return series_patches
def find_files_patch_files(self) -> list["PatchFileInDir"]:
# do nothing if the self.full_path is not a real, existing, directory
if not os.path.isdir(self.full_dir):
return []
# Find the files in self.full_dir that end in .patch; do not consider subdirectories.
# Add them to self.patch_files.
for file in os.listdir(self.full_dir):
# noinspection PyTypeChecker
if file.endswith(".patch"):
self.patch_files.append(PatchFileInDir(file, self))
return self.patch_files
@staticmethod
def parse_series_conf(series_conf_path):
patches_in_series = []
with open(series_conf_path, "r") as series_conf_file:
for line in series_conf_file:
line = line.strip()
if line.startswith("#"):
continue
# if line begins with "-", skip it
if line.startswith("-"):
continue
if line == "":
continue
patches_in_series.append(line)
return patches_in_series
class PatchFileInDir:
def __init__(self, file_name, patch_dir: PatchDir):
self.file_name = file_name
self.patch_dir: PatchDir = patch_dir
self.relative_dirs_and_base_file_name = os.path.splitext(self.file_name)[0]
self.file_name_no_ext_no_dirs = os.path.basename(self.relative_dirs_and_base_file_name)
self.from_series = False
self.series_counter = None
def __str__(self) -> str:
desc: str = f"<PatchFileInDir: file_name:'{self.file_name}', dir:{self.patch_dir.__str__()} >"
return desc
def full_file_path(self):
return os.path.join(self.patch_dir.full_dir, self.file_name)
def relative_to_src_filepath(self):
return os.path.join(self.patch_dir.rel_dir, self.file_name)
def split_patches_from_file(self) -> list["PatchInPatchFile"]:
# Hack: for autogen dirs, we just need to be as fast as possible, don't parse anything.
if self.patch_dir.is_autogen_dir:
contents_bytes = read_file_as_bytes(self.full_file_path())
# @TODO: date?
bare_patch = PatchInPatchFile(
self, 1, "", f"Autogenerated patch",
f"Armbian Autopatcher <auto.patch@armbian.com>",
f"[AUTOGEN] {self.relative_dirs_and_base_file_name}", None)
bare_patch.diff_bytes = contents_bytes
log.debug(f"Patch file {self.full_file_path()} is autogenerated.")
return [bare_patch]
counter: int = 1
mbox: mailbox.mbox = mailbox.mbox(self.full_file_path())
is_invalid_mbox: bool = False
# Sanity check: if the file is understood as mailbox, make sure the first line is a valid "From " line,
# and has the magic marker 'Mon Sep 17 00:00:00 2001' in it; otherwise, it could be a combined
# bare patch + mbox-formatted patch in a single file, and we'd lose the bare patch.
if len(mbox) > 0:
contents, contents_read_problems = read_file_as_utf8(self.full_file_path())
first_line = contents.splitlines()[0].strip()
if not first_line.startswith("From ") or ((MAGIC_MBOX_MARKER_STANDARD not in first_line) and (MAGIC_MBOX_MARKER_B4 not in first_line)):
# is_invalid_mbox = True # we might try to recover from this is there's too many
# log.error(
raise Exception(
f"File {self.full_file_path()} seems to be a valid mbox file, but it begins with"
f" '{first_line}', but in mbox the 1st line should be a valid From: header"
f" with the magic date.")
# Obtain how many times the magic marker date string is present in the contents
magic_marker_count = contents.count(MAGIC_MBOX_MARKER_STANDARD) + contents.count(MAGIC_MBOX_MARKER_B4)
if magic_marker_count != len(mbox):
# is_invalid_mbox = True # we might try to recover from this is there's too many
# log.error(
raise Exception(
f"File {self.full_file_path()} seems to be a valid mbox file, but it contains"
f" {magic_marker_count} magic marker dates, while the mbox file has been parsed as"
f" {len(mbox)} patches. Check the file for mbox formatting errors.")
# if there is no emails, it's a diff-only patch file.
if is_invalid_mbox or len(mbox) == 0:
# read the file into a string; explicitly use utf-8 to not depend on the system locale
diff, read_problems = read_file_as_utf8(self.full_file_path())
bare_patch = PatchInPatchFile(self, counter, diff, None, None, None, None)
bare_patch.problems.append("not_mbox")
bare_patch.problems.extend(read_problems)
log.debug(f"Patch file {self.full_file_path()} is not properly mbox-formatted.")
return [bare_patch]
# loop over the emails in the mbox
patches: list[PatchInPatchFile] = []
msg: mailbox.mboxMessage
for msg in mbox:
problems: list[str] = []
patch: str
try:
patch = msg.get_payload(decode=True).decode("utf-8")
except UnicodeDecodeError as e:
log.warning(f"Invalid UTF-8 mbox payload in file {self.full_file_path()}(:{counter}): {e}")
problems.append("invalid_utf8_mbox")
patch = msg.get_payload() # this will mangle valid utf-8; go ahead and use that anyway
# split the patch itself and the description from the payload
desc, patch_contents = self.split_description_and_patch(patch)
if len(patch_contents) == 0:
log.warning(f"WARNING: patch file {self.full_file_path()} fragment {counter} contains an empty patch")
continue
# Sanity check: if the patch_contents contains the magic marker, something is _very_ wrong, and we're gonna eat a patch.
if (MAGIC_MBOX_MARKER_STANDARD in patch_contents) or (MAGIC_MBOX_MARKER_B4 in patch_contents):
raise Exception(
f"File {self.full_file_path()} fragment {counter} seems to be a valid mbox file, but it contains"
f" the magic date in the patch contents, shouldn't happen. Check the mbox formatting.")
patches.append(PatchInPatchFile(
self, counter, patch_contents, desc, msg['From'], msg['Subject'], msg['Date']))
counter += 1
# sanity check, throw exception if there are no patches
if len(patches) == 0:
raise Exception("No valid patches found in file " + self.full_file_path())
return patches
@staticmethod
def split_description_and_patch(full_message_text: str) -> tuple["str | None", str]:
separator = "\n---\n"
# check if the separator is in the patch, if so, split
if separator in full_message_text:
# find the _last_ occurrence of the separator, and split two chunks from that position
separator_pos = full_message_text.rfind(separator)
desc = full_message_text[:separator_pos]
patch = full_message_text[separator_pos + len(separator):]
return desc, patch
else: # no separator, so no description, patch is the full message
desc = None
patch = full_message_text
return desc, patch
def rewrite_patch_file(self, patches: list["PatchInPatchFile"]):
# Produce a mailbox file from the patches.
# The patches are assumed to be in the same order as they were in the original file.
# The original file is overwritten.
output_file = self.full_file_path()
log.info(f"Rewriting {output_file} with new patches...")
with open(output_file, "w") as f:
for patch in patches:
log.info(f"Writing patch {patch.counter} to {output_file}...")
f.write(patch.rewritten_patch)
# Placeholder for future manual work
def shorten_patched_file_name_for_stats(path):
return os.path.basename(path)
def parse_patch_stdout_for_files(stdout_output: str):
# run the REGEX_PATCH_FILENAMES on the output; get the group 1 (the filename) for each match
# log.debug(f"Running regex on {stdout_output}")
ret: list[str] = re.findall(REGEX_PATCH_FILENAMES, stdout_output, re.MULTILINE)
# log.debug(f"Found {len(ret)} patched files in patch output: {','.join(ret)}.")
return ret # warning: this includes files the patch *deleted* too
class PatchInPatchFile:
def __init__(self, parent: PatchFileInDir, counter: int, diff: str, desc, from_hdr, sbj_hdr, date_hdr):
self.problems: list[str] = []
self.applied_ok: bool = False
self.rewritten_patch: str | None = None
self.git_commit_hash: str | None = None
self.actually_patched_files: list[str] = []
self.parent: PatchFileInDir = parent
self.counter: int = counter
self.diff: str | None = diff
self.diff_bytes: bytes | None = None
self.failed_to_parse: bool = False
# Basic parsing of properly mbox-formatted patches
self.desc: str = downgrade_to_ascii(desc) if desc is not None else None
self.from_name, self.from_email = self.parse_from_name_email(from_hdr) if from_hdr is not None else (
None, None)
self.subject: str = downgrade_to_ascii(fix_patch_subject(sbj_hdr)) if sbj_hdr is not None else None
self.date = email.utils.parsedate_to_datetime(date_hdr) if date_hdr is not None else None
self.patched_file_stats_dict: dict = {}
self.total_additions: int = 0
self.total_deletions: int = 0
self.files_modified: int = 0
self.files_added: int = 0
self.files_renamed: int = 0
self.files_removed: int = 0
self.created_file_names = []
self.deleted_file_names = []
self.renamed_file_names_source = [] # The original file names of renamed files
self.all_file_names_touched = []
self.rejects: str | None = None
self.patch_output: str | None = None
def parse_from_name_email(self, from_str: str) -> tuple["str | None", "str | None"]:
m = re.match(r'(?P<name>.*)\s*<\s*(?P<email>.*)\s*>', from_str)
if m is None:
self.problems.append("invalid_author")
log.debug(
f"Failed to parse name and email from: '{from_str}' while parsing patch {self.counter} in file {self.parent.full_file_path()}")
return downgrade_to_ascii(remove_quotes(from_str)), "unknown-email@domain.tld"
else:
# Return the name and email
return downgrade_to_ascii(remove_quotes(m.group("name"))), remove_quotes(m.group("email"))
def one_line_patch_stats(self) -> str:
if (not self.failed_to_parse) and (not self.parent.patch_dir.is_autogen_dir):
files_desc = ", ".join(self.patched_file_stats_dict)
return f"{self.text_diffstats()} {{{files_desc}}}"
return ""
def text_diffstats(self) -> str:
operations: list[str] = []
operations.append(f"{self.files_modified}M") if self.files_modified > 0 else None
operations.append(f"{self.files_added}A") if self.files_added > 0 else None
operations.append(f"{self.files_removed}D") if self.files_removed > 0 else None
operations.append(f"{self.files_renamed}R") if self.files_renamed > 0 else None
return f"(+{self.total_additions}/-{self.total_deletions})[{', '.join(operations)}]"
def parse_patch(self):
# Hack: don't parse if autogenned; this could also be "don't parse if larger than X megabytes" since
# large patches cause trouble
if self.parent.patch_dir.is_autogen_dir:
log.debug(
f"Skipping parsing of auto-generated patch {self.counter} in file {self.parent.full_file_path()}")
return
else:
# parse the patch, using the unidiff package
try:
patch = PatchSet(self.diff, encoding=None)
except Exception as e:
self.problems.append("invalid_diff")
self.failed_to_parse = True
log.warning(f"Failed to parse unidiff for file {self.parent.full_file_path()}(:{self.counter}): '{str(e).strip()}'")
return # no point in continuing; the patch is invalid; might be recovered during apply
self.total_additions = 0
self.total_deletions = 0
self.files_renamed = 0
self.files_modified = len(patch.modified_files)
self.files_added = len(patch.added_files)
self.files_removed = len(patch.removed_files)
self.created_file_names = [f.path for f in patch.added_files]
self.deleted_file_names = [f.path for f in patch.removed_files]
self.renamed_file_names_source = [] # computed below
self.all_file_names_touched = \
[f.path for f in patch.added_files] + \
[f.path for f in patch.modified_files] + \
[f.path for f in patch.removed_files]
self.patched_file_stats_dict = {}
for f in patch:
if not f.is_binary_file:
self.total_additions += f.added
self.total_deletions += f.removed
self.patched_file_stats_dict[shorten_patched_file_name_for_stats(f.path)] = {
"abs_changed_lines": f.added + f.removed}
self.files_renamed = self.files_renamed + 1 if f.is_rename else self.files_renamed
if f.is_rename:
sans_prefix = f.source_file[2:] if f.source_file.startswith("a/") else f.source_file
self.renamed_file_names_source.append(sans_prefix)
# sort the self.patched_file_stats_dict by the abs_changed_lines, descending
self.patched_file_stats_dict = dict(sorted(
self.patched_file_stats_dict.items(),
key=lambda item: item[1]["abs_changed_lines"],
reverse=True))
# sanity check; if all the values are zeroes, throw an exception
if self.total_additions == 0 and self.total_deletions == 0 and \
self.files_modified == 0 and self.files_added == 0 and self.files_removed == 0:
self.problems.append("diff_has_no_changes")
raise Exception(
f"Patch file {self.parent.full_file_path()} has no changes. diff is {len(self.diff)} bytes: '{self.diff}'")
def __str__(self) -> str:
return self.str_oneline_around("->", "<-")
def str_oneline_around(self, prefix, suffix):
# extra_email = f"{self.from_email}" if self.from_email is not None else ""
# extra_subject = f":'{self.subject}'" if self.subject is not None else ""
# extra_author = f":{extra_email}{extra_subject}"
desc: str = \
f"{prefix}{self.parent.relative_dirs_and_base_file_name}(:{self.counter}) " + \
f"{self.one_line_patch_stats()}{suffix}"
return desc
def apply_patch(self, working_dir: str, options: dict[str, bool]):
# Sanity check: if patch would create files, make sure they don't exist to begin with.
# This avoids patches being able to overwrite the mainline.
for would_be_created_file in self.created_file_names:
full_path = os.path.join(working_dir, would_be_created_file)
if os.path.exists(full_path):
self.problems.append("overwrites")
if options["allow_recreate_existing_files"]:
log.debug(f"Tolerating recreation of {would_be_created_file} in {self} as instructed.")
os.remove(full_path)
else:
log.warning(
f"File {would_be_created_file} already exists, but patch {self} would re-create it.")
# Use the 'patch' utility to apply the patch.
if self.diff_bytes is None:
real_input = self.diff.encode("utf-8")
else:
real_input = self.diff_bytes
# create a temporary filename (don't create the file yet: patch will maybe create it)
rejects_file = tempfile.mktemp()
# log.debug(f"Rejects file is going to be '{rejects_file}'...")
proc = subprocess.run(
["patch", "--batch", "-p1", "-N", f"--reject-file={rejects_file}", "--quoting-style=c"],
cwd=working_dir,
input=real_input,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=False)
# read the output of the patch command
stdout_output = proc.stdout.decode("utf-8").strip()
stderr_output = proc.stderr.decode("utf-8").strip()
# if stdout_output != "":
# log.debug(f"patch stdout: {stdout_output}")
# if stderr_output != "":
# log.warning(f"patch stderr: {stderr_output}")
# Check if the rejects exists:
if os.path.exists(rejects_file):
log.debug(f"Rejects file {rejects_file} exists.")
# Show its contents
with open(rejects_file, "r") as f:
reject_contents = f.read()
self.rejects = reject_contents
# delete it
os.remove(rejects_file)
# Look at stdout. If it contains:
if " (offset" in stdout_output or " with fuzz " in stdout_output:
log.debug(f"Patch {self} needs rebase: offset/fuzz used during apply.")
self.problems.append("needs_rebase")
if "can't find file to patch at input line" in stdout_output:
log.warning(f"Patch {self} needs fixing: can't find file to patch.")
self.problems.append("missing_file")
# parse the stdout output for the files actually patched.
if options["set_patch_date"]:
self.actually_patched_files = parse_patch_stdout_for_files(stdout_output)
self.apply_patch_date_to_files(working_dir, options)
# Store the stdout and stderr output
patch_output = ""
patch_output += f"{stdout_output}\n" if stdout_output != "" else ""
patch_output += f"{stderr_output}\n" if stderr_output != "" else ""
self.patch_output = f"{patch_output}"
# Check if the exit code is not zero and bomb
if proc.returncode != 0:
self.problems.append("failed_apply")
raise Exception(f"Failed to apply patch {self.parent.full_file_path()}")
def commit_changes_to_git(self, repo: git.Repo, add_rebase_tags: bool, split_patches: bool, pconfig: PatchingConfig):
log.info(f"Committing changes to git: {self.parent.relative_dirs_and_base_file_name}")
# add all the files that were touched by the patch
# if the patch failed to parse, this will be an empty list, so we'll just add all changes.
add_all_changes_in_git = False
if (not self.failed_to_parse) and (not self.parent.patch_dir.is_autogen_dir):
# sanity check.
if len(self.all_file_names_touched) == 0:
raise Exception(
f"Patch {self} has no files touched, but is not marked as failed to parse.")
# add all files to git staging area
all_files_to_add: list[str] = []
for file_name in self.all_file_names_touched:
is_delete = False
# Check if deleted, don't complain if so.
if file_name in self.deleted_file_names:
is_delete = True
log.info(f"Adding file {file_name} to git")
full_path = os.path.join(repo.working_tree_dir, file_name)
if (not os.path.exists(full_path)) and (not is_delete):
self.problems.append("wrong_strip_level")
log.error(f"File '{full_path}' does not exist, but is touched by {self}")
add_all_changes_in_git = True
else:
all_files_to_add.append(file_name)
# Also add all source (pre-rename) files that were renamed, sans-checking, since they won't exist.
for file_name in self.renamed_file_names_source:
# But, the file has to be at least inside the repo; it's not a real rename if it's outside (eg: bad /dev/null patches)
if file_name.startswith("/"):
log.warning(f"File {file_name} claims to be a renamed source file, but is outside the repo.")
continue
log.info(f"Adding file {file_name} (rename/source) to git")
all_files_to_add.append(file_name)
if split_patches:
return self.commit_changes_to_git_grouped(all_files_to_add, repo)
if not add_all_changes_in_git:
log.debug(f"Adding (pre-config ) {len(all_files_to_add)} files to git: {' '.join(all_files_to_add)}")
do_not_commit_files = pconfig.patches_to_git_config.do_not_commit_files # from config
do_not_commit_regexes = pconfig.patches_to_git_config.do_not_commit_regexes # from config
final_files_to_add = [f for f in all_files_to_add if f not in do_not_commit_files]
final_files_to_add = [f for f in final_files_to_add if not any(re.match(r, f) for r in do_not_commit_regexes)]
log.debug(f"Adding (post-config) {len(final_files_to_add)} files to git: {' '.join(final_files_to_add)}")
repo.git.add("-f", final_files_to_add)
if self.failed_to_parse or self.parent.patch_dir.is_autogen_dir or add_all_changes_in_git:
log.warning(f"Rescue: adding all changed files to git for {self}")
repo.git.add(repo.working_tree_dir)
desc_no_none = self.desc if self.desc is not None else ""
commit_message = f"{self.subject}\n\n{desc_no_none}"
if add_rebase_tags:
commit_message = f"{self.parent.relative_dirs_and_base_file_name}(:{self.counter})\n\nOriginal-Subject: {self.subject}\n{self.desc}"
commit_message = f"{commit_message}\n{self.patch_rebase_tags_desc()}"
author: git.Actor = git.Actor(self.from_name, self.from_email)
committer: git.Actor = git.Actor("Armbian AutoPatcher", "patching@armbian.com")
commit = repo.index.commit(
message=commit_message,
author=author,
committer=committer,
author_date=self.date,
commit_date=self.date,
skip_hooks=True
)
log.info(f"Committed changes to git: {commit.hexsha}")
# Make sure the commit is not empty
if commit.stats.total["files"] == 0:
self.problems.append("empty_commit")
raise Exception(
f"Commit {commit.hexsha} ended up empty; source patch is {self} at {self.parent.full_file_path()}(:{self.counter})")
return {"commit_hash": commit.hexsha, "patch": self}
def commit_changes_to_git_grouped(self, all_files_to_add: list[str], repo: git.Repo):
all_commits = []
prefix = "Feiteng "
grouped_files = {}
# group files by directory
for file_name in all_files_to_add:
dir_name = os.path.dirname(file_name)
if dir_name not in grouped_files:
grouped_files[dir_name] = []
grouped_files[dir_name].append(file_name)
for group_name, files in grouped_files.items():
for one_file in files:
repo.git.add(one_file)
commit_message = f"{prefix}{group_name}\n\n{prefix}{group_name}"
author: git.Actor = git.Actor("Ricardo Pardini", "ricardo@pardini.net")
commit = repo.index.commit(
message=commit_message,
author=author,
committer=author,
author_date=self.date,
commit_date=self.date,
skip_hooks=True
)
log.info(f"Committed changes to git: {commit.hexsha}")
# Make sure the commit is not empty
if commit.stats.total["files"] == 0:
self.problems.append("empty_commit")
raise Exception(
f"Commit {commit.hexsha} ended up empty; source patch is {self} at {self.parent.full_file_path()}(:{self.counter})")
all_commits.append({"commit_hash": commit.hexsha, "patch": self})
return all_commits
def patch_rebase_tags_desc(self):
tags = {}
tags["Patch-File"] = self.parent.relative_dirs_and_base_file_name
tags["Patch-File-Counter"] = self.counter
tags["Patch-Rel-Directory"] = self.parent.patch_dir.rel_dir
tags["Patch-Type"] = self.parent.patch_dir.patch_root_dir.patch_type
tags["Patch-Root-Type"] = self.parent.patch_dir.root_type
tags["Patch-Sub-Type"] = self.parent.patch_dir.sub_type
if self.subject is not None:
tags["Original-Subject"] = self.subject
ret = ""
for k, v in tags.items():
ret += f"X-Armbian: {k}: {v}\n"
return ret
def markdown_applied(self):
if self.applied_ok:
return ""
return ""
def markdown_problems(self):
ret = []
# if it's a patch in a series, add emoji
if self.parent.from_series:
ret.append(f" 📜 ")
if self.parent.patch_dir.is_autogen_dir:
ret.append(f" 🤖 ")
if len(self.problems) == 0:
ret.append("")
for problem in self.problems:
if problem in ["not_mbox", "needs_rebase"]:
# warning emoji
ret.append(f"⚠️`[{problem}]` ")
elif problem in ["autogen"]:
ret.append(f"`[{problem}]` ")
else:
ret.append(f"❌`[{problem}]` ")
# if it's a user patch, add smiley
if self.parent.patch_dir.patch_root_dir.root_type == "user":
ret.append(" 🫠`[user]` ")
return " ".join(ret)
def markdown_diffstat(self):
return f"`{self.text_diffstats()}`"
def text_files(self):
ret = []
max_files_shown = 15
file_names = list(self.patched_file_stats_dict.keys())
if len(file_names) == 0:
return "?"
for file_name in file_names[:max_files_shown]:
ret.append(f"{file_name}")
if len(file_names) > max_files_shown:
ret.append(f"and {len(file_names) - max_files_shown} more")
return ", ".join(ret)
def markdown_files(self):
ret = []
max_files_shown = 15
# Use the keys of the patch_file_stats_dict which is already sorted by the larger files
file_names = list(self.patched_file_stats_dict.keys())
# if no files were touched, just return an ?
if len(file_names) == 0:
return "`?`"
for file_name in file_names[:max_files_shown]:
ret.append(f"`{file_name}`")
if len(file_names) > max_files_shown:
ret.append(f"_and {len(file_names) - max_files_shown} more_")
return ", ".join(ret)
def text_author(self):
if self.from_name:
return f"{self.from_name.strip()}"
return "[no Author]"
def markdown_author(self):
if self.from_name:
return f"`{self.from_name.strip()}`"
return "`[no Author]`"
def markdown_subject(self):
if self.subject:
return f"_{self.subject}_"
return "`[no Subject]`"
def text_subject(self):
if self.subject:
return f"{self.subject}"
return "[no Subject]"
def markdown_link_to_patch(self):
if self.git_commit_hash is None:
return ""
return f"{self.git_commit_hash} "
def markdown_name(self, skip_markdown=False):
ret = []
escape = "`" if not skip_markdown else ""
patch_name = self.parent.relative_dirs_and_base_file_name
# if the basename includes slashes, split after the last slash, the first part is the directory, second the file
if "/" in self.parent.relative_dirs_and_base_file_name:
dir_name, patch_name = self.parent.relative_dirs_and_base_file_name.rsplit("/", 1)
if dir_name is not None:
# get only the last part of the dir_name
dir_name = dir_name.split("/")[-1]
ret.append(f"{escape}[{dir_name}/]{escape}")
ret.append(f"{escape}{patch_name}{escape}")
return " ".join(ret)
def rich_name_status(self):
color = "green"
for problem in self.problems:
if problem in ["not_mbox", "needs_rebase"]:
color = "yellow"
else:
color = "red"
# @TODO: once our ansi-haste supports it, use [link url=file://blaaa]
return f"[bold {color}]{self.markdown_name(skip_markdown=True)}"
def rich_patch_output(self):
ret = self.patch_output
color_tags = {
'green': ['Reversed (or previously applied) patch detected!'],
'yellow': ['with fuzz', 'offset ', ' hunks ignored', ' hunk ignored'],
'red': ['hunk FAILED', 'hunks FAILED']
}
# use Rich's syntax highlighting to highlight with color
for color in color_tags:
for tag in color_tags[color]:
ret = ret.replace(tag, f"[bold {color}]{tag}[/bold {color}]")
return ret
def apply_patch_date_to_files(self, working_dir, options):
# The date applied to the patched files is:
# 1) The date of the root Makefile
# 2) The date of the patch file
# And date is 2, unless 1 is higher.
patch_mtime = os.path.getmtime(self.parent.full_file_path())
makefile_mtime = options["root_makefile_date"]
final_mtime = makefile_mtime
if patch_mtime > makefile_mtime:
log.debug(f"Patch {self.parent.full_file_path()} is newer than root Makefile, using patch date")
final_mtime = patch_mtime
else:
log.debug(
f"Root Makefile is newer than patch '{self.parent.full_file_path()}', using Makefile date")
# Apply the date to all files that were touched by the patch
# If the patch parsed OK, avoid trying to touch files the patch deleted.
files_to_touch = self.actually_patched_files
if not self.failed_to_parse:
# remove self.deleted_file_names files_to_touch
files_to_touch = [f for f in files_to_touch if f not in self.deleted_file_names]
for file_name in files_to_touch:
# log.debug(f"Setting mtime of '{file_name}' to '{final_mtime}'.")
file_path = os.path.join(working_dir, file_name)
try:
os.utime(file_path, (final_mtime, final_mtime))
except FileNotFoundError:
log.warning(f"File '{file_path}' not found in patch {self}, can't set mtime.")
def fix_patch_subject(subject):
# replace newlines with one space
subject = re.sub(r"\s+", " ", subject.strip())
# replace every non-printable character with a space
subject = re.sub(r"[^\x20-\x7e]", " ", subject)
# replace two consecutive spaces with one
subject = re.sub(r" {2}", " ", subject).strip()
# remove tags from the beginning of the subject
tags = ['PATCH']
for tag in tags:
# subject might begin with "[tag xxxxx]"; remove it
if subject.startswith(f"[{tag}"):
subject = subject[subject.find("]") + 1:].strip()
prefixes = ['FROMLIST(v1): ']
for prefix in prefixes:
if subject.startswith(prefix):
subject = subject[len(prefix):].strip()
return subject
# This is definitely not the right way to do this, but it works for now.
def prepare_clean_git_tree_for_patching(repo: git.Repo, revision_sha: str, branch_name: str):
# Let's find the Commit object for the revision_sha
log.debug("Resetting git tree to revision '%s'", revision_sha)
commit = repo.commit(revision_sha)
# Lets checkout, detached HEAD, to that Commit
repo.head.reference = commit
repo.head.reset(index=True, working_tree=True)
# Let's create a new branch, and checkout to it, discarding any existing branch
log.debug("Creating branch '%s'", branch_name)
repo.create_head(branch_name, revision_sha, force=True)
repo.head.reference = repo.heads[branch_name]
repo.head.reset(index=True, working_tree=True)
# Let's remove all the untracked, but not ignored, files from the working copy
for file in repo.untracked_files:
full_name = os.path.join(repo.working_tree_dir, file)
log.debug(f"Removing untracked file '{file}'")
os.remove(full_name)
def export_commit_as_patch(repo: git.Repo, commit: str):
# Export the commit as a patch
proc = subprocess.run([
"git", "format-patch",
"--unified=3", # force 3 lines of diff context
"--keep-subject", # do not add a prefix to the subject "[PATCH] "
# "--add-header=Organization: Armbian", # add a header to the patch (ugly, changes the header)
"--no-encode-email-headers", # do not encode email headers
# "--signature=66666" # add a signature; this does not work and causes patch to not be emitted.
'--signature', "Armbian",
'--zero-commit', # do not use the git revision, instead 000000...0000
'--stat=120', # 'wider' stat output; default is 80
'--stat-graph-width=10', # shorten the diffgraph graph part, it's too long
"-1", "--stdout", commit
],
cwd=repo.working_tree_dir,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=False)
# read the output of the patch command
stdout_output = proc.stdout.decode("utf-8")
stderr_output = proc.stderr.decode("utf-8")
# Check if the exit code is not zero and bomb
if proc.returncode != 0:
raise Exception(f"Failed to export commit {commit} to patch: {stderr_output}")
if stdout_output == "":
raise Exception(f"Failed to export commit {commit} to patch: no output")
return stdout_output
# Hack
def downgrade_to_ascii(utf8: str) -> str:
return unidecode(utf8)
def remove_quotes(utf8: str) -> str:
return utf8.replace('"', '')
# Try hard to read a possibly invalid utf-8 file
def read_file_as_utf8(file_name: str) -> tuple[str, list[str]]:
with open(file_name, "rb") as f:
content = f.read() # Read the file as bytes
try:
return content.decode("utf-8"), [] # no problems if this worked
except UnicodeDecodeError as ude:
log.warning(f"File '{file_name}' is not valid utf-8, trying to fix it...: '{ude}'")
# If decoding failed, try to decode as iso-8859-1 # @TODO: or big5?
return content.decode("iso-8859-1"), ["invalid_utf8"] # utf-8 problems
def read_file_as_bytes(file_name: str) -> bytes:
with open(file_name, "rb") as f:
return f.read() # Read the file as bytes
# Extremely Armbian-specific.
def perform_git_archeology(
base_armbian_src_dir: str, armbian_git_repo: git.Repo, patch: PatchInPatchFile,
bad_archeology_hexshas: list[str], fast: bool) -> bool:
log.info(f"Trying to recover description for {patch.parent.file_name}:{patch.counter}")
file_name_for_search = f"{patch.parent.file_name_no_ext_no_dirs}.patch"
patch_file_paths: list[str] = []
if fast:
patch_file_paths = [patch.parent.full_file_path()]
else:
# Find all the files in the repo with the same name as the patch file.
# Use the UNIX find command to find all the files with the same name as the patch file.
proc = subprocess.run(
[
"find", base_armbian_src_dir,
"-name", file_name_for_search,
"-type", "f"
],
cwd=base_armbian_src_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
patch_file_paths = proc.stdout.decode("utf-8").splitlines()
log.info(f"Found {len(patch_file_paths)} files with name {file_name_for_search}")
all_commits: list = []
for found_file in patch_file_paths:
relative_file_path = os.path.relpath(found_file, base_armbian_src_dir)
hexshas = armbian_git_repo.git.log('--pretty=%H', '--follow', '--find-copies-harder', '--', relative_file_path) \
.split('\n')
log.info(f"- Trying to recover description for {relative_file_path} from {len(hexshas)} commits")
# filter out hexshas that are in the known-bad archeology list
hexshas = [hexsha for hexsha in hexshas if hexsha.strip() != '' and hexsha not in bad_archeology_hexshas]
log.debug(f"hexshas: {hexshas}")
commits = [armbian_git_repo.rev_parse(c) for c in hexshas]
all_commits.extend(commits)
unique_commits: list[git.Commit] = []
for commit in all_commits:
if commit not in unique_commits:
unique_commits.append(commit)
unique_commits.sort(key=lambda c: c.committed_datetime)
if len(unique_commits) == 0:
log.warning(f"Could not find any commits for '{file_name_for_search}'.")
return False
main_suspect: git.Commit = unique_commits[0]
log.info(f"- Main suspect: {main_suspect}: {main_suspect.message.rstrip()} Author: {main_suspect.author}")
# From the main_suspect, set the subject and the author, and the dates.
main_suspect_msg_lines = main_suspect.message.splitlines()
# strip each line
main_suspect_msg_lines = [line.strip() for line in main_suspect_msg_lines]
# remove empty lines
main_suspect_msg_lines = [line for line in main_suspect_msg_lines if line != ""]
main_suspect_subject = main_suspect_msg_lines[0].strip()
# remove the first line, which is the subject
suspect_desc_lines = main_suspect_msg_lines[1:]
# Now, create a list for all other non-main suspects.
other_suspects_desc: list[str] = []
other_suspects_desc.extend(
[f"> recovered message: > {suspect_desc_line}" for suspect_desc_line in suspect_desc_lines])
other_suspects_desc.extend("")
for commit in unique_commits:
subject = commit.message.splitlines()[0].strip()
rfc822_date = commit.committed_datetime.strftime("%a, %d %b %Y %H:%M:%S %z")
other_suspects_desc.extend([
f"- Revision {commit.hexsha}: https://github.com/armbian/build/commit/{commit.hexsha}",
f" Date: {rfc822_date}",
f" From: {commit.author.name} <{commit.author.email}>",
f" Subject: {subject}",
""
])
patch.desc = downgrade_to_ascii("\n".join([f"> X-Git-Archeology: {line}" for line in other_suspects_desc]))
if patch.subject is None:
patch.subject = downgrade_to_ascii("[ARCHEOLOGY] " + main_suspect_subject)
if patch.date is None:
patch.date = main_suspect.committed_datetime
if patch.from_name is None or patch.from_email is None:
patch.from_name, patch.from_email = downgrade_to_ascii(
main_suspect.author.name), main_suspect.author.email
return True