From 27a1facd486dc178a1c2e597ccb006b7ccc30258 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Wed, 25 Mar 2020 16:34:18 +0100 Subject: [PATCH 1/7] Factor out list_files_to_merge No behavior changes. Signed-off-by: Gilles Peskine --- scripts/assemble_changelog.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/scripts/assemble_changelog.py b/scripts/assemble_changelog.py index a3f720167..adea8bd4b 100755 --- a/scripts/assemble_changelog.py +++ b/scripts/assemble_changelog.py @@ -260,6 +260,15 @@ def remove_merged_entries(files_to_remove): for filename in files_to_remove: os.remove(filename) +def list_files_to_merge(options): + """List the entry files to merge, oldest first. + + A file is considered older if it was merged earlier. See + `FileMergeTimestamp` for details. + """ + files_to_merge = glob.glob(os.path.join(options.dir, '*.md')) + return files_to_merge + def merge_entries(options): """Merge changelog entries into the changelog file. @@ -270,7 +279,7 @@ def merge_entries(options): """ with open(options.input, 'rb') as input_file: changelog = ChangeLog(input_file) - files_to_merge = glob.glob(os.path.join(options.dir, '*.md')) + files_to_merge = list_files_to_merge(options) if not files_to_merge: sys.stderr.write('There are no pending changelog entries.\n') return From 8f46bbf46f3387fa93dd69040d039ad5cdc1ed4b Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Wed, 25 Mar 2020 16:34:43 +0100 Subject: [PATCH 2/7] Sort changelog entries by their merge date Changelog entry files were listed in reverse alphabetical order of the file name, by happenstance. Now, changelog entry files are listed in the order in which the changes were merged. More precisely: look for the git commit where the entry file was created, and look where this commit was merged into the current branch. List older merges first. List never-merged commits in date order after all the merged ones. List never-committed files in file timestamp order after all the committed ones. Signed-off-by: Gilles Peskine --- scripts/assemble_changelog.py | 129 ++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/scripts/assemble_changelog.py b/scripts/assemble_changelog.py index adea8bd4b..928f73bf7 100755 --- a/scripts/assemble_changelog.py +++ b/scripts/assemble_changelog.py @@ -26,9 +26,12 @@ Remove the input files unless --keep-entries is specified. import argparse from collections import OrderedDict +import datetime +import functools import glob import os import re +import subprocess import sys class InputFormatError(Exception): @@ -219,6 +222,115 @@ class ChangeLog: for line in self.trailer: out.write(line) + +@functools.total_ordering +class FileMergeTimestamp: + """A timestamp indicating when a file was merged. + + If file1 was merged before file2, then + FileMergeTimestamp(file1) <= FileMergeTimestamp(file2). + """ + + # Categories of files. A lower number is considered older. + MERGED = 0 + COMMITTED = 1 + LOCAL = 2 + + @staticmethod + def creation_hash(filename): + """Return the git commit id at which the given file was created. + + Return None if the file was never checked into git. + """ + hashes = subprocess.check_output(['git', 'log', '--format=%H', '--', filename]) + if not hashes: + # The file was never checked in. + return None + hashes = hashes.rstrip(b'\n') + last_hash = hashes[hashes.rfind(b'\n')+1:] + return last_hash + + @staticmethod + def list_merges(some_hash, target, *options): + """List merge commits from some_hash to target. + + Pass options to git to select which commits are included. + """ + text = subprocess.check_output(['git', 'rev-list', + '--merges', *options, + b'..'.join([some_hash, target])]) + return text.rstrip(b'\n').split(b'\n') + + @classmethod + def merge_hash(cls, some_hash): + """Return the git commit id at which the given commit was merged. + + Return None if the given commit was never merged. + """ + target = b'HEAD' + # List the merges from some_hash to the target in two ways. + # The ancestry list is the ones that are both descendants of + # some_hash and ancestors of the target. + ancestry = frozenset(cls.list_merges(some_hash, target, + '--ancestry-path')) + # The first_parents list only contains merges that are directly + # on the target branch. We want it in reverse order (oldest first). + first_parents = cls.list_merges(some_hash, target, + '--first-parent', '--reverse') + # Look for the oldest merge commit that's both on the direct path + # and directly on the target branch. That's the place where some_hash + # was merged on the target branch. See + # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit + for commit in first_parents: + if commit in ancestry: + return commit + return None + + @staticmethod + def commit_timestamp(commit_id): + """Return the timestamp of the given commit.""" + text = subprocess.check_output(['git', 'show', '-s', + '--format=%ct', + commit_id]) + return datetime.datetime.utcfromtimestamp(int(text)) + + @staticmethod + def file_timestamp(filename): + """Return the modification timestamp of the given file.""" + mtime = os.stat(filename).st_mtime + return datetime.datetime.fromtimestamp(mtime) + + def __init__(self, filename): + """Determine the timestamp at which the file was merged.""" + self.filename = filename + creation_hash = self.creation_hash(filename) + if not creation_hash: + self.category = self.LOCAL + self.datetime = self.file_timestamp(filename) + return + merge_hash = self.merge_hash(creation_hash) + if not merge_hash: + self.category = self.COMMITTED + self.datetime = self.commit_timestamp(creation_hash) + return + self.category = self.MERGED + self.datetime = self.commit_timestamp(merge_hash) + + def sort_key(self): + """"Return a sort key for this merge timestamp object. + + ts1.sort_key() < ts2.sort_key() if and only if ts1 is + considered to be older than ts2. + """ + return (self.category, self.datetime, self.filename) + + def __eq__(self, other): + return self.sort_key() == other.sort_key() + + def __lt__(self, other): + return self.sort_key() < other.sort_key() + + def check_output(generated_output_file, main_input_file, merged_files): """Make sanity checks on the generated output. @@ -267,6 +379,7 @@ def list_files_to_merge(options): `FileMergeTimestamp` for details. """ files_to_merge = glob.glob(os.path.join(options.dir, '*.md')) + files_to_merge.sort(key=lambda f: FileMergeTimestamp(f).sort_key()) return files_to_merge def merge_entries(options): @@ -290,6 +403,16 @@ def merge_entries(options): if not options.keep_entries: remove_merged_entries(files_to_merge) +def show_file_timestamps(options): + """List the files to merge and their timestamp. + + This is only intended for debugging purposes. + """ + files = list_files_to_merge(options) + for filename in files: + ts = FileMergeTimestamp(filename) + print(ts.category, ts.datetime, filename) + def set_defaults(options): """Add default values for missing options.""" output_file = getattr(options, 'output', None) @@ -320,8 +443,14 @@ def main(): parser.add_argument('--output', '-o', metavar='FILE', help='Output changelog file' ' (default: overwrite the input)') + parser.add_argument('--list-files-only', + action='store_true', + help='Only list the files that would be processed (with some debugging information)') options = parser.parse_args() set_defaults(options) + if options.list_files_only: + show_file_timestamps(options) + return merge_entries(options) if __name__ == '__main__': From 28af958ea4299eea9e0e3a112f04fcf7e8fc7049 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Thu, 26 Mar 2020 22:39:18 +0100 Subject: [PATCH 3/7] Documentation improvements Document that git is needed. Be clearer about the entry sort key being an entry sort key, not just a merge order. Be clearer about what "merge order" means. Signed-off-by: Gilles Peskine --- scripts/assemble_changelog.py | 45 +++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/scripts/assemble_changelog.py b/scripts/assemble_changelog.py index 928f73bf7..ea91578c9 100755 --- a/scripts/assemble_changelog.py +++ b/scripts/assemble_changelog.py @@ -5,6 +5,17 @@ Add changelog entries to the first level-2 section. Create a new level-2 section for unreleased changes if needed. Remove the input files unless --keep-entries is specified. + +In each level-3 section, entries are sorted in chronological order +(oldest first). From oldest to newest: +* Merged entry files are sorted according to their merge date (date of + the merge commit that brought the commit that created the file into + the target branch). +* Committed but unmerged entry files are sorted according to the date + of the commit that adds them. +* Uncommitted entry files are sorted according to their modification time. + +You must run this program from within a git working directory. """ # Copyright (C) 2019, Arm Limited, All Rights Reserved @@ -224,11 +235,18 @@ class ChangeLog: @functools.total_ordering -class FileMergeTimestamp: - """A timestamp indicating when a file was merged. +class EntryFileSortKey: + """This classes defines an ordering on changelog entry files: older < newer. - If file1 was merged before file2, then - FileMergeTimestamp(file1) <= FileMergeTimestamp(file2). + * Merged entry files are sorted according to their merge date (date of + the merge commit that brought the commit that created the file into + the target branch). + * Committed but unmerged entry files are sorted according to the date + of the commit that adds them. + * Uncommitted entry files are sorted according to their modification time. + + This class assumes that the file is in a git working directory with + the target branch checked out. """ # Categories of files. A lower number is considered older. @@ -301,7 +319,12 @@ class FileMergeTimestamp: return datetime.datetime.fromtimestamp(mtime) def __init__(self, filename): - """Determine the timestamp at which the file was merged.""" + """Determine position of the file in the changelog entry order. + + This constructor returns an object that can be used with comparison + operators, with `sort` and `sorted`, etc. Older entries are sorted + before newer entries. + """ self.filename = filename creation_hash = self.creation_hash(filename) if not creation_hash: @@ -317,10 +340,9 @@ class FileMergeTimestamp: self.datetime = self.commit_timestamp(merge_hash) def sort_key(self): - """"Return a sort key for this merge timestamp object. + """"Return a concrete sort key for this entry file sort key object. - ts1.sort_key() < ts2.sort_key() if and only if ts1 is - considered to be older than ts2. + ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``. """ return (self.category, self.datetime, self.filename) @@ -375,11 +397,10 @@ def remove_merged_entries(files_to_remove): def list_files_to_merge(options): """List the entry files to merge, oldest first. - A file is considered older if it was merged earlier. See - `FileMergeTimestamp` for details. + "Oldest" is defined by `EntryFileSortKey`. """ files_to_merge = glob.glob(os.path.join(options.dir, '*.md')) - files_to_merge.sort(key=lambda f: FileMergeTimestamp(f).sort_key()) + files_to_merge.sort(key=lambda f: EntryFileSortKey(f).sort_key()) return files_to_merge def merge_entries(options): @@ -410,7 +431,7 @@ def show_file_timestamps(options): """ files = list_files_to_merge(options) for filename in files: - ts = FileMergeTimestamp(filename) + ts = EntryFileSortKey(filename) print(ts.category, ts.datetime, filename) def set_defaults(options): From 7fa3eb7d44bd01beaeff145bc1f00a5af37e2751 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Thu, 26 Mar 2020 22:41:32 +0100 Subject: [PATCH 4/7] Simplify sorting Calling sort_key explicitly is marginally faster, but less readable. Signed-off-by: Gilles Peskine --- scripts/assemble_changelog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/assemble_changelog.py b/scripts/assemble_changelog.py index ea91578c9..ebb056e4a 100755 --- a/scripts/assemble_changelog.py +++ b/scripts/assemble_changelog.py @@ -400,7 +400,7 @@ def list_files_to_merge(options): "Oldest" is defined by `EntryFileSortKey`. """ files_to_merge = glob.glob(os.path.join(options.dir, '*.md')) - files_to_merge.sort(key=lambda f: EntryFileSortKey(f).sort_key()) + files_to_merge.sort(key=EntryFileSortKey) return files_to_merge def merge_entries(options): From 13dc6342822bd4158f8dc4ed30bb2f88af2de356 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Thu, 26 Mar 2020 22:46:47 +0100 Subject: [PATCH 5/7] Simplify the matching of the last line Signed-off-by: Gilles Peskine --- scripts/assemble_changelog.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/scripts/assemble_changelog.py b/scripts/assemble_changelog.py index ebb056e4a..6eeea6124 100755 --- a/scripts/assemble_changelog.py +++ b/scripts/assemble_changelog.py @@ -261,12 +261,14 @@ class EntryFileSortKey: Return None if the file was never checked into git. """ hashes = subprocess.check_output(['git', 'log', '--format=%H', '--', filename]) - if not hashes: - # The file was never checked in. + m = re.search(b'(.+)$', hashes) + if not m: + # The git output is empty. This means that the file was + # never checked in. return None - hashes = hashes.rstrip(b'\n') - last_hash = hashes[hashes.rfind(b'\n')+1:] - return last_hash + # The last commit in the log is the oldest one, which is when the + # file was created. + return m.group(0) @staticmethod def list_merges(some_hash, target, *options): From 98a53aa3990a6a7730bb15d27e215d3163485ac2 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Thu, 26 Mar 2020 22:47:07 +0100 Subject: [PATCH 6/7] Follow renames when looking for the creation of a file Signed-off-by: Gilles Peskine --- scripts/assemble_changelog.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/assemble_changelog.py b/scripts/assemble_changelog.py index 6eeea6124..50e912881 100755 --- a/scripts/assemble_changelog.py +++ b/scripts/assemble_changelog.py @@ -260,7 +260,9 @@ class EntryFileSortKey: Return None if the file was never checked into git. """ - hashes = subprocess.check_output(['git', 'log', '--format=%H', '--', filename]) + hashes = subprocess.check_output(['git', 'log', '--format=%H', + '--follow', + '--', filename]) m = re.search(b'(.+)$', hashes) if not m: # The git output is empty. This means that the file was From 42f384c186db02c0a96b9a0ae5c95df6eae92726 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Fri, 27 Mar 2020 09:23:38 +0100 Subject: [PATCH 7/7] Mbed Crypto isn't a separate project anymore Signed-off-by: Gilles Peskine --- scripts/assemble_changelog.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/assemble_changelog.py b/scripts/assemble_changelog.py index 50e912881..d8a6e4c1a 100755 --- a/scripts/assemble_changelog.py +++ b/scripts/assemble_changelog.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -"""Assemble Mbed Crypto change log entries into the change log file. +"""Assemble Mbed TLS change log entries into the change log file. Add changelog entries to the first level-2 section. Create a new level-2 section for unreleased changes if needed. @@ -33,7 +33,7 @@ You must run this program from within a git working directory. # See the License for the specific language governing permissions and # limitations under the License. # -# This file is part of Mbed Crypto (https://tls.mbed.org) +# This file is part of Mbed TLS (https://tls.mbed.org) import argparse from collections import OrderedDict @@ -70,7 +70,7 @@ STANDARD_SECTIONS = ( ) class ChangeLog: - """An Mbed Crypto changelog. + """An Mbed TLS changelog. A changelog is a file in Markdown format. Each level 2 section title starts a version, and versions are sorted in reverse chronological