From c425822e17ca7f599644fde133a629a24677f070 Mon Sep 17 00:00:00 2001 From: nicoo Date: Fri, 13 Sep 2024 09:35:26 +0000 Subject: [PATCH 1/6] maintainers/scripts/sha-to-sri: format --- maintainers/scripts/sha-to-sri.py | 68 +++++++++++++++++-------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/maintainers/scripts/sha-to-sri.py b/maintainers/scripts/sha-to-sri.py index 1af7ff215ad3..657d5ac5d87f 100755 --- a/maintainers/scripts/sha-to-sri.py +++ b/maintainers/scripts/sha-to-sri.py @@ -26,11 +26,12 @@ class Encoding(ABC): assert len(digest) == self.n from base64 import b64encode + return f"{self.hashName}-{b64encode(digest).decode()}" @classmethod - def all(cls, h) -> 'List[Encoding]': - return [ c(h) for c in cls.__subclasses__() ] + def all(cls, h) -> "List[Encoding]": + return [c(h) for c in cls.__subclasses__()] def __init__(self, h): self.n = h.digest_size @@ -38,54 +39,57 @@ class Encoding(ABC): @property @abstractmethod - def length(self) -> int: - ... + def length(self) -> int: ... @property def regex(self) -> str: return f"[{self.alphabet}]{{{self.length}}}" @abstractmethod - def decode(self, s: str) -> bytes: - ... + def decode(self, s: str) -> bytes: ... class Nix32(Encoding): alphabet = "0123456789abcdfghijklmnpqrsvwxyz" - inverted = { c: i for i, c in enumerate(alphabet) } + inverted = {c: i for i, c in enumerate(alphabet)} @property def length(self): return 1 + (8 * self.n) // 5 + def decode(self, s: str): assert len(s) == self.length - out = [ 0 for _ in range(self.n) ] + out = [0 for _ in range(self.n)] # TODO: Do better than a list of byte-sized ints for n, c in enumerate(reversed(s)): digit = self.inverted[c] i, j = divmod(5 * n, 8) - out[i] = out[i] | (digit << j) & 0xff + out[i] = out[i] | (digit << j) & 0xFF rem = digit >> (8 - j) if rem == 0: continue elif i < self.n: - out[i+1] = rem + out[i + 1] = rem else: raise ValueError(f"Invalid nix32 hash: '{s}'") return bytes(out) + class Hex(Encoding): alphabet = "0-9A-Fa-f" @property def length(self): return 2 * self.n + def decode(self, s: str): from binascii import unhexlify + return unhexlify(s) + class Base64(Encoding): alphabet = "A-Za-z0-9+/" @@ -94,36 +98,39 @@ class Base64(Encoding): """Number of characters in data and padding.""" i, k = divmod(self.n, 3) return 4 * i + (0 if k == 0 else k + 1), (3 - k) % 3 + @property def length(self): return sum(self.format) + @property def regex(self): data, padding = self.format return f"[{self.alphabet}]{{{data}}}={{{padding}}}" + def decode(self, s): from base64 import b64decode + return b64decode(s, validate = True) -_HASHES = (hashlib.new(n) for n in ('SHA-256', 'SHA-512')) -ENCODINGS = { - h.name: Encoding.all(h) - for h in _HASHES -} +_HASHES = (hashlib.new(n) for n in ("SHA-256", "SHA-512")) +ENCODINGS = {h.name: Encoding.all(h) for h in _HASHES} RE = { h: "|".join( - (f"({h}-)?" if e.name == 'base64' else '') + - f"(?P<{h}_{e.name}>{e.regex})" + (f"({h}-)?" if e.name == "base64" else "") + f"(?P<{h}_{e.name}>{e.regex})" for e in encodings - ) for h, encodings in ENCODINGS.items() + ) + for h, encodings in ENCODINGS.items() } -_DEF_RE = re.compile("|".join( - f"(?P<{h}>{h} = (?P<{h}_quote>['\"])({re})(?P={h}_quote);)" - for h, re in RE.items() -)) +_DEF_RE = re.compile( + "|".join( + f"(?P<{h}>{h} = (?P<{h}_quote>['\"])({re})(?P={h}_quote);)" + for h, re in RE.items() + ) +) def defToSRI(s: str) -> str: @@ -153,7 +160,7 @@ def defToSRI(s: str) -> str: @contextmanager def atomicFileUpdate(target: Path): - '''Atomically replace the contents of a file. + """Atomically replace the contents of a file. Guarantees that no temporary files are left behind, and `target` is either left untouched, or overwritten with new content if no exception was raised. @@ -164,9 +171,10 @@ def atomicFileUpdate(target: Path): Upon exiting the context, the files are closed; if no exception was raised, `new` (atomically) replaces the `target`, otherwise it is deleted. - ''' + """ # That's mostly copied from noto-emoji.py, should DRY it out from tempfile import mkstemp + fd, _p = mkstemp( dir = target.parent, prefix = target.name, @@ -175,7 +183,7 @@ def atomicFileUpdate(target: Path): try: with target.open() as original: - with tmpPath.open('w') as new: + with tmpPath.open("w") as new: yield (original, new) tmpPath.replace(target) @@ -188,22 +196,20 @@ def atomicFileUpdate(target: Path): def fileToSRI(p: Path): with atomicFileUpdate(p) as (og, new): for i, line in enumerate(og): - with log_context(line=i): + with log_context(line = i): new.write(defToSRI(line)) -_SKIP_RE = re.compile( - "(generated by)|(do not edit)", - re.IGNORECASE -) +_SKIP_RE = re.compile("(generated by)|(do not edit)", re.IGNORECASE) if __name__ == "__main__": from sys import argv, stderr + logger.info("Starting!") for arg in argv[1:]: p = Path(arg) - with log_context(path=str(p)): + with log_context(path = str(p)): try: if p.name == "yarn.nix" or p.name.find("generated") != -1: logger.warning("File looks autogenerated, skipping!") From 915799a2b9e59a3b32d596a216893554f9c87f07 Mon Sep 17 00:00:00 2001 From: nicoo Date: Fri, 13 Sep 2024 08:00:12 +0000 Subject: [PATCH 2/6] maintainers/scripts/sha-to-sri: fix file-descriptor leak --- maintainers/scripts/sha-to-sri.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/maintainers/scripts/sha-to-sri.py b/maintainers/scripts/sha-to-sri.py index 657d5ac5d87f..881e03071913 100755 --- a/maintainers/scripts/sha-to-sri.py +++ b/maintainers/scripts/sha-to-sri.py @@ -173,17 +173,18 @@ def atomicFileUpdate(target: Path): raised, `new` (atomically) replaces the `target`, otherwise it is deleted. """ # That's mostly copied from noto-emoji.py, should DRY it out - from tempfile import mkstemp - - fd, _p = mkstemp( - dir = target.parent, - prefix = target.name, - ) - tmpPath = Path(_p) + from tempfile import NamedTemporaryFile try: with target.open() as original: - with tmpPath.open("w") as new: + with NamedTemporaryFile( + dir = target.parent, + prefix = target.stem, + suffix = target.suffix, + delete = False, + mode="w", # otherwise the file would be opened in binary mode by default + ) as new: + tmpPath = Path(new.name) yield (original, new) tmpPath.replace(target) From bf6b5f7f85e4d0b71392241f3824d880dd9414e5 Mon Sep 17 00:00:00 2001 From: nicoo Date: Fri, 13 Sep 2024 08:00:53 +0000 Subject: [PATCH 3/6] maintainers/scripts/sha-to-sri: accept directories as input Doesn't skip files passed as CLI arguments based on their name anymore, since bulk changes can now be done without resorting to `xargs` or equivalent. --- maintainers/scripts/sha-to-sri.py | 32 +++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/maintainers/scripts/sha-to-sri.py b/maintainers/scripts/sha-to-sri.py index 881e03071913..35ce2ca3389a 100755 --- a/maintainers/scripts/sha-to-sri.py +++ b/maintainers/scripts/sha-to-sri.py @@ -7,7 +7,7 @@ from pathlib import Path from structlog.contextvars import bound_contextvars as log_context from typing import ClassVar, List, Tuple -import hashlib, re, structlog +import hashlib, logging, re, structlog logger = structlog.getLogger("sha-to-SRI") @@ -208,24 +208,20 @@ if __name__ == "__main__": logger.info("Starting!") - for arg in argv[1:]: - p = Path(arg) - with log_context(path = str(p)): + def handleFile(p: Path, skipLevel = logging.INFO): + with log_context(file = str(p)): try: - if p.name == "yarn.nix" or p.name.find("generated") != -1: - logger.warning("File looks autogenerated, skipping!") - continue - with p.open() as f: for line in f: if line.strip(): break if _SKIP_RE.search(line): - logger.warning("File looks autogenerated, skipping!") - continue + logger.log(skipLevel, "File looks autogenerated, skipping!") + return fileToSRI(p) + except Exception as exn: logger.error( "Unhandled exception, skipping file!", @@ -233,3 +229,19 @@ if __name__ == "__main__": ) else: logger.info("Finished processing file") + + for arg in argv[1:]: + p = Path(arg) + with log_context(arg = arg): + if p.is_file(): + handleFile(p, skipLevel = logging.WARNING) + + elif p.is_dir(): + logger.info("Recursing into directory") + for q in p.glob("**/*.nix"): + if q.is_file(): + if q.name == "yarn.nix" or q.name.find("generated") != -1: + logger.info("File looks autogenerated, skipping!") + continue + + handleFile(q) From e079a279f4be68e45d47c2d424df6f314616ab5c Mon Sep 17 00:00:00 2001 From: nicoo Date: Fri, 13 Sep 2024 08:25:54 +0000 Subject: [PATCH 4/6] maintainers/scripts/sha-to-sri: drop unused imports --- maintainers/scripts/sha-to-sri.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/maintainers/scripts/sha-to-sri.py b/maintainers/scripts/sha-to-sri.py index 35ce2ca3389a..e31091242341 100755 --- a/maintainers/scripts/sha-to-sri.py +++ b/maintainers/scripts/sha-to-sri.py @@ -1,7 +1,7 @@ #!/usr/bin/env nix-shell #! nix-shell -i "python3 -I" -p "python3.withPackages(p: with p; [ rich structlog ])" -from abc import ABC, abstractclassmethod, abstractmethod +from abc import ABC, abstractmethod from contextlib import contextmanager from pathlib import Path from structlog.contextvars import bound_contextvars as log_context @@ -204,7 +204,7 @@ def fileToSRI(p: Path): _SKIP_RE = re.compile("(generated by)|(do not edit)", re.IGNORECASE) if __name__ == "__main__": - from sys import argv, stderr + from sys import argv logger.info("Starting!") From 011daf916106828a70a5adf3a55b393fffec4a11 Mon Sep 17 00:00:00 2001 From: nicoo Date: Fri, 13 Sep 2024 10:15:19 +0000 Subject: [PATCH 5/6] maintainers/scripts: document sha-to-sri --- maintainers/scripts/README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/maintainers/scripts/README.md b/maintainers/scripts/README.md index 2b99a4e75114..44a5fc9bc590 100644 --- a/maintainers/scripts/README.md +++ b/maintainers/scripts/README.md @@ -56,3 +56,16 @@ The maintainer is designated by a `selector` which must be one of: see [`maintainer-list.nix`] for the fields' definition. [`maintainer-list.nix`]: ../maintainer-list.nix + + +## Conventions + +### `sha-to-sri.py` + +`sha-to-sri.py path ...` (atomically) rewrites hash attributes (named `hash` or `sha(1|256|512)`) +into the SRI format: `hash = "{hash name}-{base64 encoded value}"`. + +`path` must point to either a nix file, or a directory which will be automatically traversed. + +`sha-to-sri.py` automatically skips files whose first non-empty line contains `generated by` or `do not edit`. +Moreover, when walking a directory tree, the script will skip files whose name is `yarn.nix` or contains `generated`. From 9259479c421ede2348d756f739b5690578ad4a38 Mon Sep 17 00:00:00 2001 From: nicoo Date: Fri, 13 Sep 2024 14:11:34 +0000 Subject: [PATCH 6/6] maintainers/scripts/sha-to-sri: minor efficiency improvement of the `Nix32` decoder --- maintainers/scripts/sha-to-sri.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/maintainers/scripts/sha-to-sri.py b/maintainers/scripts/sha-to-sri.py index e31091242341..971c24fe1fff 100755 --- a/maintainers/scripts/sha-to-sri.py +++ b/maintainers/scripts/sha-to-sri.py @@ -59,8 +59,7 @@ class Nix32(Encoding): def decode(self, s: str): assert len(s) == self.length - out = [0 for _ in range(self.n)] - # TODO: Do better than a list of byte-sized ints + out = bytearray(self.n) for n, c in enumerate(reversed(s)): digit = self.inverted[c]