From c425822e17ca7f599644fde133a629a24677f070 Mon Sep 17 00:00:00 2001
From: nicoo <nicoo@mur.at>
Date: Fri, 13 Sep 2024 09:35:26 +0000
Subject: [PATCH 1/6] maintainers/scripts/sha-to-sri: format

---
 maintainers/scripts/sha-to-sri.py | 68 +++++++++++++++++--------------
 1 file changed, 37 insertions(+), 31 deletions(-)

diff --git a/maintainers/scripts/sha-to-sri.py b/maintainers/scripts/sha-to-sri.py
index 1af7ff215ad3..657d5ac5d87f 100755
--- a/maintainers/scripts/sha-to-sri.py
+++ b/maintainers/scripts/sha-to-sri.py
@@ -26,11 +26,12 @@ class Encoding(ABC):
         assert len(digest) == self.n
 
         from base64 import b64encode
+
         return f"{self.hashName}-{b64encode(digest).decode()}"
 
     @classmethod
-    def all(cls, h) -> 'List[Encoding]':
-        return [ c(h) for c in cls.__subclasses__() ]
+    def all(cls, h) -> "List[Encoding]":
+        return [c(h) for c in cls.__subclasses__()]
 
     def __init__(self, h):
         self.n = h.digest_size
@@ -38,54 +39,57 @@ class Encoding(ABC):
 
     @property
     @abstractmethod
-    def length(self) -> int:
-        ...
+    def length(self) -> int: ...
 
     @property
     def regex(self) -> str:
         return f"[{self.alphabet}]{{{self.length}}}"
 
     @abstractmethod
-    def decode(self, s: str) -> bytes:
-        ...
+    def decode(self, s: str) -> bytes: ...
 
 
 class Nix32(Encoding):
     alphabet = "0123456789abcdfghijklmnpqrsvwxyz"
-    inverted  = { c: i for i, c in enumerate(alphabet) }
+    inverted = {c: i for i, c in enumerate(alphabet)}
 
     @property
     def length(self):
         return 1 + (8 * self.n) // 5
+
     def decode(self, s: str):
         assert len(s) == self.length
-        out = [ 0 for _ in range(self.n) ]
+        out = [0 for _ in range(self.n)]
         # TODO: Do better than a list of byte-sized ints
 
         for n, c in enumerate(reversed(s)):
             digit = self.inverted[c]
             i, j = divmod(5 * n, 8)
-            out[i] = out[i] | (digit << j) & 0xff
+            out[i] = out[i] | (digit << j) & 0xFF
             rem = digit >> (8 - j)
             if rem == 0:
                 continue
             elif i < self.n:
-                out[i+1] = rem
+                out[i + 1] = rem
             else:
                 raise ValueError(f"Invalid nix32 hash: '{s}'")
 
         return bytes(out)
 
+
 class Hex(Encoding):
     alphabet = "0-9A-Fa-f"
 
     @property
     def length(self):
         return 2 * self.n
+
     def decode(self, s: str):
         from binascii import unhexlify
+
         return unhexlify(s)
 
+
 class Base64(Encoding):
     alphabet = "A-Za-z0-9+/"
 
@@ -94,36 +98,39 @@ class Base64(Encoding):
         """Number of characters in data and padding."""
         i, k = divmod(self.n, 3)
         return 4 * i + (0 if k == 0 else k + 1), (3 - k) % 3
+
     @property
     def length(self):
         return sum(self.format)
+
     @property
     def regex(self):
         data, padding = self.format
         return f"[{self.alphabet}]{{{data}}}={{{padding}}}"
+
     def decode(self, s):
         from base64 import b64decode
+
         return b64decode(s, validate = True)
 
 
-_HASHES = (hashlib.new(n) for n in ('SHA-256', 'SHA-512'))
-ENCODINGS = {
-    h.name: Encoding.all(h)
-    for h in _HASHES
-}
+_HASHES = (hashlib.new(n) for n in ("SHA-256", "SHA-512"))
+ENCODINGS = {h.name: Encoding.all(h) for h in _HASHES}
 
 RE = {
     h: "|".join(
-        (f"({h}-)?" if e.name == 'base64' else '') +
-        f"(?P<{h}_{e.name}>{e.regex})"
+        (f"({h}-)?" if e.name == "base64" else "") + f"(?P<{h}_{e.name}>{e.regex})"
         for e in encodings
-    ) for h, encodings in ENCODINGS.items()
+    )
+    for h, encodings in ENCODINGS.items()
 }
 
-_DEF_RE = re.compile("|".join(
-    f"(?P<{h}>{h} = (?P<{h}_quote>['\"])({re})(?P={h}_quote);)"
-    for h, re in RE.items()
-))
+_DEF_RE = re.compile(
+    "|".join(
+        f"(?P<{h}>{h} = (?P<{h}_quote>['\"])({re})(?P={h}_quote);)"
+        for h, re in RE.items()
+    )
+)
 
 
 def defToSRI(s: str) -> str:
@@ -153,7 +160,7 @@ def defToSRI(s: str) -> str:
 
 @contextmanager
 def atomicFileUpdate(target: Path):
-    '''Atomically replace the contents of a file.
+    """Atomically replace the contents of a file.
 
     Guarantees that no temporary files are left behind, and `target` is either
     left untouched, or overwritten with new content if no exception was raised.
@@ -164,9 +171,10 @@ def atomicFileUpdate(target: Path):
 
     Upon exiting the context, the files are closed; if no exception was
     raised, `new` (atomically) replaces the `target`, otherwise it is deleted.
-    '''
+    """
     # That's mostly copied from noto-emoji.py, should DRY it out
     from tempfile import mkstemp
+
     fd, _p = mkstemp(
         dir = target.parent,
         prefix = target.name,
@@ -175,7 +183,7 @@ def atomicFileUpdate(target: Path):
 
     try:
         with target.open() as original:
-            with tmpPath.open('w') as new:
+            with tmpPath.open("w") as new:
                 yield (original, new)
 
         tmpPath.replace(target)
@@ -188,22 +196,20 @@ def atomicFileUpdate(target: Path):
 def fileToSRI(p: Path):
     with atomicFileUpdate(p) as (og, new):
         for i, line in enumerate(og):
-            with log_context(line=i):
+            with log_context(line = i):
                 new.write(defToSRI(line))
 
 
-_SKIP_RE = re.compile(
-    "(generated by)|(do not edit)",
-    re.IGNORECASE
-)
+_SKIP_RE = re.compile("(generated by)|(do not edit)", re.IGNORECASE)
 
 if __name__ == "__main__":
     from sys import argv, stderr
+
     logger.info("Starting!")
 
     for arg in argv[1:]:
         p = Path(arg)
-        with log_context(path=str(p)):
+        with log_context(path = str(p)):
             try:
                 if p.name == "yarn.nix" or p.name.find("generated") != -1:
                     logger.warning("File looks autogenerated, skipping!")

From 915799a2b9e59a3b32d596a216893554f9c87f07 Mon Sep 17 00:00:00 2001
From: nicoo <nicoo@mur.at>
Date: Fri, 13 Sep 2024 08:00:12 +0000
Subject: [PATCH 2/6] maintainers/scripts/sha-to-sri: fix file-descriptor leak

---
 maintainers/scripts/sha-to-sri.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/maintainers/scripts/sha-to-sri.py b/maintainers/scripts/sha-to-sri.py
index 657d5ac5d87f..881e03071913 100755
--- a/maintainers/scripts/sha-to-sri.py
+++ b/maintainers/scripts/sha-to-sri.py
@@ -173,17 +173,18 @@ def atomicFileUpdate(target: Path):
     raised, `new` (atomically) replaces the `target`, otherwise it is deleted.
     """
     # That's mostly copied from noto-emoji.py, should DRY it out
-    from tempfile import mkstemp
-
-    fd, _p = mkstemp(
-        dir = target.parent,
-        prefix = target.name,
-    )
-    tmpPath = Path(_p)
+    from tempfile import NamedTemporaryFile
 
     try:
         with target.open() as original:
-            with tmpPath.open("w") as new:
+            with NamedTemporaryFile(
+                dir = target.parent,
+                prefix = target.stem,
+                suffix = target.suffix,
+                delete = False,
+                mode="w",  # otherwise the file would be opened in binary mode by default
+            ) as new:
+                tmpPath = Path(new.name)
                 yield (original, new)
 
         tmpPath.replace(target)

From bf6b5f7f85e4d0b71392241f3824d880dd9414e5 Mon Sep 17 00:00:00 2001
From: nicoo <nicoo@mur.at>
Date: Fri, 13 Sep 2024 08:00:53 +0000
Subject: [PATCH 3/6] maintainers/scripts/sha-to-sri: accept directories as
 input

Doesn't skip files passed as CLI arguments based on their name anymore,
since bulk changes can now be done without resorting to `xargs` or equivalent.
---
 maintainers/scripts/sha-to-sri.py | 32 +++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/maintainers/scripts/sha-to-sri.py b/maintainers/scripts/sha-to-sri.py
index 881e03071913..35ce2ca3389a 100755
--- a/maintainers/scripts/sha-to-sri.py
+++ b/maintainers/scripts/sha-to-sri.py
@@ -7,7 +7,7 @@ from pathlib import Path
 from structlog.contextvars import bound_contextvars as log_context
 from typing import ClassVar, List, Tuple
 
-import hashlib, re, structlog
+import hashlib, logging, re, structlog
 
 
 logger = structlog.getLogger("sha-to-SRI")
@@ -208,24 +208,20 @@ if __name__ == "__main__":
 
     logger.info("Starting!")
 
-    for arg in argv[1:]:
-        p = Path(arg)
-        with log_context(path = str(p)):
+    def handleFile(p: Path, skipLevel = logging.INFO):
+        with log_context(file = str(p)):
             try:
-                if p.name == "yarn.nix" or p.name.find("generated") != -1:
-                    logger.warning("File looks autogenerated, skipping!")
-                    continue
-
                 with p.open() as f:
                     for line in f:
                         if line.strip():
                             break
 
                     if _SKIP_RE.search(line):
-                        logger.warning("File looks autogenerated, skipping!")
-                        continue
+                        logger.log(skipLevel, "File looks autogenerated, skipping!")
+                        return
 
                 fileToSRI(p)
+
             except Exception as exn:
                 logger.error(
                     "Unhandled exception, skipping file!",
@@ -233,3 +229,19 @@ if __name__ == "__main__":
                 )
             else:
                 logger.info("Finished processing file")
+
+    for arg in argv[1:]:
+        p = Path(arg)
+        with log_context(arg = arg):
+            if p.is_file():
+                handleFile(p, skipLevel = logging.WARNING)
+
+            elif p.is_dir():
+                logger.info("Recursing into directory")
+                for q in p.glob("**/*.nix"):
+                    if q.is_file():
+                        if q.name == "yarn.nix" or q.name.find("generated") != -1:
+                            logger.info("File looks autogenerated, skipping!")
+                            continue
+
+                        handleFile(q)

From e079a279f4be68e45d47c2d424df6f314616ab5c Mon Sep 17 00:00:00 2001
From: nicoo <nicoo@mur.at>
Date: Fri, 13 Sep 2024 08:25:54 +0000
Subject: [PATCH 4/6] maintainers/scripts/sha-to-sri: drop unused imports

---
 maintainers/scripts/sha-to-sri.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/maintainers/scripts/sha-to-sri.py b/maintainers/scripts/sha-to-sri.py
index 35ce2ca3389a..e31091242341 100755
--- a/maintainers/scripts/sha-to-sri.py
+++ b/maintainers/scripts/sha-to-sri.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env nix-shell
 #! nix-shell -i "python3 -I" -p "python3.withPackages(p: with p; [ rich structlog ])"
 
-from abc import ABC, abstractclassmethod, abstractmethod
+from abc import ABC, abstractmethod
 from contextlib import contextmanager
 from pathlib import Path
 from structlog.contextvars import bound_contextvars as log_context
@@ -204,7 +204,7 @@ def fileToSRI(p: Path):
 _SKIP_RE = re.compile("(generated by)|(do not edit)", re.IGNORECASE)
 
 if __name__ == "__main__":
-    from sys import argv, stderr
+    from sys import argv
 
     logger.info("Starting!")
 

From 011daf916106828a70a5adf3a55b393fffec4a11 Mon Sep 17 00:00:00 2001
From: nicoo <nicoo@mur.at>
Date: Fri, 13 Sep 2024 10:15:19 +0000
Subject: [PATCH 5/6] maintainers/scripts: document sha-to-sri

---
 maintainers/scripts/README.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/maintainers/scripts/README.md b/maintainers/scripts/README.md
index 2b99a4e75114..44a5fc9bc590 100644
--- a/maintainers/scripts/README.md
+++ b/maintainers/scripts/README.md
@@ -56,3 +56,16 @@ The maintainer is designated by a `selector` which must be one of:
   see [`maintainer-list.nix`] for the fields' definition.
 
 [`maintainer-list.nix`]: ../maintainer-list.nix
+
+
+## Conventions
+
+### `sha-to-sri.py`
+
+`sha-to-sri.py path ...` (atomically) rewrites hash attributes (named `hash` or `sha(1|256|512)`)
+into the SRI format: `hash = "{hash name}-{base64 encoded value}"`.
+
+`path` must point to either a nix file, or a directory which will be automatically traversed.
+
+`sha-to-sri.py` automatically skips files whose first non-empty line contains `generated by` or `do not edit`.
+Moreover, when walking a directory tree, the script will skip files whose name is `yarn.nix` or contains `generated`.

From 9259479c421ede2348d756f739b5690578ad4a38 Mon Sep 17 00:00:00 2001
From: nicoo <nicoo@mur.at>
Date: Fri, 13 Sep 2024 14:11:34 +0000
Subject: [PATCH 6/6] maintainers/scripts/sha-to-sri: minor efficiency
 improvement of the `Nix32` decoder

---
 maintainers/scripts/sha-to-sri.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/maintainers/scripts/sha-to-sri.py b/maintainers/scripts/sha-to-sri.py
index e31091242341..971c24fe1fff 100755
--- a/maintainers/scripts/sha-to-sri.py
+++ b/maintainers/scripts/sha-to-sri.py
@@ -59,8 +59,7 @@ class Nix32(Encoding):
 
     def decode(self, s: str):
         assert len(s) == self.length
-        out = [0 for _ in range(self.n)]
-        # TODO: Do better than a list of byte-sized ints
+        out = bytearray(self.n)
 
         for n, c in enumerate(reversed(s)):
             digit = self.inverted[c]