diff options
Diffstat (limited to 'scripts/crypto/gen-hash-testvecs.py')
| -rwxr-xr-x | scripts/crypto/gen-hash-testvecs.py | 364 |
1 files changed, 364 insertions, 0 deletions
diff --git a/scripts/crypto/gen-hash-testvecs.py b/scripts/crypto/gen-hash-testvecs.py new file mode 100755 index 000000000000..f356f87e1c77 --- /dev/null +++ b/scripts/crypto/gen-hash-testvecs.py @@ -0,0 +1,364 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-or-later +# +# Script that generates test vectors for the given hash function. +# +# Requires that python-cryptography be installed. +# +# Copyright 2025 Google LLC + +import cryptography.hazmat.primitives.ciphers +import cryptography.hazmat.primitives.cmac +import hashlib +import hmac +import sys + +DATA_LENS = [0, 1, 2, 3, 16, 32, 48, 49, 63, 64, 65, 127, 128, 129, 256, 511, + 513, 1000, 3333, 4096, 4128, 4160, 4224, 16384] + +# Generate the given number of random bytes, using the length itself as the seed +# for a simple linear congruential generator (LCG). The C test code uses the +# same LCG with the same seeding strategy to reconstruct the data, ensuring +# reproducibility without explicitly storing the data in the test vectors. +def rand_bytes(length): + seed = length + out = [] + for _ in range(length): + seed = (seed * 25214903917 + 11) % 2**48 + out.append((seed >> 16) % 256) + return bytes(out) + +AES_256_KEY_SIZE = 32 + +# AES-CMAC. Just wraps the implementation from python-cryptography. +class AesCmac: + def __init__(self, key): + aes = cryptography.hazmat.primitives.ciphers.algorithms.AES(key) + self.cmac = cryptography.hazmat.primitives.cmac.CMAC(aes) + + def update(self, data): + self.cmac.update(data) + + def digest(self): + return self.cmac.finalize() + +POLY1305_KEY_SIZE = 32 + +# A straightforward, unoptimized implementation of Poly1305. +# Reference: https://cr.yp.to/mac/poly1305-20050329.pdf +class Poly1305: + def __init__(self, key): + assert len(key) == POLY1305_KEY_SIZE + self.h = 0 + rclamp = 0x0ffffffc0ffffffc0ffffffc0fffffff + self.r = int.from_bytes(key[:16], byteorder='little') & rclamp + self.s = int.from_bytes(key[16:], byteorder='little') + + # Note: this supports partial blocks only at the end. + def update(self, data): + for i in range(0, len(data), 16): + chunk = data[i:i+16] + c = int.from_bytes(chunk, byteorder='little') + 2**(8 * len(chunk)) + self.h = ((self.h + c) * self.r) % (2**130 - 5) + return self + + # Note: gen_additional_poly1305_testvecs() relies on this being + # nondestructive, i.e. not changing any field of self. + def digest(self): + m = (self.h + self.s) % 2**128 + return m.to_bytes(16, byteorder='little') + +GHASH_POLY = sum((1 << i) for i in [128, 7, 2, 1, 0]) +GHASH_BLOCK_SIZE = 16 + +# A straightforward, unoptimized implementation of GHASH. +class Ghash: + + @staticmethod + def reflect_bits_in_bytes(v): + res = 0 + for offs in range(0, 128, 8): + for bit in range(8): + if (v & (1 << (offs + bit))) != 0: + res ^= 1 << (offs + 7 - bit) + return res + + @staticmethod + def bytes_to_poly(data): + return Ghash.reflect_bits_in_bytes(int.from_bytes(data, byteorder='little')) + + @staticmethod + def poly_to_bytes(poly): + return Ghash.reflect_bits_in_bytes(poly).to_bytes(16, byteorder='little') + + def __init__(self, key): + assert len(key) == 16 + self.h = Ghash.bytes_to_poly(key) + self.acc = 0 + + # Note: this supports partial blocks only at the end. + def update(self, data): + for i in range(0, len(data), 16): + # acc += block + self.acc ^= Ghash.bytes_to_poly(data[i:i+16]) + # acc = (acc * h) mod GHASH_POLY + product = 0 + for j in range(127, -1, -1): + if (self.h & (1 << j)) != 0: + product ^= self.acc << j + if (product & (1 << (128 + j))) != 0: + product ^= GHASH_POLY << j + self.acc = product + return self + + def digest(self): + return Ghash.poly_to_bytes(self.acc) + +POLYVAL_POLY = sum((1 << i) for i in [128, 127, 126, 121, 0]) +POLYVAL_BLOCK_SIZE = 16 + +# A straightforward, unoptimized implementation of POLYVAL. +# Reference: https://datatracker.ietf.org/doc/html/rfc8452 +class Polyval: + def __init__(self, key): + assert len(key) == 16 + self.h = int.from_bytes(key, byteorder='little') + self.acc = 0 + + # Note: this supports partial blocks only at the end. + def update(self, data): + for i in range(0, len(data), 16): + # acc += block + self.acc ^= int.from_bytes(data[i:i+16], byteorder='little') + # acc = (acc * h * x^-128) mod POLYVAL_POLY + product = 0 + for j in range(128): + if (self.h & (1 << j)) != 0: + product ^= self.acc << j + if (product & (1 << j)) != 0: + product ^= POLYVAL_POLY << j + self.acc = product >> 128 + return self + + def digest(self): + return self.acc.to_bytes(16, byteorder='little') + +def hash_init(alg): + # The keyed hash functions are assigned a fixed random key here, to present + # them as unkeyed hash functions. This allows all the test cases for + # unkeyed hash functions to work on them. + if alg == 'aes-cmac': + return AesCmac(rand_bytes(AES_256_KEY_SIZE)) + if alg == 'ghash': + return Ghash(rand_bytes(GHASH_BLOCK_SIZE)) + if alg == 'poly1305': + return Poly1305(rand_bytes(POLY1305_KEY_SIZE)) + if alg == 'polyval': + return Polyval(rand_bytes(POLYVAL_BLOCK_SIZE)) + return hashlib.new(alg) + +def hash_update(ctx, data): + ctx.update(data) + +def hash_final(ctx): + return ctx.digest() + +def compute_hash(alg, data): + ctx = hash_init(alg) + hash_update(ctx, data) + return hash_final(ctx) + +def print_bytes(prefix, value, bytes_per_line): + for i in range(0, len(value), bytes_per_line): + line = prefix + ''.join(f'0x{b:02x}, ' for b in value[i:i+bytes_per_line]) + print(f'{line.rstrip()}') + +def print_static_u8_array_definition(name, value): + print('') + print(f'static const u8 {name} = {{') + print_bytes('\t', value, 8) + print('};') + +def print_c_struct_u8_array_field(name, value): + print(f'\t\t.{name} = {{') + print_bytes('\t\t\t', value, 8) + print('\t\t},') + +def alg_digest_size_const(alg): + if alg == 'aes-cmac': + return 'AES_BLOCK_SIZE' + if alg.startswith('blake2'): + return f'{alg.upper()}_HASH_SIZE' + return f"{alg.upper().replace('-', '_')}_DIGEST_SIZE" + +def gen_unkeyed_testvecs(alg): + print('') + print('static const struct {') + print('\tsize_t data_len;') + print(f'\tu8 digest[{alg_digest_size_const(alg)}];') + print('} hash_testvecs[] = {') + for data_len in DATA_LENS: + data = rand_bytes(data_len) + print('\t{') + print(f'\t\t.data_len = {data_len},') + print_c_struct_u8_array_field('digest', compute_hash(alg, data)) + print('\t},') + print('};') + + data = rand_bytes(4096) + ctx = hash_init(alg) + for data_len in range(len(data) + 1): + hash_update(ctx, compute_hash(alg, data[:data_len])) + print_static_u8_array_definition( + f'hash_testvec_consolidated[{alg_digest_size_const(alg)}]', + hash_final(ctx)) + +def gen_additional_sha3_testvecs(): + max_len = 4096 + in_data = rand_bytes(max_len) + for alg in ['shake128', 'shake256']: + ctx = hashlib.new('sha3-256') + for in_len in range(max_len + 1): + out_len = (in_len * 293) % (max_len + 1) + out = hashlib.new(alg, data=in_data[:in_len]).digest(out_len) + ctx.update(out) + print_static_u8_array_definition(f'{alg}_testvec_consolidated[SHA3_256_DIGEST_SIZE]', + ctx.digest()) + +def gen_hmac_testvecs(alg): + ctx = hmac.new(rand_bytes(32), digestmod=alg) + data = rand_bytes(4096) + for data_len in range(len(data) + 1): + ctx.update(data[:data_len]) + key_len = data_len % 293 + key = rand_bytes(key_len) + mac = hmac.digest(key, data[:data_len], alg) + ctx.update(mac) + print_static_u8_array_definition( + f'hmac_testvec_consolidated[{alg.upper()}_DIGEST_SIZE]', + ctx.digest()) + +def gen_additional_blake2_testvecs(alg): + if alg == 'blake2s': + (max_key_size, max_hash_size) = (32, 32) + elif alg == 'blake2b': + (max_key_size, max_hash_size) = (64, 64) + else: + raise ValueError(f'Unsupported alg: {alg}') + hashes = b'' + for key_len in range(max_key_size + 1): + for out_len in range(1, max_hash_size + 1): + h = hashlib.new(alg, digest_size=out_len, key=rand_bytes(key_len)) + h.update(rand_bytes(100)) + hashes += h.digest() + print_static_u8_array_definition( + f'{alg}_keyed_testvec_consolidated[{alg_digest_size_const(alg)}]', + compute_hash(alg, hashes)) + +def nh_extract_int(bytestr, pos, length): + assert pos % 8 == 0 and length % 8 == 0 + return int.from_bytes(bytestr[pos//8 : pos//8 + length//8], byteorder='little') + +# The NH "almost-universal hash function" used in Adiantum. This is a +# straightforward translation of the pseudocode from Section 6.3 of the Adiantum +# paper (https://eprint.iacr.org/2018/720.pdf), except the outer loop is omitted +# because we assume len(msg) <= 1024. (The kernel's nh() function is only +# expected to handle up to 1024 bytes; it's just called repeatedly as needed.) +def nh(key, msg): + (w, s, r, u) = (32, 2, 4, 8192) + l = 8 * len(msg) + assert l <= u + assert l % (2*s*w) == 0 + h = bytes() + for i in range(0, 2*s*w*r, 2*s*w): + p = 0 + for j in range(0, l, 2*s*w): + for k in range(0, w*s, w): + a0 = nh_extract_int(key, i + j + k, w) + a1 = nh_extract_int(key, i + j + k + s*w, w) + b0 = nh_extract_int(msg, j + k, w) + b1 = nh_extract_int(msg, j + k + s*w, w) + p += ((a0 + b0) % 2**w) * ((a1 + b1) % 2**w) + h += (p % 2**64).to_bytes(8, byteorder='little') + return h + +def gen_nh_testvecs(): + NH_KEY_BYTES = 1072 + NH_MESSAGE_BYTES = 1024 + key = rand_bytes(NH_KEY_BYTES) + msg = rand_bytes(NH_MESSAGE_BYTES) + print_static_u8_array_definition('nh_test_key[NH_KEY_BYTES]', key) + print_static_u8_array_definition('nh_test_msg[NH_MESSAGE_BYTES]', msg) + for length in [16, 96, 256, 1024]: + print_static_u8_array_definition(f'nh_test_val{length}[NH_HASH_BYTES]', + nh(key, msg[:length])) + +def gen_additional_poly1305_testvecs(): + key = b'\xff' * POLY1305_KEY_SIZE + data = b'' + ctx = Poly1305(key) + for _ in range(32): + for j in range(0, 4097, 16): + ctx.update(b'\xff' * j) + data += ctx.digest() + print_static_u8_array_definition( + 'poly1305_allones_macofmacs[POLY1305_DIGEST_SIZE]', + Poly1305(key).update(data).digest()) + +def gen_additional_ghash_testvecs(): + key = b'\xff' * GHASH_BLOCK_SIZE + hashes = b'' + for data_len in range(0, 4097, 16): + hashes += Ghash(key).update(b'\xff' * data_len).digest() + print_static_u8_array_definition( + 'ghash_allones_hashofhashes[GHASH_DIGEST_SIZE]', + Ghash(key).update(hashes).digest()) + +def gen_additional_polyval_testvecs(): + key = b'\xff' * POLYVAL_BLOCK_SIZE + hashes = b'' + for data_len in range(0, 4097, 16): + hashes += Polyval(key).update(b'\xff' * data_len).digest() + print_static_u8_array_definition( + 'polyval_allones_hashofhashes[POLYVAL_DIGEST_SIZE]', + Polyval(key).update(hashes).digest()) + +if len(sys.argv) != 2: + sys.stderr.write('Usage: gen-hash-testvecs.py ALGORITHM\n') + sys.stderr.write('ALGORITHM may be any supported by Python hashlib;\n') + sys.stderr.write(' or aes-cmac, ghash, nh, poly1305, polyval, or sha3.\n') + sys.stderr.write('Example: gen-hash-testvecs.py sha512\n') + sys.exit(1) + +alg = sys.argv[1] +print('/* SPDX-License-Identifier: GPL-2.0-or-later */') +print(f'/* This file was generated by: {sys.argv[0]} {" ".join(sys.argv[1:])} */') +if alg == 'aes-cmac': + gen_unkeyed_testvecs(alg) +elif alg.startswith('blake2'): + gen_unkeyed_testvecs(alg) + gen_additional_blake2_testvecs(alg) +elif alg == 'ghash': + gen_unkeyed_testvecs(alg) + gen_additional_ghash_testvecs() +elif alg == 'nh': + gen_nh_testvecs() +elif alg == 'poly1305': + gen_unkeyed_testvecs(alg) + gen_additional_poly1305_testvecs() +elif alg == 'polyval': + gen_unkeyed_testvecs(alg) + gen_additional_polyval_testvecs() +elif alg == 'sha3': + print() + print('/* SHA3-256 test vectors */') + gen_unkeyed_testvecs('sha3-256') + print() + print('/* SHAKE test vectors */') + gen_additional_sha3_testvecs() +elif alg == 'sm3': + gen_unkeyed_testvecs(alg) + # Kernel doesn't implement HMAC-SM3 library functions yet. +else: + gen_unkeyed_testvecs(alg) + gen_hmac_testvecs(alg) |
