/* SPDX-License-Identifier: GPL-2.0 */ /* * NH - ε-almost-universal hash function, ARM64 NEON accelerated version * * Copyright 2018 Google LLC * * Author: Eric Biggers */ #include KEY .req x0 MESSAGE .req x1 MESSAGE_LEN .req x2 HASH .req x3 PASS0_SUMS .req v0 PASS1_SUMS .req v1 PASS2_SUMS .req v2 PASS3_SUMS .req v3 K0 .req v4 K1 .req v5 K2 .req v6 K3 .req v7 T0 .req v8 T1 .req v9 T2 .req v10 T3 .req v11 T4 .req v12 T5 .req v13 T6 .req v14 T7 .req v15 .macro _nh_stride k0, k1, k2, k3 // Load next message stride ld1 {T3.16b}, [MESSAGE], #16 // Load next key stride ld1 {\k3\().4s}, [KEY], #16 // Add message words to key words add T0.4s, T3.4s, \k0\().4s add T1.4s, T3.4s, \k1\().4s add T2.4s, T3.4s, \k2\().4s add T3.4s, T3.4s, \k3\().4s // Multiply 32x32 => 64 and accumulate mov T4.d[0], T0.d[1] mov T5.d[0], T1.d[1] mov T6.d[0], T2.d[1] mov T7.d[0], T3.d[1] umlal PASS0_SUMS.2d, T0.2s, T4.2s umlal PASS1_SUMS.2d, T1.2s, T5.2s umlal PASS2_SUMS.2d, T2.2s, T6.2s umlal PASS3_SUMS.2d, T3.2s, T7.2s .endm /* * void nh_neon(const u32 *key, const u8 *message, size_t message_len, * u8 hash[NH_HASH_BYTES]) * * It's guaranteed that message_len % 16 == 0. */ ENTRY(nh_neon) ld1 {K0.4s,K1.4s}, [KEY], #32 movi PASS0_SUMS.2d, #0 movi PASS1_SUMS.2d, #0 ld1 {K2.4s}, [KEY], #16 movi PASS2_SUMS.2d, #0 movi PASS3_SUMS.2d, #0 subs MESSAGE_LEN, MESSAGE_LEN, #64 blt .Lloop4_done .Lloop4: _nh_stride K0, K1, K2, K3 _nh_stride K1, K2, K3, K0 _nh_stride K2, K3, K0, K1 _nh_stride K3, K0, K1, K2 subs MESSAGE_LEN, MESSAGE_LEN, #64 bge .Lloop4 .Lloop4_done: ands MESSAGE_LEN, MESSAGE_LEN, #63 beq .Ldone _nh_stride K0, K1, K2, K3 subs MESSAGE_LEN, MESSAGE_LEN, #16 beq .Ldone _nh_stride K1, K2, K3, K0 subs MESSAGE_LEN, MESSAGE_LEN, #16 beq .Ldone _nh_stride K2, K3, K0, K1 .Ldone: // Sum the accumulators for each pass, then store the sums to 'hash' addp T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d st1 {T0.16b,T1.16b}, [HASH] ret ENDPROC(nh_neon)