diff options
author | Sascha Hauer <s.hauer@pengutronix.de> | 2015-04-13 12:57:12 +0200 |
---|---|---|
committer | Sascha Hauer <s.hauer@pengutronix.de> | 2015-04-13 12:57:12 +0200 |
commit | 909dbe5334d54a5538128ebf18391608315c2f9a (patch) | |
tree | ded97d91bba0ba00dabef434dd98611ea5cd6e15 | |
parent | 881d2e4ac607d8545dd2e62892436c759b8c0958 (diff) | |
parent | 238ffb68f62525f12ef1af1ebe316360634ec529 (diff) | |
download | barebox-909dbe5334d54a5538128ebf18391608315c2f9a.tar.gz barebox-909dbe5334d54a5538128ebf18391608315c2f9a.tar.xz |
Merge branch 'for-next/crypto'
50 files changed, 6620 insertions, 966 deletions
diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 96c9f5797e..721aa9bd9d 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -269,6 +269,7 @@ endif common-y += $(BOARD) arch/arm/boards/ $(MACH) common-y += arch/arm/lib/ arch/arm/cpu/ +common-y += arch/arm/crypto/ common-$(CONFIG_OFTREE) += arch/arm/dts/ diff --git a/arch/arm/configs/at91rm9200ek_defconfig b/arch/arm/configs/at91rm9200ek_defconfig index 54e3b1d8b8..03fca04cb7 100644 --- a/arch/arm/configs/at91rm9200ek_defconfig +++ b/arch/arm/configs/at91rm9200ek_defconfig @@ -76,5 +76,5 @@ CONFIG_LED=y CONFIG_LED_GPIO=y CONFIG_LED_TRIGGERS=y CONFIG_FS_CRAMFS=y -CONFIG_SHA1=y -CONFIG_SHA256=y +CONFIG_DIGEST_SHA1_GENERIC=y +CONFIG_DIGEST_SHA256_GENERIC=y diff --git a/arch/arm/configs/highbank_defconfig b/arch/arm/configs/highbank_defconfig index cf42d29622..8c965c83a5 100644 --- a/arch/arm/configs/highbank_defconfig +++ b/arch/arm/configs/highbank_defconfig @@ -59,5 +59,5 @@ CONFIG_DISK=y CONFIG_DISK_AHCI=y CONFIG_GPIO_PL061=y CONFIG_FS_TFTP=y -CONFIG_SHA1=y -CONFIG_SHA256=y +CONFIG_DIGEST_SHA1_GENERIC=y +CONFIG_DIGEST_SHA256_GENERIC=y diff --git a/arch/arm/configs/mioa701_defconfig b/arch/arm/configs/mioa701_defconfig index d405edf01b..cc4587ccbd 100644 --- a/arch/arm/configs/mioa701_defconfig +++ b/arch/arm/configs/mioa701_defconfig @@ -104,4 +104,4 @@ CONFIG_FS_UBIFS_COMPRESSION_ZLIB=y CONFIG_BZLIB=y CONFIG_BMP=y CONFIG_PNG=y -CONFIG_SHA256=y +CONFIG_DIGEST_SHA256_GENERIC=y diff --git a/arch/arm/configs/module-mb7707_defconfig b/arch/arm/configs/module-mb7707_defconfig index 843dd59889..83a798bac1 100644 --- a/arch/arm/configs/module-mb7707_defconfig +++ b/arch/arm/configs/module-mb7707_defconfig @@ -45,5 +45,5 @@ CONFIG_USB_HOST=y CONFIG_USB_EHCI=y CONFIG_USB_STORAGE=y CONFIG_CLOCKSOURCE_DUMMY=y -CONFIG_SHA1=y -CONFIG_SHA256=y +CONFIG_DIGEST_SHA1_GENERIC=y +CONFIG_DIGEST_SHA256_GENERIC=y diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig index dcb00c0755..46c3a68e53 100644 --- a/arch/arm/configs/nhk8815_defconfig +++ b/arch/arm/configs/nhk8815_defconfig @@ -55,4 +55,4 @@ CONFIG_MTD_NAND_NOMADIK=y CONFIG_UBI=y CONFIG_FS_CRAMFS=y CONFIG_MD5=y -CONFIG_SHA256=y +CONFIG_DIGEST_SHA256_GENERIC=y diff --git a/arch/arm/configs/phytec-phycard-omap3_defconfig b/arch/arm/configs/phytec-phycard-omap3_defconfig index aefc78d5ca..a2564d4459 100644 --- a/arch/arm/configs/phytec-phycard-omap3_defconfig +++ b/arch/arm/configs/phytec-phycard-omap3_defconfig @@ -173,6 +173,6 @@ CONFIG_CRC32=y CONFIG_CRC16=y CONFIG_DIGEST=y CONFIG_MD5=y -CONFIG_SHA1=y -CONFIG_SHA224=y -CONFIG_SHA256=y +CONFIG_DIGEST_SHA1_GENERIC=y +CONFIG_DIGEST_SHA224_GENERIC=y +CONFIG_DIGEST_SHA256_GENERIC=y diff --git a/arch/arm/configs/rpi_defconfig b/arch/arm/configs/rpi_defconfig index c6b2c50c7b..25770a007f 100644 --- a/arch/arm/configs/rpi_defconfig +++ b/arch/arm/configs/rpi_defconfig @@ -64,5 +64,5 @@ CONFIG_FS_EXT4=y CONFIG_FS_FAT=y CONFIG_FS_FAT_WRITE=y CONFIG_FS_FAT_LFN=y -CONFIG_SHA1=y -CONFIG_SHA256=y +CONFIG_DIGEST_SHA1_GENERIC=y +CONFIG_DIGEST_SHA256_GENERIC=y diff --git a/arch/arm/configs/versatilepb_arm1176_defconfig b/arch/arm/configs/versatilepb_arm1176_defconfig index ca0ab3603f..cefdb296a1 100644 --- a/arch/arm/configs/versatilepb_arm1176_defconfig +++ b/arch/arm/configs/versatilepb_arm1176_defconfig @@ -95,5 +95,5 @@ CONFIG_FS_EXT4=y CONFIG_FS_TFTP=y CONFIG_FS_NFS=y CONFIG_PNG=y -CONFIG_SHA1=y -CONFIG_SHA256=y +CONFIG_DIGEST_SHA1_GENERIC=y +CONFIG_DIGEST_SHA256_GENERIC=y diff --git a/arch/arm/configs/versatilepb_defconfig b/arch/arm/configs/versatilepb_defconfig index 54a6fec919..0876824f66 100644 --- a/arch/arm/configs/versatilepb_defconfig +++ b/arch/arm/configs/versatilepb_defconfig @@ -81,5 +81,5 @@ CONFIG_FS_CRAMFS=y CONFIG_FS_EXT4=y CONFIG_FS_TFTP=y CONFIG_FS_NFS=y -CONFIG_SHA1=y -CONFIG_SHA256=y +CONFIG_DIGEST_SHA1_GENERIC=y +CONFIG_DIGEST_SHA256_GENERIC=y diff --git a/arch/arm/configs/vexpress_ca9_defconfig b/arch/arm/configs/vexpress_ca9_defconfig index 84171c473d..c5ad315a8c 100644 --- a/arch/arm/configs/vexpress_ca9_defconfig +++ b/arch/arm/configs/vexpress_ca9_defconfig @@ -58,5 +58,5 @@ CONFIG_DRIVER_CFI=y # CONFIG_DRIVER_CFI_BANK_WIDTH_1 is not set # CONFIG_DRIVER_CFI_BANK_WIDTH_2 is not set CONFIG_FS_TFTP=y -CONFIG_SHA1=y -CONFIG_SHA256=y +CONFIG_DIGEST_SHA1_GENERIC=y +CONFIG_DIGEST_SHA256_GENERIC=y diff --git a/arch/arm/configs/vexpress_defconfig b/arch/arm/configs/vexpress_defconfig index c7928c45a5..beea11aece 100644 --- a/arch/arm/configs/vexpress_defconfig +++ b/arch/arm/configs/vexpress_defconfig @@ -57,5 +57,5 @@ CONFIG_DRIVER_CFI=y # CONFIG_DRIVER_CFI_BANK_WIDTH_1 is not set # CONFIG_DRIVER_CFI_BANK_WIDTH_2 is not set CONFIG_FS_TFTP=y -CONFIG_SHA1=y -CONFIG_SHA256=y +CONFIG_DIGEST_SHA1_GENERIC=y +CONFIG_DIGEST_SHA256_GENERIC=y diff --git a/arch/arm/configs/virt2real_defconfig b/arch/arm/configs/virt2real_defconfig index a81d18b25b..0c686caadf 100644 --- a/arch/arm/configs/virt2real_defconfig +++ b/arch/arm/configs/virt2real_defconfig @@ -45,5 +45,5 @@ CONFIG_DRIVER_SERIAL_NS16550=y CONFIG_LED=y CONFIG_LED_GPIO=y CONFIG_LED_GPIO_OF=y -CONFIG_SHA1=y -CONFIG_SHA256=y +CONFIG_DIGEST_SHA1_GENERIC=y +CONFIG_DIGEST_SHA256_GENERIC=y diff --git a/arch/arm/configs/zylonite310_defconfig b/arch/arm/configs/zylonite310_defconfig index 77e4f84ff1..fa6587c0ee 100644 --- a/arch/arm/configs/zylonite310_defconfig +++ b/arch/arm/configs/zylonite310_defconfig @@ -114,4 +114,4 @@ CONFIG_FS_UBIFS_COMPRESSION_ZLIB=y CONFIG_BZLIB=y CONFIG_BMP=y CONFIG_PNG=y -CONFIG_SHA256=y +CONFIG_DIGEST_SHA256_GENERIC=y diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile new file mode 100644 index 0000000000..372bf8d699 --- /dev/null +++ b/arch/arm/crypto/Makefile @@ -0,0 +1,17 @@ +# +# Arch-specific CryptoAPI modules. +# + +obj-$(CONFIG_DIGEST_SHA1_ARM) += sha1-arm.o +obj-$(CONFIG_DIGEST_SHA256_ARM) += sha256-arm.o + +sha1-arm-y := sha1-armv4-large.o sha1_glue.o +sha256-arm-y := sha256-core.o sha256_glue.o + +quiet_cmd_perl = PERL $@ + cmd_perl = $(PERL) $(<) > $(@) + +$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl + $(call cmd,perl) + +.PRECIOUS: $(obj)/sha256-core.S diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S new file mode 100644 index 0000000000..99207c45ec --- /dev/null +++ b/arch/arm/crypto/sha1-armv4-large.S @@ -0,0 +1,497 @@ +#define __ARM_ARCH__ __LINUX_ARM_ARCH__ +@ ==================================================================== +@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ ==================================================================== + +@ sha1_block procedure for ARMv4. +@ +@ January 2007. + +@ Size/performance trade-off +@ ==================================================================== +@ impl size in bytes comp cycles[*] measured performance +@ ==================================================================== +@ thumb 304 3212 4420 +@ armv4-small 392/+29% 1958/+64% 2250/+96% +@ armv4-compact 740/+89% 1552/+26% 1840/+22% +@ armv4-large 1420/+92% 1307/+19% 1370/+34%[***] +@ full unroll ~5100/+260% ~1260/+4% ~1300/+5% +@ ==================================================================== +@ thumb = same as 'small' but in Thumb instructions[**] and +@ with recurring code in two private functions; +@ small = detached Xload/update, loops are folded; +@ compact = detached Xload/update, 5x unroll; +@ large = interleaved Xload/update, 5x unroll; +@ full unroll = interleaved Xload/update, full unroll, estimated[!]; +@ +@ [*] Manually counted instructions in "grand" loop body. Measured +@ performance is affected by prologue and epilogue overhead, +@ i-cache availability, branch penalties, etc. +@ [**] While each Thumb instruction is twice smaller, they are not as +@ diverse as ARM ones: e.g., there are only two arithmetic +@ instructions with 3 arguments, no [fixed] rotate, addressing +@ modes are limited. As result it takes more instructions to do +@ the same job in Thumb, therefore the code is never twice as +@ small and always slower. +@ [***] which is also ~35% better than compiler generated code. Dual- +@ issue Cortex A8 core was measured to process input block in +@ ~990 cycles. + +@ August 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 13% improvement on +@ Cortex A8 core and in absolute terms ~870 cycles per input block +@ [or 13.6 cycles per byte]. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 10% +@ improvement on Cortex A8 core and 12.2 cycles per byte. + +#include <linux/linkage.h> + +.text + +.align 2 +ENTRY(sha1_block_data_order) + stmdb sp!,{r4-r12,lr} + add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 + ldmia r0,{r3,r4,r5,r6,r7} +.Lloop: + ldr r8,.LK_00_19 + mov r14,sp + sub sp,sp,#15*4 + mov r5,r5,ror#30 + mov r6,r6,ror#30 + mov r7,r7,ror#30 @ [6] +.L_00_15: +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r7,r8,r7,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r5,r6 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r7,r8,r7,ror#2 @ E+=K_00_19 + eor r10,r5,r6 @ F_xx_xx + add r7,r7,r3,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r4,r10,ror#2 + add r7,r7,r9 @ E+=X[i] + eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r7,r7,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r6,r8,r6,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r4,r5 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r6,r8,r6,ror#2 @ E+=K_00_19 + eor r10,r4,r5 @ F_xx_xx + add r6,r6,r7,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r3,r10,ror#2 + add r6,r6,r9 @ E+=X[i] + eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r6,r6,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r5,r8,r5,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r3,r4 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r5,r8,r5,ror#2 @ E+=K_00_19 + eor r10,r3,r4 @ F_xx_xx + add r5,r5,r6,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r7,r10,ror#2 + add r5,r5,r9 @ E+=X[i] + eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r5,r5,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r4,r8,r4,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r7,r3 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r4,r8,r4,ror#2 @ E+=K_00_19 + eor r10,r7,r3 @ F_xx_xx + add r4,r4,r5,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r6,r10,ror#2 + add r4,r4,r9 @ E+=X[i] + eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r4,r4,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r3,r8,r3,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r6,r7 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r3,r8,r3,ror#2 @ E+=K_00_19 + eor r10,r6,r7 @ F_xx_xx + add r3,r3,r4,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r5,r10,ror#2 + add r3,r3,r9 @ E+=X[i] + eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r3,r3,r10 @ E+=F_00_19(B,C,D) + cmp r14,sp + bne .L_00_15 @ [((11+4)*5+2)*3] + sub sp,sp,#25*4 +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r7,r8,r7,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r5,r6 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r7,r8,r7,ror#2 @ E+=K_00_19 + eor r10,r5,r6 @ F_xx_xx + add r7,r7,r3,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r4,r10,ror#2 + add r7,r7,r9 @ E+=X[i] + eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r7,r7,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r3,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) + add r6,r6,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r7,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) + add r5,r5,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r6,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) + add r4,r4,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r5,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) + add r3,r3,r10 @ E+=F_00_19(B,C,D) + + ldr r8,.LK_20_39 @ [+15+16*4] + cmn sp,#0 @ [+3], clear carry to denote 20_39 +.L_20_39_or_60_79: + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r7,r8,r7,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r5,r6 @ F_xx_xx + mov r9,r9,ror#31 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r4,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r7,r7,r9 @ E+=X[i] + add r7,r7,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r3,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + add r6,r6,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r7,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + add r5,r5,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r6,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + add r4,r4,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r5,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + add r3,r3,r10 @ E+=F_20_39(B,C,D) + ARM( teq r14,sp ) @ preserve carry + THUMB( mov r11,sp ) + THUMB( teq r14,r11 ) @ preserve carry + bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] + bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes + + ldr r8,.LK_40_59 + sub sp,sp,#20*4 @ [+2] +.L_40_59: + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r7,r8,r7,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r5,r6 @ F_xx_xx + mov r9,r9,ror#31 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r4,r10,ror#2 @ F_xx_xx + and r11,r5,r6 @ F_xx_xx + add r7,r7,r9 @ E+=X[i] + add r7,r7,r10 @ E+=F_40_59(B,C,D) + add r7,r7,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r3,r10,ror#2 @ F_xx_xx + and r11,r4,r5 @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + add r6,r6,r10 @ E+=F_40_59(B,C,D) + add r6,r6,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r7,r10,ror#2 @ F_xx_xx + and r11,r3,r4 @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + add r5,r5,r10 @ E+=F_40_59(B,C,D) + add r5,r5,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r6,r10,ror#2 @ F_xx_xx + and r11,r7,r3 @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + add r4,r4,r10 @ E+=F_40_59(B,C,D) + add r4,r4,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r5,r10,ror#2 @ F_xx_xx + and r11,r6,r7 @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + add r3,r3,r10 @ E+=F_40_59(B,C,D) + add r3,r3,r11,ror#2 + cmp r14,sp + bne .L_40_59 @ [+((12+5)*5+2)*4] + + ldr r8,.LK_60_79 + sub sp,sp,#20*4 + cmp sp,#0 @ set carry to denote 60_79 + b .L_20_39_or_60_79 @ [+4], spare 300 bytes +.L_done: + add sp,sp,#80*4 @ "deallocate" stack frame + ldmia r0,{r8,r9,r10,r11,r12} + add r3,r8,r3 + add r4,r9,r4 + add r5,r10,r5,ror#2 + add r6,r11,r6,ror#2 + add r7,r12,r7,ror#2 + stmia r0,{r3,r4,r5,r6,r7} + teq r1,r2 + bne .Lloop @ [+18], total 1307 + + ldmia sp!,{r4-r12,pc} +.align 2 +.LK_00_19: .word 0x5a827999 +.LK_20_39: .word 0x6ed9eba1 +.LK_40_59: .word 0x8f1bbcdc +.LK_60_79: .word 0xca62c1d6 +ENDPROC(sha1_block_data_order) +.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>" +.align 2 diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c new file mode 100644 index 0000000000..176aa9ec69 --- /dev/null +++ b/arch/arm/crypto/sha1_glue.c @@ -0,0 +1,137 @@ +/* + * Cryptographic API. + * Glue code for the SHA1 Secure Hash Algorithm assembler implementation + * + * This file is based on sha1_generic.c and sha1_ssse3_glue.c + * + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> + * Copyright (c) Mathias Krause <minipli@googlemail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <common.h> +#include <digest.h> +#include <init.h> +#include <crypto/sha.h> +#include <crypto/internal.h> +#include <asm/byteorder.h> + +void sha1_block_data_order(u32 *digest, + const unsigned char *data, unsigned int rounds); + + +static int sha1_init(struct digest *desc) +{ + struct sha1_state *sctx = digest_ctx(desc); + + *sctx = (struct sha1_state){ + .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, + }; + + return 0; +} + + +static int __sha1_update(struct sha1_state *sctx, const u8 *data, + unsigned int len, unsigned int partial) +{ + unsigned int done = 0; + + sctx->count += len; + + if (partial) { + done = SHA1_BLOCK_SIZE - partial; + memcpy(sctx->buffer + partial, data, done); + sha1_block_data_order(sctx->state, sctx->buffer, 1); + } + + if (len - done >= SHA1_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; + sha1_block_data_order(sctx->state, data + done, rounds); + done += rounds * SHA1_BLOCK_SIZE; + } + + memcpy(sctx->buffer, data + done, len - done); + return 0; +} + + +int sha1_update_arm(struct digest *desc, const void *data, + unsigned long len) +{ + struct sha1_state *sctx = digest_ctx(desc); + unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; + int res; + + /* Handle the fast case right here */ + if (partial + len < SHA1_BLOCK_SIZE) { + sctx->count += len; + memcpy(sctx->buffer + partial, data, len); + return 0; + } + res = __sha1_update(sctx, data, len, partial); + return res; +} +EXPORT_SYMBOL_GPL(sha1_update_arm); + + +/* Add padding and return the message digest. */ +static int sha1_final(struct digest *desc, u8 *out) +{ + struct sha1_state *sctx = digest_ctx(desc); + unsigned int i, index, padlen; + __be32 *dst = (__be32 *)out; + __be64 bits; + static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; + + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->count % SHA1_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); + /* We need to fill a whole block for __sha1_update() */ + if (padlen <= 56) { + sctx->count += padlen; + memcpy(sctx->buffer + index, padding, padlen); + } else { + __sha1_update(sctx, padding, padlen, index); + } + __sha1_update(sctx, (const u8 *)&bits, sizeof(bits), 56); + + /* Store state in digest */ + for (i = 0; i < 5; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + return 0; +} + +static struct digest_algo m = { + .base = { + .name = "sha1", + .driver_name = "sha1-asm", + .priority = 150, + }, + + .init = sha1_init, + .update = sha1_update_arm, + .final = sha1_final, + .digest = digest_generic_digest, + .verify = digest_generic_verify, + .length = SHA1_DIGEST_SIZE, + .ctx_length = sizeof(struct sha1_state), +}; + +static int sha1_mod_init(void) +{ + return digest_algo_register(&m); +} +device_initcall(sha1_mod_init); diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl new file mode 100644 index 0000000000..2b186a034e --- /dev/null +++ b/arch/arm/crypto/sha256-armv4.pl @@ -0,0 +1,717 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# +# Permission to use under GPL terms is granted. +# ==================================================================== + +# SHA256 block procedure for ARMv4. May 2007. + +# Performance is ~2x better than gcc 3.4 generated code and in "abso- +# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per +# byte [on single-issue Xscale PXA250 core]. + +# July 2010. +# +# Rescheduling for dual-issue pipeline resulted in 22% improvement on +# Cortex A8 core and ~20 cycles per processed byte. + +# February 2011. +# +# Profiler-assisted and platform-specific optimization resulted in 16% +# improvement on Cortex A8 core and ~15.4 cycles per processed byte. + +# September 2013. +# +# Add NEON implementation. On Cortex A8 it was measured to process one +# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon +# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only +# code (meaning that latter performs sub-optimally, nothing was done +# about it). + +# May 2014. +# +# Add ARMv8 code path performing at 2.0 cpb on Apple A7. + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +$ctx="r0"; $t0="r0"; +$inp="r1"; $t4="r1"; +$len="r2"; $t1="r2"; +$T1="r3"; $t3="r3"; +$A="r4"; +$B="r5"; +$C="r6"; +$D="r7"; +$E="r8"; +$F="r9"; +$G="r10"; +$H="r11"; +@V=($A,$B,$C,$D,$E,$F,$G,$H); +$t2="r12"; +$Ktbl="r14"; + +@Sigma0=( 2,13,22); +@Sigma1=( 6,11,25); +@sigma0=( 7,18, 3); +@sigma1=(17,19,10); + +sub BODY_00_15 { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; + +$code.=<<___ if ($i<16); +#if __ARM_ARCH__>=7 + @ ldr $t1,[$inp],#4 @ $i +# if $i==15 + str $inp,[sp,#17*4] @ make room for $t4 +# endif + eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` + add $a,$a,$t2 @ h+=Maj(a,b,c) from the past + eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) + rev $t1,$t1 +#else + @ ldrb $t1,[$inp,#3] @ $i + add $a,$a,$t2 @ h+=Maj(a,b,c) from the past + ldrb $t2,[$inp,#2] + ldrb $t0,[$inp,#1] + orr $t1,$t1,$t2,lsl#8 + ldrb $t2,[$inp],#4 + orr $t1,$t1,$t0,lsl#16 +# if $i==15 + str $inp,[sp,#17*4] @ make room for $t4 +# endif + eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` + orr $t1,$t1,$t2,lsl#24 + eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) +#endif +___ +$code.=<<___; + ldr $t2,[$Ktbl],#4 @ *K256++ + add $h,$h,$t1 @ h+=X[i] + str $t1,[sp,#`$i%16`*4] + eor $t1,$f,$g + add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e) + and $t1,$t1,$e + add $h,$h,$t2 @ h+=K256[i] + eor $t1,$t1,$g @ Ch(e,f,g) + eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]` + add $h,$h,$t1 @ h+=Ch(e,f,g) +#if $i==31 + and $t2,$t2,#0xff + cmp $t2,#0xf2 @ done? +#endif +#if $i<15 +# if __ARM_ARCH__>=7 + ldr $t1,[$inp],#4 @ prefetch +# else + ldrb $t1,[$inp,#3] +# endif + eor $t2,$a,$b @ a^b, b^c in next round +#else + ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx + eor $t2,$a,$b @ a^b, b^c in next round + ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx +#endif + eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a) + and $t3,$t3,$t2 @ (b^c)&=(a^b) + add $d,$d,$h @ d+=h + eor $t3,$t3,$b @ Maj(a,b,c) + add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a) + @ add $h,$h,$t3 @ h+=Maj(a,b,c) +___ + ($t2,$t3)=($t3,$t2); +} + +sub BODY_16_XX { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; + +$code.=<<___; + @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i + @ ldr $t4,[sp,#`($i+14)%16`*4] + mov $t0,$t1,ror#$sigma0[0] + add $a,$a,$t2 @ h+=Maj(a,b,c) from the past + mov $t2,$t4,ror#$sigma1[0] + eor $t0,$t0,$t1,ror#$sigma0[1] + eor $t2,$t2,$t4,ror#$sigma1[1] + eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) + ldr $t1,[sp,#`($i+0)%16`*4] + eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14]) + ldr $t4,[sp,#`($i+9)%16`*4] + + add $t2,$t2,$t0 + eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15 + add $t1,$t1,$t2 + eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) + add $t1,$t1,$t4 @ X[i] +___ + &BODY_00_15(@_); +} + +$code=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +#endif + +.text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# ifdef __thumb2__ +.thumb +# else +.code 32 +# endif +#endif + +#ifdef __thumb__ +#define adrl adr +#endif + +.type K256,%object +.align 5 +K256: +.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.size K256,.-K256 +.word 0 @ terminator +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha256_block_data_order +#endif +.align 5 + +.global sha256_block_data_order +.type sha256_block_data_order,%function +sha256_block_data_order: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ sha256_block_data_order +#else + adr r3,sha256_block_data_order +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA256 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif + add $len,$inp,$len,lsl#6 @ len to point at the end of inp + stmdb sp!,{$ctx,$inp,$len,r4-r11,lr} + ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} + sub $Ktbl,r3,#256+32 @ K256 + sub sp,sp,#16*4 @ alloca(X[16]) +.Loop: +# if __ARM_ARCH__>=7 + ldr $t1,[$inp],#4 +# else + ldrb $t1,[$inp,#3] +# endif + eor $t3,$B,$C @ magic + eor $t2,$t2,$t2 +___ +for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } +$code.=".Lrounds_16_xx:\n"; +for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; +#if __ARM_ARCH__>=7 + ite eq @ Thumb2 thing, sanity check in ARM +#endif + ldreq $t3,[sp,#16*4] @ pull ctx + bne .Lrounds_16_xx + + add $A,$A,$t2 @ h+=Maj(a,b,c) from the past + ldr $t0,[$t3,#0] + ldr $t1,[$t3,#4] + ldr $t2,[$t3,#8] + add $A,$A,$t0 + ldr $t0,[$t3,#12] + add $B,$B,$t1 + ldr $t1,[$t3,#16] + add $C,$C,$t2 + ldr $t2,[$t3,#20] + add $D,$D,$t0 + ldr $t0,[$t3,#24] + add $E,$E,$t1 + ldr $t1,[$t3,#28] + add $F,$F,$t2 + ldr $inp,[sp,#17*4] @ pull inp + ldr $t2,[sp,#18*4] @ pull inp+len + add $G,$G,$t0 + add $H,$H,$t1 + stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H} + cmp $inp,$t2 + sub $Ktbl,$Ktbl,#256 @ rewind Ktbl + bne .Loop + + add sp,sp,#`16+3`*4 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + bx lr @ interoperable with Thumb ISA:-) +#endif +.size sha256_block_data_order,.-sha256_block_data_order +___ +###################################################################### +# NEON stuff +# +{{{ +my @X=map("q$_",(0..3)); +my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25"); +my $Xfer=$t4; +my $j=0; + +sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } +sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } + +sub AUTOLOAD() # thunk [simplified] x86-style perlasm +{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; + my $arg = pop; + $arg = "#$arg" if ($arg*1 eq $arg); + $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; +} + +sub Xupdate() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e,$f,$g,$h); + + &vext_8 ($T0,@X[0],@X[1],4); # X[1..4] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vext_8 ($T1,@X[2],@X[3],4); # X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T2,$T0,$sigma0[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T1,$T0,$sigma0[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T2,$T0,32-$sigma0[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T3,$T0,$sigma0[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T1,$T1,$T2); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T3,$T0,32-$sigma0[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T1,$T1,$T3); # sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); # sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vld1_32 ("{$T0}","[$Ktbl,:128]!"); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); # sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 ($T0,$T0,@X[0]); + while($#insns>=2) { eval(shift(@insns)); } + &vst1_32 ("{$T0}","[$Xfer,:128]!"); + eval(shift(@insns)); + eval(shift(@insns)); + + push(@X,shift(@X)); # "rotate" X[] +} + +sub Xpreload() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e,$f,$g,$h); + + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vld1_32 ("{$T0}","[$Ktbl,:128]!"); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vrev32_8 (@X[0],@X[0]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 ($T0,$T0,@X[0]); + foreach (@insns) { eval; } # remaining instructions + &vst1_32 ("{$T0}","[$Xfer,:128]!"); + + push(@X,shift(@X)); # "rotate" X[] +} + +sub body_00_15 () { + ( + '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'. + '&add ($h,$h,$t1)', # h+=X[i]+K[i] + '&eor ($t1,$f,$g)', + '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))', + '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past + '&and ($t1,$t1,$e)', + '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e) + '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))', + '&eor ($t1,$t1,$g)', # Ch(e,f,g) + '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e) + '&eor ($t2,$a,$b)', # a^b, b^c in next round + '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a) + '&add ($h,$h,$t1)', # h+=Ch(e,f,g) + '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'. + '&ldr ($t1,"[$Ktbl]") if ($j==15);'. + '&ldr ($t1,"[sp,#64]") if ($j==31)', + '&and ($t3,$t3,$t2)', # (b^c)&=(a^b) + '&add ($d,$d,$h)', # d+=h + '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a) + '&eor ($t3,$t3,$b)', # Maj(a,b,c) + '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);' + ) +} + +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.global sha256_block_data_order_neon +.type sha256_block_data_order_neon,%function +.align 4 +sha256_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + + sub $H,sp,#16*4+16 + adrl $Ktbl,K256 + bic $H,$H,#15 @ align for 128-bit stores + mov $t2,sp + mov sp,$H @ alloca + add $len,$inp,$len,lsl#6 @ len to point at the end of inp + + vld1.8 {@X[0]},[$inp]! + vld1.8 {@X[1]},[$inp]! + vld1.8 {@X[2]},[$inp]! + vld1.8 {@X[3]},[$inp]! + vld1.32 {$T0},[$Ktbl,:128]! + vld1.32 {$T1},[$Ktbl,:128]! + vld1.32 {$T2},[$Ktbl,:128]! + vld1.32 {$T3},[$Ktbl,:128]! + vrev32.8 @X[0],@X[0] @ yes, even on + str $ctx,[sp,#64] + vrev32.8 @X[1],@X[1] @ big-endian + str $inp,[sp,#68] + mov $Xfer,sp + vrev32.8 @X[2],@X[2] + str $len,[sp,#72] + vrev32.8 @X[3],@X[3] + str $t2,[sp,#76] @ save original sp + vadd.i32 $T0,$T0,@X[0] + vadd.i32 $T1,$T1,@X[1] + vst1.32 {$T0},[$Xfer,:128]! + vadd.i32 $T2,$T2,@X[2] + vst1.32 {$T1},[$Xfer,:128]! + vadd.i32 $T3,$T3,@X[3] + vst1.32 {$T2},[$Xfer,:128]! + vst1.32 {$T3},[$Xfer,:128]! + + ldmia $ctx,{$A-$H} + sub $Xfer,$Xfer,#64 + ldr $t1,[sp,#0] + eor $t2,$t2,$t2 + eor $t3,$B,$C + b .L_00_48 + +.align 4 +.L_00_48: +___ + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); +$code.=<<___; + teq $t1,#0 @ check for K256 terminator + ldr $t1,[sp,#0] + sub $Xfer,$Xfer,#64 + bne .L_00_48 + + ldr $inp,[sp,#68] + ldr $t0,[sp,#72] + sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl + teq $inp,$t0 + it eq + subeq $inp,$inp,#64 @ avoid SEGV + vld1.8 {@X[0]},[$inp]! @ load next input block + vld1.8 {@X[1]},[$inp]! + vld1.8 {@X[2]},[$inp]! + vld1.8 {@X[3]},[$inp]! + it ne + strne $inp,[sp,#68] + mov $Xfer,sp +___ + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); +$code.=<<___; + ldr $t0,[$t1,#0] + add $A,$A,$t2 @ h+=Maj(a,b,c) from the past + ldr $t2,[$t1,#4] + ldr $t3,[$t1,#8] + ldr $t4,[$t1,#12] + add $A,$A,$t0 @ accumulate + ldr $t0,[$t1,#16] + add $B,$B,$t2 + ldr $t2,[$t1,#20] + add $C,$C,$t3 + ldr $t3,[$t1,#24] + add $D,$D,$t4 + ldr $t4,[$t1,#28] + add $E,$E,$t0 + str $A,[$t1],#4 + add $F,$F,$t2 + str $B,[$t1],#4 + add $G,$G,$t3 + str $C,[$t1],#4 + add $H,$H,$t4 + str $D,[$t1],#4 + stmia $t1,{$E-$H} + + ittte ne + movne $Xfer,sp + ldrne $t1,[sp,#0] + eorne $t2,$t2,$t2 + ldreq sp,[sp,#76] @ restore original sp + itt ne + eorne $t3,$B,$C + bne .L_00_48 + + ldmia sp!,{r4-r12,pc} +.size sha256_block_data_order_neon,.-sha256_block_data_order_neon +#endif +___ +}}} +###################################################################### +# ARMv8 stuff +# +{{{ +my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2)); +my @MSG=map("q$_",(8..11)); +my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15)); +my $Ktbl="r3"; + +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + +# ifdef __thumb2__ +# define INST(a,b,c,d) .byte c,d|0xc,a,b +# else +# define INST(a,b,c,d) .byte a,b,c,d +# endif + +.type sha256_block_data_order_armv8,%function +.align 5 +sha256_block_data_order_armv8: +.LARMv8: + vld1.32 {$ABCD,$EFGH},[$ctx] +# ifdef __thumb2__ + adr $Ktbl,.LARMv8 + sub $Ktbl,$Ktbl,#.LARMv8-K256 +# else + adrl $Ktbl,K256 +# endif + add $len,$inp,$len,lsl#6 @ len to point at the end of inp + +.Loop_v8: + vld1.8 {@MSG[0]-@MSG[1]},[$inp]! + vld1.8 {@MSG[2]-@MSG[3]},[$inp]! + vld1.32 {$W0},[$Ktbl]! + vrev32.8 @MSG[0],@MSG[0] + vrev32.8 @MSG[1],@MSG[1] + vrev32.8 @MSG[2],@MSG[2] + vrev32.8 @MSG[3],@MSG[3] + vmov $ABCD_SAVE,$ABCD @ offload + vmov $EFGH_SAVE,$EFGH + teq $inp,$len +___ +for($i=0;$i<12;$i++) { +$code.=<<___; + vld1.32 {$W1},[$Ktbl]! + vadd.i32 $W0,$W0,@MSG[0] + sha256su0 @MSG[0],@MSG[1] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + sha256su1 @MSG[0],@MSG[2],@MSG[3] +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); +} +$code.=<<___; + vld1.32 {$W1},[$Ktbl]! + vadd.i32 $W0,$W0,@MSG[0] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + vld1.32 {$W0},[$Ktbl]! + vadd.i32 $W1,$W1,@MSG[1] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + vld1.32 {$W1},[$Ktbl] + vadd.i32 $W0,$W0,@MSG[2] + sub $Ktbl,$Ktbl,#256-16 @ rewind + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + vadd.i32 $W1,$W1,@MSG[3] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + vadd.i32 $ABCD,$ABCD,$ABCD_SAVE + vadd.i32 $EFGH,$EFGH,$EFGH_SAVE + it ne + bne .Loop_v8 + + vst1.32 {$ABCD,$EFGH},[$ctx] + + ret @ bx lr +.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 +#endif +___ +}}} +$code.=<<___; +.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>" +.align 2 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.comm OPENSSL_armcap_P,4,4 +#endif +___ + +open SELF,$0; +while(<SELF>) { + next if (/^#!/); + last if (!s/^#/@/ and !/^$/); + print; +} +close SELF; + +{ my %opcode = ( + "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40, + "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 ); + + sub unsha256 { + my ($mnemonic,$arg)=@_; + + if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) { + my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) + |(($2&7)<<17)|(($2&8)<<4) + |(($3&7)<<1) |(($3&8)<<2); + # since ARMv7 instructions are always encoded little-endian. + # correct solution is to use .inst directive, but older + # assemblers don't implement it:-( + sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s", + $word&0xff,($word>>8)&0xff, + ($word>>16)&0xff,($word>>24)&0xff, + $mnemonic,$arg; + } + } +} + +foreach (split($/,$code)) { + + s/\`([^\`]*)\`/eval $1/geo; + + s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo; + + s/\bret\b/bx lr/go or + s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 + + print $_,"\n"; +} + +close STDOUT; # enforce flush diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped new file mode 100644 index 0000000000..15e0f4ad1e --- /dev/null +++ b/arch/arm/crypto/sha256-core.S_shipped @@ -0,0 +1,2779 @@ + +@ ==================================================================== +@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Permission to use under GPL terms is granted. +@ ==================================================================== + +@ SHA256 block procedure for ARMv4. May 2007. + +@ Performance is ~2x better than gcc 3.4 generated code and in "abso- +@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per +@ byte [on single-issue Xscale PXA250 core]. + +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 22% improvement on +@ Cortex A8 core and ~20 cycles per processed byte. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 16% +@ improvement on Cortex A8 core and ~15.4 cycles per processed byte. + +@ September 2013. +@ +@ Add NEON implementation. On Cortex A8 it was measured to process one +@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon +@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only +@ code (meaning that latter performs sub-optimally, nothing was done +@ about it). + +@ May 2014. +@ +@ Add ARMv8 code path performing at 2.0 cpb on Apple A7. + +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +#endif + +.text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# ifdef __thumb2__ +.thumb +# else +.code 32 +# endif +#endif + +#ifdef __thumb__ +#define adrl adr +#endif + +.type K256,%object +.align 5 +K256: +.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.size K256,.-K256 +.word 0 @ terminator +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha256_block_data_order +#endif +.align 5 + +.global sha256_block_data_order +.type sha256_block_data_order,%function +sha256_block_data_order: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ sha256_block_data_order +#else + adr r3,sha256_block_data_order +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA256 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif + add r2,r1,r2,lsl#6 @ len to point at the end of inp + stmdb sp!,{r0,r1,r2,r4-r11,lr} + ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} + sub r14,r3,#256+32 @ K256 + sub sp,sp,#16*4 @ alloca(X[16]) +.Loop: +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ magic + eor r12,r12,r12 +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 0 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 0 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 0==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 0<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 1 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 1 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 1==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 1<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 2 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 2 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 2==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 2<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 3 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 3 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 3==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 3<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 4 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 4 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 4==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 4<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 5 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 5==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 5<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 6 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 6 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 6==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 6<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 7 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 7==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 7<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 8 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 8 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 8==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 8<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 9 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 9 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 9==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 9<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 10 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 10 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 10==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 10<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 11 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 11 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 11==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 11<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 12 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 12 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 12==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 12<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 13 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 13 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 13==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 13<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 14 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 14 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 14==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 14<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 15 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 15 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 15==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 15<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +.Lrounds_16_xx: + @ ldr r2,[sp,#1*4] @ 16 + @ ldr r1,[sp,#14*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#0*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#9*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 16==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 16<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#2*4] @ 17 + @ ldr r1,[sp,#15*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#1*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#10*4] + + add r3,r3,r0 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 17==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 17<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#3*4] @ 18 + @ ldr r1,[sp,#0*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#2*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#11*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 18==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 18<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#4*4] @ 19 + @ ldr r1,[sp,#1*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#3*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#12*4] + + add r3,r3,r0 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 19==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 19<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#5*4] @ 20 + @ ldr r1,[sp,#2*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#4*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#13*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 20==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 20<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#6*4] @ 21 + @ ldr r1,[sp,#3*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#5*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#14*4] + + add r3,r3,r0 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 21==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 21<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#7*4] @ 22 + @ ldr r1,[sp,#4*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#6*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#15*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 22==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 22<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#8*4] @ 23 + @ ldr r1,[sp,#5*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#7*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#0*4] + + add r3,r3,r0 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 23==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 23<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#9*4] @ 24 + @ ldr r1,[sp,#6*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#8*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#1*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 24==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 24<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#10*4] @ 25 + @ ldr r1,[sp,#7*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#9*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#2*4] + + add r3,r3,r0 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 25==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 25<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#11*4] @ 26 + @ ldr r1,[sp,#8*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#10*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#3*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 26==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 26<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#12*4] @ 27 + @ ldr r1,[sp,#9*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#11*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#4*4] + + add r3,r3,r0 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 27==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 27<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#13*4] @ 28 + @ ldr r1,[sp,#10*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#12*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#5*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 28==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 28<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#14*4] @ 29 + @ ldr r1,[sp,#11*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#13*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#6*4] + + add r3,r3,r0 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 29==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 29<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#15*4] @ 30 + @ ldr r1,[sp,#12*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#14*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#7*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 30==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 30<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#0*4] @ 31 + @ ldr r1,[sp,#13*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#15*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#8*4] + + add r3,r3,r0 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 31==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 31<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + ite eq @ Thumb2 thing, sanity check in ARM +#endif + ldreq r3,[sp,#16*4] @ pull ctx + bne .Lrounds_16_xx + + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r0,[r3,#0] + ldr r2,[r3,#4] + ldr r12,[r3,#8] + add r4,r4,r0 + ldr r0,[r3,#12] + add r5,r5,r2 + ldr r2,[r3,#16] + add r6,r6,r12 + ldr r12,[r3,#20] + add r7,r7,r0 + ldr r0,[r3,#24] + add r8,r8,r2 + ldr r2,[r3,#28] + add r9,r9,r12 + ldr r1,[sp,#17*4] @ pull inp + ldr r12,[sp,#18*4] @ pull inp+len + add r10,r10,r0 + add r11,r11,r2 + stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} + cmp r1,r12 + sub r14,r14,#256 @ rewind Ktbl + bne .Loop + + add sp,sp,#19*4 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size sha256_block_data_order,.-sha256_block_data_order +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.global sha256_block_data_order_neon +.type sha256_block_data_order_neon,%function +.align 4 +sha256_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + + sub r11,sp,#16*4+16 + adrl r14,K256 + bic r11,r11,#15 @ align for 128-bit stores + mov r12,sp + mov sp,r11 @ alloca + add r2,r1,r2,lsl#6 @ len to point at the end of inp + + vld1.8 {q0},[r1]! + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + vld1.32 {q8},[r14,:128]! + vld1.32 {q9},[r14,:128]! + vld1.32 {q10},[r14,:128]! + vld1.32 {q11},[r14,:128]! + vrev32.8 q0,q0 @ yes, even on + str r0,[sp,#64] + vrev32.8 q1,q1 @ big-endian + str r1,[sp,#68] + mov r1,sp + vrev32.8 q2,q2 + str r2,[sp,#72] + vrev32.8 q3,q3 + str r12,[sp,#76] @ save original sp + vadd.i32 q8,q8,q0 + vadd.i32 q9,q9,q1 + vst1.32 {q8},[r1,:128]! + vadd.i32 q10,q10,q2 + vst1.32 {q9},[r1,:128]! + vadd.i32 q11,q11,q3 + vst1.32 {q10},[r1,:128]! + vst1.32 {q11},[r1,:128]! + + ldmia r0,{r4-r11} + sub r1,r1,#64 + ldr r2,[sp,#0] + eor r12,r12,r12 + eor r3,r5,r6 + b .L_00_48 + +.align 4 +.L_00_48: + vext.8 q8,q0,q1,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q2,q3,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q0,q0,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#4] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d7,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d7,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d7,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q0,q0,q9 + add r10,r10,r2 + ldr r2,[sp,#8] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d7,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d7,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d0,d0,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d0,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d0,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d0,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#12] + and r3,r3,r12 + vshr.u32 d24,d0,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d0,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d1,d1,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q0 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q1,q2,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q3,q0,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q1,q1,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#20] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d1,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d1,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d1,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q1,q1,q9 + add r6,r6,r2 + ldr r2,[sp,#24] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d1,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d1,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d2,d2,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d2,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d2,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d2,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#28] + and r3,r3,r12 + vshr.u32 d24,d2,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d2,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d3,d3,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q1 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vext.8 q8,q2,q3,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q0,q1,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q2,q2,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#36] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d3,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d3,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d3,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q2,q2,q9 + add r10,r10,r2 + ldr r2,[sp,#40] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d3,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d3,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d4,d4,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d4,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d4,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d4,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#44] + and r3,r3,r12 + vshr.u32 d24,d4,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d4,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d5,d5,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q2 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q3,q0,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q1,q2,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q3,q3,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#52] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d5,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d5,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d5,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q3,q3,q9 + add r6,r6,r2 + ldr r2,[sp,#56] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d5,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d5,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d6,d6,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d6,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d6,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d6,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#60] + and r3,r3,r12 + vshr.u32 d24,d6,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d6,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d7,d7,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q3 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[r14] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + teq r2,#0 @ check for K256 terminator + ldr r2,[sp,#0] + sub r1,r1,#64 + bne .L_00_48 + + ldr r1,[sp,#68] + ldr r0,[sp,#72] + sub r14,r14,#256 @ rewind r14 + teq r1,r0 + it eq + subeq r1,r1,#64 @ avoid SEGV + vld1.8 {q0},[r1]! @ load next input block + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + it ne + strne r1,[sp,#68] + mov r1,sp + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q0,q0 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q0 + ldr r2,[sp,#4] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#8] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#12] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q1,q1 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q1 + ldr r2,[sp,#20] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#24] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#28] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q2,q2 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q2 + ldr r2,[sp,#36] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#40] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#44] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q3,q3 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q3 + ldr r2,[sp,#52] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#56] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#60] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#64] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + ldr r0,[r2,#0] + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r12,[r2,#4] + ldr r3,[r2,#8] + ldr r1,[r2,#12] + add r4,r4,r0 @ accumulate + ldr r0,[r2,#16] + add r5,r5,r12 + ldr r12,[r2,#20] + add r6,r6,r3 + ldr r3,[r2,#24] + add r7,r7,r1 + ldr r1,[r2,#28] + add r8,r8,r0 + str r4,[r2],#4 + add r9,r9,r12 + str r5,[r2],#4 + add r10,r10,r3 + str r6,[r2],#4 + add r11,r11,r1 + str r7,[r2],#4 + stmia r2,{r8-r11} + + ittte ne + movne r1,sp + ldrne r2,[sp,#0] + eorne r12,r12,r12 + ldreq sp,[sp,#76] @ restore original sp + itt ne + eorne r3,r5,r6 + bne .L_00_48 + + ldmia sp!,{r4-r12,pc} +.size sha256_block_data_order_neon,.-sha256_block_data_order_neon +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + +# ifdef __thumb2__ +# define INST(a,b,c,d) .byte c,d|0xc,a,b +# else +# define INST(a,b,c,d) .byte a,b,c,d +# endif + +.type sha256_block_data_order_armv8,%function +.align 5 +sha256_block_data_order_armv8: +.LARMv8: + vld1.32 {q0,q1},[r0] +# ifdef __thumb2__ + adr r3,.LARMv8 + sub r3,r3,#.LARMv8-K256 +# else + adrl r3,K256 +# endif + add r2,r1,r2,lsl#6 @ len to point at the end of inp + +.Loop_v8: + vld1.8 {q8-q9},[r1]! + vld1.8 {q10-q11},[r1]! + vld1.32 {q12},[r3]! + vrev32.8 q8,q8 + vrev32.8 q9,q9 + vrev32.8 q10,q10 + vrev32.8 q11,q11 + vmov q14,q0 @ offload + vmov q15,q1 + teq r1,r2 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + + vld1.32 {q13},[r3] + vadd.i32 q12,q12,q10 + sub r3,r3,#256-16 @ rewind + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + + vadd.i32 q13,q13,q11 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + + vadd.i32 q0,q0,q14 + vadd.i32 q1,q1,q15 + it ne + bne .Loop_v8 + + vst1.32 {q0,q1},[r0] + + bx lr @ bx lr +.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 +#endif +.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>" +.align 2 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.comm OPENSSL_armcap_P,4,4 +#endif diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c new file mode 100644 index 0000000000..f8086f6ac7 --- /dev/null +++ b/arch/arm/crypto/sha256_glue.c @@ -0,0 +1,213 @@ +/* + * Glue code for the SHA256 Secure Hash Algorithm assembly implementation + * using optimized ARM assembler and NEON instructions. + * + * Copyright © 2015 Google Inc. + * + * This file is based on sha256_ssse3_glue.c: + * Copyright (C) 2013 Intel Corporation + * Author: Tim Chen <tim.c.chen@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <common.h> +#include <digest.h> +#include <init.h> +#include <crypto/sha.h> +#include <crypto/internal.h> +#include <asm/byteorder.h> + +void sha256_block_data_order(u32 *digest, const void *data, + unsigned int num_blks); + + +int sha256_init(struct digest *desc) +{ + struct sha256_state *sctx = digest_ctx(desc); + + sctx->state[0] = SHA256_H0; + sctx->state[1] = SHA256_H1; + sctx->state[2] = SHA256_H2; + sctx->state[3] = SHA256_H3; + sctx->state[4] = SHA256_H4; + sctx->state[5] = SHA256_H5; + sctx->state[6] = SHA256_H6; + sctx->state[7] = SHA256_H7; + sctx->count = 0; + + return 0; +} + +int sha224_init(struct digest *desc) +{ + struct sha256_state *sctx = digest_ctx(desc); + + sctx->state[0] = SHA224_H0; + sctx->state[1] = SHA224_H1; + sctx->state[2] = SHA224_H2; + sctx->state[3] = SHA224_H3; + sctx->state[4] = SHA224_H4; + sctx->state[5] = SHA224_H5; + sctx->state[6] = SHA224_H6; + sctx->state[7] = SHA224_H7; + sctx->count = 0; + + return 0; +} + +int __sha256_update(struct digest *desc, const u8 *data, unsigned int len, + unsigned int partial) +{ + struct sha256_state *sctx = digest_ctx(desc); + unsigned int done = 0; + + sctx->count += len; + + if (partial) { + done = SHA256_BLOCK_SIZE - partial; + memcpy(sctx->buf + partial, data, done); + sha256_block_data_order(sctx->state, sctx->buf, 1); + } + + if (len - done >= SHA256_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; + + sha256_block_data_order(sctx->state, data + done, rounds); + done += rounds * SHA256_BLOCK_SIZE; + } + + memcpy(sctx->buf, data + done, len - done); + + return 0; +} + +int sha256_update(struct digest *desc, const void *data, + unsigned long len) +{ + struct sha256_state *sctx = digest_ctx(desc); + unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; + + /* Handle the fast case right here */ + if (partial + len < SHA256_BLOCK_SIZE) { + sctx->count += len; + memcpy(sctx->buf + partial, data, len); + + return 0; + } + + return __sha256_update(desc, data, len, partial); +} + +/* Add padding and return the message digest. */ +static int sha256_final(struct digest *desc, u8 *out) +{ + struct sha256_state *sctx = digest_ctx(desc); + unsigned int i, index, padlen; + __be32 *dst = (__be32 *)out; + __be64 bits; + static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; + + /* save number of bits */ + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->count % SHA256_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); + + /* We need to fill a whole block for __sha256_update */ + if (padlen <= 56) { + sctx->count += padlen; + memcpy(sctx->buf + index, padding, padlen); + } else { + __sha256_update(desc, padding, padlen, index); + } + __sha256_update(desc, (const u8 *)&bits, sizeof(bits), 56); + + /* Store state in digest */ + for (i = 0; i < 8; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int sha224_final(struct digest *desc, u8 *out) +{ + u8 D[SHA256_DIGEST_SIZE]; + + sha256_final(desc, D); + + memcpy(out, D, SHA224_DIGEST_SIZE); + memset(D, 0, SHA256_DIGEST_SIZE); + + return 0; +} + +int sha256_export(struct digest *desc, void *out) +{ + struct sha256_state *sctx = digest_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +int sha256_import(struct digest *desc, const void *in) +{ + struct sha256_state *sctx = digest_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + +static struct digest_algo sha224 = { + .base = { + .name = "sha224", + .driver_name = "sha224-asm", + .priority = 150, + }, + + .length = SHA224_DIGEST_SIZE, + .init = sha224_init, + .update = sha256_update, + .final = sha224_final, + .digest = digest_generic_digest, + .verify = digest_generic_verify, + .ctx_length = sizeof(struct sha256_state), +}; + +static int sha224_digest_register(void) +{ + return digest_algo_register(&sha224); +} +device_initcall(sha224_digest_register); + +static struct digest_algo sha256 = { + .base = { + .name = "sha256", + .driver_name = "sha256-asm", + .priority = 150, + }, + + .length = SHA256_DIGEST_SIZE, + .init = sha256_init, + .update = sha256_update, + .final = sha256_final, + .digest = digest_generic_digest, + .verify = digest_generic_verify, + .ctx_length = sizeof(struct sha256_state), +}; + +static int sha256_digest_register(void) +{ + return digest_algo_register(&sha256); +} +device_initcall(sha256_digest_register); diff --git a/arch/arm/crypto/sha256_glue.h b/arch/arm/crypto/sha256_glue.h new file mode 100644 index 0000000000..0312f4ffe8 --- /dev/null +++ b/arch/arm/crypto/sha256_glue.h @@ -0,0 +1,23 @@ +#ifndef _CRYPTO_SHA256_GLUE_H +#define _CRYPTO_SHA256_GLUE_H + +#include <linux/crypto.h> +#include <crypto/sha.h> + +extern struct shash_alg sha256_neon_algs[2]; + +extern int sha256_init(struct shash_desc *desc); + +extern int sha224_init(struct shash_desc *desc); + +extern int __sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len, unsigned int partial); + +extern int sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len); + +extern int sha256_export(struct shash_desc *desc, void *out); + +extern int sha256_import(struct shash_desc *desc, const void *in); + +#endif /* _CRYPTO_SHA256_GLUE_H */ diff --git a/arch/mips/configs/img-ci20_defconfig b/arch/mips/configs/img-ci20_defconfig index 56235c4cf3..6702c88b65 100644 --- a/arch/mips/configs/img-ci20_defconfig +++ b/arch/mips/configs/img-ci20_defconfig @@ -37,6 +37,6 @@ CONFIG_OFDEVICE=y # CONFIG_SPI is not set CONFIG_CLOCKSOURCE_DUMMY=y CONFIG_CLOCKSOURCE_DUMMY_RATE=3500 -CONFIG_SHA1=y -CONFIG_SHA224=y -CONFIG_SHA256=y +CONFIG_DIGEST_SHA1_GENERIC=y +CONFIG_DIGEST_SHA224_GENERIC=y +CONFIG_DIGEST_SHA256_GENERIC=y diff --git a/arch/mips/configs/ritmix-rzx50_defconfig b/arch/mips/configs/ritmix-rzx50_defconfig index 0814883e89..eedb329ee5 100644 --- a/arch/mips/configs/ritmix-rzx50_defconfig +++ b/arch/mips/configs/ritmix-rzx50_defconfig @@ -46,6 +46,6 @@ CONFIG_LED_TRIGGERS=y CONFIG_GPIO_JZ4740=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_JZ4740=y -CONFIG_SHA1=y -CONFIG_SHA224=y -CONFIG_SHA256=y +CONFIG_DIGEST_SHA1_GENERIC=y +CONFIG_DIGEST_SHA224_GENERIC=y +CONFIG_DIGEST_SHA256_GENERIC=y diff --git a/arch/mips/configs/tplink-mr3020_defconfig b/arch/mips/configs/tplink-mr3020_defconfig index d249919bed..9f81ce39dc 100644 --- a/arch/mips/configs/tplink-mr3020_defconfig +++ b/arch/mips/configs/tplink-mr3020_defconfig @@ -30,5 +30,5 @@ CONFIG_MTD=y # CONFIG_MTD_OOB_DEVICE is not set CONFIG_MTD_M25P80=y CONFIG_MD5=y -CONFIG_SHA224=y -CONFIG_SHA256=y +CONFIG_DIGEST_SHA224_GENERIC=y +CONFIG_DIGEST_SHA256_GENERIC=y diff --git a/commands/Kconfig b/commands/Kconfig index e4f68e7bda..2618eda2f6 100644 --- a/commands/Kconfig +++ b/commands/Kconfig @@ -12,9 +12,9 @@ config HAS_POWEROFF if COMMAND_SUPPORT -config COMPILE_DIGEST +config COMPILE_HASH tristate - select DIGEST + select CMD_DIGEST help Turns on compilation of digest.c @@ -842,6 +842,16 @@ config CMD_CMP Returns successfully if the two files are the same, return with an error if not +config CMD_DIGEST + tristate + select DIGEST + prompt "digest" + help + Usage: digest -a <algo> [-k <key> | -K <file>] [-s <sig> | -S <file>] FILE|AREA + + Calculate a digest over a FILE or a memory area with the possibility + to checkit. + config CMD_DIRNAME tristate prompt "dirname" @@ -917,7 +927,7 @@ config CMD_LS config CMD_MD5SUM tristate - select COMPILE_DIGEST + select COMPILE_HASH select MD5 prompt "md5sum" help @@ -982,7 +992,7 @@ config CMD_RMDIR config CMD_SHA1SUM tristate - select COMPILE_DIGEST + select COMPILE_HASH select SHA1 prompt "sha1sum" help @@ -994,7 +1004,7 @@ config CMD_SHA1SUM config CMD_SHA224SUM tristate - select COMPILE_DIGEST + select COMPILE_HASH select SHA224 prompt "sha224sum" help @@ -1006,7 +1016,7 @@ config CMD_SHA224SUM config CMD_SHA256SUM tristate - select COMPILE_DIGEST + select COMPILE_HASH select SHA256 prompt "sha256sum" help @@ -1016,6 +1026,30 @@ config CMD_SHA256SUM Calculate a SHA256 digest over a FILE or a memory area. +config CMD_SHA384SUM + tristate + select COMPILE_HASH + select SHA384 + prompt "sha384sum" + help + Calculate SHA384 digest + + Usage: sha384sum FILE|AREA + + Calculate a SHA384 digest over a FILE or a memory area. + +config CMD_SHA512SUM + tristate + select COMPILE_HASH + select SHA512 + prompt "sha512sum" + help + sha512sum - calculate SHA512 digest + + Usage: sha512sum FILE|AREA + + Calculate a SHA512 digest over a FILE or a memory area. + config CMD_UNCOMPRESS bool select UNCOMPRESS diff --git a/commands/Makefile b/commands/Makefile index 99a65d4609..d69e3f0b06 100644 --- a/commands/Makefile +++ b/commands/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_STDDEV) += stddev.o -obj-$(CONFIG_COMPILE_DIGEST) += digest.o +obj-$(CONFIG_CMD_DIGEST) += digest.o +obj-$(CONFIG_COMPILE_HASH) += hashsum.o obj-$(CONFIG_COMPILE_MEMORY) += mem.o obj-$(CONFIG_CMD_BOOTM) += bootm.o obj-$(CONFIG_CMD_UIMAGE) += uimage.o diff --git a/commands/digest.c b/commands/digest.c index 092fda21af..340c07a248 100644 --- a/commands/digest.c +++ b/commands/digest.c @@ -1,20 +1,7 @@ /* - * digest.c - Calculate a md5/sha1/sha256 checksum of a memory area - * - * Copyright (c) 2011 Peter Korsgaard <jacmet@sunsite.dk> - * - * See file CREDITS for list of people who contributed to this - * project. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Copyright (c) 2015 Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com> * + * GPLv2 ONLY */ #include <common.h> @@ -25,27 +12,27 @@ #include <xfuncs.h> #include <malloc.h> #include <digest.h> +#include <getopt.h> +#include <libfile.h> + +#include "internal.h" -static int do_digest(char *algorithm, int argc, char *argv[]) +int __do_digest(struct digest *d, unsigned char *sig, + int argc, char *argv[]) { - struct digest *d; - int ret = 0; + int ret = COMMAND_ERROR_USAGE; int i; unsigned char *hash; - d = digest_get_by_name(algorithm); - BUG_ON(!d); + if (argc < 1) + goto err; - if (argc < 2) - return COMMAND_ERROR_USAGE; - - hash = calloc(d->length, sizeof(unsigned char)); + hash = calloc(digest_length(d), sizeof(unsigned char)); if (!hash) { perror("calloc"); - return COMMAND_ERROR_USAGE; + goto err; } - argv++; while (*argv) { char *filename = "/dev/mem"; loff_t start = 0, size = ~0; @@ -57,104 +44,157 @@ static int do_digest(char *algorithm, int argc, char *argv[]) argv++; } - if (digest_file_window(d, filename, hash, start, size) < 0) { + ret = digest_file_window(d, filename, + hash, sig, start, size); + if (ret < 0) { ret = 1; } else { - for (i = 0; i < d->length; i++) - printf("%02x", hash[i]); + if (!sig) { + for (i = 0; i < digest_length(d); i++) + printf("%02x", hash[i]); - printf(" %s\t0x%08llx ... 0x%08llx\n", - filename, start, start + size); + printf(" %s\t0x%08llx ... 0x%08llx\n", + filename, start, start + size); + } } argv++; } free(hash); +err: + digest_free(d); return ret; } -#ifdef CONFIG_CMD_MD5SUM - -static int do_md5(int argc, char *argv[]) +static void prints_algo_help(void) { - return do_digest("md5", argc, argv); + puts("\navailable algo:\n"); + digest_algo_prints("\t"); } -BAREBOX_CMD_HELP_START(md5sum) -BAREBOX_CMD_HELP_TEXT("Calculate a MD5 digest over a FILE or a memory area.") -BAREBOX_CMD_HELP_END - -BAREBOX_CMD_START(md5sum) - .cmd = do_md5, - BAREBOX_CMD_DESC("calculate MD5 checksum") - BAREBOX_CMD_OPTS("FILE|AREA...") - BAREBOX_CMD_GROUP(CMD_GRP_FILE) - BAREBOX_CMD_HELP(cmd_md5sum_help) -BAREBOX_CMD_END - -#endif /* CMD_CMD_MD5SUM */ - -#ifdef CONFIG_CMD_SHA1SUM - -static int do_sha1(int argc, char *argv[]) +static int do_digest(int argc, char *argv[]) { - return do_digest("sha1", argc, argv); -} + struct digest *d; + unsigned char *tmp_key = NULL; + unsigned char *tmp_sig = NULL; + char *sig = NULL; + char *sigfile = NULL; + size_t siglen = 0; + char *key = NULL; + char *keyfile = NULL; + size_t keylen = 0; + size_t digestlen = 0; + char *algo = NULL; + int opt; + int ret = COMMAND_ERROR; -BAREBOX_CMD_HELP_START(sha1sum) -BAREBOX_CMD_HELP_TEXT("Calculate a SHA1 digest over a FILE or a memory area.") -BAREBOX_CMD_HELP_END + if (argc < 2) + return COMMAND_ERROR_USAGE; -BAREBOX_CMD_START(sha1sum) - .cmd = do_sha1, - BAREBOX_CMD_DESC("calculate SHA1 digest") - BAREBOX_CMD_OPTS("FILE|AREA") - BAREBOX_CMD_GROUP(CMD_GRP_FILE) - BAREBOX_CMD_HELP(cmd_sha1sum_help) -BAREBOX_CMD_END + while((opt = getopt(argc, argv, "a:k:K:s:S:")) > 0) { + switch(opt) { + case 'k': + key = optarg; + keylen = strlen(key); + break; + case 'K': + keyfile = optarg; + break; + case 'a': + algo = optarg; + break; + case 's': + sig = optarg; + siglen = strlen(sig); + break; + case 'S': + sigfile = optarg; + break; + } + } -#endif /* CMD_CMD_SHA1SUM */ + if (!algo) + return COMMAND_ERROR_USAGE; -#ifdef CONFIG_CMD_SHA224SUM + d = digest_alloc(algo); + if (!d) { + eprintf("algo '%s' not found\n", algo); + return COMMAND_ERROR_USAGE; + } -static int do_sha224(int argc, char *argv[]) -{ - return do_digest("sha224", argc, argv); -} + argc -= optind; + argv += optind; -BAREBOX_CMD_HELP_START(sha224sum) -BAREBOX_CMD_HELP_TEXT("Calculate a SHA224 digest over a FILE or a memory area.") -BAREBOX_CMD_HELP_END + if (keyfile) { + tmp_key = key = read_file(keyfile, &keylen); + if (!key) { + eprintf("file '%s' not found\n", keyfile); + goto err; + } + } -BAREBOX_CMD_START(sha224sum) - .cmd = do_sha224, - BAREBOX_CMD_DESC("calculate SHA224 digest") - BAREBOX_CMD_OPTS("FILE|AREA") - BAREBOX_CMD_GROUP(CMD_GRP_FILE) - BAREBOX_CMD_HELP(cmd_sha224sum_help) -BAREBOX_CMD_END + if (key) { + ret = digest_set_key(d, key, keylen); + free(tmp_key); + if (ret) + goto err; + } else if (digest_is_flags(d, DIGEST_ALGO_NEED_KEY)) { + eprintf("%s need a key to be used\n", digest_name(d)); + goto err; + } -#endif /* CMD_CMD_SHA224SUM */ + if (sigfile) { + sig = tmp_sig = read_file(sigfile, &siglen); + if (!tmp_sig) { + eprintf("file '%s' not found\n", sigfile); + goto err; + } + } -#ifdef CONFIG_CMD_SHA256SUM + if (sig) { + digestlen = digest_length(d); + if (siglen == 2 * digestlen) { + if (!tmp_sig) + tmp_sig = xmalloc(digestlen); + + ret = hex2bin(tmp_sig, sig, digestlen); + if (ret) + goto err; + + sig = tmp_sig; + } else if (siglen != digestlen) { + eprintf("%s wrong size %zu, expected %zu\n", + sigfile, siglen, digestlen); + goto err; + } + } -static int do_sha256(int argc, char *argv[]) -{ - return do_digest("sha256", argc, argv); + ret = __do_digest(d, sig, argc, argv); + free(tmp_sig); + return ret; + +err: + digest_free(d); + return ret; } -BAREBOX_CMD_HELP_START(sha256sum) -BAREBOX_CMD_HELP_TEXT("Calculate a SHA256 digest over a FILE or a memory area.") +BAREBOX_CMD_HELP_START(digest) +BAREBOX_CMD_HELP_TEXT("Calculate a digest over a FILE or a memory area.") +BAREBOX_CMD_HELP_TEXT("Options:") +BAREBOX_CMD_HELP_OPT ("-a <algo>\t", "hash or signature algorithm to use") +BAREBOX_CMD_HELP_OPT ("-k <key>\t", "use supplied <key> (ASCII or hex) for MAC") +BAREBOX_CMD_HELP_OPT ("-K <file>\t", "use key from <file> (binary) for MAC") +BAREBOX_CMD_HELP_OPT ("-v <hex>\t", "verify data against supplied <hex> (hash, MAC or signature)") +BAREBOX_CMD_HELP_OPT ("-V <file>\t", "verify data against <file> (hash, MAC or signature)") BAREBOX_CMD_HELP_END -BAREBOX_CMD_START(sha256sum) - .cmd = do_sha256, - BAREBOX_CMD_DESC("calculate SHA256 digest") - BAREBOX_CMD_OPTS("FILE|AREA") +BAREBOX_CMD_START(digest) + .cmd = do_digest, + BAREBOX_CMD_DESC("calculate digest") + BAREBOX_CMD_OPTS("-a <algo> [-k <key> | -K <file>] [-s <sig> | -S <file>] FILE|AREA") BAREBOX_CMD_GROUP(CMD_GRP_FILE) - BAREBOX_CMD_HELP(cmd_sha256sum_help) + BAREBOX_CMD_HELP(cmd_digest_help) + BAREBOX_CMD_USAGE(prints_algo_help) BAREBOX_CMD_END - -#endif /* CMD_CMD_SHA256SUM */ diff --git a/commands/hashsum.c b/commands/hashsum.c new file mode 100644 index 0000000000..8d3694fa78 --- /dev/null +++ b/commands/hashsum.c @@ -0,0 +1,194 @@ +/* + * digest.c - Calculate a md5/sha1/sha256 checksum of a memory area + * + * Copyright (c) 2011 Peter Korsgaard <jacmet@sunsite.dk> + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <common.h> +#include <command.h> +#include <fs.h> +#include <fcntl.h> +#include <errno.h> +#include <xfuncs.h> +#include <malloc.h> +#include <digest.h> +#include <getopt.h> + +#include "internal.h" + +static int do_hash(char *algo, int argc, char *argv[]) +{ + struct digest *d; + unsigned char *key = NULL; + size_t keylen = 0; + int opt, ret; + + while ((opt = getopt(argc, argv, "h:")) > 0) { + switch(opt) { + case 'h': + key = optarg; + keylen = strlen(key); + break; + } + } + + if (key) { + char *tmp = asprintf("hmac(%s)", algo); + d = digest_alloc(tmp); + free(tmp); + BUG_ON(!d); + + ret = digest_set_key(d, key, keylen); + if (ret) { + perror("set_key"); + return ret; + } + } else { + d = digest_alloc(algo); + BUG_ON(!d); + } + + argc -= optind; + argv += optind; + + return __do_digest(d, NULL, argc, argv); +} + +#ifdef CONFIG_CMD_MD5SUM + +static int do_md5(int argc, char *argv[]) +{ + return do_hash("md5", argc, argv); +} + +BAREBOX_CMD_HELP_START(md5sum) +BAREBOX_CMD_HELP_TEXT("Calculate a MD5 digest over a FILE or a memory area.") +BAREBOX_CMD_HELP_END + +BAREBOX_CMD_START(md5sum) + .cmd = do_md5, + BAREBOX_CMD_DESC("calculate MD5 checksum") + BAREBOX_CMD_OPTS("FILE|AREA...") + BAREBOX_CMD_GROUP(CMD_GRP_FILE) + BAREBOX_CMD_HELP(cmd_md5sum_help) +BAREBOX_CMD_END + +#endif /* CMD_CMD_MD5SUM */ + +#ifdef CONFIG_CMD_SHA1SUM + +static int do_sha1(int argc, char *argv[]) +{ + return do_hash("sha1", argc, argv); +} + +BAREBOX_CMD_HELP_START(sha1sum) +BAREBOX_CMD_HELP_TEXT("Calculate a SHA1 digest over a FILE or a memory area.") +BAREBOX_CMD_HELP_END + +BAREBOX_CMD_START(sha1sum) + .cmd = do_sha1, + BAREBOX_CMD_DESC("calculate SHA1 digest") + BAREBOX_CMD_OPTS("FILE|AREA") + BAREBOX_CMD_GROUP(CMD_GRP_FILE) + BAREBOX_CMD_HELP(cmd_sha1sum_help) +BAREBOX_CMD_END + +#endif /* CMD_CMD_SHA1SUM */ + +#ifdef CONFIG_CMD_SHA224SUM + +static int do_sha224(int argc, char *argv[]) +{ + return do_hash("sha224", argc, argv); +} + +BAREBOX_CMD_HELP_START(sha224sum) +BAREBOX_CMD_HELP_TEXT("Calculate a SHA224 digest over a FILE or a memory area.") +BAREBOX_CMD_HELP_END + +BAREBOX_CMD_START(sha224sum) + .cmd = do_sha224, + BAREBOX_CMD_DESC("calculate SHA224 digest") + BAREBOX_CMD_OPTS("FILE|AREA") + BAREBOX_CMD_GROUP(CMD_GRP_FILE) + BAREBOX_CMD_HELP(cmd_sha224sum_help) +BAREBOX_CMD_END + +#endif /* CMD_CMD_SHA224SUM */ + +#ifdef CONFIG_CMD_SHA256SUM + +static int do_sha256(int argc, char *argv[]) +{ + return do_hash("sha256", argc, argv); +} + +BAREBOX_CMD_HELP_START(sha256sum) +BAREBOX_CMD_HELP_TEXT("Calculate a SHA256 digest over a FILE or a memory area.") +BAREBOX_CMD_HELP_END + +BAREBOX_CMD_START(sha256sum) + .cmd = do_sha256, + BAREBOX_CMD_DESC("calculate SHA256 digest") + BAREBOX_CMD_OPTS("FILE|AREA") + BAREBOX_CMD_GROUP(CMD_GRP_FILE) + BAREBOX_CMD_HELP(cmd_sha256sum_help) +BAREBOX_CMD_END + +#endif /* CMD_CMD_SHA256SUM */ + +#ifdef CONFIG_CMD_SHA384SUM + +static int do_sha384(int argc, char *argv[]) +{ + return do_hash("sha384", argc, argv); +} + +BAREBOX_CMD_HELP_START(sha384sum) +BAREBOX_CMD_HELP_TEXT("Calculate a SHA384 digest over a FILE or a memory area.") +BAREBOX_CMD_HELP_END + +BAREBOX_CMD_START(sha384sum) + .cmd = do_sha384, + BAREBOX_CMD_DESC("calculate SHA384 digest") + BAREBOX_CMD_OPTS("FILE|AREA") + BAREBOX_CMD_GROUP(CMD_GRP_FILE) + BAREBOX_CMD_HELP(cmd_sha384sum_help) +BAREBOX_CMD_END + +#endif /* CMD_CMD_SHA384SUM */ + +#ifdef CONFIG_CMD_SHA512SUM + +static int do_sha512(int argc, char *argv[]) +{ + return do_hash("sha512", argc, argv); +} + +BAREBOX_CMD_HELP_START(sha512sum) +BAREBOX_CMD_HELP_TEXT("Calculate a SHA512 digest over a FILE or a memory area.") +BAREBOX_CMD_HELP_END + +BAREBOX_CMD_START(sha512sum) + .cmd = do_sha512, + BAREBOX_CMD_DESC("calculate SHA512 digest") + BAREBOX_CMD_OPTS("FILE|AREA") + BAREBOX_CMD_GROUP(CMD_GRP_FILE) + BAREBOX_CMD_HELP(cmd_sha512sum_help) +BAREBOX_CMD_END + +#endif /* CMD_CMD_SHA512SUM */ diff --git a/commands/internal.h b/commands/internal.h new file mode 100644 index 0000000000..21d1408c91 --- /dev/null +++ b/commands/internal.h @@ -0,0 +1,2 @@ +int __do_digest(struct digest *d, unsigned char *sig, + int argc, char *argv[]); diff --git a/common/Kconfig b/common/Kconfig index b68371358d..242f1e4047 100644 --- a/common/Kconfig +++ b/common/Kconfig @@ -449,6 +449,14 @@ config PASSWD_SUM_SHA256 bool "SHA256" select SHA256 +config PASSWD_SUM_SHA512 + bool "SHA512" + select SHA512 + +config PASSWD_CRYPTO_PBKDF2 + bool "PBKDF2" + select CRYPTO_PBKDF2 + endchoice endif diff --git a/common/Makefile b/common/Makefile index ee5dca7023..6bfbfb4745 100644 --- a/common/Makefile +++ b/common/Makefile @@ -20,7 +20,6 @@ obj-$(CONFIG_CMD_MEMTEST) += memtest.o obj-$(CONFIG_COMMAND_SUPPORT) += command.o obj-$(CONFIG_CONSOLE_FULL) += console.o obj-$(CONFIG_CONSOLE_SIMPLE) += console_simple.o -obj-$(CONFIG_DIGEST) += digest.o obj-$(CONFIG_DDR_SPD) += ddr_spd.o obj-$(CONFIG_ENV_HANDLING) += environment.o obj-$(CONFIG_ENVIRONMENT_VARIABLES) += env.o diff --git a/common/command.c b/common/command.c index 61191c2d62..dc2cb88eaf 100644 --- a/common/command.c +++ b/common/command.c @@ -47,6 +47,8 @@ void barebox_cmd_usage(struct command *cmdtp) puts(cmdtp->help); putchar('\n'); } + if (cmdtp->usage) + cmdtp->usage(); #endif } EXPORT_SYMBOL(barebox_cmd_usage); diff --git a/common/digest.c b/common/digest.c deleted file mode 100644 index ae414ba5d5..0000000000 --- a/common/digest.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - * (C) Copyright 2008-2010 Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com> - * - * See file CREDITS for list of people who contributed to this - * project. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; version 2 of - * the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include <common.h> -#include <digest.h> -#include <malloc.h> -#include <fs.h> -#include <fcntl.h> -#include <linux/stat.h> -#include <errno.h> -#include <module.h> -#include <linux/err.h> - -static LIST_HEAD(digests); - -static int dummy_init(struct digest *d) -{ - return 0; -} - -int digest_register(struct digest *d) -{ - if (!d || !d->name || !d->update || !d->final || d->length < 1) - return -EINVAL; - - if (!d->init) - d->init = dummy_init; - - if (digest_get_by_name(d->name)) - return -EEXIST; - - list_add_tail(&d->list, &digests); - - return 0; -} -EXPORT_SYMBOL(digest_register); - -void digest_unregister(struct digest *d) -{ - if (!d) - return; - - list_del(&d->list); -} -EXPORT_SYMBOL(digest_unregister); - -struct digest* digest_get_by_name(char* name) -{ - struct digest* d; - - if (!name) - return NULL; - - list_for_each_entry(d, &digests, list) { - if(strcmp(d->name, name) == 0) - return d; - } - - return NULL; -} -EXPORT_SYMBOL_GPL(digest_get_by_name); - -int digest_file_window(struct digest *d, char *filename, - unsigned char *hash, - ulong start, ulong size) -{ - ulong len = 0; - int fd, now, ret = 0; - unsigned char *buf; - int flags = 0; - - d->init(d); - - fd = open(filename, O_RDONLY); - if (fd < 0) { - perror(filename); - return fd; - } - - buf = memmap(fd, PROT_READ); - if (buf == (void *)-1) { - buf = xmalloc(4096); - flags = 1; - } - - if (start > 0) { - if (flags) { - ret = lseek(fd, start, SEEK_SET); - if (ret == -1) { - perror("lseek"); - goto out; - } - } else { - buf += start; - } - } - - while (size) { - now = min((ulong)4096, size); - if (flags) { - now = read(fd, buf, now); - if (now < 0) { - ret = now; - perror("read"); - goto out_free; - } - if (!now) - break; - } - - if (ctrlc()) { - ret = -EINTR; - goto out_free; - } - - d->update(d, buf, now); - size -= now; - len += now; - } - - d->final(d, hash); - -out_free: - if (flags) - free(buf); -out: - close(fd); - - return ret; -} -EXPORT_SYMBOL_GPL(digest_file_window); - -int digest_file(struct digest *d, char *filename, - unsigned char *hash) -{ - struct stat st; - int ret; - - ret = stat(filename, &st); - - if (ret < 0) - return ret; - - return digest_file_window(d, filename, hash, 0, st.st_size); -} -EXPORT_SYMBOL_GPL(digest_file); - -int digest_file_by_name(char *algo, char *filename, - unsigned char *hash) -{ - struct digest *d; - - d = digest_get_by_name(algo); - if (!d) - return -EIO; - - return digest_file(d, filename, hash); -} -EXPORT_SYMBOL_GPL(digest_file_by_name); diff --git a/common/password.c b/common/password.c index 111c139286..c8454228d0 100644 --- a/common/password.c +++ b/common/password.c @@ -25,7 +25,9 @@ #include <malloc.h> #include <xfuncs.h> #include <clock.h> +#include <stdlib.h> #include <generated/passwd.h> +#include <crypto/pbkdf2.h> #if defined(CONFIG_PASSWD_SUM_MD5) #define PASSWD_SUM "md5" @@ -33,8 +35,16 @@ #define PASSWD_SUM "sha1" #elif defined(CONFIG_PASSWD_SUM_SHA256) #define PASSWD_SUM "sha256" +#elif defined(CONFIG_PASSWD_SUM_SHA512) +#define PASSWD_SUM "sha512" +#else +#define PASSWD_SUM NULL #endif +#define PBKDF2_SALT_LEN 32 +#define PBKDF2_LENGTH 64 +#define PBKDF2_COUNT 10000 + int password(unsigned char *passwd, size_t length, int flags, int timeout) { unsigned char *buf = passwd; @@ -275,46 +285,59 @@ EXPORT_SYMBOL(write_env_passwd); static int __check_passwd(unsigned char* passwd, size_t length, int std) { - struct digest *d; + struct digest *d = NULL; unsigned char *passwd1_sum; unsigned char *passwd2_sum; int ret = 0; + int hash_len; - d = digest_get_by_name(PASSWD_SUM); + if (IS_ENABLED(CONFIG_PASSWD_CRYPTO_PBKDF2)) { + hash_len = PBKDF2_LENGTH; + } else { + d = digest_alloc(PASSWD_SUM); - passwd1_sum = calloc(d->length, sizeof(unsigned char)); + hash_len = digest_length(d); + } + passwd1_sum = calloc(hash_len * 2, sizeof(unsigned char)); if (!passwd1_sum) return -ENOMEM; - passwd2_sum = calloc(d->length, sizeof(unsigned char)); + passwd2_sum = passwd1_sum + hash_len; - if (!passwd2_sum) { - ret = -ENOMEM; - goto err1; - } + if (std) + ret = read_env_passwd(passwd2_sum, hash_len); + else + ret = read_default_passwd(passwd2_sum, hash_len); - d->init(d); + if (ret < 0) + goto err; - d->update(d, passwd, length); + if (IS_ENABLED(CONFIG_PASSWD_CRYPTO_PBKDF2)) { + char *key = passwd2_sum + PBKDF2_SALT_LEN; + char *salt = passwd2_sum; + int keylen = PBKDF2_LENGTH - PBKDF2_SALT_LEN; - d->final(d, passwd1_sum); + ret = pkcs5_pbkdf2_hmac_sha1(passwd, length, salt, + PBKDF2_SALT_LEN, PBKDF2_COUNT, keylen, passwd1_sum); + if (ret) + goto err; - if (std) - ret = read_env_passwd(passwd2_sum, d->length); - else - ret = read_default_passwd(passwd2_sum, d->length); + if (strncmp(passwd1_sum, key, keylen) == 0) + ret = 1; + } else { + ret = digest_digest(d, passwd, length, passwd1_sum); - if (ret < 0) - goto err2; + if (ret) + goto err; - if (strncmp(passwd1_sum, passwd2_sum, d->length) == 0) - ret = 1; + if (strncmp(passwd1_sum, passwd2_sum, hash_len) == 0) + ret = 1; + } -err2: - free(passwd2_sum); -err1: +err: free(passwd1_sum); + digest_free(d); return ret; } @@ -343,25 +366,41 @@ int check_passwd(unsigned char* passwd, size_t length) int set_env_passwd(unsigned char* passwd, size_t length) { - struct digest *d; + struct digest *d = NULL; unsigned char *passwd_sum; - int ret; + int ret, hash_len; - d = digest_get_by_name(PASSWD_SUM); + if (IS_ENABLED(CONFIG_PASSWD_CRYPTO_PBKDF2)) { + hash_len = PBKDF2_LENGTH; + } else { + d = digest_alloc(PASSWD_SUM); - passwd_sum = calloc(d->length, sizeof(unsigned char)); + hash_len = digest_length(d); + } + passwd_sum = calloc(hash_len, sizeof(unsigned char)); if (!passwd_sum) return -ENOMEM; - d->init(d); + if (IS_ENABLED(CONFIG_PASSWD_CRYPTO_PBKDF2)) { + char *key = passwd_sum + PBKDF2_SALT_LEN; + char *salt = passwd_sum; + int keylen = PBKDF2_LENGTH - PBKDF2_SALT_LEN; - d->update(d, passwd, length); + get_random_bytes(passwd_sum, PBKDF2_SALT_LEN); - d->final(d, passwd_sum); + ret = pkcs5_pbkdf2_hmac_sha1(passwd, length, salt, + PBKDF2_SALT_LEN, PBKDF2_COUNT, keylen, key); + } else { + ret = digest_digest(d, passwd, length, passwd_sum); + } + if (ret) + goto err; - ret = write_env_passwd(passwd_sum, d->length); + ret = write_env_passwd(passwd_sum, hash_len); +err: + digest_free(d); free(passwd_sum); return ret; diff --git a/crypto/Kconfig b/crypto/Kconfig index 4bd8dcf359..24f8b410ed 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -13,15 +13,74 @@ menuconfig DIGEST if DIGEST config MD5 - bool "MD5" + bool config SHA1 - bool "SHA1" + bool config SHA224 - bool "SHA224" + bool config SHA256 + bool + +config SHA384 + bool + +config SHA512 + bool + +config DIGEST_HMAC + bool + +config DIGEST_MD5_GENERIC + bool "MD5" + select MD5 + +config DIGEST_SHA1_GENERIC + bool "SHA1" + select SHA1 + +config DIGEST_SHA224_GENERIC + bool "SHA224" + select SHA224 + +config DIGEST_SHA256_GENERIC bool "SHA256" + select SHA256 + +config DIGEST_SHA384_GENERIC + bool "SHA384" + select SHA384 + +config DIGEST_SHA512_GENERIC + bool "SHA512" + select SHA512 + +config DIGEST_HMAC_GENERIC + bool "HMAC" + select DIGEST_HMAC + +config DIGEST_SHA1_ARM + tristate "SHA1 digest algorithm (ARM-asm)" + depends on ARM + select SHA1 + help + SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented + using optimized ARM assembler. + +config DIGEST_SHA256_ARM + tristate "SHA-224/256 digest algorithm (ARM-asm and NEON)" + depends on ARM + select SHA256 + select SHA224 + help + SHA-256 secure hash standard (DFIPS 180-2) implemented + using optimized ARM assembler and NEON, when available. endif + +config CRYPTO_PBKDF2 + select DIGEST + select SHA1 + bool diff --git a/crypto/Makefile b/crypto/Makefile index 7c5b035b73..f39de718e5 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -1,7 +1,13 @@ obj-$(CONFIG_CRC32) += crc32.o obj-$(CONFIG_CRC16) += crc16.o obj-$(CONFIG_CRC7) += crc7.o -obj-$(CONFIG_MD5) += md5.o -obj-$(CONFIG_SHA1) += sha1.o -obj-$(CONFIG_SHA224) += sha2.o -obj-$(CONFIG_SHA256) += sha2.o +obj-$(CONFIG_DIGEST) += digest.o +obj-$(CONFIG_DIGEST_HMAC_GENERIC) += hmac.o +obj-$(CONFIG_DIGEST_MD5_GENERIC) += md5.o +obj-$(CONFIG_DIGEST_SHA1_GENERIC) += sha1.o +obj-$(CONFIG_DIGEST_SHA224_GENERIC) += sha2.o +obj-$(CONFIG_DIGEST_SHA256_GENERIC) += sha2.o +obj-$(CONFIG_DIGEST_SHA384_GENERIC) += sha4.o +obj-$(CONFIG_DIGEST_SHA512_GENERIC) += sha4.o + +obj-$(CONFIG_CRYPTO_PBKDF2) += pbkdf2.o diff --git a/crypto/digest.c b/crypto/digest.c new file mode 100644 index 0000000000..b3f514c8ad --- /dev/null +++ b/crypto/digest.c @@ -0,0 +1,282 @@ +/* + * (C) Copyright 2008-2010 Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com> + * + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; version 2 of + * the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <common.h> +#include <digest.h> +#include <malloc.h> +#include <fs.h> +#include <fcntl.h> +#include <linux/stat.h> +#include <errno.h> +#include <module.h> +#include <linux/err.h> +#include <crypto/internal.h> + +static LIST_HEAD(digests); + +static struct digest_algo *digest_algo_get_by_name(const char *name); + +static int dummy_init(struct digest *d) +{ + return 0; +} + +static void dummy_free(struct digest *d) {} + +int digest_generic_verify(struct digest *d, const unsigned char *md) +{ + int ret; + int len = digest_length(d); + unsigned char *tmp; + + tmp = xmalloc(len); + + ret = digest_final(d, tmp); + if (ret) + goto end; + + ret = memcmp(md, tmp, len); + ret = ret ? -EINVAL : 0; +end: + free(tmp); + return ret; +} + +int digest_generic_digest(struct digest *d, const void *data, + unsigned int len, u8 *md) + +{ + int ret; + + if (!data || len == 0 || !md) + return -EINVAL; + + ret = digest_init(d); + if (ret) + return ret; + ret = digest_update(d, data, len); + if (ret) + return ret; + return digest_final(d, md); +} + +int digest_algo_register(struct digest_algo *d) +{ + if (!d || !d->base.name || !d->update || !d->final || !d->verify) + return -EINVAL; + + if (!d->init) + d->init = dummy_init; + + if (!d->alloc) + d->alloc = dummy_init; + + if (!d->free) + d->free = dummy_free; + + list_add_tail(&d->list, &digests); + + return 0; +} +EXPORT_SYMBOL(digest_algo_register); + +void digest_algo_unregister(struct digest_algo *d) +{ + if (!d) + return; + + list_del(&d->list); +} +EXPORT_SYMBOL(digest_algo_unregister); + +static struct digest_algo *digest_algo_get_by_name(const char *name) +{ + struct digest_algo *d = NULL; + struct digest_algo *tmp; + int priority = -1; + + if (!name) + return NULL; + + list_for_each_entry(tmp, &digests, list) { + if (strcmp(tmp->base.name, name) != 0) + continue; + + if (tmp->base.priority <= priority) + continue; + + d = tmp; + priority = tmp->base.priority; + } + + return d; +} + +void digest_algo_prints(const char *prefix) +{ + struct digest_algo* d; + + printf("%s%-15s\t%-20s\t%-15s\n", prefix, "name", "driver", "priority"); + printf("%s--------------------------------------------------\n", prefix); + list_for_each_entry(d, &digests, list) { + printf("%s%-15s\t%-20s\t%d\n", prefix, d->base.name, + d->base.driver_name, d->base.priority); + } +} + +struct digest *digest_alloc(const char *name) +{ + struct digest *d; + struct digest_algo *algo; + + algo = digest_algo_get_by_name(name); + if (!algo) + return NULL; + + d = xzalloc(sizeof(*d)); + d->algo = algo; + d->ctx = xzalloc(algo->ctx_length); + if (d->algo->alloc(d)) { + digest_free(d); + return NULL; + } + + return d; +} +EXPORT_SYMBOL_GPL(digest_alloc); + +void digest_free(struct digest *d) +{ + if (!d) + return; + d->algo->free(d); + free(d->ctx); + free(d); +} +EXPORT_SYMBOL_GPL(digest_free); + +int digest_file_window(struct digest *d, const char *filename, + unsigned char *hash, + unsigned char *sig, + ulong start, ulong size) +{ + ulong len = 0; + int fd, now, ret = 0; + unsigned char *buf; + int flags = 0; + + ret = digest_init(d); + if (ret) + return ret; + + fd = open(filename, O_RDONLY); + if (fd < 0) { + perror(filename); + return fd; + } + + buf = memmap(fd, PROT_READ); + if (buf == (void *)-1) { + buf = xmalloc(4096); + flags = 1; + } + + if (start > 0) { + if (flags) { + ret = lseek(fd, start, SEEK_SET); + if (ret == -1) { + perror("lseek"); + goto out; + } + } else { + buf += start; + } + } + + while (size) { + now = min((ulong)4096, size); + if (flags) { + now = read(fd, buf, now); + if (now < 0) { + ret = now; + perror("read"); + goto out_free; + } + if (!now) + break; + } + + if (ctrlc()) { + ret = -EINTR; + goto out_free; + } + + ret = digest_update(d, buf, now); + if (ret) + goto out_free; + size -= now; + len += now; + } + + if (sig) + ret = digest_verify(d, sig); + else + ret = digest_final(d, hash); + +out_free: + if (flags) + free(buf); +out: + close(fd); + + return ret; +} +EXPORT_SYMBOL_GPL(digest_file_window); + +int digest_file(struct digest *d, const char *filename, + unsigned char *hash, + unsigned char *sig) +{ + struct stat st; + int ret; + + ret = stat(filename, &st); + + if (ret < 0) + return ret; + + return digest_file_window(d, filename, hash, sig, 0, st.st_size); +} +EXPORT_SYMBOL_GPL(digest_file); + +int digest_file_by_name(const char *algo, const char *filename, + unsigned char *hash, + unsigned char *sig) +{ + struct digest *d; + int ret; + + d = digest_alloc(algo); + if (!d) + return -EIO; + + ret = digest_file(d, filename, hash, sig); + digest_free(d); + return ret; +} +EXPORT_SYMBOL_GPL(digest_file_by_name); diff --git a/crypto/hmac.c b/crypto/hmac.c new file mode 100644 index 0000000000..20af2a56de --- /dev/null +++ b/crypto/hmac.c @@ -0,0 +1,199 @@ +/* + * (C) Copyright 2015 Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com> + * + * GPL v2 only + */ + +#include <common.h> +#include <digest.h> +#include <malloc.h> +#include <init.h> +#include <crypto/internal.h> + +struct digest_hmac { + char *name; + unsigned int pad_length; + struct digest_algo algo; +}; + +struct digest_hmac_ctx { + struct digest *d; + + unsigned char *ipad; + unsigned char *opad; + + unsigned char *key; + unsigned int keylen; +}; + +static inline struct digest_hmac *to_digest_hmac(struct digest_algo *algo) +{ + return container_of(algo, struct digest_hmac, algo); +} + +static int digest_hmac_alloc(struct digest *d) +{ + struct digest_hmac_ctx *dh = d->ctx; + struct digest_hmac *hmac = to_digest_hmac(d->algo); + + dh->d = digest_alloc(hmac->name); + if (!dh->d) + return -EINVAL; + + d->length = dh->d->algo->length; + + dh->ipad = xmalloc(hmac->pad_length); + dh->opad = xmalloc(hmac->pad_length); + + return 0; +} + +static void digest_hmac_free(struct digest *d) +{ + struct digest_hmac_ctx *dh = d->ctx; + + free(dh->ipad); + free(dh->opad); + free(dh->key); + + digest_free(dh->d); +} + +static int digest_hmac_set_key(struct digest *d, const unsigned char *key, + unsigned int len) +{ + struct digest_hmac_ctx *dh = d->ctx; + struct digest_hmac *hmac = to_digest_hmac(d->algo); + unsigned char *sum = NULL; + int ret; + + free(dh->key); + if (len > hmac->pad_length) { + sum = xmalloc(digest_length(dh->d)); + ret = digest_digest(dh->d, dh->key, dh->keylen, sum); + if (ret) + goto err; + dh->keylen = digest_length(dh->d); + dh->key = sum; + } else { + dh->key = xmalloc(len); + memcpy(dh->key, key, len); + dh->keylen = len; + } + + ret = 0; +err: + free(sum); + return ret; +} + +static int digest_hmac_init(struct digest *d) +{ + struct digest_hmac_ctx *dh = d->ctx; + struct digest_hmac *hmac = to_digest_hmac(d->algo); + int i, ret; + unsigned char *key = dh->key; + unsigned int keylen = dh->keylen; + + memset(dh->ipad, 0x36, hmac->pad_length); + memset(dh->opad, 0x5C, hmac->pad_length); + + for (i = 0; i < keylen; i++) { + dh->ipad[i] = (unsigned char)(dh->ipad[i] ^ key[i]); + dh->opad[i] = (unsigned char)(dh->opad[i] ^ key[i]); + } + + ret = digest_init(dh->d); + if (ret) + return ret; + return digest_update(dh->d, dh->ipad, hmac->pad_length); +} + +static int digest_hmac_update(struct digest *d, const void *data, + unsigned long len) +{ + struct digest_hmac_ctx *dh = d->ctx; + + return digest_update(dh->d, data, len); +} + +static int digest_hmac_final(struct digest *d, unsigned char *md) +{ + struct digest_hmac_ctx *dh = d->ctx; + struct digest_hmac *hmac = to_digest_hmac(d->algo); + unsigned char *tmp = NULL; + int ret; + + tmp = xmalloc(digest_length(d)); + + ret = digest_final(dh->d, tmp); + if (ret) + goto err; + ret = digest_init(dh->d); + if (ret) + goto err; + ret = digest_update(dh->d, dh->opad, hmac->pad_length); + if (ret) + goto err; + ret = digest_update(dh->d, tmp, digest_length(d)); + if (ret) + goto err; + ret = digest_final(dh->d, md); + +err: + free(tmp); + + return ret; +} + +struct digest_algo hmac_algo = { + .base = { + .priority = 0, + .flags = DIGEST_ALGO_NEED_KEY, + }, + .alloc = digest_hmac_alloc, + .init = digest_hmac_init, + .update = digest_hmac_update, + .final = digest_hmac_final, + .digest = digest_generic_digest, + .verify = digest_generic_verify, + .set_key = digest_hmac_set_key, + .free = digest_hmac_free, + .ctx_length = sizeof(struct digest_hmac), +}; + +static int digest_hmac_register(char *name, unsigned int pad_length) +{ + struct digest_hmac *dh; + + if (!name || !pad_length) + return -EINVAL; + + dh = xzalloc(sizeof(*dh)); + dh->name = xstrdup(name); + dh->pad_length = pad_length; + dh->algo = hmac_algo; + dh->algo.base.name = asprintf("hmac(%s)", name); + dh->algo.base.driver_name = asprintf("hmac(%s)-generic", name); + + return digest_algo_register(&dh->algo); +} + +static int digest_hmac_initcall(void) +{ + if (IS_ENABLED(CONFIG_MD5)) + digest_hmac_register("md5", 64); + if (IS_ENABLED(CONFIG_SHA1)) + digest_hmac_register("sha1", 64); + if (IS_ENABLED(CONFIG_SHA224)) + digest_hmac_register("sha224", 64); + if (IS_ENABLED(CONFIG_SHA256)) + digest_hmac_register("sha256", 64); + if (IS_ENABLED(CONFIG_SHA384)) + digest_hmac_register("sha384", 128); + if (IS_ENABLED(CONFIG_SHA512)) + digest_hmac_register("sha512", 128); + + return 0; +} +crypto_initcall(digest_hmac_initcall); diff --git a/crypto/md5.c b/crypto/md5.c index 6c4ca1dd59..23892babce 100644 --- a/crypto/md5.c +++ b/crypto/md5.c @@ -28,6 +28,7 @@ #include <common.h> #include <digest.h> #include <init.h> +#include <crypto/internal.h> struct MD5Context { __u32 buf[4]; @@ -265,16 +266,9 @@ MD5Transform(__u32 buf[4], __u32 const in[16]) buf[3] += d; } -struct md5 { - struct MD5Context context; - struct digest d; -}; - static int digest_md5_init(struct digest *d) { - struct md5 *m = container_of(d, struct md5, d); - - MD5Init(&m->context); + MD5Init(d->ctx); return 0; } @@ -282,36 +276,35 @@ static int digest_md5_init(struct digest *d) static int digest_md5_update(struct digest *d, const void *data, unsigned long len) { - struct md5 *m = container_of(d, struct md5, d); - - MD5Update(&m->context, data, len); + MD5Update(d->ctx, data, len); return 0; } static int digest_md5_final(struct digest *d, unsigned char *md) { - struct md5 *m = container_of(d, struct md5, d); - - MD5Final(md, &m->context); + MD5Final(md, d->ctx); return 0; } -static struct md5 m = { - .d = { - .name = "md5", - .init = digest_md5_init, - .update = digest_md5_update, - .final = digest_md5_final, - .length = 16, - } +static struct digest_algo md5 = { + .base = { + .name = "md5", + .driver_name = "md5-generic", + .priority = 0, + }, + .init = digest_md5_init, + .update = digest_md5_update, + .final = digest_md5_final, + .digest = digest_generic_digest, + .verify = digest_generic_verify, + .length = 16, + .ctx_length = sizeof(struct MD5Context), }; static int md5_digest_register(void) { - digest_register(&m.d); - - return 0; + return digest_algo_register(&md5); } device_initcall(md5_digest_register); diff --git a/crypto/pbkdf2.c b/crypto/pbkdf2.c new file mode 100644 index 0000000000..c4ba7be5ef --- /dev/null +++ b/crypto/pbkdf2.c @@ -0,0 +1,94 @@ +/* + * (C) Copyright 2015 Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com> + * + * Under GPLv2 Only + */ + +#include <common.h> +#include <malloc.h> +#include <errno.h> +#include <crypto/pbkdf2.h> + +int pkcs5_pbkdf2_hmac(struct digest* d, + const unsigned char *pwd, size_t pwd_len, + const unsigned char *salt, size_t salt_len, + uint32_t iteration, + uint32_t key_len, unsigned char *key) +{ + int i, j, k; + unsigned char cnt[4]; + uint32_t pass_len; + unsigned char *tmpdgt; + uint32_t d_len; + int ret; + + if (!d) + return -EINVAL; + + d_len = digest_length(d); + tmpdgt = malloc(d_len); + if (!tmpdgt) + return -ENOMEM; + + i = 1; + + ret = digest_set_key(d, pwd, pwd_len); + if (ret) + goto err; + + while (key_len) { + pass_len = min(key_len, d_len); + cnt[0] = (i >> 24) & 0xff; + cnt[1] = (i >> 16) & 0xff; + cnt[2] = (i >> 8) & 0xff; + cnt[3] = i & 0xff; + ret = digest_init(d); + if (ret) + goto err; + ret = digest_update(d, salt, salt_len); + if (ret) + goto err; + ret = digest_update(d, cnt, 4); + if (ret) + goto err; + ret = digest_final(d, tmpdgt); + if (ret) + goto err; + + memcpy(key, tmpdgt, pass_len); + + for (j = 1; j < iteration; j++) { + ret = digest_digest(d, tmpdgt, d_len, tmpdgt); + if (ret) + goto err; + + for(k = 0; k < pass_len; k++) + key[k] ^= tmpdgt[k]; + } + + key_len -= pass_len; + key += pass_len; + i++; + } + + ret = 0; +err: + free(tmpdgt); + + return ret;; +} + +int pkcs5_pbkdf2_hmac_sha1(const unsigned char *pwd, size_t pwd_len, + const unsigned char *salt, size_t salt_len, + uint32_t iter, + uint32_t key_len, unsigned char *key) +{ + int ret; + struct digest* d = digest_alloc("hmac(sha1)"); + + ret = pkcs5_pbkdf2_hmac(d, pwd, pwd_len, salt, salt_len, iter, + key_len, key); + + digest_free(d); + return ret; +} diff --git a/crypto/sha1.c b/crypto/sha1.c index 58d14a8b3f..a3de2719d8 100644 --- a/crypto/sha1.c +++ b/crypto/sha1.c @@ -1,338 +1,305 @@ /* - * Heiko Schocher, DENX Software Engineering, hs@denx.de. - * based on: - * FIPS-180-1 compliant SHA-1 implementation + * Cryptographic API. * - * Copyright (C) 2003-2006 Christophe Devine + * SHA1 Secure Hash Algorithm. * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License, version 2.1 as published by the Free Software Foundation. + * Derived from cryptoapi implementation, adapted for in-place + * scatterlist interface. * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - */ -/* - * The SHA-1 standard was published by NIST in 1993. + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. * - * http://www.itl.nist.gov/fipspubs/fip180-1.htm */ #include <common.h> #include <digest.h> #include <init.h> #include <linux/string.h> +#include <asm/unaligned.h> #include <asm/byteorder.h> -#define SHA1_SUM_POS -0x20 -#define SHA1_SUM_LEN 20 +#include <crypto/sha.h> +#include <crypto/internal.h> -typedef struct -{ - uint32_t total[2]; /*!< number of bytes processed */ - uint32_t state[5]; /*!< intermediate digest state */ - uint8_t buffer[64]; /*!< data block being processed */ -} -sha1_context; +#define SHA_WORKSPACE_WORDS 16 -/* - * 32-bit integer manipulation macros (big endian) - */ -#define GET_UINT32_BE(n,b,i) (n) = be32_to_cpu(((uint32_t*)(b))[i / 4]) -#define PUT_UINT32_BE(n,b,i) ((uint32_t*)(b))[i / 4] = cpu_to_be32(n) - -/* - * SHA-1 context setup - */ -static void sha1_starts (sha1_context * ctx) +static int sha1_init(struct digest *desc) { - ctx->total[0] = 0; - ctx->total[1] = 0; - - ctx->state[0] = 0x67452301; - ctx->state[1] = 0xEFCDAB89; - ctx->state[2] = 0x98BADCFE; - ctx->state[3] = 0x10325476; - ctx->state[4] = 0xC3D2E1F0; -} + struct sha1_state *ctx = digest_ctx(desc); -static void sha1_process (sha1_context * ctx, uint8_t data[64]) -{ - uint32_t temp, W[16], A, B, C, D, E; - - GET_UINT32_BE (W[0], data, 0); - GET_UINT32_BE (W[1], data, 4); - GET_UINT32_BE (W[2], data, 8); - GET_UINT32_BE (W[3], data, 12); - GET_UINT32_BE (W[4], data, 16); - GET_UINT32_BE (W[5], data, 20); - GET_UINT32_BE (W[6], data, 24); - GET_UINT32_BE (W[7], data, 28); - GET_UINT32_BE (W[8], data, 32); - GET_UINT32_BE (W[9], data, 36); - GET_UINT32_BE (W[10], data, 40); - GET_UINT32_BE (W[11], data, 44); - GET_UINT32_BE (W[12], data, 48); - GET_UINT32_BE (W[13], data, 52); - GET_UINT32_BE (W[14], data, 56); - GET_UINT32_BE (W[15], data, 60); - -#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n))) - -#define R(t) ( \ - temp = W[(t - 3) & 0x0F] ^ W[(t - 8) & 0x0F] ^ \ - W[(t - 14) & 0x0F] ^ W[ t & 0x0F], \ - ( W[t & 0x0F] = S(temp,1) ) \ -) - -#define P(a,b,c,d,e,x) { \ - e += S(a,5) + F(b,c,d) + K + x; b = S(b,30); \ -} + ctx->count = 0; - A = ctx->state[0]; - B = ctx->state[1]; - C = ctx->state[2]; - D = ctx->state[3]; - E = ctx->state[4]; - -#define F(x,y,z) (z ^ (x & (y ^ z))) -#define K 0x5A827999 - - P (A, B, C, D, E, W[0]); - P (E, A, B, C, D, W[1]); - P (D, E, A, B, C, W[2]); - P (C, D, E, A, B, W[3]); - P (B, C, D, E, A, W[4]); - P (A, B, C, D, E, W[5]); - P (E, A, B, C, D, W[6]); - P (D, E, A, B, C, W[7]); - P (C, D, E, A, B, W[8]); - P (B, C, D, E, A, W[9]); - P (A, B, C, D, E, W[10]); - P (E, A, B, C, D, W[11]); - P (D, E, A, B, C, W[12]); - P (C, D, E, A, B, W[13]); - P (B, C, D, E, A, W[14]); - P (A, B, C, D, E, W[15]); - P (E, A, B, C, D, R (16)); - P (D, E, A, B, C, R (17)); - P (C, D, E, A, B, R (18)); - P (B, C, D, E, A, R (19)); - -#undef K -#undef F - -#define F(x,y,z) (x ^ y ^ z) -#define K 0x6ED9EBA1 - - P (A, B, C, D, E, R (20)); - P (E, A, B, C, D, R (21)); - P (D, E, A, B, C, R (22)); - P (C, D, E, A, B, R (23)); - P (B, C, D, E, A, R (24)); - P (A, B, C, D, E, R (25)); - P (E, A, B, C, D, R (26)); - P (D, E, A, B, C, R (27)); - P (C, D, E, A, B, R (28)); - P (B, C, D, E, A, R (29)); - P (A, B, C, D, E, R (30)); - P (E, A, B, C, D, R (31)); - P (D, E, A, B, C, R (32)); - P (C, D, E, A, B, R (33)); - P (B, C, D, E, A, R (34)); - P (A, B, C, D, E, R (35)); - P (E, A, B, C, D, R (36)); - P (D, E, A, B, C, R (37)); - P (C, D, E, A, B, R (38)); - P (B, C, D, E, A, R (39)); - -#undef K -#undef F - -#define F(x,y,z) ((x & y) | (z & (x | y))) -#define K 0x8F1BBCDC - - P (A, B, C, D, E, R (40)); - P (E, A, B, C, D, R (41)); - P (D, E, A, B, C, R (42)); - P (C, D, E, A, B, R (43)); - P (B, C, D, E, A, R (44)); - P (A, B, C, D, E, R (45)); - P (E, A, B, C, D, R (46)); - P (D, E, A, B, C, R (47)); - P (C, D, E, A, B, R (48)); - P (B, C, D, E, A, R (49)); - P (A, B, C, D, E, R (50)); - P (E, A, B, C, D, R (51)); - P (D, E, A, B, C, R (52)); - P (C, D, E, A, B, R (53)); - P (B, C, D, E, A, R (54)); - P (A, B, C, D, E, R (55)); - P (E, A, B, C, D, R (56)); - P (D, E, A, B, C, R (57)); - P (C, D, E, A, B, R (58)); - P (B, C, D, E, A, R (59)); - -#undef K -#undef F - -#define F(x,y,z) (x ^ y ^ z) -#define K 0xCA62C1D6 - - P (A, B, C, D, E, R (60)); - P (E, A, B, C, D, R (61)); - P (D, E, A, B, C, R (62)); - P (C, D, E, A, B, R (63)); - P (B, C, D, E, A, R (64)); - P (A, B, C, D, E, R (65)); - P (E, A, B, C, D, R (66)); - P (D, E, A, B, C, R (67)); - P (C, D, E, A, B, R (68)); - P (B, C, D, E, A, R (69)); - P (A, B, C, D, E, R (70)); - P (E, A, B, C, D, R (71)); - P (D, E, A, B, C, R (72)); - P (C, D, E, A, B, R (73)); - P (B, C, D, E, A, R (74)); - P (A, B, C, D, E, R (75)); - P (E, A, B, C, D, R (76)); - P (D, E, A, B, C, R (77)); - P (C, D, E, A, B, R (78)); - P (B, C, D, E, A, R (79)); - -#undef K -#undef F - - ctx->state[0] += A; - ctx->state[1] += B; - ctx->state[2] += C; - ctx->state[3] += D; - ctx->state[4] += E; + ctx->state[0] = SHA1_H0; + ctx->state[1] = SHA1_H1; + ctx->state[2] = SHA1_H2; + ctx->state[3] = SHA1_H3; + ctx->state[4] = SHA1_H4; + + return 0; } /* - * SHA-1 process buffer + * If you have 32 registers or more, the compiler can (and should) + * try to change the array[] accesses into registers. However, on + * machines with less than ~25 registers, that won't really work, + * and at least gcc will make an unholy mess of it. + * + * So to avoid that mess which just slows things down, we force + * the stores to memory to actually happen (we might be better off + * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as + * suggested by Artur Skawina - that will also make gcc unable to + * try to do the silly "optimize away loads" part because it won't + * see what the value will be). + * + * Ben Herrenschmidt reports that on PPC, the C version comes close + * to the optimized asm with this (ie on PPC you don't want that + * 'volatile', since there are lots of registers). + * + * On ARM we get the best code generation by forcing a full memory barrier + * between each SHA_ROUND, otherwise gcc happily get wild with spilling and + * the stack frame size simply explode and performance goes down the drain. */ -static void sha1_update (sha1_context * ctx, uint8_t *input, uint32_t ilen) -{ - uint32_t fill, left; - - if (ilen <= 0) - return; - - left = ctx->total[0] & 0x3F; - fill = 64 - left; - - ctx->total[0] += ilen; - ctx->total[0] &= 0xFFFFFFFF; - - if (ctx->total[0] < ilen) - ctx->total[1]++; - - if (left && ilen >= fill) { - memcpy ((void *) (ctx->buffer + left), (void *) input, fill); - sha1_process (ctx, ctx->buffer); - input += fill; - ilen -= fill; - left = 0; - } - - while (ilen >= 64) { - sha1_process (ctx, input); - input += 64; - ilen -= 64; - } - if (ilen > 0) { - memcpy ((void *) (ctx->buffer + left), (void *) input, ilen); - } -} +#ifdef CONFIG_X86 + #define setW(x, val) (*(volatile __u32 *)&W(x) = (val)) +#elif defined(CONFIG_ARM) + #define setW(x, val) do { W(x) = (val); __asm__("":::"memory"); } while (0) +#else + #define setW(x, val) (W(x) = (val)) +#endif -static uint8_t sha1_padding[64] = { - 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; +/* This "rolls" over the 512-bit array */ +#define W(x) (array[(x)&15]) /* - * SHA-1 final digest + * Where do we get the source from? The first 16 iterations get it from + * the input data, the next mix it from the 512-bit array. */ -static void sha1_finish (sha1_context * ctx, uint8_t output[20]) +#define SHA_SRC(t) get_unaligned_be32((__u32 *)data + t) +#define SHA_MIX(t) rol32(W(t+13) ^ W(t+8) ^ W(t+2) ^ W(t), 1) + +#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \ + __u32 TEMP = input(t); setW(t, TEMP); \ + E += TEMP + rol32(A,5) + (fn) + (constant); \ + B = ror32(B, 2); } while (0) + +#define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) +#define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) +#define T_20_39(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1, A, B, C, D, E ) +#define T_40_59(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc, A, B, C, D, E ) +#define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0xca62c1d6, A, B, C, D, E ) + +/** + * sha_transform - single block SHA1 transform + * + * @digest: 160 bit digest to update + * @data: 512 bits of data to hash + * @array: 16 words of workspace (see note) + * + * This function generates a SHA1 digest for a single 512-bit block. + * Be warned, it does not handle padding and message digest, do not + * confuse it with the full FIPS 180-1 digest algorithm for variable + * length messages. + * + * Note: If the hash is security sensitive, the caller should be sure + * to clear the workspace. This is left to the caller to avoid + * unnecessary clears between chained hashing operations. + */ +static void sha_transform(__u32 *digest, const char *data, __u32 *array) { - uint32_t last, padn; - uint32_t high, low; - uint8_t msglen[8]; - - high = (ctx->total[0] >> 29) - | (ctx->total[1] << 3); - low = (ctx->total[0] << 3); - - PUT_UINT32_BE (high, msglen, 0); - PUT_UINT32_BE (low, msglen, 4); - - last = ctx->total[0] & 0x3F; - padn = (last < 56) ? (56 - last) : (120 - last); - - sha1_update (ctx, sha1_padding, padn); - sha1_update (ctx, msglen, 8); - - PUT_UINT32_BE (ctx->state[0], output, 0); - PUT_UINT32_BE (ctx->state[1], output, 4); - PUT_UINT32_BE (ctx->state[2], output, 8); - PUT_UINT32_BE (ctx->state[3], output, 12); - PUT_UINT32_BE (ctx->state[4], output, 16); + __u32 A, B, C, D, E; + + A = digest[0]; + B = digest[1]; + C = digest[2]; + D = digest[3]; + E = digest[4]; + + /* Round 1 - iterations 0-16 take their input from 'data' */ + T_0_15( 0, A, B, C, D, E); + T_0_15( 1, E, A, B, C, D); + T_0_15( 2, D, E, A, B, C); + T_0_15( 3, C, D, E, A, B); + T_0_15( 4, B, C, D, E, A); + T_0_15( 5, A, B, C, D, E); + T_0_15( 6, E, A, B, C, D); + T_0_15( 7, D, E, A, B, C); + T_0_15( 8, C, D, E, A, B); + T_0_15( 9, B, C, D, E, A); + T_0_15(10, A, B, C, D, E); + T_0_15(11, E, A, B, C, D); + T_0_15(12, D, E, A, B, C); + T_0_15(13, C, D, E, A, B); + T_0_15(14, B, C, D, E, A); + T_0_15(15, A, B, C, D, E); + + /* Round 1 - tail. Input from 512-bit mixing array */ + T_16_19(16, E, A, B, C, D); + T_16_19(17, D, E, A, B, C); + T_16_19(18, C, D, E, A, B); + T_16_19(19, B, C, D, E, A); + + /* Round 2 */ + T_20_39(20, A, B, C, D, E); + T_20_39(21, E, A, B, C, D); + T_20_39(22, D, E, A, B, C); + T_20_39(23, C, D, E, A, B); + T_20_39(24, B, C, D, E, A); + T_20_39(25, A, B, C, D, E); + T_20_39(26, E, A, B, C, D); + T_20_39(27, D, E, A, B, C); + T_20_39(28, C, D, E, A, B); + T_20_39(29, B, C, D, E, A); + T_20_39(30, A, B, C, D, E); + T_20_39(31, E, A, B, C, D); + T_20_39(32, D, E, A, B, C); + T_20_39(33, C, D, E, A, B); + T_20_39(34, B, C, D, E, A); + T_20_39(35, A, B, C, D, E); + T_20_39(36, E, A, B, C, D); + T_20_39(37, D, E, A, B, C); + T_20_39(38, C, D, E, A, B); + T_20_39(39, B, C, D, E, A); + + /* Round 3 */ + T_40_59(40, A, B, C, D, E); + T_40_59(41, E, A, B, C, D); + T_40_59(42, D, E, A, B, C); + T_40_59(43, C, D, E, A, B); + T_40_59(44, B, C, D, E, A); + T_40_59(45, A, B, C, D, E); + T_40_59(46, E, A, B, C, D); + T_40_59(47, D, E, A, B, C); + T_40_59(48, C, D, E, A, B); + T_40_59(49, B, C, D, E, A); + T_40_59(50, A, B, C, D, E); + T_40_59(51, E, A, B, C, D); + T_40_59(52, D, E, A, B, C); + T_40_59(53, C, D, E, A, B); + T_40_59(54, B, C, D, E, A); + T_40_59(55, A, B, C, D, E); + T_40_59(56, E, A, B, C, D); + T_40_59(57, D, E, A, B, C); + T_40_59(58, C, D, E, A, B); + T_40_59(59, B, C, D, E, A); + + /* Round 4 */ + T_60_79(60, A, B, C, D, E); + T_60_79(61, E, A, B, C, D); + T_60_79(62, D, E, A, B, C); + T_60_79(63, C, D, E, A, B); + T_60_79(64, B, C, D, E, A); + T_60_79(65, A, B, C, D, E); + T_60_79(66, E, A, B, C, D); + T_60_79(67, D, E, A, B, C); + T_60_79(68, C, D, E, A, B); + T_60_79(69, B, C, D, E, A); + T_60_79(70, A, B, C, D, E); + T_60_79(71, E, A, B, C, D); + T_60_79(72, D, E, A, B, C); + T_60_79(73, C, D, E, A, B); + T_60_79(74, B, C, D, E, A); + T_60_79(75, A, B, C, D, E); + T_60_79(76, E, A, B, C, D); + T_60_79(77, D, E, A, B, C); + T_60_79(78, C, D, E, A, B); + T_60_79(79, B, C, D, E, A); + + digest[0] += A; + digest[1] += B; + digest[2] += C; + digest[3] += D; + digest[4] += E; } -struct sha1 { - sha1_context context; - struct digest d; -}; - -static int digest_sha1_init(struct digest *d) +static int sha1_update(struct digest *desc, const void *data, + unsigned long len) { - struct sha1 *m = container_of(d, struct sha1, d); - - sha1_starts(&m->context); + struct sha1_state *sctx = digest_ctx(desc); + unsigned int partial, done; + const u8 *src; + + partial = sctx->count % SHA1_BLOCK_SIZE; + sctx->count += len; + done = 0; + src = data; + + if ((partial + len) >= SHA1_BLOCK_SIZE) { + u32 temp[SHA_WORKSPACE_WORDS]; + + if (partial) { + done = -partial; + memcpy(sctx->buffer + partial, data, + done + SHA1_BLOCK_SIZE); + src = sctx->buffer; + } + + do { + sha_transform(sctx->state, src, temp); + done += SHA1_BLOCK_SIZE; + src = data + done; + } while (done + SHA1_BLOCK_SIZE <= len); + + memset(temp, 0, sizeof(temp)); + partial = 0; + } + memcpy(sctx->buffer + partial, src, len - done); return 0; } -static int digest_sha1_update(struct digest *d, const void *data, - unsigned long len) +static int sha1_final(struct digest *desc, unsigned char *md) { - struct sha1 *m = container_of(d, struct sha1, d); + struct sha1_state *sctx = digest_ctx(desc); + __be32 *dst = (__be32 *)md; + u32 i, index, padlen; + __be64 bits; + static const u8 padding[64] = { 0x80, }; - sha1_update(&m->context, (uint8_t*)data, len); + bits = cpu_to_be64(sctx->count << 3); - return 0; -} + /* Pad out to 56 mod 64 */ + index = sctx->count & 0x3f; + padlen = (index < 56) ? (56 - index) : ((64+56) - index); + sha1_update(desc, padding, padlen); -static int digest_sha1_final(struct digest *d, unsigned char *md) -{ - struct sha1 *m = container_of(d, struct sha1, d); + /* Append length */ + sha1_update(desc, (const u8 *)&bits, sizeof(bits)); + + /* Store state in digest */ + for (i = 0; i < 5; i++) + dst[i] = cpu_to_be32(sctx->state[i]); - sha1_finish(&m->context, md); + /* Wipe context */ + memset(sctx, 0, sizeof *sctx); return 0; } -static struct sha1 m = { - .d = { - .name = "sha1", - .init = digest_sha1_init, - .update = digest_sha1_update, - .final = digest_sha1_final, - .length = SHA1_SUM_LEN, - } +static struct digest_algo m = { + .base = { + .name = "sha1", + .driver_name = "sha1-generic", + .priority = 0, + }, + + .init = sha1_init, + .update = sha1_update, + .final = sha1_final, + .digest = digest_generic_digest, + .verify = digest_generic_verify, + .length = SHA1_DIGEST_SIZE, + .ctx_length = sizeof(struct sha1_state), }; static int sha1_digest_register(void) { - digest_register(&m.d); - - return 0; + return digest_algo_register(&m); } device_initcall(sha1_digest_register); diff --git a/crypto/sha2.c b/crypto/sha2.c index 00a1af3419..6ac5527028 100644 --- a/crypto/sha2.c +++ b/crypto/sha2.c @@ -1,17 +1,20 @@ /* - * FIPS-180-2 compliant SHA-256 implementation + * Cryptographic API. * - * Copyright (C) 2001-2003 Christophe Devine + * SHA-256, as specified in + * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> + * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. * */ @@ -19,337 +22,352 @@ #include <digest.h> #include <init.h> #include <linux/string.h> +#include <asm/unaligned.h> #include <asm/byteorder.h> -#define SHA224_SUM_LEN 28 -#define SHA256_SUM_LEN 32 - -typedef struct { - uint32_t total[2]; - uint32_t state[8]; - uint8_t buffer[64]; - int is224; -} sha2_context; - -/* - * 32-bit integer manipulation macros (big endian) - */ -#define GET_UINT32_BE(n,b,i) (n) = be32_to_cpu(((uint32_t*)(b))[i / 4]) -#define PUT_UINT32_BE(n,b,i) ((uint32_t*)(b))[i / 4] = cpu_to_be32(n) +#include <crypto/sha.h> +#include <crypto/internal.h> -static void sha2_starts(sha2_context * ctx, int is224) +static inline u32 Ch(u32 x, u32 y, u32 z) { - ctx->total[0] = 0; - ctx->total[1] = 0; - -#ifdef CONFIG_SHA256 - if (is224 == 0) { - /* SHA-256 */ - ctx->state[0] = 0x6A09E667; - ctx->state[1] = 0xBB67AE85; - ctx->state[2] = 0x3C6EF372; - ctx->state[3] = 0xA54FF53A; - ctx->state[4] = 0x510E527F; - ctx->state[5] = 0x9B05688C; - ctx->state[6] = 0x1F83D9AB; - ctx->state[7] = 0x5BE0CD19; - } -#endif -#ifdef CONFIG_SHA224 - if (is224 == 1) { - /* SHA-224 */ - ctx->state[0] = 0xC1059ED8; - ctx->state[1] = 0x367CD507; - ctx->state[2] = 0x3070DD17; - ctx->state[3] = 0xF70E5939; - ctx->state[4] = 0xFFC00B31; - ctx->state[5] = 0x68581511; - ctx->state[6] = 0x64F98FA7; - ctx->state[7] = 0xBEFA4FA4; - } -#endif - - ctx->is224 = is224; + return z ^ (x & (y ^ z)); } -static void sha2_process(sha2_context * ctx, const uint8_t data[64]) +static inline u32 Maj(u32 x, u32 y, u32 z) { - uint32_t temp1, temp2; - uint32_t W[64]; - uint32_t A, B, C, D, E, F, G, H; - - GET_UINT32_BE(W[0], data, 0); - GET_UINT32_BE(W[1], data, 4); - GET_UINT32_BE(W[2], data, 8); - GET_UINT32_BE(W[3], data, 12); - GET_UINT32_BE(W[4], data, 16); - GET_UINT32_BE(W[5], data, 20); - GET_UINT32_BE(W[6], data, 24); - GET_UINT32_BE(W[7], data, 28); - GET_UINT32_BE(W[8], data, 32); - GET_UINT32_BE(W[9], data, 36); - GET_UINT32_BE(W[10], data, 40); - GET_UINT32_BE(W[11], data, 44); - GET_UINT32_BE(W[12], data, 48); - GET_UINT32_BE(W[13], data, 52); - GET_UINT32_BE(W[14], data, 56); - GET_UINT32_BE(W[15], data, 60); - -#define SHR(x,n) ((x & 0xFFFFFFFF) >> n) -#define ROTR(x,n) (SHR(x,n) | (x << (32 - n))) - -#define S0(x) (ROTR(x, 7) ^ ROTR(x,18) ^ SHR(x, 3)) -#define S1(x) (ROTR(x,17) ^ ROTR(x,19) ^ SHR(x,10)) - -#define S2(x) (ROTR(x, 2) ^ ROTR(x,13) ^ ROTR(x,22)) -#define S3(x) (ROTR(x, 6) ^ ROTR(x,11) ^ ROTR(x,25)) - -#define F0(x,y,z) ((x & y) | (z & (x | y))) -#define F1(x,y,z) (z ^ (x & (y ^ z))) - -#define R(t) \ -( \ - W[t] = S1(W[t - 2]) + W[t - 7] + \ - S0(W[t - 15]) + W[t - 16] \ -) - -#define P(a,b,c,d,e,f,g,h,x,K) { \ - temp1 = h + S3(e) + F1(e,f,g) + K + x; \ - temp2 = S2(a) + F0(a,b,c); \ - d += temp1; h = temp1 + temp2; \ + return (x & y) | (z & (x | y)); } - A = ctx->state[0]; - B = ctx->state[1]; - C = ctx->state[2]; - D = ctx->state[3]; - E = ctx->state[4]; - F = ctx->state[5]; - G = ctx->state[6]; - H = ctx->state[7]; - - P(A, B, C, D, E, F, G, H, W[0], 0x428A2F98); - P(H, A, B, C, D, E, F, G, W[1], 0x71374491); - P(G, H, A, B, C, D, E, F, W[2], 0xB5C0FBCF); - P(F, G, H, A, B, C, D, E, W[3], 0xE9B5DBA5); - P(E, F, G, H, A, B, C, D, W[4], 0x3956C25B); - P(D, E, F, G, H, A, B, C, W[5], 0x59F111F1); - P(C, D, E, F, G, H, A, B, W[6], 0x923F82A4); - P(B, C, D, E, F, G, H, A, W[7], 0xAB1C5ED5); - P(A, B, C, D, E, F, G, H, W[8], 0xD807AA98); - P(H, A, B, C, D, E, F, G, W[9], 0x12835B01); - P(G, H, A, B, C, D, E, F, W[10], 0x243185BE); - P(F, G, H, A, B, C, D, E, W[11], 0x550C7DC3); - P(E, F, G, H, A, B, C, D, W[12], 0x72BE5D74); - P(D, E, F, G, H, A, B, C, W[13], 0x80DEB1FE); - P(C, D, E, F, G, H, A, B, W[14], 0x9BDC06A7); - P(B, C, D, E, F, G, H, A, W[15], 0xC19BF174); - P(A, B, C, D, E, F, G, H, R(16), 0xE49B69C1); - P(H, A, B, C, D, E, F, G, R(17), 0xEFBE4786); - P(G, H, A, B, C, D, E, F, R(18), 0x0FC19DC6); - P(F, G, H, A, B, C, D, E, R(19), 0x240CA1CC); - P(E, F, G, H, A, B, C, D, R(20), 0x2DE92C6F); - P(D, E, F, G, H, A, B, C, R(21), 0x4A7484AA); - P(C, D, E, F, G, H, A, B, R(22), 0x5CB0A9DC); - P(B, C, D, E, F, G, H, A, R(23), 0x76F988DA); - P(A, B, C, D, E, F, G, H, R(24), 0x983E5152); - P(H, A, B, C, D, E, F, G, R(25), 0xA831C66D); - P(G, H, A, B, C, D, E, F, R(26), 0xB00327C8); - P(F, G, H, A, B, C, D, E, R(27), 0xBF597FC7); - P(E, F, G, H, A, B, C, D, R(28), 0xC6E00BF3); - P(D, E, F, G, H, A, B, C, R(29), 0xD5A79147); - P(C, D, E, F, G, H, A, B, R(30), 0x06CA6351); - P(B, C, D, E, F, G, H, A, R(31), 0x14292967); - P(A, B, C, D, E, F, G, H, R(32), 0x27B70A85); - P(H, A, B, C, D, E, F, G, R(33), 0x2E1B2138); - P(G, H, A, B, C, D, E, F, R(34), 0x4D2C6DFC); - P(F, G, H, A, B, C, D, E, R(35), 0x53380D13); - P(E, F, G, H, A, B, C, D, R(36), 0x650A7354); - P(D, E, F, G, H, A, B, C, R(37), 0x766A0ABB); - P(C, D, E, F, G, H, A, B, R(38), 0x81C2C92E); - P(B, C, D, E, F, G, H, A, R(39), 0x92722C85); - P(A, B, C, D, E, F, G, H, R(40), 0xA2BFE8A1); - P(H, A, B, C, D, E, F, G, R(41), 0xA81A664B); - P(G, H, A, B, C, D, E, F, R(42), 0xC24B8B70); - P(F, G, H, A, B, C, D, E, R(43), 0xC76C51A3); - P(E, F, G, H, A, B, C, D, R(44), 0xD192E819); - P(D, E, F, G, H, A, B, C, R(45), 0xD6990624); - P(C, D, E, F, G, H, A, B, R(46), 0xF40E3585); - P(B, C, D, E, F, G, H, A, R(47), 0x106AA070); - P(A, B, C, D, E, F, G, H, R(48), 0x19A4C116); - P(H, A, B, C, D, E, F, G, R(49), 0x1E376C08); - P(G, H, A, B, C, D, E, F, R(50), 0x2748774C); - P(F, G, H, A, B, C, D, E, R(51), 0x34B0BCB5); - P(E, F, G, H, A, B, C, D, R(52), 0x391C0CB3); - P(D, E, F, G, H, A, B, C, R(53), 0x4ED8AA4A); - P(C, D, E, F, G, H, A, B, R(54), 0x5B9CCA4F); - P(B, C, D, E, F, G, H, A, R(55), 0x682E6FF3); - P(A, B, C, D, E, F, G, H, R(56), 0x748F82EE); - P(H, A, B, C, D, E, F, G, R(57), 0x78A5636F); - P(G, H, A, B, C, D, E, F, R(58), 0x84C87814); - P(F, G, H, A, B, C, D, E, R(59), 0x8CC70208); - P(E, F, G, H, A, B, C, D, R(60), 0x90BEFFFA); - P(D, E, F, G, H, A, B, C, R(61), 0xA4506CEB); - P(C, D, E, F, G, H, A, B, R(62), 0xBEF9A3F7); - P(B, C, D, E, F, G, H, A, R(63), 0xC67178F2); - - ctx->state[0] += A; - ctx->state[1] += B; - ctx->state[2] += C; - ctx->state[3] += D; - ctx->state[4] += E; - ctx->state[5] += F; - ctx->state[6] += G; - ctx->state[7] += H; -} +#define e0(x) (ror32(x, 2) ^ ror32(x,13) ^ ror32(x,22)) +#define e1(x) (ror32(x, 6) ^ ror32(x,11) ^ ror32(x,25)) +#define s0(x) (ror32(x, 7) ^ ror32(x,18) ^ (x >> 3)) +#define s1(x) (ror32(x,17) ^ ror32(x,19) ^ (x >> 10)) -static void sha2_update(sha2_context * ctx, const uint8_t * input, size_t length) +static inline void LOAD_OP(int I, u32 *W, const u8 *input) { - size_t fill; - uint32_t left; - - if (length == 0) - return; - - left = ctx->total[0] & 0x3F; - fill = 64 - left; - - ctx->total[0] += (uint32_t)length; - ctx->total[0] &= 0xFFFFFFFF; + W[I] = get_unaligned_be32((__u32 *)input + I); +} - if (ctx->total[0] < (uint32_t)length) - ctx->total[1]++; +static inline void BLEND_OP(int I, u32 *W) +{ + W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16]; +} - if (left && length >= fill) { - memcpy((void *) (ctx->buffer + left), (void *) input, fill); - sha2_process(ctx, ctx->buffer); - length -= fill; - input += fill; - left = 0; - } +static void sha256_transform(u32 *state, const u8 *input) +{ + u32 a, b, c, d, e, f, g, h, t1, t2; + u32 W[64]; + int i; + + /* load the input */ + for (i = 0; i < 16; i++) + LOAD_OP(i, W, input); + + /* now blend */ + for (i = 16; i < 64; i++) + BLEND_OP(i, W); + + /* load the state into our registers */ + a=state[0]; b=state[1]; c=state[2]; d=state[3]; + e=state[4]; f=state[5]; g=state[6]; h=state[7]; + + /* now iterate */ + t1 = h + e1(e) + Ch(e,f,g) + 0x428a2f98 + W[ 0]; + t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; + t1 = g + e1(d) + Ch(d,e,f) + 0x71374491 + W[ 1]; + t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; + t1 = f + e1(c) + Ch(c,d,e) + 0xb5c0fbcf + W[ 2]; + t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; + t1 = e + e1(b) + Ch(b,c,d) + 0xe9b5dba5 + W[ 3]; + t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; + t1 = d + e1(a) + Ch(a,b,c) + 0x3956c25b + W[ 4]; + t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; + t1 = c + e1(h) + Ch(h,a,b) + 0x59f111f1 + W[ 5]; + t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; + t1 = b + e1(g) + Ch(g,h,a) + 0x923f82a4 + W[ 6]; + t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; + t1 = a + e1(f) + Ch(f,g,h) + 0xab1c5ed5 + W[ 7]; + t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; + + t1 = h + e1(e) + Ch(e,f,g) + 0xd807aa98 + W[ 8]; + t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; + t1 = g + e1(d) + Ch(d,e,f) + 0x12835b01 + W[ 9]; + t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; + t1 = f + e1(c) + Ch(c,d,e) + 0x243185be + W[10]; + t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; + t1 = e + e1(b) + Ch(b,c,d) + 0x550c7dc3 + W[11]; + t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; + t1 = d + e1(a) + Ch(a,b,c) + 0x72be5d74 + W[12]; + t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; + t1 = c + e1(h) + Ch(h,a,b) + 0x80deb1fe + W[13]; + t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; + t1 = b + e1(g) + Ch(g,h,a) + 0x9bdc06a7 + W[14]; + t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; + t1 = a + e1(f) + Ch(f,g,h) + 0xc19bf174 + W[15]; + t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; + + t1 = h + e1(e) + Ch(e,f,g) + 0xe49b69c1 + W[16]; + t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; + t1 = g + e1(d) + Ch(d,e,f) + 0xefbe4786 + W[17]; + t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; + t1 = f + e1(c) + Ch(c,d,e) + 0x0fc19dc6 + W[18]; + t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; + t1 = e + e1(b) + Ch(b,c,d) + 0x240ca1cc + W[19]; + t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; + t1 = d + e1(a) + Ch(a,b,c) + 0x2de92c6f + W[20]; + t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; + t1 = c + e1(h) + Ch(h,a,b) + 0x4a7484aa + W[21]; + t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; + t1 = b + e1(g) + Ch(g,h,a) + 0x5cb0a9dc + W[22]; + t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; + t1 = a + e1(f) + Ch(f,g,h) + 0x76f988da + W[23]; + t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; + + t1 = h + e1(e) + Ch(e,f,g) + 0x983e5152 + W[24]; + t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; + t1 = g + e1(d) + Ch(d,e,f) + 0xa831c66d + W[25]; + t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; + t1 = f + e1(c) + Ch(c,d,e) + 0xb00327c8 + W[26]; + t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; + t1 = e + e1(b) + Ch(b,c,d) + 0xbf597fc7 + W[27]; + t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; + t1 = d + e1(a) + Ch(a,b,c) + 0xc6e00bf3 + W[28]; + t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; + t1 = c + e1(h) + Ch(h,a,b) + 0xd5a79147 + W[29]; + t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; + t1 = b + e1(g) + Ch(g,h,a) + 0x06ca6351 + W[30]; + t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; + t1 = a + e1(f) + Ch(f,g,h) + 0x14292967 + W[31]; + t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; + + t1 = h + e1(e) + Ch(e,f,g) + 0x27b70a85 + W[32]; + t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; + t1 = g + e1(d) + Ch(d,e,f) + 0x2e1b2138 + W[33]; + t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; + t1 = f + e1(c) + Ch(c,d,e) + 0x4d2c6dfc + W[34]; + t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; + t1 = e + e1(b) + Ch(b,c,d) + 0x53380d13 + W[35]; + t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; + t1 = d + e1(a) + Ch(a,b,c) + 0x650a7354 + W[36]; + t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; + t1 = c + e1(h) + Ch(h,a,b) + 0x766a0abb + W[37]; + t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; + t1 = b + e1(g) + Ch(g,h,a) + 0x81c2c92e + W[38]; + t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; + t1 = a + e1(f) + Ch(f,g,h) + 0x92722c85 + W[39]; + t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; + + t1 = h + e1(e) + Ch(e,f,g) + 0xa2bfe8a1 + W[40]; + t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; + t1 = g + e1(d) + Ch(d,e,f) + 0xa81a664b + W[41]; + t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; + t1 = f + e1(c) + Ch(c,d,e) + 0xc24b8b70 + W[42]; + t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; + t1 = e + e1(b) + Ch(b,c,d) + 0xc76c51a3 + W[43]; + t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; + t1 = d + e1(a) + Ch(a,b,c) + 0xd192e819 + W[44]; + t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; + t1 = c + e1(h) + Ch(h,a,b) + 0xd6990624 + W[45]; + t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; + t1 = b + e1(g) + Ch(g,h,a) + 0xf40e3585 + W[46]; + t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; + t1 = a + e1(f) + Ch(f,g,h) + 0x106aa070 + W[47]; + t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; + + t1 = h + e1(e) + Ch(e,f,g) + 0x19a4c116 + W[48]; + t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; + t1 = g + e1(d) + Ch(d,e,f) + 0x1e376c08 + W[49]; + t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; + t1 = f + e1(c) + Ch(c,d,e) + 0x2748774c + W[50]; + t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; + t1 = e + e1(b) + Ch(b,c,d) + 0x34b0bcb5 + W[51]; + t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; + t1 = d + e1(a) + Ch(a,b,c) + 0x391c0cb3 + W[52]; + t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; + t1 = c + e1(h) + Ch(h,a,b) + 0x4ed8aa4a + W[53]; + t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; + t1 = b + e1(g) + Ch(g,h,a) + 0x5b9cca4f + W[54]; + t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; + t1 = a + e1(f) + Ch(f,g,h) + 0x682e6ff3 + W[55]; + t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; + + t1 = h + e1(e) + Ch(e,f,g) + 0x748f82ee + W[56]; + t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; + t1 = g + e1(d) + Ch(d,e,f) + 0x78a5636f + W[57]; + t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; + t1 = f + e1(c) + Ch(c,d,e) + 0x84c87814 + W[58]; + t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; + t1 = e + e1(b) + Ch(b,c,d) + 0x8cc70208 + W[59]; + t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; + t1 = d + e1(a) + Ch(a,b,c) + 0x90befffa + W[60]; + t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; + t1 = c + e1(h) + Ch(h,a,b) + 0xa4506ceb + W[61]; + t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; + t1 = b + e1(g) + Ch(g,h,a) + 0xbef9a3f7 + W[62]; + t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; + t1 = a + e1(f) + Ch(f,g,h) + 0xc67178f2 + W[63]; + t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; + + state[0] += a; state[1] += b; state[2] += c; state[3] += d; + state[4] += e; state[5] += f; state[6] += g; state[7] += h; + + /* clear any sensitive info... */ + a = b = c = d = e = f = g = h = t1 = t2 = 0; + memset(W, 0, 64 * sizeof(u32)); +} - while (length >= 64) { - sha2_process(ctx, input); - length -= 64; - input += 64; - } +static int sha224_init(struct digest *desc) +{ + struct sha256_state *sctx = digest_ctx(desc); + sctx->state[0] = SHA224_H0; + sctx->state[1] = SHA224_H1; + sctx->state[2] = SHA224_H2; + sctx->state[3] = SHA224_H3; + sctx->state[4] = SHA224_H4; + sctx->state[5] = SHA224_H5; + sctx->state[6] = SHA224_H6; + sctx->state[7] = SHA224_H7; + sctx->count = 0; - if (length) - memcpy((void *) (ctx->buffer + left), (void *) input, length); + return 0; } -static const uint8_t sha2_padding[64] = { - 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static void sha2_finish(sha2_context * ctx, uint8_t digest[32]) +static int sha256_init(struct digest *desc) { - uint32_t last, padn; - uint32_t high, low; - uint8_t msglen[8]; - - high = ((ctx->total[0] >> 29) - | (ctx->total[1] << 3)); - low = (ctx->total[0] << 3); - - PUT_UINT32_BE(high, msglen, 0); - PUT_UINT32_BE(low, msglen, 4); - - last = ctx->total[0] & 0x3F; - padn = (last < 56) ? (56 - last) : (120 - last); - - sha2_update(ctx, sha2_padding, padn); - sha2_update(ctx, msglen, 8); - - PUT_UINT32_BE(ctx->state[0], digest, 0); - PUT_UINT32_BE(ctx->state[1], digest, 4); - PUT_UINT32_BE(ctx->state[2], digest, 8); - PUT_UINT32_BE(ctx->state[3], digest, 12); - PUT_UINT32_BE(ctx->state[4], digest, 16); - PUT_UINT32_BE(ctx->state[5], digest, 20); - PUT_UINT32_BE(ctx->state[6], digest, 24); - if (!ctx->is224) - PUT_UINT32_BE(ctx->state[7], digest, 28); -} + struct sha256_state *sctx = digest_ctx(desc); + sctx->state[0] = SHA256_H0; + sctx->state[1] = SHA256_H1; + sctx->state[2] = SHA256_H2; + sctx->state[3] = SHA256_H3; + sctx->state[4] = SHA256_H4; + sctx->state[5] = SHA256_H5; + sctx->state[6] = SHA256_H6; + sctx->state[7] = SHA256_H7; + sctx->count = 0; -struct sha2 { - sha2_context context; - struct digest d; -}; + return 0; +} -static int digest_sha2_update(struct digest *d, const void *data, +static int sha256_update(struct digest *desc, const void *data, unsigned long len) { - struct sha2 *m = container_of(d, struct sha2, d); - - sha2_update(&m->context, (uint8_t *)data, len); + struct sha256_state *sctx = digest_ctx(desc); + unsigned int partial, done; + const u8 *src; + + partial = sctx->count & 0x3f; + sctx->count += len; + done = 0; + src = data; + + if ((partial + len) > 63) { + if (partial) { + done = -partial; + memcpy(sctx->buf + partial, data, done + 64); + src = sctx->buf; + } + + do { + sha256_transform(sctx->state, src); + done += 64; + src = data + done; + } while (done + 63 < len); + + partial = 0; + } + memcpy(sctx->buf + partial, src, len - done); return 0; } -static int digest_sha2_final(struct digest *d, unsigned char *md) +static int sha256_final(struct digest *desc, u8 *out) { - struct sha2 *m = container_of(d, struct sha2, d); + struct sha256_state *sctx = digest_ctx(desc); + __be32 *dst = (__be32 *)out; + __be64 bits; + unsigned int index, pad_len; + int i; + static const u8 padding[64] = { 0x80, }; + + /* Save number of bits */ + bits = cpu_to_be64(sctx->count << 3); - sha2_finish(&m->context, md); + /* Pad out to 56 mod 64. */ + index = sctx->count & 0x3f; + pad_len = (index < 56) ? (56 - index) : ((64+56) - index); + sha256_update(desc, padding, pad_len); + + /* Append length (before padding) */ + sha256_update(desc, (const u8 *)&bits, sizeof(bits)); + + /* Store state in digest */ + for (i = 0; i < 8; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Zeroize sensitive information. */ + memset(sctx, 0, sizeof(*sctx)); return 0; } -#ifdef CONFIG_SHA224 -static int digest_sha224_init(struct digest *d) +static int sha224_final(struct digest *desc, u8 *hash) { - struct sha2 *m = container_of(d, struct sha2, d); + u8 D[SHA256_DIGEST_SIZE]; - sha2_starts(&m->context, 1); + sha256_final(desc, D); + + memcpy(hash, D, SHA224_DIGEST_SIZE); + memset(D, 0, SHA256_DIGEST_SIZE); return 0; } -static struct sha2 m224 = { - .d = { - .name = "sha224", - .init = digest_sha224_init, - .update = digest_sha2_update, - .final = digest_sha2_final, - .length = SHA224_SUM_LEN, - } +static struct digest_algo m224 = { + .base = { + .name = "sha224", + .driver_name = "sha224-generic", + .priority = 0, + }, + + .init = sha224_init, + .update = sha256_update, + .final = sha224_final, + .digest = digest_generic_digest, + .verify = digest_generic_verify, + .length = SHA224_DIGEST_SIZE, + .ctx_length = sizeof(struct sha256_state), }; -#endif -#ifdef CONFIG_SHA256 -static int digest_sha256_init(struct digest *d) +static int sha224_digest_register(void) { - struct sha2 *m = container_of(d, struct sha2, d); + if (!IS_ENABLED(CONFIG_SHA224)) + return 0; - sha2_starts(&m->context, 0); - - return 0; + return digest_algo_register(&m224); } - -static struct sha2 m256 = { - .d = { - .name = "sha256", - .init = digest_sha256_init, - .update = digest_sha2_update, - .final = digest_sha2_final, - .length = SHA256_SUM_LEN, - } +device_initcall(sha224_digest_register); + +static struct digest_algo m256 = { + .base = { + .name = "sha256", + .driver_name = "sha256-generic", + .priority = 0, + }, + + .init = sha256_init, + .update = sha256_update, + .final = sha256_final, + .digest = digest_generic_digest, + .verify = digest_generic_verify, + .length = SHA256_DIGEST_SIZE, + .ctx_length = sizeof(struct sha256_state), }; -#endif -static int sha2_digest_register(void) +static int sha256_digest_register(void) { -#ifdef CONFIG_SHA224 - digest_register(&m224.d); -#endif -#ifdef CONFIG_SHA256 - digest_register(&m256.d); -#endif + if (!IS_ENABLED(CONFIG_SHA256)) + return 0; - return 0; + return digest_algo_register(&m256); } -device_initcall(sha2_digest_register); +device_initcall(sha256_digest_register); diff --git a/crypto/sha4.c b/crypto/sha4.c new file mode 100644 index 0000000000..187a91ef58 --- /dev/null +++ b/crypto/sha4.c @@ -0,0 +1,293 @@ +/* SHA-512 code by Jean-Luc Cooke <jlcooke@certainkey.com> + * + * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) 2003 Kyle McMartin <kyle@debian.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + */ + +#include <common.h> +#include <digest.h> +#include <init.h> +#include <linux/string.h> +#include <asm/unaligned.h> +#include <asm/byteorder.h> + +#include <crypto/sha.h> +#include <crypto/internal.h> + +static inline u64 Ch(u64 x, u64 y, u64 z) +{ + return z ^ (x & (y ^ z)); +} + +static inline u64 Maj(u64 x, u64 y, u64 z) +{ + return (x & y) | (z & (x | y)); +} + +static const u64 sha512_K[80] = { + 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 0xb5c0fbcfec4d3b2fULL, + 0xe9b5dba58189dbbcULL, 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, + 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, 0xd807aa98a3030242ULL, + 0x12835b0145706fbeULL, 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, + 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, 0x9bdc06a725c71235ULL, + 0xc19bf174cf692694ULL, 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, + 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, 0x2de92c6f592b0275ULL, + 0x4a7484aa6ea6e483ULL, 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, + 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, 0xb00327c898fb213fULL, + 0xbf597fc7beef0ee4ULL, 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, + 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, 0x27b70a8546d22ffcULL, + 0x2e1b21385c26c926ULL, 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, + 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, 0x81c2c92e47edaee6ULL, + 0x92722c851482353bULL, 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, + 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, 0xd192e819d6ef5218ULL, + 0xd69906245565a910ULL, 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, + 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, 0x2748774cdf8eeb99ULL, + 0x34b0bcb5e19b48a8ULL, 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, + 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, 0x748f82ee5defb2fcULL, + 0x78a5636f43172f60ULL, 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, + 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, 0xbef9a3f7b2c67915ULL, + 0xc67178f2e372532bULL, 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, + 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, 0x06f067aa72176fbaULL, + 0x0a637dc5a2c898a6ULL, 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, + 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, 0x3c9ebe0a15c9bebcULL, + 0x431d67c49c100d4cULL, 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, + 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL, +}; + +#define e0(x) (ror64(x,28) ^ ror64(x,34) ^ ror64(x,39)) +#define e1(x) (ror64(x,14) ^ ror64(x,18) ^ ror64(x,41)) +#define s0(x) (ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7)) +#define s1(x) (ror64(x,19) ^ ror64(x,61) ^ (x >> 6)) + +static inline void LOAD_OP(int I, u64 *W, const u8 *input) +{ + W[I] = get_unaligned_be64((__u64 *)input + I); +} + +static inline void BLEND_OP(int I, u64 *W) +{ + W[I & 15] += s1(W[(I-2) & 15]) + W[(I-7) & 15] + s0(W[(I-15) & 15]); +} + +static void +sha512_transform(u64 *state, const u8 *input) +{ + u64 a, b, c, d, e, f, g, h, t1, t2; + + int i; + u64 W[16]; + + /* load the state into our registers */ + a=state[0]; b=state[1]; c=state[2]; d=state[3]; + e=state[4]; f=state[5]; g=state[6]; h=state[7]; + + /* now iterate */ + for (i=0; i<80; i+=8) { + if (!(i & 8)) { + int j; + + if (i < 16) { + /* load the input */ + for (j = 0; j < 16; j++) + LOAD_OP(i + j, W, input); + } else { + for (j = 0; j < 16; j++) { + BLEND_OP(i + j, W); + } + } + } + + t1 = h + e1(e) + Ch(e,f,g) + sha512_K[i ] + W[(i & 15)]; + t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; + t1 = g + e1(d) + Ch(d,e,f) + sha512_K[i+1] + W[(i & 15) + 1]; + t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; + t1 = f + e1(c) + Ch(c,d,e) + sha512_K[i+2] + W[(i & 15) + 2]; + t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; + t1 = e + e1(b) + Ch(b,c,d) + sha512_K[i+3] + W[(i & 15) + 3]; + t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; + t1 = d + e1(a) + Ch(a,b,c) + sha512_K[i+4] + W[(i & 15) + 4]; + t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; + t1 = c + e1(h) + Ch(h,a,b) + sha512_K[i+5] + W[(i & 15) + 5]; + t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; + t1 = b + e1(g) + Ch(g,h,a) + sha512_K[i+6] + W[(i & 15) + 6]; + t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; + t1 = a + e1(f) + Ch(f,g,h) + sha512_K[i+7] + W[(i & 15) + 7]; + t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; + } + + state[0] += a; state[1] += b; state[2] += c; state[3] += d; + state[4] += e; state[5] += f; state[6] += g; state[7] += h; + + /* erase our data */ + a = b = c = d = e = f = g = h = t1 = t2 = 0; +} + +static int +sha512_init(struct digest *desc) +{ + struct sha512_state *sctx = digest_ctx(desc); + sctx->state[0] = SHA512_H0; + sctx->state[1] = SHA512_H1; + sctx->state[2] = SHA512_H2; + sctx->state[3] = SHA512_H3; + sctx->state[4] = SHA512_H4; + sctx->state[5] = SHA512_H5; + sctx->state[6] = SHA512_H6; + sctx->state[7] = SHA512_H7; + sctx->count[0] = sctx->count[1] = 0; + + return 0; +} + +static int sha384_init(struct digest *desc) +{ + struct sha512_state *sctx = digest_ctx(desc); + sctx->state[0] = SHA384_H0; + sctx->state[1] = SHA384_H1; + sctx->state[2] = SHA384_H2; + sctx->state[3] = SHA384_H3; + sctx->state[4] = SHA384_H4; + sctx->state[5] = SHA384_H5; + sctx->state[6] = SHA384_H6; + sctx->state[7] = SHA384_H7; + sctx->count[0] = sctx->count[1] = 0; + + return 0; +} + +static int sha512_update(struct digest *desc, const void *in, + unsigned long len) +{ + struct sha512_state *sctx = digest_ctx(desc); + const u8 *data = in; + + unsigned int i, index, part_len; + + /* Compute number of bytes mod 128 */ + index = sctx->count[0] & 0x7f; + + /* Update number of bytes */ + if ((sctx->count[0] += len) < len) + sctx->count[1]++; + + part_len = 128 - index; + + /* Transform as many times as possible. */ + if (len >= part_len) { + memcpy(&sctx->buf[index], data, part_len); + sha512_transform(sctx->state, sctx->buf); + + for (i = part_len; i + 127 < len; i+=128) + sha512_transform(sctx->state, &data[i]); + + index = 0; + } else { + i = 0; + } + + /* Buffer remaining input */ + memcpy(&sctx->buf[index], &data[i], len - i); + + return 0; +} + +static int sha512_final(struct digest *desc, u8 *hash) +{ + struct sha512_state *sctx = digest_ctx(desc); + static u8 padding[128] = { 0x80, }; + __be64 *dst = (__be64 *)hash; + __be64 bits[2]; + unsigned int index, pad_len; + int i; + + /* Save number of bits */ + bits[1] = cpu_to_be64(sctx->count[0] << 3); + bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); + + /* Pad out to 112 mod 128. */ + index = sctx->count[0] & 0x7f; + pad_len = (index < 112) ? (112 - index) : ((128+112) - index); + sha512_update(desc, padding, pad_len); + + /* Append length (before padding) */ + sha512_update(desc, (const u8 *)bits, sizeof(bits)); + + /* Store state in digest */ + for (i = 0; i < 8; i++) + dst[i] = cpu_to_be64(sctx->state[i]); + + /* Zeroize sensitive information. */ + memset(sctx, 0, sizeof(struct sha512_state)); + + return 0; +} + +static int sha384_final(struct digest *desc, u8 *hash) +{ + u8 D[64]; + + sha512_final(desc, D); + + memcpy(hash, D, 48); + memset(D, 0, 64); + + return 0; +} + +static struct digest_algo m384 = { + .base = { + .name = "sha384", + .driver_name = "sha384-generic", + .priority = 0, + }, + + .init = sha384_init, + .update = sha512_update, + .final = sha384_final, + .digest = digest_generic_digest, + .verify = digest_generic_verify, + .length = SHA384_DIGEST_SIZE, + .ctx_length = sizeof(struct sha512_state), +}; + + +static int sha384_digest_register(void) +{ + if (!IS_ENABLED(CONFIG_SHA384)) + return 0; + + return digest_algo_register(&m384); +} +device_initcall(sha384_digest_register); + +static struct digest_algo m512 = { + .base = { + .name = "sha512", + .driver_name = "sha512-generic", + .priority = 0, + }, + + .init = sha512_init, + .update = sha512_update, + .final = sha512_final, + .digest = digest_generic_digest, + .verify = digest_generic_verify, + .length = SHA512_DIGEST_SIZE, + .ctx_length = sizeof(struct sha512_state), +}; + +static int sha512_digest_register(void) +{ + if (!IS_ENABLED(CONFIG_SHA512)) + return 0; + + return digest_algo_register(&m512); +} +device_initcall(sha512_digest_register); diff --git a/include/asm-generic/barebox.lds.h b/include/asm-generic/barebox.lds.h index 66abff30fa..e359187d7f 100644 --- a/include/asm-generic/barebox.lds.h +++ b/include/asm-generic/barebox.lds.h @@ -33,7 +33,8 @@ KEEP(*(.initcall.8)) \ KEEP(*(.initcall.9)) \ KEEP(*(.initcall.10)) \ - KEEP(*(.initcall.11)) + KEEP(*(.initcall.11)) \ + KEEP(*(.initcall.12)) #define BAREBOX_CMDS KEEP(*(SORT_BY_NAME(.barebox_cmd*))) diff --git a/include/command.h b/include/command.h index 5d5bf53544..3aca1a9f1b 100644 --- a/include/command.h +++ b/include/command.h @@ -54,6 +54,7 @@ struct command { uint32_t group; #ifdef CONFIG_LONGHELP const char *help; /* Help message (long) */ + void (*usage)(void); #endif } #ifdef __x86_64__ @@ -115,8 +116,10 @@ static const __maybe_unused char cmd_##_name##_help[] = #ifdef CONFIG_LONGHELP #define BAREBOX_CMD_HELP(text) .help = text, +#define BAREBOX_CMD_USAGE(fn) .usage = fn, #else #define BAREBOX_CMD_HELP(text) +#define BAREBOX_CMD_USAGE(fn) #endif #define BAREBOX_CMD_GROUP(grp) .group = grp, diff --git a/include/crypto/internal.h b/include/crypto/internal.h new file mode 100644 index 0000000000..0987ccc160 --- /dev/null +++ b/include/crypto/internal.h @@ -0,0 +1,9 @@ +/* + * (C) Copyright 2015 Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com> + * + * GPL v2 only + */ + +int digest_generic_verify(struct digest *d, const unsigned char *md); +int digest_generic_digest(struct digest *d, const void *data, + unsigned int len, u8 *out); diff --git a/include/crypto/pbkdf2.h b/include/crypto/pbkdf2.h new file mode 100644 index 0000000000..fa66675261 --- /dev/null +++ b/include/crypto/pbkdf2.h @@ -0,0 +1,23 @@ +/* + * (C) Copyright 2015 Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com> + * + * Under GPLv2 Only + */ + +#ifndef __PBKDF2_H__ +#define __PBKDF2_H__ + +#include <digest.h> + +int pkcs5_pbkdf2_hmac_sha1(const unsigned char *pwd, size_t pwd_len, + const unsigned char *salt, size_t salt_len, + uint32_t iteration, + uint32_t key_len, unsigned char *buf); + +int pkcs5_pbkdf2_hmac(struct digest* d, + const unsigned char *pwd, size_t pwd_len, + const unsigned char *salt, size_t salt_len, + uint32_t iteration, + uint32_t key_len, unsigned char *key); + +#endif /* __PBKDF2_H__ */ diff --git a/include/crypto/sha.h b/include/crypto/sha.h new file mode 100644 index 0000000000..190f8a0e02 --- /dev/null +++ b/include/crypto/sha.h @@ -0,0 +1,95 @@ +/* + * Common values for SHA algorithms + */ + +#ifndef _CRYPTO_SHA_H +#define _CRYPTO_SHA_H + +#include <linux/types.h> + +#define SHA1_DIGEST_SIZE 20 +#define SHA1_BLOCK_SIZE 64 + +#define SHA224_DIGEST_SIZE 28 +#define SHA224_BLOCK_SIZE 64 + +#define SHA256_DIGEST_SIZE 32 +#define SHA256_BLOCK_SIZE 64 + +#define SHA384_DIGEST_SIZE 48 +#define SHA384_BLOCK_SIZE 128 + +#define SHA512_DIGEST_SIZE 64 +#define SHA512_BLOCK_SIZE 128 + +#define SHA1_H0 0x67452301UL +#define SHA1_H1 0xefcdab89UL +#define SHA1_H2 0x98badcfeUL +#define SHA1_H3 0x10325476UL +#define SHA1_H4 0xc3d2e1f0UL + +#define SHA224_H0 0xc1059ed8UL +#define SHA224_H1 0x367cd507UL +#define SHA224_H2 0x3070dd17UL +#define SHA224_H3 0xf70e5939UL +#define SHA224_H4 0xffc00b31UL +#define SHA224_H5 0x68581511UL +#define SHA224_H6 0x64f98fa7UL +#define SHA224_H7 0xbefa4fa4UL + +#define SHA256_H0 0x6a09e667UL +#define SHA256_H1 0xbb67ae85UL +#define SHA256_H2 0x3c6ef372UL +#define SHA256_H3 0xa54ff53aUL +#define SHA256_H4 0x510e527fUL +#define SHA256_H5 0x9b05688cUL +#define SHA256_H6 0x1f83d9abUL +#define SHA256_H7 0x5be0cd19UL + +#define SHA384_H0 0xcbbb9d5dc1059ed8ULL +#define SHA384_H1 0x629a292a367cd507ULL +#define SHA384_H2 0x9159015a3070dd17ULL +#define SHA384_H3 0x152fecd8f70e5939ULL +#define SHA384_H4 0x67332667ffc00b31ULL +#define SHA384_H5 0x8eb44a8768581511ULL +#define SHA384_H6 0xdb0c2e0d64f98fa7ULL +#define SHA384_H7 0x47b5481dbefa4fa4ULL + +#define SHA512_H0 0x6a09e667f3bcc908ULL +#define SHA512_H1 0xbb67ae8584caa73bULL +#define SHA512_H2 0x3c6ef372fe94f82bULL +#define SHA512_H3 0xa54ff53a5f1d36f1ULL +#define SHA512_H4 0x510e527fade682d1ULL +#define SHA512_H5 0x9b05688c2b3e6c1fULL +#define SHA512_H6 0x1f83d9abfb41bd6bULL +#define SHA512_H7 0x5be0cd19137e2179ULL + +struct sha1_state { + u64 count; + u32 state[SHA1_DIGEST_SIZE / 4]; + u8 buffer[SHA1_BLOCK_SIZE]; +}; + +struct sha256_state { + u64 count; + u32 state[SHA256_DIGEST_SIZE / 4]; + u8 buf[SHA256_BLOCK_SIZE]; +}; + +struct sha512_state { + u64 count[2]; + u64 state[SHA512_DIGEST_SIZE / 8]; + u8 buf[SHA512_BLOCK_SIZE]; +}; + +struct shash_desc; + +extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data, + unsigned int len); + +extern int crypto_sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len); + +extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data, + unsigned int len); +#endif diff --git a/include/digest.h b/include/digest.h index 8563c10128..7f8d696eb6 100644 --- a/include/digest.h +++ b/include/digest.h @@ -21,33 +21,115 @@ #include <linux/list.h> -struct digest -{ +struct digest; + +struct crypto_alg { char *name; + char *driver_name; + int priority; +#define DIGEST_ALGO_NEED_KEY (1 << 0) + unsigned int flags; +}; + +struct digest_algo { + struct crypto_alg base; + int (*alloc)(struct digest *d); + void (*free)(struct digest *d); int (*init)(struct digest *d); int (*update)(struct digest *d, const void *data, unsigned long len); int (*final)(struct digest *d, unsigned char *md); + int (*digest)(struct digest *d, const void *data, + unsigned int len, u8 *out); + int (*set_key)(struct digest *d, const unsigned char *key, unsigned int len); + int (*verify)(struct digest *d, const unsigned char *md); unsigned int length; + unsigned int ctx_length; struct list_head list; }; +struct digest { + struct digest_algo *algo; + void *ctx; + unsigned int length; +}; + /* * digest functions */ -int digest_register(struct digest *d); -void digest_unregister(struct digest *d); +int digest_algo_register(struct digest_algo *d); +void digest_algo_unregister(struct digest_algo *d); +void digest_algo_prints(const char *prefix); -struct digest* digest_get_by_name(char* name); +struct digest *digest_alloc(const char *name); +void digest_free(struct digest *d); -int digest_file_window(struct digest *d, char *filename, +int digest_file_window(struct digest *d, const char *filename, unsigned char *hash, + unsigned char *sig, ulong start, ulong size); -int digest_file(struct digest *d, char *filename, - unsigned char *hash); -int digest_file_by_name(char *algo, char *filename, - unsigned char *hash); +int digest_file(struct digest *d, const char *filename, + unsigned char *hash, + unsigned char *sig); +int digest_file_by_name(const char *algo, const char *filename, + unsigned char *hash, + unsigned char *sig); + +static inline int digest_init(struct digest *d) +{ + return d->algo->init(d); +} + +static inline int digest_update(struct digest *d, const void *data, + unsigned long len) +{ + return d->algo->update(d, data, len); +} + +static inline int digest_final(struct digest *d, unsigned char *md) +{ + return d->algo->final(d, md); +} + +static inline int digest_digest(struct digest *d, const void *data, + unsigned int len, u8 *md) +{ + return d->algo->digest(d, data, len, md); +} + +static inline int digest_verify(struct digest *d, const unsigned char *md) +{ + return d->algo->verify(d, md); +} + +static inline int digest_length(struct digest *d) +{ + return d->length ? d->length : d->algo->length; +} + +static inline int digest_set_key(struct digest *d, const unsigned char *key, + unsigned int len) +{ + if (!d->algo->set_key) + return -ENOTSUPP; + return d->algo->set_key(d, key, len); +} + +static inline int digest_is_flags(struct digest *d, unsigned int flags) +{ + return d->algo->base.flags & flags; +} + +static inline const char *digest_name(struct digest *d) +{ + return d->algo->base.name; +} + +static inline void* digest_ctx(struct digest *d) +{ + return d->ctx; +} #endif /* __SH_ST_DEVICES_H__ */ diff --git a/include/init.h b/include/init.h index 40cea55fb1..37c7eedf67 100644 --- a/include/init.h +++ b/include/init.h @@ -37,7 +37,8 @@ typedef int (*initcall_t)(void); #define coredevice_initcall(fn) __define_initcall("8",fn,8) #define fs_initcall(fn) __define_initcall("9",fn,9) #define device_initcall(fn) __define_initcall("10",fn,10) -#define late_initcall(fn) __define_initcall("11",fn,11) +#define crypto_initcall(fn) __define_initcall("11",fn,11) +#define late_initcall(fn) __define_initcall("12",fn,12) /* section for code used very early when * - we're not running from where we linked at |