diff options
author | Siarhei Siamashka <siarhei.siamashka@gmail.com> | 2013-06-22 21:08:13 +0300 |
---|---|---|
committer | Siarhei Siamashka <siarhei.siamashka@gmail.com> | 2013-06-22 21:11:50 +0300 |
commit | e96f9c7d56b210a79f7d074cc3da12ab841f9997 (patch) | |
tree | a78f6068486c4d2811a3d790e05ac983587dcce9 | |
parent | f07834064a5e385010250303ee557b28cd33da2e (diff) | |
download | cpuburn-arm-e96f9c7d56b210a79f7d074cc3da12ab841f9997.tar.gz cpuburn-arm-e96f9c7d56b210a79f7d074cc3da12ab841f9997.tar.xz |
Initial revision of Cortex-A7 optimized cpuburn program
Stresses L1/L2 data caches, FP64 arithmetics, NEON stores with
interleaving, integer multiplications and conditional branches.
Signed-off-by: Siarhei Siamashka <siarhei.siamashka@gmail.com>
-rw-r--r-- | cpuburn-a7.S | 131 |
1 files changed, 131 insertions, 0 deletions
diff --git a/cpuburn-a7.S b/cpuburn-a7.S new file mode 100644 index 0000000..a294e11 --- /dev/null +++ b/cpuburn-a7.S @@ -0,0 +1,131 @@ +/* + * Copyright © 2013 Siarhei Siamashka <siarhei.siamashka@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * This program tries to stress ARM Cortex-A7 processor to generate as + * much heat as possible. + * WARNING: improperly cooled or otherwise flawed hardware may potentially + * overheat and fail. Use at your own risk. + * + * Compilation instructions: + * $ arm-linux-gnueabihf-gcc -o cpuburn-a7 cpuburn-a7.S + * + * See http://ssvb.github.io/2012/04/10/cpuburn-arm-cortex-a9.html + * for more details. + */ + + .syntax unified + .text + .arch armv7-a + .fpu neon + .arm + + .global main + .global sysconf + .global fork + .global malloc + +#define LOOP_UNROLL_FACTOR 120 + +.func main +.type main, %function +main: + push {r4-r12, lr} + +#ifdef __linux__ + mov r0, 84 /* _SC_NPROCESSORS_ONLN */ + bl sysconf + mov r4, r0 + cmp r4, #2 + blt 1f + bl fork /* have at least 2 cores */ + cmp r4, #4 + blt 1f + bl fork /* have at least 4 cores */ +1: +#endif + /* use returned PID as a seed for random */ + mov r2, r0 + + vmov.u8 q2, #1 + vmov.f64 d31, #5.0 + vmov.f64 d28, #7.0 + vsqrt.f64 d29, d31 + vsqrt.f64 d28, d28 + + /* + * Allocate a large memory block (4M) and initialize it by writing + * at a bunch of random locations. This is done in order to cause + * artificial fragmentation of physical memory pages and mitigate + * cache line aliasing effects. We prefer reproducible results. + */ + mov r0, #(4 * 1024 * 1024) + bl malloc + mov lr, r0 + ldr r0, =12345 + ldr r1, =1103515245 + mov ip, #65536 +1: mla r2, r2, r1, r0 + mov r3, r3, lsr #(32 - 16) + str r2, [lr, r3, lsl #6] + subs ip, ip, #1 + bne 1b + + /* Align buffer pointer up to the next page boundary */ + ldr r0, =4095 + add lr, lr, r0 + bic lr, lr, r0 + + ldr r2, =0x55555555 + ldr r3, =0xCCCCCCCC + ldr r5, =(LOOP_UNROLL_FACTOR * 10 * 16 * 4) + ldr r9, =(LOOP_UNROLL_FACTOR) + mov r8, r9 + b 1f + + .ltorg + .balign 64 + +1: +.rept 10 + vsqrt.f64 d30, d31 + pld [lr, #64] + .rept 4 + smlalne r0, r1, r2, r3 + vst2.8 {q2}, [lr, :64]! + bne 2f +2: + smlalne r7, r1, r2, r3 + bne 2f +2: + vmul.f64 d27, d28, d29 + bne 2f +2: + .endr +.endr + subs r8, #1 + bne 1b + mov r8, r9 + subs lr, lr, r5 + bne 1b +.endfunc |