diff options
Diffstat (limited to 'patches/glibc-2.23/0300-optimized-string-functions-for-NEON-from-Linaro.patch')
-rw-r--r-- | patches/glibc-2.23/0300-optimized-string-functions-for-NEON-from-Linaro.patch | 699 |
1 files changed, 0 insertions, 699 deletions
diff --git a/patches/glibc-2.23/0300-optimized-string-functions-for-NEON-from-Linaro.patch b/patches/glibc-2.23/0300-optimized-string-functions-for-NEON-from-Linaro.patch deleted file mode 100644 index f823c45..0000000 --- a/patches/glibc-2.23/0300-optimized-string-functions-for-NEON-from-Linaro.patch +++ /dev/null @@ -1,699 +0,0 @@ -From: Michael Olbrich <m.olbrich@pengutronix.de> -Date: Thu, 15 Sep 2011 16:50:56 +0200 -Subject: [PATCH] optimized string functions for NEON from Linaro - -Signed-off-by: Michael Olbrich <m.olbrich@pengutronix.de> ---- - cortex-strings/sysdeps/arm/armv7/memchr.S | 155 ++++++++++++++++++++++++++++++ - cortex-strings/sysdeps/arm/armv7/memcpy.S | 152 +++++++++++++++++++++++++++++ - cortex-strings/sysdeps/arm/armv7/memset.S | 118 +++++++++++++++++++++++ - cortex-strings/sysdeps/arm/armv7/strchr.S | 76 +++++++++++++++ - cortex-strings/sysdeps/arm/armv7/strlen.S | 150 +++++++++++++++++++++++++++++ - 5 files changed, 651 insertions(+) - create mode 100644 cortex-strings/sysdeps/arm/armv7/memchr.S - create mode 100644 cortex-strings/sysdeps/arm/armv7/memcpy.S - create mode 100644 cortex-strings/sysdeps/arm/armv7/memset.S - create mode 100644 cortex-strings/sysdeps/arm/armv7/strchr.S - create mode 100644 cortex-strings/sysdeps/arm/armv7/strlen.S - -diff --git a/cortex-strings/sysdeps/arm/armv7/memchr.S b/cortex-strings/sysdeps/arm/armv7/memchr.S -new file mode 100644 -index 000000000000..92a2d9f0967d ---- /dev/null -+++ b/cortex-strings/sysdeps/arm/armv7/memchr.S -@@ -0,0 +1,155 @@ -+/* Copyright (c) 2010-2011, Linaro Limited -+ All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ -+ * Neither the name of Linaro Limited nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+/* -+ Written by Dave Gilbert <david.gilbert@linaro.org> -+ -+ This memchr routine is optimised on a Cortex-A9 and should work on -+ all ARMv7 processors. It has a fast past for short sizes, and has -+ an optimised path for large data sets; the worst case is finding the -+ match early in a large data set. -+ -+ */ -+ -+@ 2011-02-07 david.gilbert@linaro.org -+@ Extracted from local git a5b438d861 -+@ 2011-07-14 david.gilbert@linaro.org -+@ Import endianness fix from local git ea786f1b -+@ 2011-12-07 david.gilbert@linaro.org -+@ Removed unneeded cbz from align loop -+ -+ .syntax unified -+ .arch armv7-a -+ -+@ this lets us check a flag in a 00/ff byte easily in either endianness -+#ifdef __ARMEB__ -+#define CHARTSTMASK(c) 1<<(31-(c*8)) -+#else -+#define CHARTSTMASK(c) 1<<(c*8) -+#endif -+ .text -+ .thumb -+ -+@ --------------------------------------------------------------------------- -+ .thumb_func -+ .align 2 -+ .p2align 4,,15 -+ .global memchr -+ .type memchr,%function -+memchr: -+ @ r0 = start of memory to scan -+ @ r1 = character to look for -+ @ r2 = length -+ @ returns r0 = pointer to character or NULL if not found -+ and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char -+ -+ cmp r2,#16 @ If it's short don't bother with anything clever -+ blt 20f -+ -+ tst r0, #7 @ If it's already aligned skip the next bit -+ beq 10f -+ -+ @ Work up to an aligned point -+5: -+ ldrb r3, [r0],#1 -+ subs r2, r2, #1 -+ cmp r3, r1 -+ beq 50f @ If it matches exit found -+ tst r0, #7 -+ bne 5b @ If not aligned yet then do next byte -+ -+10: -+ @ At this point, we are aligned, we know we have at least 8 bytes to work with -+ push {r4,r5,r6,r7} -+ orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes -+ orr r1, r1, r1, lsl #16 -+ bic r4, r2, #7 @ Number of double words to work with -+ mvns r7, #0 @ all F's -+ movs r3, #0 -+ -+15: -+ ldmia r0!,{r5,r6} -+ subs r4, r4, #8 -+ eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target -+ eor r6,r6, r1 -+ uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 -+ sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION -+ uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 -+ sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION -+ cbnz r6, 60f -+ bne 15b @ (Flags from the subs above) If not run out of bytes then go around again -+ -+ pop {r4,r5,r6,r7} -+ and r1,r1,#0xff @ Get r1 back to a single character from the expansion above -+ and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done -+ -+20: -+ cbz r2, 40f @ 0 length or hit the end already then not found -+ -+21: @ Post aligned section, or just a short call -+ ldrb r3,[r0],#1 -+ subs r2,r2,#1 -+ eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub -+ cbz r3, 50f -+ bne 21b @ on r2 flags -+ -+40: -+ movs r0,#0 @ not found -+ bx lr -+ -+50: -+ subs r0,r0,#1 @ found -+ bx lr -+ -+60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was -+ @ r0 points to the start of the double word after the one that was tested -+ @ r5 has the 00/ff pattern for the first word, r6 has the chained value -+ cmp r5, #0 -+ itte eq -+ moveq r5, r6 @ the end is in the 2nd word -+ subeq r0,r0,#3 @ Points to 2nd byte of 2nd word -+ subne r0,r0,#7 @ or 2nd byte of 1st word -+ -+ @ r0 currently points to the 3rd byte of the word containing the hit -+ tst r5, # CHARTSTMASK(0) @ 1st character -+ bne 61f -+ adds r0,r0,#1 -+ tst r5, # CHARTSTMASK(1) @ 2nd character -+ ittt eq -+ addeq r0,r0,#1 -+ tsteq r5, # (3<<15) @ 2nd & 3rd character -+ @ If not the 3rd must be the last one -+ addeq r0,r0,#1 -+ -+61: -+ pop {r4,r5,r6,r7} -+ subs r0,r0,#1 -+ bx lr -diff --git a/cortex-strings/sysdeps/arm/armv7/memcpy.S b/cortex-strings/sysdeps/arm/armv7/memcpy.S -new file mode 100644 -index 000000000000..3be24cad2c8d ---- /dev/null -+++ b/cortex-strings/sysdeps/arm/armv7/memcpy.S -@@ -0,0 +1,152 @@ -+/* Copyright (c) 2010-2011, Linaro Limited -+ All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ -+ * Neither the name of Linaro Limited nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+ Written by Dave Gilbert <david.gilbert@linaro.org> -+ -+ This memcpy routine is optimised on a Cortex-A9 and should work on -+ all ARMv7 processors with NEON. */ -+ -+@ 2011-09-01 david.gilbert@linaro.org -+@ Extracted from local git 2f11b436 -+ -+ .syntax unified -+ .arch armv7-a -+ -+@ this lets us check a flag in a 00/ff byte easily in either endianness -+#ifdef __ARMEB__ -+#define CHARTSTMASK(c) 1<<(31-(c*8)) -+#else -+#define CHARTSTMASK(c) 1<<(c*8) -+#endif -+ .text -+ .thumb -+ -+@ --------------------------------------------------------------------------- -+ .thumb_func -+ .align 2 -+ .p2align 4,,15 -+ .global memcpy -+ .type memcpy,%function -+memcpy: -+ @ r0 = dest -+ @ r1 = source -+ @ r2 = count -+ @ returns dest in r0 -+ @ Overlaps of source/dest not allowed according to spec -+ @ Note this routine relies on v7 misaligned loads/stores -+ pld [r1] -+ mov r12, r0 @ stash original r0 -+ cmp r2,#32 -+ blt 10f @ take the small copy case separately -+ -+ @ test for either source or destination being misaligned -+ @ (We only rely on word align) -+ tst r0,#3 -+ it eq -+ tsteq r1,#3 -+ bne 30f @ misaligned case -+ -+4: -+ @ at this point we are word (or better) aligned and have at least -+ @ 32 bytes to play with -+ -+ @ If it's a huge copy, try Neon -+ cmp r2, #128*1024 -+ bge 35f @ Sharing general non-aligned case here, aligned could be faster -+ -+ push {r3,r4,r5,r6,r7,r8,r10,r11} -+5: -+ ldmia r1!,{r3,r4,r5,r6,r7,r8,r10,r11} -+ sub r2,r2,#32 -+ pld [r1,#96] -+ cmp r2,#32 -+ stmia r0!,{r3,r4,r5,r6,r7,r8,r10,r11} -+ bge 5b -+ -+ pop {r3,r4,r5,r6,r7,r8,r10,r11} -+ @ We are now down to less than 32 bytes -+ cbz r2,15f @ quick exit for the case where we copied a multiple of 32 -+ -+10: @ small copies (not necessarily aligned - note might be slightly more than 32bytes) -+ cmp r2,#4 -+ blt 12f -+11: -+ sub r2,r2,#4 -+ cmp r2,#4 -+ ldr r3, [r1],#4 -+ str r3, [r0],#4 -+ bge 11b -+12: -+ tst r2,#2 -+ itt ne -+ ldrhne r3, [r1],#2 -+ strhne r3, [r0],#2 -+ -+ tst r2,#1 -+ itt ne -+ ldrbne r3, [r1],#1 -+ strbne r3, [r0],#1 -+ -+15: @ exit -+ mov r0,r12 @ restore r0 -+ bx lr -+ -+ .align 2 -+ .p2align 4,,15 -+30: @ non-aligned - at least 32 bytes to play with -+ @ Test for co-misalignment -+ eor r3, r0, r1 -+ tst r3,#3 -+ beq 50f -+ -+ @ Use Neon for misaligned -+35: -+ vld1.8 {d0,d1,d2,d3}, [r1]! -+ sub r2,r2,#32 -+ cmp r2,#32 -+ pld [r1,#96] -+ vst1.8 {d0,d1,d2,d3}, [r0]! -+ bge 35b -+ b 10b @ TODO: Probably a bad idea to switch to ARM at this point -+ -+ .align 2 -+ .p2align 4,,15 -+50: @ Co-misaligned -+ @ At this point we've got at least 32 bytes -+51: -+ ldrb r3,[r1],#1 -+ sub r2,r2,#1 -+ strb r3,[r0],#1 -+ tst r0,#7 -+ bne 51b -+ -+ cmp r2,#32 -+ blt 10b -+ b 4b -diff --git a/cortex-strings/sysdeps/arm/armv7/memset.S b/cortex-strings/sysdeps/arm/armv7/memset.S -new file mode 100644 -index 000000000000..921cb7535cc8 ---- /dev/null -+++ b/cortex-strings/sysdeps/arm/armv7/memset.S -@@ -0,0 +1,118 @@ -+/* Copyright (c) 2010-2011, Linaro Limited -+ All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ -+ * Neither the name of Linaro Limited nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+ Written by Dave Gilbert <david.gilbert@linaro.org> -+ -+ This memset routine is optimised on a Cortex-A9 and should work on -+ all ARMv7 processors. */ -+ -+ .syntax unified -+ .arch armv7-a -+ -+@ 2011-08-30 david.gilbert@linaro.org -+@ Extracted from local git 2f11b436 -+ -+@ this lets us check a flag in a 00/ff byte easily in either endianness -+#ifdef __ARMEB__ -+#define CHARTSTMASK(c) 1<<(31-(c*8)) -+#else -+#define CHARTSTMASK(c) 1<<(c*8) -+#endif -+ .text -+ .thumb -+ -+@ --------------------------------------------------------------------------- -+ .thumb_func -+ .align 2 -+ .p2align 4,,15 -+ .global memset -+ .type memset,%function -+memset: -+ @ r0 = address -+ @ r1 = character -+ @ r2 = count -+ @ returns original address in r0 -+ -+ mov r3, r0 @ Leave r0 alone -+ cbz r2, 10f @ Exit if 0 length -+ -+ tst r0, #7 -+ beq 2f @ Already aligned -+ -+ @ Ok, so we're misaligned here -+1: -+ strb r1, [r3], #1 -+ subs r2,r2,#1 -+ tst r3, #7 -+ cbz r2, 10f @ Exit if we hit the end -+ bne 1b @ go round again if still misaligned -+ -+2: -+ @ OK, so we're aligned -+ push {r4,r5,r6,r7} -+ bics r4, r2, #15 @ if less than 16 bytes then need to finish it off -+ beq 5f -+ -+3: -+ @ POSIX says that ch is cast to an unsigned char. A uxtb is one -+ @ byte and takes two cycles, where an AND is four bytes but one -+ @ cycle. -+ and r1, #0xFF -+ orr r1, r1, r1, lsl#8 @ Same character into all bytes -+ orr r1, r1, r1, lsl#16 -+ mov r5,r1 -+ mov r6,r1 -+ mov r7,r1 -+ -+4: -+ subs r4,r4,#16 -+ stmia r3!,{r1,r5,r6,r7} -+ bne 4b -+ and r2,r2,#15 -+ -+ @ At this point we're still aligned and we have upto align-1 bytes left to right -+ @ we can avoid some of the byte-at-a time now by testing for some big chunks -+ tst r2,#8 -+ itt ne -+ subne r2,r2,#8 -+ stmiane r3!,{r1,r5} -+ -+5: -+ pop {r4,r5,r6,r7} -+ cbz r2, 10f -+ -+ @ Got to do any last < alignment bytes -+6: -+ subs r2,r2,#1 -+ strb r1,[r3],#1 -+ bne 6b -+ -+10: -+ bx lr @ goodbye -diff --git a/cortex-strings/sysdeps/arm/armv7/strchr.S b/cortex-strings/sysdeps/arm/armv7/strchr.S -new file mode 100644 -index 000000000000..8875dbfce6da ---- /dev/null -+++ b/cortex-strings/sysdeps/arm/armv7/strchr.S -@@ -0,0 +1,76 @@ -+/* Copyright (c) 2010-2011, Linaro Limited -+ All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ -+ * Neither the name of Linaro Limited nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+ Written by Dave Gilbert <david.gilbert@linaro.org> -+ -+ A very simple strchr routine, from benchmarks on A9 it's a bit faster than -+ the current version in eglibc (2.12.1-0ubuntu14 package) -+ I don't think doing a word at a time version is worth it since a lot -+ of strchr cases are very short anyway */ -+ -+@ 2011-02-07 david.gilbert@linaro.org -+@ Extracted from local git a5b438d861 -+ -+ .syntax unified -+ .arch armv7-a -+ -+ .text -+ .thumb -+ -+@ --------------------------------------------------------------------------- -+ -+ .thumb_func -+ .align 2 -+ .p2align 4,,15 -+ .global strchr -+ .type strchr,%function -+strchr: -+ @ r0 = start of string -+ @ r1 = character to match -+ @ returns NULL for no match, or a pointer to the match -+ and r1,r1, #255 -+ -+1: -+ ldrb r2,[r0],#1 -+ cmp r2,r1 -+ cbz r2,10f -+ bne 1b -+ -+ @ We're here if it matched -+5: -+ subs r0,r0,#1 -+ bx lr -+ -+10: -+ @ We're here if we ran off the end -+ cmp r1, #0 @ Corner case - you're allowed to search for the nil and get a pointer to it -+ beq 5b @ A bit messy, if it's common we should branch at the start to a special loop -+ mov r0,#0 -+ bx lr -diff --git a/cortex-strings/sysdeps/arm/armv7/strlen.S b/cortex-strings/sysdeps/arm/armv7/strlen.S -new file mode 100644 -index 000000000000..8efa2356fdd1 ---- /dev/null -+++ b/cortex-strings/sysdeps/arm/armv7/strlen.S -@@ -0,0 +1,150 @@ -+/* Copyright (c) 2010-2011,2013 Linaro Limited -+ All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ -+ * Neither the name of Linaro Limited nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+/* -+ Assumes: -+ ARMv6T2, AArch32 -+ -+ */ -+ -+ .macro def_fn f p2align=0 -+ .text -+ .p2align \p2align -+ .global \f -+ .type \f, %function -+\f: -+ .endm -+ -+#ifdef __ARMEB__ -+#define S2LO lsl -+#define S2HI lsr -+#else -+#define S2LO lsr -+#define S2HI lsl -+#endif -+ -+ /* This code requires Thumb. */ -+ .thumb -+ .syntax unified -+ -+/* Parameters and result. */ -+#define srcin r0 -+#define result r0 -+ -+/* Internal variables. */ -+#define src r1 -+#define data1a r2 -+#define data1b r3 -+#define const_m1 r12 -+#define const_0 r4 -+#define tmp1 r4 /* Overlaps const_0 */ -+#define tmp2 r5 -+ -+def_fn strlen p2align=6 -+ pld [srcin, #0] -+ strd r4, r5, [sp, #-8]! -+ bic src, srcin, #7 -+ mvn const_m1, #0 -+ ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */ -+ pld [src, #32] -+ bne.w .Lmisaligned8 -+ mov const_0, #0 -+ mov result, #-8 -+.Lloop_aligned: -+ /* Bytes 0-7. */ -+ ldrd data1a, data1b, [src] -+ pld [src, #64] -+ add result, result, #8 -+.Lstart_realigned: -+ uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ -+ sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ -+ uadd8 data1b, data1b, const_m1 -+ sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ -+ cbnz data1b, .Lnull_found -+ -+ /* Bytes 8-15. */ -+ ldrd data1a, data1b, [src, #8] -+ uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ -+ add result, result, #8 -+ sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ -+ uadd8 data1b, data1b, const_m1 -+ sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ -+ cbnz data1b, .Lnull_found -+ -+ /* Bytes 16-23. */ -+ ldrd data1a, data1b, [src, #16] -+ uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ -+ add result, result, #8 -+ sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ -+ uadd8 data1b, data1b, const_m1 -+ sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ -+ cbnz data1b, .Lnull_found -+ -+ /* Bytes 24-31. */ -+ ldrd data1a, data1b, [src, #24] -+ add src, src, #32 -+ uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ -+ add result, result, #8 -+ sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ -+ uadd8 data1b, data1b, const_m1 -+ sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ -+ cmp data1b, #0 -+ beq .Lloop_aligned -+ -+.Lnull_found: -+ cmp data1a, #0 -+ itt eq -+ addeq result, result, #4 -+ moveq data1a, data1b -+#ifndef __ARMEB__ -+ rev data1a, data1a -+#endif -+ clz data1a, data1a -+ ldrd r4, r5, [sp], #8 -+ add result, result, data1a, lsr #3 /* Bits -> Bytes. */ -+ bx lr -+ -+.Lmisaligned8: -+ ldrd data1a, data1b, [src] -+ and tmp2, tmp1, #3 -+ rsb result, tmp1, #0 -+ lsl tmp2, tmp2, #3 /* Bytes -> bits. */ -+ tst tmp1, #4 -+ pld [src, #64] -+ S2HI tmp2, const_m1, tmp2 -+ orn data1a, data1a, tmp2 -+ itt ne -+ ornne data1b, data1b, tmp2 -+ movne data1a, const_m1 -+ mov const_0, #0 -+ b .Lstart_realigned -+ .size strlen, . - strlen -+ |