From 0aa9b52efa75b4c8bbdc12845434e25e457ea57b Mon Sep 17 00:00:00 2001 From: Christopher Ferris Date: Wed, 10 Jul 2013 14:31:03 -0700 Subject: [PATCH] Add new optimized strlen for arm. This optimized version is primarily targeted at cortex-a15. Tested on all nexus devices using the system/extras/libc_test strlen test. Tested alignments from 1 to 32 that are powers of 2. Tested that strlen does not cross page boundaries at all alignments. Speed improvements listed below: cortex-a15 - Sizes >= 32 bytes, ~75% improvement. - Sizes >= 1024 bytes, ~250% improvement. cortex-a9 - Sizes >= 32 bytes, ~75% improvement. - Sizes >= 1024 bytes, ~85% improvement. krait - Sizes >= 32 bytes, ~95% improvement. - Sizes >= 1024 bytes, ~160% improvement. Merge from internal master. (cherry-picked from 2fc071797743b88a9a47427d46baed7c7b24f4d2) Change-Id: I1ceceb4e745fd68e9d946f96d1d42e0cdaff6ccf --- libc/arch-arm/arm.mk | 1 - libc/arch-arm/cortex-a15/bionic/strlen.S | 151 ++++++++++++++++++++ libc/arch-arm/cortex-a15/cortex-a15.mk | 1 + libc/arch-arm/cortex-a9/cortex-a9.mk | 2 + libc/arch-arm/{ => generic}/bionic/strlen.c | 0 libc/arch-arm/generic/generic.mk | 1 + libc/arch-arm/krait/krait.mk | 2 + 7 files changed, 157 insertions(+), 1 deletion(-) create mode 100644 libc/arch-arm/cortex-a15/bionic/strlen.S rename libc/arch-arm/{ => generic}/bionic/strlen.c (100%) diff --git a/libc/arch-arm/arm.mk b/libc/arch-arm/arm.mk index b308b0520..1a2185f8f 100644 --- a/libc/arch-arm/arm.mk +++ b/libc/arch-arm/arm.mk @@ -15,7 +15,6 @@ _LIBC_ARCH_COMMON_SRC_FILES := \ arch-arm/bionic/setjmp.S \ arch-arm/bionic/sigsetjmp.S \ arch-arm/bionic/strcpy.S \ - arch-arm/bionic/strlen.c.arm \ arch-arm/bionic/syscall.S \ arch-arm/bionic/tgkill.S \ arch-arm/bionic/tkill.S \ diff --git a/libc/arch-arm/cortex-a15/bionic/strlen.S b/libc/arch-arm/cortex-a15/bionic/strlen.S new file mode 100644 index 000000000..d5b8ba422 --- /dev/null +++ b/libc/arch-arm/cortex-a15/bionic/strlen.S @@ -0,0 +1,151 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Copyright (c) 2013 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + + .syntax unified + + .thumb + .thumb_func + +ENTRY(strlen) + pld [r1, #128] + mov r1, r0 + + rsb r3, r0, #0 + ands r3, r3, #7 + beq mainloop + + // Align to a double word (64 bits). + ands ip, r3, #1 + beq align_to_32 + + ldrb r2, [r1], #1 + cmp r2, #0 + beq update_count_and_return + +align_to_32: + ands ip, r3, #2 + beq align_to_64 + + ldrb r2, [r1], #1 + cmp r2, #0 + beq update_count_and_return + ldrb r2, [r1], #1 + cmp r2, #0 + beq update_count_and_return + +align_to_64: + ands ip, r3, #4 + beq mainloop + ldr r3, [r1], #4 + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne zero_in_second_register + +mainloop: + ldrd r2, r3, [r1], #8 + + pld [r1, #64] + + sub ip, r2, #0x01010101 + bic ip, ip, r2 + ands ip, ip, #0x80808080 + bne zero_in_first_register + + sub ip, r3, #0x01010101 + bic ip, ip, r3 + ands ip, ip, #0x80808080 + bne zero_in_second_register + b mainloop + +zero_in_first_register: + sub r1, r1, #4 + +zero_in_second_register: + sub r0, r1, r0 + + // Check for zero in byte 0. + ands r1, ip, #0x80 + beq check_byte1 + + sub r0, r0, #4 + bx lr + +check_byte1: + // Check for zero in byte 1. + ands r1, ip, #0x8000 + beq check_byte2 + + sub r0, r0, #3 + bx lr + +check_byte2: + // Check for zero in byte 2. + ands r1, ip, #0x800000 + beq return + + sub r0, r0, #2 + bx lr + +update_count_and_return: + sub r0, r1, r0 + +return: + sub r0, r0, #1 + bx lr +END(strlen) diff --git a/libc/arch-arm/cortex-a15/cortex-a15.mk b/libc/arch-arm/cortex-a15/cortex-a15.mk index d8193f8c2..0904e6bca 100644 --- a/libc/arch-arm/cortex-a15/cortex-a15.mk +++ b/libc/arch-arm/cortex-a15/cortex-a15.mk @@ -1,5 +1,6 @@ $(call libc-add-cpu-variant-src,MEMCPY,arch-arm/cortex-a15/bionic/memcpy.S) $(call libc-add-cpu-variant-src,MEMSET,arch-arm/cortex-a15/bionic/memset.S) $(call libc-add-cpu-variant-src,STRCMP,arch-arm/cortex-a15/bionic/strcmp.S) +$(call libc-add-cpu-variant-src,STRLEN,arch-arm/cortex-a15/bionic/strlen.S) include bionic/libc/arch-arm/generic/generic.mk diff --git a/libc/arch-arm/cortex-a9/cortex-a9.mk b/libc/arch-arm/cortex-a9/cortex-a9.mk index 48629330a..5c684ed49 100644 --- a/libc/arch-arm/cortex-a9/cortex-a9.mk +++ b/libc/arch-arm/cortex-a9/cortex-a9.mk @@ -1,5 +1,7 @@ $(call libc-add-cpu-variant-src,MEMCPY,arch-arm/cortex-a9/bionic/memcpy.S) $(call libc-add-cpu-variant-src,MEMSET,arch-arm/cortex-a9/bionic/memset.S) $(call libc-add-cpu-variant-src,STRCMP,arch-arm/cortex-a9/bionic/strcmp.S) +# Use cortex-a15 version of strlen. +$(call libc-add-cpu-variant-src,STRLEN,arch-arm/cortex-a15/bionic/strlen.S) include bionic/libc/arch-arm/generic/generic.mk diff --git a/libc/arch-arm/bionic/strlen.c b/libc/arch-arm/generic/bionic/strlen.c similarity index 100% rename from libc/arch-arm/bionic/strlen.c rename to libc/arch-arm/generic/bionic/strlen.c diff --git a/libc/arch-arm/generic/generic.mk b/libc/arch-arm/generic/generic.mk index 358b1e6f6..18cad9da6 100644 --- a/libc/arch-arm/generic/generic.mk +++ b/libc/arch-arm/generic/generic.mk @@ -1,3 +1,4 @@ $(call libc-add-cpu-variant-src,MEMCPY,arch-arm/generic/bionic/memcpy.S) $(call libc-add-cpu-variant-src,MEMSET,arch-arm/generic/bionic/memset.S) $(call libc-add-cpu-variant-src,STRCMP,arch-arm/generic/bionic/strcmp.S) +$(call libc-add-cpu-variant-src,STRLEN,arch-arm/generic/bionic/strlen.c) diff --git a/libc/arch-arm/krait/krait.mk b/libc/arch-arm/krait/krait.mk index 4847f86a8..288afbb61 100644 --- a/libc/arch-arm/krait/krait.mk +++ b/libc/arch-arm/krait/krait.mk @@ -1,5 +1,7 @@ $(call libc-add-cpu-variant-src,MEMCPY,arch-arm/krait/bionic/memcpy.S) $(call libc-add-cpu-variant-src,MEMSET,arch-arm/krait/bionic/memset.S) $(call libc-add-cpu-variant-src,STRCMP,arch-arm/krait/bionic/strcmp.S) +# Use cortex-a15 version of strlen. +$(call libc-add-cpu-variant-src,STRLEN,arch-arm/cortex-a15/bionic/strlen.S) include bionic/libc/arch-arm/generic/generic.mk