am 2fbc9dda: Merge "bionic/x86: Optimization for string routines"

* commit '2fbc9dda345486005569be4ec586c6aae9d51650':
  bionic/x86: Optimization for string routines
This commit is contained in:
Elliott Hughes 2013-06-07 17:47:33 -07:00 committed by Android Git Automerger
commit 6bcf71c9ac
39 changed files with 13747 additions and 631 deletions

View file

@ -63,17 +63,11 @@ libc_common_src_files := \
stdlib/strtoumax.c \ stdlib/strtoumax.c \
stdlib/tolower_.c \ stdlib/tolower_.c \
stdlib/toupper_.c \ stdlib/toupper_.c \
string/index.c \
string/strcasecmp.c \ string/strcasecmp.c \
string/strcat.c \
string/strcspn.c \ string/strcspn.c \
string/strdup.c \ string/strdup.c \
string/strlcat.c \
string/strlcpy.c \
string/strncat.c \
string/strncpy.c \
string/strpbrk.c \ string/strpbrk.c \
string/strrchr.c \ string/__strrchr_chk.c \
string/strsep.c \ string/strsep.c \
string/strspn.c \ string/strspn.c \
string/strstr.c \ string/strstr.c \
@ -119,9 +113,7 @@ libc_common_src_files := \
bionic/ldexp.c \ bionic/ldexp.c \
bionic/lseek64.c \ bionic/lseek64.c \
bionic/md5.c \ bionic/md5.c \
bionic/memchr.c \
bionic/memmem.c \ bionic/memmem.c \
bionic/memrchr.c \
bionic/memswap.c \ bionic/memswap.c \
bionic/mmap.c \ bionic/mmap.c \
bionic/openat.c \ bionic/openat.c \
@ -157,7 +149,6 @@ libc_common_src_files := \
bionic/sleep.c \ bionic/sleep.c \
bionic/statfs.c \ bionic/statfs.c \
bionic/strndup.c \ bionic/strndup.c \
bionic/strnlen.c \
bionic/strntoimax.c \ bionic/strntoimax.c \
bionic/strntoumax.c \ bionic/strntoumax.c \
bionic/strtotimeval.c \ bionic/strtotimeval.c \
@ -237,7 +228,7 @@ libc_bionic_src_files := \
bionic/signalfd.cpp \ bionic/signalfd.cpp \
bionic/sigwait.cpp \ bionic/sigwait.cpp \
bionic/__strcat_chk.cpp \ bionic/__strcat_chk.cpp \
bionic/strchr.cpp \ bionic/__strchr_chk.cpp \
bionic/__strcpy_chk.cpp \ bionic/__strcpy_chk.cpp \
bionic/strerror.cpp \ bionic/strerror.cpp \
bionic/strerror_r.cpp \ bionic/strerror_r.cpp \
@ -293,27 +284,20 @@ libc_upstream_freebsd_src_files := \
upstream-freebsd/lib/libc/string/wcpcpy.c \ upstream-freebsd/lib/libc/string/wcpcpy.c \
upstream-freebsd/lib/libc/string/wcpncpy.c \ upstream-freebsd/lib/libc/string/wcpncpy.c \
upstream-freebsd/lib/libc/string/wcscasecmp.c \ upstream-freebsd/lib/libc/string/wcscasecmp.c \
upstream-freebsd/lib/libc/string/wcscat.c \
upstream-freebsd/lib/libc/string/wcschr.c \
upstream-freebsd/lib/libc/string/wcscmp.c \
upstream-freebsd/lib/libc/string/wcscpy.c \
upstream-freebsd/lib/libc/string/wcscspn.c \ upstream-freebsd/lib/libc/string/wcscspn.c \
upstream-freebsd/lib/libc/string/wcsdup.c \ upstream-freebsd/lib/libc/string/wcsdup.c \
upstream-freebsd/lib/libc/string/wcslcat.c \ upstream-freebsd/lib/libc/string/wcslcat.c \
upstream-freebsd/lib/libc/string/wcslcpy.c \ upstream-freebsd/lib/libc/string/wcslcpy.c \
upstream-freebsd/lib/libc/string/wcslen.c \
upstream-freebsd/lib/libc/string/wcsncasecmp.c \ upstream-freebsd/lib/libc/string/wcsncasecmp.c \
upstream-freebsd/lib/libc/string/wcsncat.c \ upstream-freebsd/lib/libc/string/wcsncat.c \
upstream-freebsd/lib/libc/string/wcsncmp.c \ upstream-freebsd/lib/libc/string/wcsncmp.c \
upstream-freebsd/lib/libc/string/wcsncpy.c \ upstream-freebsd/lib/libc/string/wcsncpy.c \
upstream-freebsd/lib/libc/string/wcsnlen.c \ upstream-freebsd/lib/libc/string/wcsnlen.c \
upstream-freebsd/lib/libc/string/wcspbrk.c \ upstream-freebsd/lib/libc/string/wcspbrk.c \
upstream-freebsd/lib/libc/string/wcsrchr.c \
upstream-freebsd/lib/libc/string/wcsspn.c \ upstream-freebsd/lib/libc/string/wcsspn.c \
upstream-freebsd/lib/libc/string/wcsstr.c \ upstream-freebsd/lib/libc/string/wcsstr.c \
upstream-freebsd/lib/libc/string/wcstok.c \ upstream-freebsd/lib/libc/string/wcstok.c \
upstream-freebsd/lib/libc/string/wmemchr.c \ upstream-freebsd/lib/libc/string/wmemchr.c \
upstream-freebsd/lib/libc/string/wmemcmp.c \
upstream-freebsd/lib/libc/string/wmemcpy.c \ upstream-freebsd/lib/libc/string/wmemcpy.c \
upstream-freebsd/lib/libc/string/wmemmove.c \ upstream-freebsd/lib/libc/string/wmemmove.c \
upstream-freebsd/lib/libc/string/wmemset.c \ upstream-freebsd/lib/libc/string/wmemset.c \
@ -369,6 +353,24 @@ libc_common_src_files += \
bionic/memmove.c.arm \ bionic/memmove.c.arm \
string/bcopy.c \ string/bcopy.c \
string/strncmp.c \ string/strncmp.c \
string/strcat.c \
string/strncat.c \
string/strncpy.c \
bionic/strchr.cpp \
string/strrchr.c \
bionic/memchr.c \
bionic/memrchr.c \
string/index.c \
bionic/strnlen.c \
string/strlcat.c \
string/strlcpy.c \
upstream-freebsd/lib/libc/string/wcschr.c \
upstream-freebsd/lib/libc/string/wcsrchr.c \
upstream-freebsd/lib/libc/string/wcscmp.c \
upstream-freebsd/lib/libc/string/wcscpy.c \
upstream-freebsd/lib/libc/string/wmemcmp.c \
upstream-freebsd/lib/libc/string/wcslen.c \
upstream-freebsd/lib/libc/string/wcscat.c
# These files need to be arm so that gdbserver # These files need to be arm so that gdbserver
# can set breakpoints in them without messing # can set breakpoints in them without messing
@ -392,7 +394,6 @@ libc_common_src_files += \
bionic/pthread-rwlocks.c \ bionic/pthread-rwlocks.c \
bionic/pthread-timers.c \ bionic/pthread-timers.c \
bionic/ptrace.c \ bionic/ptrace.c \
string/strcpy.c \
libc_static_common_src_files += \ libc_static_common_src_files += \
bionic/pthread.c \ bionic/pthread.c \
@ -407,7 +408,25 @@ libc_common_src_files += \
string/bcopy.c \ string/bcopy.c \
string/strcmp.c \ string/strcmp.c \
string/strcpy.c \ string/strcpy.c \
string/strncmp.c string/strncmp.c \
string/strcat.c \
string/strncat.c \
string/strncpy.c \
bionic/strchr.cpp \
string/strrchr.c \
bionic/memchr.c \
bionic/memrchr.c \
string/index.c \
bionic/strnlen.c \
string/strlcat.c \
string/strlcpy.c \
upstream-freebsd/lib/libc/string/wcschr.c \
upstream-freebsd/lib/libc/string/wcsrchr.c \
upstream-freebsd/lib/libc/string/wcscmp.c \
upstream-freebsd/lib/libc/string/wcscpy.c \
upstream-freebsd/lib/libc/string/wmemcmp.c \
upstream-freebsd/lib/libc/string/wcslen.c \
upstream-freebsd/lib/libc/string/wcscat.c
libc_common_src_files += \ libc_common_src_files += \
bionic/pthread-atfork.c \ bionic/pthread-atfork.c \

View file

@ -1,43 +0,0 @@
/*
Copyright (c) 2010, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(USE_SSE2)
# include "cache_wrapper.S"
# undef __i686
# define USE_AS_BZERO
# define sse2_memset5_atom bzero
# include "sse2-memset5-atom.S"
#else
# include "bzero.S"
#endif

View file

@ -1,43 +0,0 @@
/*
Copyright (c) 2010, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(USE_SSSE3)
# include "cache_wrapper.S"
# undef __i686
# define MEMCPY memcpy
# define USE_AS_MEMMOVE
# include "ssse3-memcpy5.S"
#else
# include "memcpy.S"
#endif

View file

@ -1,43 +0,0 @@
/*
Copyright (c) 2010, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(USE_SSSE3)
# include "cache_wrapper.S"
# undef __i686
# define MEMCPY memmove
# define USE_AS_MEMMOVE
# include "ssse3-memcpy5.S"
#else
# include "memmove.S"
#endif

View file

@ -1,42 +0,0 @@
/*
Copyright (c) 2010, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(USE_SSE2)
# include "cache_wrapper.S"
# undef __i686
# define sse2_memset5_atom memset
# include "sse2-memset5-atom.S"
#else
# include "memset.S"
#endif

View file

@ -28,15 +28,6 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#if defined(USE_SSSE3) #define USE_AS_BZERO
#define MEMSET bzero
# define USE_AS_STRNCMP #include "sse2-memset-atom.S"
# define ssse3_strcmp_latest strncmp
# include "ssse3-strcmp-latest.S"
#else
# include "strncmp.S"
#endif

View file

@ -0,0 +1,32 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define strchr index
#include "sse2-strchr-atom.S"

View file

@ -0,0 +1,556 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef L
# define L(label) .L##label
#endif
#ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
#endif
#ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
#endif
#ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#endif
#ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
#endif
#ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
#ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
#endif
#ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
#endif
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#define ENTRANCE PUSH (%edi);
#define PARMS 8
#define RETURN POP (%edi); ret; CFI_PUSH (%edi);
#define STR1 PARMS
#define STR2 STR1+4
#define LEN STR2+4
.text
ENTRY (memchr)
ENTRANCE
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
mov LEN(%esp), %edx
test %edx, %edx
jz L(return_null)
punpcklbw %xmm1, %xmm1
mov %ecx, %edi
punpcklbw %xmm1, %xmm1
and $63, %ecx
pshufd $0, %xmm1, %xmm1
cmp $48, %ecx
ja L(crosscache)
movdqu (%edi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(match_case2_prolog)
sub $16, %edx
jbe L(return_null)
lea 16(%edi), %edi
and $15, %ecx
and $-16, %edi
add %ecx, %edx
sub $64, %edx
jbe L(exit_loop)
jmp L(loop_prolog)
.p2align 4
L(crosscache):
and $15, %ecx
and $-16, %edi
movdqa (%edi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
sar %cl, %eax
test %eax, %eax
jnz L(match_case2_prolog1)
lea -16(%edx), %edx
add %ecx, %edx
jle L(return_null)
lea 16(%edi), %edi
sub $64, %edx
jbe L(exit_loop)
.p2align 4
L(loop_prolog):
movdqa (%edi), %xmm0
pcmpeqb %xmm1, %xmm0
xor %ecx, %ecx
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(match_case1)
movdqa 16(%edi), %xmm2
pcmpeqb %xmm1, %xmm2
lea 16(%ecx), %ecx
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(match_case1)
movdqa 32(%edi), %xmm3
pcmpeqb %xmm1, %xmm3
lea 16(%ecx), %ecx
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(match_case1)
movdqa 48(%edi), %xmm4
pcmpeqb %xmm1, %xmm4
lea 16(%ecx), %ecx
pmovmskb %xmm4, %eax
test %eax, %eax
jnz L(match_case1)
lea 64(%edi), %edi
sub $64, %edx
jbe L(exit_loop)
movdqa (%edi), %xmm0
pcmpeqb %xmm1, %xmm0
xor %ecx, %ecx
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(match_case1)
movdqa 16(%edi), %xmm2
pcmpeqb %xmm1, %xmm2
lea 16(%ecx), %ecx
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(match_case1)
movdqa 32(%edi), %xmm3
pcmpeqb %xmm1, %xmm3
lea 16(%ecx), %ecx
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(match_case1)
movdqa 48(%edi), %xmm4
pcmpeqb %xmm1, %xmm4
lea 16(%ecx), %ecx
pmovmskb %xmm4, %eax
test %eax, %eax
jnz L(match_case1)
lea 64(%edi), %edi
mov %edi, %ecx
and $-64, %edi
and $63, %ecx
add %ecx, %edx
.p2align 4
L(align64_loop):
sub $64, %edx
jbe L(exit_loop)
movdqa (%edi), %xmm0
movdqa 16(%edi), %xmm2
movdqa 32(%edi), %xmm3
movdqa 48(%edi), %xmm4
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm1, %xmm2
pcmpeqb %xmm1, %xmm3
pcmpeqb %xmm1, %xmm4
pmaxub %xmm0, %xmm3
pmaxub %xmm2, %xmm4
pmaxub %xmm3, %xmm4
add $64, %edi
pmovmskb %xmm4, %eax
test %eax, %eax
jz L(align64_loop)
sub $64, %edi
pmovmskb %xmm0, %eax
xor %ecx, %ecx
test %eax, %eax
jnz L(match_case1)
pmovmskb %xmm2, %eax
lea 16(%ecx), %ecx
test %eax, %eax
jnz L(match_case1)
movdqa 32(%edi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
lea 16(%ecx), %ecx
test %eax, %eax
jnz L(match_case1)
pcmpeqb 48(%edi), %xmm1
pmovmskb %xmm1, %eax
lea 16(%ecx), %ecx
.p2align 4
L(match_case1):
add %ecx, %edi
test %al, %al
jz L(match_case1_high)
mov %al, %cl
and $15, %cl
jz L(match_case1_8)
test $0x01, %al
jnz L(exit_case1_1)
test $0x02, %al
jnz L(exit_case1_2)
test $0x04, %al
jnz L(exit_case1_3)
lea 3(%edi), %eax
RETURN
.p2align 4
L(match_case1_8):
test $0x10, %al
jnz L(exit_case1_5)
test $0x20, %al
jnz L(exit_case1_6)
test $0x40, %al
jnz L(exit_case1_7)
lea 7(%edi), %eax
RETURN
.p2align 4
L(match_case1_high):
mov %ah, %ch
and $15, %ch
jz L(match_case1_high_8)
test $0x01, %ah
jnz L(exit_case1_9)
test $0x02, %ah
jnz L(exit_case1_10)
test $0x04, %ah
jnz L(exit_case1_11)
lea 11(%edi), %eax
RETURN
.p2align 4
L(match_case1_high_8):
test $0x10, %ah
jnz L(exit_case1_13)
test $0x20, %ah
jnz L(exit_case1_14)
test $0x40, %ah
jnz L(exit_case1_15)
lea 15(%edi), %eax
RETURN
.p2align 4
L(exit_loop):
add $64, %edx
movdqa (%edi), %xmm0
pcmpeqb %xmm1, %xmm0
xor %ecx, %ecx
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(match_case2)
cmp $16, %edx
jbe L(return_null)
movdqa 16(%edi), %xmm2
pcmpeqb %xmm1, %xmm2
lea 16(%ecx), %ecx
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(match_case2)
cmp $32, %edx
jbe L(return_null)
movdqa 32(%edi), %xmm3
pcmpeqb %xmm1, %xmm3
lea 16(%ecx), %ecx
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(match_case2)
cmp $48, %edx
jbe L(return_null)
pcmpeqb 48(%edi), %xmm1
lea 16(%ecx), %ecx
pmovmskb %xmm1, %eax
test %eax, %eax
jnz L(match_case2)
xor %eax, %eax
RETURN
.p2align 4
L(exit_case1_1):
mov %edi, %eax
RETURN
.p2align 4
L(exit_case1_2):
lea 1(%edi), %eax
RETURN
.p2align 4
L(exit_case1_3):
lea 2(%edi), %eax
RETURN
.p2align 4
L(exit_case1_5):
lea 4(%edi), %eax
RETURN
.p2align 4
L(exit_case1_6):
lea 5(%edi), %eax
RETURN
.p2align 4
L(exit_case1_7):
lea 6(%edi), %eax
RETURN
.p2align 4
L(exit_case1_9):
lea 8(%edi), %eax
RETURN
.p2align 4
L(exit_case1_10):
lea 9(%edi), %eax
RETURN
.p2align 4
L(exit_case1_11):
lea 10(%edi), %eax
RETURN
.p2align 4
L(exit_case1_13):
lea 12(%edi), %eax
RETURN
.p2align 4
L(exit_case1_14):
lea 13(%edi), %eax
RETURN
.p2align 4
L(exit_case1_15):
lea 14(%edi), %eax
RETURN
.p2align 4
L(match_case2):
sub %ecx, %edx
L(match_case2_prolog1):
add %ecx, %edi
L(match_case2_prolog):
test %al, %al
jz L(match_case2_high)
mov %al, %cl
and $15, %cl
jz L(match_case2_8)
test $0x01, %al
jnz L(exit_case2_1)
test $0x02, %al
jnz L(exit_case2_2)
test $0x04, %al
jnz L(exit_case2_3)
sub $4, %edx
jb L(return_null)
lea 3(%edi), %eax
RETURN
.p2align 4
L(match_case2_8):
test $0x10, %al
jnz L(exit_case2_5)
test $0x20, %al
jnz L(exit_case2_6)
test $0x40, %al
jnz L(exit_case2_7)
sub $8, %edx
jb L(return_null)
lea 7(%edi), %eax
RETURN
.p2align 4
L(match_case2_high):
mov %ah, %ch
and $15, %ch
jz L(match_case2_high_8)
test $0x01, %ah
jnz L(exit_case2_9)
test $0x02, %ah
jnz L(exit_case2_10)
test $0x04, %ah
jnz L(exit_case2_11)
sub $12, %edx
jb L(return_null)
lea 11(%edi), %eax
RETURN
.p2align 4
L(match_case2_high_8):
test $0x10, %ah
jnz L(exit_case2_13)
test $0x20, %ah
jnz L(exit_case2_14)
test $0x40, %ah
jnz L(exit_case2_15)
sub $16, %edx
jb L(return_null)
lea 15(%edi), %eax
RETURN
.p2align 4
L(exit_case2_1):
mov %edi, %eax
RETURN
.p2align 4
L(exit_case2_2):
sub $2, %edx
jb L(return_null)
lea 1(%edi), %eax
RETURN
.p2align 4
L(exit_case2_3):
sub $3, %edx
jb L(return_null)
lea 2(%edi), %eax
RETURN
.p2align 4
L(exit_case2_5):
sub $5, %edx
jb L(return_null)
lea 4(%edi), %eax
RETURN
.p2align 4
L(exit_case2_6):
sub $6, %edx
jb L(return_null)
lea 5(%edi), %eax
RETURN
.p2align 4
L(exit_case2_7):
sub $7, %edx
jb L(return_null)
lea 6(%edi), %eax
RETURN
.p2align 4
L(exit_case2_9):
sub $9, %edx
jb L(return_null)
lea 8(%edi), %eax
RETURN
.p2align 4
L(exit_case2_10):
sub $10, %edx
jb L(return_null)
lea 9(%edi), %eax
RETURN
.p2align 4
L(exit_case2_11):
sub $11, %edx
jb L(return_null)
lea 10(%edi), %eax
RETURN
.p2align 4
L(exit_case2_13):
sub $13, %edx
jb L(return_null)
lea 12(%edi), %eax
RETURN
.p2align 4
L(exit_case2_14):
sub $14, %edx
jb L(return_null)
lea 13(%edi), %eax
RETURN
.p2align 4
L(exit_case2_15):
sub $15, %edx
jb L(return_null)
lea 14(%edi), %eax
RETURN
.p2align 4
L(return_null):
xor %eax, %eax
RETURN
END (memchr)

View file

@ -0,0 +1,778 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef L
# define L(label) .L##label
#endif
#ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
#endif
#ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
#endif
#ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#endif
#ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
#endif
#ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
#ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
#endif
#ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
#endif
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#define PARMS 4
#define STR1 PARMS
#define STR2 STR1+4
#define LEN STR2+4
.text
ENTRY (memrchr)
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
mov LEN(%esp), %edx
test %edx, %edx
jz L(return_null)
sub $16, %edx
jbe L(length_less16)
punpcklbw %xmm1, %xmm1
add %edx, %ecx
punpcklbw %xmm1, %xmm1
movdqu (%ecx), %xmm0
pshufd $0, %xmm1, %xmm1
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(exit_dispatch)
sub $64, %ecx
mov %ecx, %eax
and $15, %eax
jz L(loop_prolog)
add $16, %ecx
add $16, %edx
and $-16, %ecx
sub %eax, %edx
.p2align 4
/* Loop start on aligned string. */
L(loop_prolog):
sub $64, %edx
jbe L(exit_loop)
movdqa 48(%ecx), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches48)
movdqa 32(%ecx), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches32)
movdqa 16(%ecx), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches16)
movdqa (%ecx), %xmm4
pcmpeqb %xmm1, %xmm4
pmovmskb %xmm4, %eax
test %eax, %eax
jnz L(exit_dispatch)
sub $64, %ecx
sub $64, %edx
jbe L(exit_loop)
movdqa 48(%ecx), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches48)
movdqa 32(%ecx), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches32)
movdqa 16(%ecx), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches16)
movdqa (%ecx), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(exit_dispatch)
mov %ecx, %eax
and $63, %eax
test %eax, %eax
jz L(align64_loop)
add $64, %ecx
add $64, %edx
and $-64, %ecx
sub %eax, %edx
.p2align 4
L(align64_loop):
sub $64, %ecx
sub $64, %edx
jbe L(exit_loop)
movdqa (%ecx), %xmm0
movdqa 16(%ecx), %xmm2
movdqa 32(%ecx), %xmm3
movdqa 48(%ecx), %xmm4
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm1, %xmm2
pcmpeqb %xmm1, %xmm3
pcmpeqb %xmm1, %xmm4
pmaxub %xmm3, %xmm0
pmaxub %xmm4, %xmm2
pmaxub %xmm0, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jz L(align64_loop)
pmovmskb %xmm4, %eax
test %eax, %eax
jnz L(matches48)
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
movdqa 16(%ecx), %xmm2
pcmpeqb %xmm1, %xmm2
pcmpeqb (%ecx), %xmm1
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
pmovmskb %xmm1, %eax
test %ah, %ah
jnz L(exit_dispatch_high)
mov %al, %dl
and $15 << 4, %dl
jnz L(exit_dispatch_8)
test $0x08, %al
jnz L(exit_4)
test $0x04, %al
jnz L(exit_3)
test $0x02, %al
jnz L(exit_2)
mov %ecx, %eax
ret
.p2align 4
L(exit_loop):
add $64, %edx
cmp $32, %edx
jbe L(exit_loop_32)
movdqa 48(%ecx), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches48)
movdqa 32(%ecx), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches32)
movdqa 16(%ecx), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches16_1)
cmp $48, %edx
jbe L(return_null)
pcmpeqb (%ecx), %xmm1
pmovmskb %xmm1, %eax
test %eax, %eax
jnz L(matches0_1)
xor %eax, %eax
ret
.p2align 4
L(exit_loop_32):
movdqa 48(%ecx), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches48_1)
cmp $16, %edx
jbe L(return_null)
pcmpeqb 32(%ecx), %xmm1
pmovmskb %xmm1, %eax
test %eax, %eax
jnz L(matches32_1)
xor %eax, %eax
ret
.p2align 4
L(matches16):
lea 16(%ecx), %ecx
test %ah, %ah
jnz L(exit_dispatch_high)
mov %al, %dl
and $15 << 4, %dl
jnz L(exit_dispatch_8)
test $0x08, %al
jnz L(exit_4)
test $0x04, %al
jnz L(exit_3)
test $0x02, %al
jnz L(exit_2)
mov %ecx, %eax
ret
.p2align 4
L(matches32):
lea 32(%ecx), %ecx
test %ah, %ah
jnz L(exit_dispatch_high)
mov %al, %dl
and $15 << 4, %dl
jnz L(exit_dispatch_8)
test $0x08, %al
jnz L(exit_4)
test $0x04, %al
jnz L(exit_3)
test $0x02, %al
jnz L(exit_2)
mov %ecx, %eax
ret
.p2align 4
L(matches48):
lea 48(%ecx), %ecx
.p2align 4
L(exit_dispatch):
test %ah, %ah
jnz L(exit_dispatch_high)
mov %al, %dl
and $15 << 4, %dl
jnz L(exit_dispatch_8)
test $0x08, %al
jnz L(exit_4)
test $0x04, %al
jnz L(exit_3)
test $0x02, %al
jnz L(exit_2)
mov %ecx, %eax
ret
.p2align 4
L(exit_dispatch_8):
test $0x80, %al
jnz L(exit_8)
test $0x40, %al
jnz L(exit_7)
test $0x20, %al
jnz L(exit_6)
lea 4(%ecx), %eax
ret
.p2align 4
L(exit_dispatch_high):
mov %ah, %dh
and $15 << 4, %dh
jnz L(exit_dispatch_high_8)
test $0x08, %ah
jnz L(exit_12)
test $0x04, %ah
jnz L(exit_11)
test $0x02, %ah
jnz L(exit_10)
lea 8(%ecx), %eax
ret
.p2align 4
L(exit_dispatch_high_8):
test $0x80, %ah
jnz L(exit_16)
test $0x40, %ah
jnz L(exit_15)
test $0x20, %ah
jnz L(exit_14)
lea 12(%ecx), %eax
ret
.p2align 4
L(exit_2):
lea 1(%ecx), %eax
ret
.p2align 4
L(exit_3):
lea 2(%ecx), %eax
ret
.p2align 4
L(exit_4):
lea 3(%ecx), %eax
ret
.p2align 4
L(exit_6):
lea 5(%ecx), %eax
ret
.p2align 4
L(exit_7):
lea 6(%ecx), %eax
ret
.p2align 4
L(exit_8):
lea 7(%ecx), %eax
ret
.p2align 4
L(exit_10):
lea 9(%ecx), %eax
ret
.p2align 4
L(exit_11):
lea 10(%ecx), %eax
ret
.p2align 4
L(exit_12):
lea 11(%ecx), %eax
ret
.p2align 4
L(exit_14):
lea 13(%ecx), %eax
ret
.p2align 4
L(exit_15):
lea 14(%ecx), %eax
ret
.p2align 4
L(exit_16):
lea 15(%ecx), %eax
ret
.p2align 4
L(matches0_1):
lea -64(%edx), %edx
test %ah, %ah
jnz L(exit_dispatch_1_high)
mov %al, %ah
and $15 << 4, %ah
jnz L(exit_dispatch_1_8)
test $0x08, %al
jnz L(exit_1_4)
test $0x04, %al
jnz L(exit_1_3)
test $0x02, %al
jnz L(exit_1_2)
add $0, %edx
jl L(return_null)
mov %ecx, %eax
ret
.p2align 4
L(matches16_1):
lea -48(%edx), %edx
lea 16(%ecx), %ecx
test %ah, %ah
jnz L(exit_dispatch_1_high)
mov %al, %ah
and $15 << 4, %ah
jnz L(exit_dispatch_1_8)
test $0x08, %al
jnz L(exit_1_4)
test $0x04, %al
jnz L(exit_1_3)
test $0x02, %al
jnz L(exit_1_2)
add $0, %edx
jl L(return_null)
mov %ecx, %eax
ret
.p2align 4
L(matches32_1):
lea -32(%edx), %edx
lea 32(%ecx), %ecx
test %ah, %ah
jnz L(exit_dispatch_1_high)
mov %al, %ah
and $15 << 4, %ah
jnz L(exit_dispatch_1_8)
test $0x08, %al
jnz L(exit_1_4)
test $0x04, %al
jnz L(exit_1_3)
test $0x02, %al
jnz L(exit_1_2)
add $0, %edx
jl L(return_null)
mov %ecx, %eax
ret
.p2align 4
L(matches48_1):
lea -16(%edx), %edx
lea 48(%ecx), %ecx
.p2align 4
L(exit_dispatch_1):
test %ah, %ah
jnz L(exit_dispatch_1_high)
mov %al, %ah
and $15 << 4, %ah
jnz L(exit_dispatch_1_8)
test $0x08, %al
jnz L(exit_1_4)
test $0x04, %al
jnz L(exit_1_3)
test $0x02, %al
jnz L(exit_1_2)
add $0, %edx
jl L(return_null)
mov %ecx, %eax
ret
.p2align 4
L(exit_dispatch_1_8):
test $0x80, %al
jnz L(exit_1_8)
test $0x40, %al
jnz L(exit_1_7)
test $0x20, %al
jnz L(exit_1_6)
add $4, %edx
jl L(return_null)
lea 4(%ecx), %eax
ret
.p2align 4
L(exit_dispatch_1_high):
mov %ah, %al
and $15 << 4, %al
jnz L(exit_dispatch_1_high_8)
test $0x08, %ah
jnz L(exit_1_12)
test $0x04, %ah
jnz L(exit_1_11)
test $0x02, %ah
jnz L(exit_1_10)
add $8, %edx
jl L(return_null)
lea 8(%ecx), %eax
ret
.p2align 4
L(exit_dispatch_1_high_8):
test $0x80, %ah
jnz L(exit_1_16)
test $0x40, %ah
jnz L(exit_1_15)
test $0x20, %ah
jnz L(exit_1_14)
add $12, %edx
jl L(return_null)
lea 12(%ecx), %eax
ret
.p2align 4
L(exit_1_2):
add $1, %edx
jl L(return_null)
lea 1(%ecx), %eax
ret
.p2align 4
L(exit_1_3):
add $2, %edx
jl L(return_null)
lea 2(%ecx), %eax
ret
.p2align 4
L(exit_1_4):
add $3, %edx
jl L(return_null)
lea 3(%ecx), %eax
ret
.p2align 4
L(exit_1_6):
add $5, %edx
jl L(return_null)
lea 5(%ecx), %eax
ret
.p2align 4
L(exit_1_7):
add $6, %edx
jl L(return_null)
lea 6(%ecx), %eax
ret
.p2align 4
L(exit_1_8):
add $7, %edx
jl L(return_null)
lea 7(%ecx), %eax
ret
.p2align 4
L(exit_1_10):
add $9, %edx
jl L(return_null)
lea 9(%ecx), %eax
ret
.p2align 4
L(exit_1_11):
add $10, %edx
jl L(return_null)
lea 10(%ecx), %eax
ret
.p2align 4
L(exit_1_12):
add $11, %edx
jl L(return_null)
lea 11(%ecx), %eax
ret
.p2align 4
L(exit_1_14):
add $13, %edx
jl L(return_null)
lea 13(%ecx), %eax
ret
.p2align 4
L(exit_1_15):
add $14, %edx
jl L(return_null)
lea 14(%ecx), %eax
ret
.p2align 4
L(exit_1_16):
add $15, %edx
jl L(return_null)
lea 15(%ecx), %eax
ret
.p2align 4
L(return_null):
xor %eax, %eax
ret
.p2align 4
L(length_less16_offset0):
mov %dl, %cl
pcmpeqb (%eax), %xmm1
mov $1, %edx
sal %cl, %edx
sub $1, %edx
mov %eax, %ecx
pmovmskb %xmm1, %eax
and %edx, %eax
test %eax, %eax
jnz L(exit_dispatch)
xor %eax, %eax
ret
.p2align 4
L(length_less16):
punpcklbw %xmm1, %xmm1
add $16, %edx
punpcklbw %xmm1, %xmm1
mov %ecx, %eax
pshufd $0, %xmm1, %xmm1
and $15, %ecx
jz L(length_less16_offset0)
PUSH (%edi)
mov %cl, %dh
add %dl, %dh
and $-16, %eax
sub $16, %dh
ja L(length_less16_part2)
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edi
sar %cl, %edi
add %ecx, %eax
mov %dl, %cl
mov $1, %edx
sal %cl, %edx
sub $1, %edx
and %edx, %edi
test %edi, %edi
jz L(ret_null)
bsr %edi, %edi
add %edi, %eax
POP (%edi)
ret
CFI_PUSH (%edi)
.p2align 4
L(length_less16_part2):
movdqa 16(%eax), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %edi
mov %cl, %ch
mov %dh, %cl
mov $1, %edx
sal %cl, %edx
sub $1, %edx
and %edx, %edi
test %edi, %edi
jnz L(length_less16_part2_return)
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edi
mov %ch, %cl
sar %cl, %edi
test %edi, %edi
jz L(ret_null)
bsr %edi, %edi
add %edi, %eax
xor %ch, %ch
add %ecx, %eax
POP (%edi)
ret
CFI_PUSH (%edi)
.p2align 4
L(length_less16_part2_return):
bsr %edi, %edi
lea 16(%eax, %edi), %eax
POP (%edi)
ret
CFI_PUSH (%edi)
.p2align 4
L(ret_null):
xor %eax, %eax
POP (%edi)
ret
END (memrchr)

View file

@ -28,6 +28,9 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "cache.h"
#undef __i686
#ifndef L #ifndef L
# define L(label) .L##label # define L(label) .L##label
#endif #endif
@ -136,9 +139,13 @@ __i686.get_pc_thunk.bx:
jmp *TABLE(,%ecx,4) jmp *TABLE(,%ecx,4)
#endif #endif
#ifndef MEMSET
# define MEMSET memset
#endif
.section .text.sse2,"ax",@progbits .section .text.sse2,"ax",@progbits
ALIGN (4) ALIGN (4)
ENTRY (sse2_memset5_atom) ENTRY (MEMSET)
ENTRANCE ENTRANCE
movl LEN(%esp), %ecx movl LEN(%esp), %ecx
@ -911,4 +918,4 @@ L(aligned_16_15bytes):
SETRTNVAL SETRTNVAL
RETURN_END RETURN_END
END (sse2_memset5_atom) END (MEMSET)

View file

@ -0,0 +1,391 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef L
# define L(label) .L##label
#endif
#ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
#endif
#ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
#endif
#ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#endif
#ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
#endif
#ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
#ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
#endif
#ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
#endif
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#define PARMS 8
#define ENTRANCE PUSH(%edi)
#define RETURN POP (%edi); ret; CFI_PUSH (%edi);
#define STR1 PARMS
#define STR2 STR1+4
.text
ENTRY (strchr)
ENTRANCE
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
pxor %xmm2, %xmm2
mov %ecx, %edi
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
/* ECX has OFFSET. */
and $15, %ecx
pshufd $0, %xmm1, %xmm1
je L(loop)
/* Handle unaligned string. */
and $-16, %edi
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
pcmpeqb %xmm1, %xmm0
/* Find where NULL is. */
pmovmskb %xmm2, %edx
/* Check if there is a match. */
pmovmskb %xmm0, %eax
/* Remove the leading bytes. */
sarl %cl, %edx
sarl %cl, %eax
test %eax, %eax
jz L(unaligned_no_match)
add %ecx, %edi
test %edx, %edx
jz L(match_case1)
jmp L(match_case2)
.p2align 4
L(unaligned_no_match):
test %edx, %edx
jne L(return_null)
pxor %xmm2, %xmm2
add $16, %edi
.p2align 4
/* Loop start on aligned string. */
L(loop):
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
test %edx, %edx
jnz L(return_null)
add $16, %edi
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
test %edx, %edx
jnz L(return_null)
add $16, %edi
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
test %edx, %edx
jnz L(return_null)
add $16, %edi
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
test %edx, %edx
jnz L(return_null)
add $16, %edi
jmp L(loop)
L(matches):
/* There is a match. First find where NULL is. */
test %edx, %edx
jz L(match_case1)
.p2align 4
L(match_case2):
test %al, %al
jz L(match_higth_case2)
mov %al, %cl
and $15, %cl
jnz L(match_case2_4)
mov %dl, %ch
and $15, %ch
jnz L(return_null)
test $0x10, %al
jnz L(Exit5)
test $0x10, %dl
jnz L(return_null)
test $0x20, %al
jnz L(Exit6)
test $0x20, %dl
jnz L(return_null)
test $0x40, %al
jnz L(Exit7)
test $0x40, %dl
jnz L(return_null)
lea 7(%edi), %eax
RETURN
.p2align 4
L(match_case2_4):
test $0x01, %al
jnz L(Exit1)
test $0x01, %dl
jnz L(return_null)
test $0x02, %al
jnz L(Exit2)
test $0x02, %dl
jnz L(return_null)
test $0x04, %al
jnz L(Exit3)
test $0x04, %dl
jnz L(return_null)
lea 3(%edi), %eax
RETURN
.p2align 4
L(match_higth_case2):
test %dl, %dl
jnz L(return_null)
mov %ah, %cl
and $15, %cl
jnz L(match_case2_12)
mov %dh, %ch
and $15, %ch
jnz L(return_null)
test $0x10, %ah
jnz L(Exit13)
test $0x10, %dh
jnz L(return_null)
test $0x20, %ah
jnz L(Exit14)
test $0x20, %dh
jnz L(return_null)
test $0x40, %ah
jnz L(Exit15)
test $0x40, %dh
jnz L(return_null)
lea 15(%edi), %eax
RETURN
.p2align 4
L(match_case2_12):
test $0x01, %ah
jnz L(Exit9)
test $0x01, %dh
jnz L(return_null)
test $0x02, %ah
jnz L(Exit10)
test $0x02, %dh
jnz L(return_null)
test $0x04, %ah
jnz L(Exit11)
test $0x04, %dh
jnz L(return_null)
lea 11(%edi), %eax
RETURN
.p2align 4
L(match_case1):
test %al, %al
jz L(match_higth_case1)
test $0x01, %al
jnz L(Exit1)
test $0x02, %al
jnz L(Exit2)
test $0x04, %al
jnz L(Exit3)
test $0x08, %al
jnz L(Exit4)
test $0x10, %al
jnz L(Exit5)
test $0x20, %al
jnz L(Exit6)
test $0x40, %al
jnz L(Exit7)
lea 7(%edi), %eax
RETURN
.p2align 4
L(match_higth_case1):
test $0x01, %ah
jnz L(Exit9)
test $0x02, %ah
jnz L(Exit10)
test $0x04, %ah
jnz L(Exit11)
test $0x08, %ah
jnz L(Exit12)
test $0x10, %ah
jnz L(Exit13)
test $0x20, %ah
jnz L(Exit14)
test $0x40, %ah
jnz L(Exit15)
lea 15(%edi), %eax
RETURN
.p2align 4
L(Exit1):
lea (%edi), %eax
RETURN
.p2align 4
L(Exit2):
lea 1(%edi), %eax
RETURN
.p2align 4
L(Exit3):
lea 2(%edi), %eax
RETURN
.p2align 4
L(Exit4):
lea 3(%edi), %eax
RETURN
.p2align 4
L(Exit5):
lea 4(%edi), %eax
RETURN
.p2align 4
L(Exit6):
lea 5(%edi), %eax
RETURN
.p2align 4
L(Exit7):
lea 6(%edi), %eax
RETURN
.p2align 4
L(Exit9):
lea 8(%edi), %eax
RETURN
.p2align 4
L(Exit10):
lea 9(%edi), %eax
RETURN
.p2align 4
L(Exit11):
lea 10(%edi), %eax
RETURN
.p2align 4
L(Exit12):
lea 11(%edi), %eax
RETURN
.p2align 4
L(Exit13):
lea 12(%edi), %eax
RETURN
.p2align 4
L(Exit14):
lea 13(%edi), %eax
RETURN
.p2align 4
L(Exit15):
lea 14(%edi), %eax
RETURN
.p2align 4
L(return_null):
xor %eax, %eax
RETURN
END (strchr)

View file

@ -1,71 +1,112 @@
#define STRLEN sse2_strlen_atom /*
Copyright (c) 2011, Intel Corporation
All rights reserved.
#ifndef L Redistribution and use in source and binary forms, with or without
# define L(label) .L##label modification, are permitted provided that the following conditions are met:
#endif
#ifndef cfi_startproc * Redistributions of source code must retain the above copyright notice,
# define cfi_startproc .cfi_startproc * this list of conditions and the following disclaimer.
#endif
#ifndef cfi_endproc * Redistributions in binary form must reproduce the above copyright notice,
# define cfi_endproc .cfi_endproc * this list of conditions and the following disclaimer in the documentation
#endif * and/or other materials provided with the distribution.
#ifndef cfi_rel_offset * Neither the name of Intel Corporation nor the names of its contributors
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off * may be used to endorse or promote products derived from this software
#endif * without specific prior written permission.
#ifndef cfi_restore THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# define cfi_restore(reg) .cfi_restore reg ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
#endif WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef cfi_adjust_cfa_offset #ifndef USE_AS_STRCAT
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
#ifndef cfi_remember_state # ifndef STRLEN
# define cfi_remember_state .cfi_remember_state # define STRLEN strlen
#endif # endif
#ifndef cfi_restore_state # ifndef L
# define cfi_restore_state .cfi_restore_state # define L(label) .L##label
#endif # endif
#ifndef ENTRY # ifndef cfi_startproc
# define ENTRY(name) \ # define cfi_startproc .cfi_startproc
.type name, @function; \ # endif
.globl name; \
.p2align 4; \ # ifndef cfi_endproc
name: \ # define cfi_endproc .cfi_endproc
# endif
/* calee safe register only for strnlen is required */
# ifdef USE_AS_STRNLEN
# ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
# endif
# ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
# endif
# ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
# endif
# endif
# ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc cfi_startproc
#endif # endif
#ifndef END # ifndef END
# define END(name) \ # define END(name) \
cfi_endproc; \ cfi_endproc; \
.size name, .-name .size name, .-name
#endif # endif
#define CFI_PUSH(REG) \ # define PARMS 4
cfi_adjust_cfa_offset (4); \ # define STR PARMS
cfi_rel_offset (REG, 0) # define RETURN ret
#define CFI_POP(REG) \ # ifdef USE_AS_STRNLEN
cfi_adjust_cfa_offset (-4); \ # define LEN PARMS + 8
cfi_restore (REG) # define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define PUSH(REG) pushl REG; CFI_PUSH (REG) # define CFI_POP(REG) \
#define POP(REG) popl REG; CFI_POP (REG) cfi_adjust_cfa_offset (-4); \
#define PARMS 4 cfi_restore (REG)
#define STR PARMS
#define ENTRANCE # define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define RETURN ret # define POP(REG) popl REG; CFI_POP (REG)
# undef RETURN
# define RETURN POP (%edi); ret; CFI_PUSH(%edi);
# endif
.text .text
ENTRY (STRLEN) ENTRY (STRLEN)
ENTRANCE
mov STR(%esp), %edx mov STR(%esp), %edx
# ifdef USE_AS_STRNLEN
PUSH (%edi)
movl LEN(%esp), %edi
sub $4, %edi
jbe L(len_less4_prolog)
# endif
#endif
xor %eax, %eax xor %eax, %eax
cmpb $0, (%edx) cmpb $0, (%edx)
jz L(exit_tail0) jz L(exit_tail0)
@ -75,6 +116,12 @@ ENTRY (STRLEN)
jz L(exit_tail2) jz L(exit_tail2)
cmpb $0, 3(%edx) cmpb $0, 3(%edx)
jz L(exit_tail3) jz L(exit_tail3)
#ifdef USE_AS_STRNLEN
sub $4, %edi
jbe L(len_less8_prolog)
#endif
cmpb $0, 4(%edx) cmpb $0, 4(%edx)
jz L(exit_tail4) jz L(exit_tail4)
cmpb $0, 5(%edx) cmpb $0, 5(%edx)
@ -83,6 +130,12 @@ ENTRY (STRLEN)
jz L(exit_tail6) jz L(exit_tail6)
cmpb $0, 7(%edx) cmpb $0, 7(%edx)
jz L(exit_tail7) jz L(exit_tail7)
#ifdef USE_AS_STRNLEN
sub $4, %edi
jbe L(len_less12_prolog)
#endif
cmpb $0, 8(%edx) cmpb $0, 8(%edx)
jz L(exit_tail8) jz L(exit_tail8)
cmpb $0, 9(%edx) cmpb $0, 9(%edx)
@ -91,6 +144,12 @@ ENTRY (STRLEN)
jz L(exit_tail10) jz L(exit_tail10)
cmpb $0, 11(%edx) cmpb $0, 11(%edx)
jz L(exit_tail11) jz L(exit_tail11)
#ifdef USE_AS_STRNLEN
sub $4, %edi
jbe L(len_less16_prolog)
#endif
cmpb $0, 12(%edx) cmpb $0, 12(%edx)
jz L(exit_tail12) jz L(exit_tail12)
cmpb $0, 13(%edx) cmpb $0, 13(%edx)
@ -99,211 +158,531 @@ ENTRY (STRLEN)
jz L(exit_tail14) jz L(exit_tail14)
cmpb $0, 15(%edx) cmpb $0, 15(%edx)
jz L(exit_tail15) jz L(exit_tail15)
pxor %xmm0, %xmm0 pxor %xmm0, %xmm0
mov %edx, %eax lea 16(%edx), %eax
mov %edx, %ecx mov %eax, %ecx
and $-16, %eax and $-16, %eax
add $16, %ecx
add $16, %eax #ifdef USE_AS_STRNLEN
and $15, %edx
add %edx, %edi
sub $64, %edi
jbe L(len_less64)
#endif
pcmpeqb (%eax), %xmm0 pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1 pxor %xmm1, %xmm1
test %edx, %edx
lea 16(%eax), %eax lea 16(%eax), %eax
test %edx, %edx
jnz L(exit) jnz L(exit)
pcmpeqb (%eax), %xmm1 pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx pmovmskb %xmm1, %edx
pxor %xmm2, %xmm2 pxor %xmm2, %xmm2
test %edx, %edx
lea 16(%eax), %eax lea 16(%eax), %eax
test %edx, %edx
jnz L(exit) jnz L(exit)
pcmpeqb (%eax), %xmm2 pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3 pxor %xmm3, %xmm3
test %edx, %edx
lea 16(%eax), %eax lea 16(%eax), %eax
test %edx, %edx
jnz L(exit) jnz L(exit)
pcmpeqb (%eax), %xmm3 pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx pmovmskb %xmm3, %edx
test %edx, %edx
lea 16(%eax), %eax lea 16(%eax), %eax
test %edx, %edx
jnz L(exit) jnz L(exit)
#ifdef USE_AS_STRNLEN
sub $64, %edi
jbe L(len_less64)
#endif
pcmpeqb (%eax), %xmm0 pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax lea 16(%eax), %eax
test %edx, %edx
jnz L(exit) jnz L(exit)
pcmpeqb (%eax), %xmm1 pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax lea 16(%eax), %eax
test %edx, %edx
jnz L(exit) jnz L(exit)
pcmpeqb (%eax), %xmm2 pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax lea 16(%eax), %eax
test %edx, %edx
jnz L(exit) jnz L(exit)
pcmpeqb (%eax), %xmm3 pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx pmovmskb %xmm3, %edx
test %edx, %edx
lea 16(%eax), %eax lea 16(%eax), %eax
test %edx, %edx
jnz L(exit) jnz L(exit)
#ifdef USE_AS_STRNLEN
sub $64, %edi
jbe L(len_less64)
#endif
pcmpeqb (%eax), %xmm0 pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax lea 16(%eax), %eax
test %edx, %edx
jnz L(exit) jnz L(exit)
pcmpeqb (%eax), %xmm1 pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax lea 16(%eax), %eax
test %edx, %edx
jnz L(exit) jnz L(exit)
pcmpeqb (%eax), %xmm2 pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax lea 16(%eax), %eax
test %edx, %edx
jnz L(exit) jnz L(exit)
pcmpeqb (%eax), %xmm3 pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx pmovmskb %xmm3, %edx
test %edx, %edx
lea 16(%eax), %eax lea 16(%eax), %eax
test %edx, %edx
jnz L(exit) jnz L(exit)
#ifdef USE_AS_STRNLEN
sub $64, %edi
jbe L(len_less64)
#endif
pcmpeqb (%eax), %xmm0 pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax lea 16(%eax), %eax
test %edx, %edx
jnz L(exit) jnz L(exit)
pcmpeqb (%eax), %xmm1 pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax lea 16(%eax), %eax
test %edx, %edx
jnz L(exit) jnz L(exit)
pcmpeqb (%eax), %xmm2 pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax lea 16(%eax), %eax
test %edx, %edx
jnz L(exit) jnz L(exit)
pcmpeqb (%eax), %xmm3 pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx pmovmskb %xmm3, %edx
test %edx, %edx
lea 16(%eax), %eax lea 16(%eax), %eax
test %edx, %edx
jnz L(exit) jnz L(exit)
#ifdef USE_AS_STRNLEN
mov %eax, %edx
and $63, %edx
add %edx, %edi
#endif
and $-0x40, %eax and $-0x40, %eax
PUSH (%esi)
PUSH (%edi) .p2align 4
PUSH (%ebx) L(aligned_64_loop):
PUSH (%ebp) #ifdef USE_AS_STRNLEN
xor %ebp, %ebp sub $64, %edi
L(aligned_64): jbe L(len_less64)
pcmpeqb (%eax), %xmm0 #endif
pcmpeqb 16(%eax), %xmm1 movaps (%eax), %xmm0
pcmpeqb 32(%eax), %xmm2 movaps 16(%eax), %xmm1
pcmpeqb 48(%eax), %xmm3 movaps 32(%eax), %xmm2
pmovmskb %xmm0, %edx movaps 48(%eax), %xmm6
pmovmskb %xmm1, %esi pminub %xmm1, %xmm0
pmovmskb %xmm2, %edi pminub %xmm6, %xmm2
pmovmskb %xmm3, %ebx pminub %xmm0, %xmm2
or %edx, %ebp pcmpeqb %xmm3, %xmm2
or %esi, %ebp pmovmskb %xmm2, %edx
or %edi, %ebp
or %ebx, %ebp
lea 64(%eax), %eax lea 64(%eax), %eax
jz L(aligned_64)
L(48leave):
test %edx, %edx test %edx, %edx
jnz L(aligned_64_exit_16) jz L(aligned_64_loop)
test %esi, %esi
jnz L(aligned_64_exit_32) pcmpeqb -64(%eax), %xmm3
test %edi, %edi pmovmskb %xmm3, %edx
jnz L(aligned_64_exit_48) lea 48(%ecx), %ecx
mov %ebx, %edx test %edx, %edx
lea (%eax), %eax jnz L(exit)
jmp L(aligned_64_exit)
L(aligned_64_exit_48): pcmpeqb %xmm1, %xmm3
lea -16(%eax), %eax pmovmskb %xmm3, %edx
mov %edi, %edx lea -16(%ecx), %ecx
jmp L(aligned_64_exit) test %edx, %edx
L(aligned_64_exit_32): jnz L(exit)
lea -32(%eax), %eax
mov %esi, %edx pcmpeqb -32(%eax), %xmm3
jmp L(aligned_64_exit) pmovmskb %xmm3, %edx
L(aligned_64_exit_16): lea -16(%ecx), %ecx
lea -48(%eax), %eax test %edx, %edx
L(aligned_64_exit): jnz L(exit)
POP (%ebp)
POP (%ebx) pcmpeqb %xmm6, %xmm3
POP (%edi) pmovmskb %xmm3, %edx
POP (%esi) lea -16(%ecx), %ecx
L(exit): L(exit):
sub %ecx, %eax sub %ecx, %eax
test %dl, %dl test %dl, %dl
jz L(exit_high) jz L(exit_high)
mov %dl, %cl
and $15, %cl
jz L(exit_8)
test $0x01, %dl test $0x01, %dl
jnz L(exit_tail0) jnz L(exit_tail0)
test $0x02, %dl test $0x02, %dl
jnz L(exit_tail1) jnz L(exit_tail1)
test $0x04, %dl test $0x04, %dl
jnz L(exit_tail2) jnz L(exit_tail2)
add $3, %eax
RETURN
test $0x08, %dl .p2align 4
jnz L(exit_tail3) L(exit_8):
test $0x10, %dl test $0x10, %dl
jnz L(exit_tail4) jnz L(exit_tail4)
test $0x20, %dl test $0x20, %dl
jnz L(exit_tail5) jnz L(exit_tail5)
test $0x40, %dl test $0x40, %dl
jnz L(exit_tail6) jnz L(exit_tail6)
add $7, %eax add $7, %eax
RETURN
.p2align 4
L(exit_high):
mov %dh, %ch
and $15, %ch
jz L(exit_high_8)
test $0x01, %dh
jnz L(exit_tail8)
test $0x02, %dh
jnz L(exit_tail9)
test $0x04, %dh
jnz L(exit_tail10)
add $11, %eax
RETURN
.p2align 4
L(exit_high_8):
test $0x10, %dh
jnz L(exit_tail12)
test $0x20, %dh
jnz L(exit_tail13)
test $0x40, %dh
jnz L(exit_tail14)
add $15, %eax
L(exit_tail0): L(exit_tail0):
RETURN RETURN
L(exit_high): #ifdef USE_AS_STRNLEN
add $8, %eax
test $0x01, %dh
jnz L(exit_tail0)
test $0x02, %dh .p2align 4
jnz L(exit_tail1) L(len_less64):
pxor %xmm0, %xmm0
add $64, %edi
test $0x04, %dh pcmpeqb (%eax), %xmm0
jnz L(exit_tail2) pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
lea 16(%eax), %eax
test %edx, %edx
jnz L(strnlen_exit)
test $0x08, %dh sub $16, %edi
jnz L(exit_tail3) jbe L(return_start_len)
test $0x10, %dh pcmpeqb (%eax), %xmm1
jnz L(exit_tail4) pmovmskb %xmm1, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(strnlen_exit)
test $0x20, %dh sub $16, %edi
jnz L(exit_tail5) jbe L(return_start_len)
test $0x40, %dh pcmpeqb (%eax), %xmm0
jnz L(exit_tail6) pmovmskb %xmm0, %edx
add $7, %eax lea 16(%eax), %eax
test %edx, %edx
jnz L(strnlen_exit)
sub $16, %edi
jbe L(return_start_len)
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(strnlen_exit)
#ifndef USE_AS_STRLCAT
movl LEN(%esp), %eax
RETURN RETURN
#else
jmp L(return_start_len)
#endif
.p2align 4
L(strnlen_exit):
sub %ecx, %eax
test %dl, %dl
jz L(strnlen_exit_high)
mov %dl, %cl
and $15, %cl
jz L(strnlen_exit_8)
test $0x01, %dl
jnz L(exit_tail0)
test $0x02, %dl
jnz L(strnlen_exit_tail1)
test $0x04, %dl
jnz L(strnlen_exit_tail2)
sub $4, %edi
jb L(return_start_len)
lea 3(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_8):
test $0x10, %dl
jnz L(strnlen_exit_tail4)
test $0x20, %dl
jnz L(strnlen_exit_tail5)
test $0x40, %dl
jnz L(strnlen_exit_tail6)
sub $8, %edi
jb L(return_start_len)
lea 7(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_high):
mov %dh, %ch
and $15, %ch
jz L(strnlen_exit_high_8)
test $0x01, %dh
jnz L(strnlen_exit_tail8)
test $0x02, %dh
jnz L(strnlen_exit_tail9)
test $0x04, %dh
jnz L(strnlen_exit_tail10)
sub $12, %edi
jb L(return_start_len)
lea 11(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_high_8):
test $0x10, %dh
jnz L(strnlen_exit_tail12)
test $0x20, %dh
jnz L(strnlen_exit_tail13)
test $0x40, %dh
jnz L(strnlen_exit_tail14)
sub $16, %edi
jb L(return_start_len)
lea 15(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail1):
sub $2, %edi
jb L(return_start_len)
lea 1(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail2):
sub $3, %edi
jb L(return_start_len)
lea 2(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail4):
sub $5, %edi
jb L(return_start_len)
lea 4(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail5):
sub $6, %edi
jb L(return_start_len)
lea 5(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail6):
sub $7, %edi
jb L(return_start_len)
lea 6(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail8):
sub $9, %edi
jb L(return_start_len)
lea 8(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail9):
sub $10, %edi
jb L(return_start_len)
lea 9(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail10):
sub $11, %edi
jb L(return_start_len)
lea 10(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail12):
sub $13, %edi
jb L(return_start_len)
lea 12(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail13):
sub $14, %edi
jb L(return_start_len)
lea 13(%eax), %eax
RETURN
.p2align 4
L(strnlen_exit_tail14):
sub $15, %edi
jb L(return_start_len)
lea 14(%eax), %eax
RETURN
#ifndef USE_AS_STRLCAT
.p2align 4
L(return_start_len):
movl LEN(%esp), %eax
RETURN
#endif
/* for prolog only */
.p2align 4
L(len_less4_prolog):
xor %eax, %eax
add $4, %edi
jz L(exit_tail0)
cmpb $0, (%edx)
jz L(exit_tail0)
cmp $1, %edi
je L(exit_tail1)
cmpb $0, 1(%edx)
jz L(exit_tail1)
cmp $2, %edi
je L(exit_tail2)
cmpb $0, 2(%edx)
jz L(exit_tail2)
cmp $3, %edi
je L(exit_tail3)
cmpb $0, 3(%edx)
jz L(exit_tail3)
mov %edi, %eax
RETURN
.p2align 4
L(len_less8_prolog):
add $4, %edi
cmpb $0, 4(%edx)
jz L(exit_tail4)
cmp $1, %edi
je L(exit_tail5)
cmpb $0, 5(%edx)
jz L(exit_tail5)
cmp $2, %edi
je L(exit_tail6)
cmpb $0, 6(%edx)
jz L(exit_tail6)
cmp $3, %edi
je L(exit_tail7)
cmpb $0, 7(%edx)
jz L(exit_tail7)
mov $8, %eax
RETURN
.p2align 4
L(len_less12_prolog):
add $4, %edi
cmpb $0, 8(%edx)
jz L(exit_tail8)
cmp $1, %edi
je L(exit_tail9)
cmpb $0, 9(%edx)
jz L(exit_tail9)
cmp $2, %edi
je L(exit_tail10)
cmpb $0, 10(%edx)
jz L(exit_tail10)
cmp $3, %edi
je L(exit_tail11)
cmpb $0, 11(%edx)
jz L(exit_tail11)
mov $12, %eax
RETURN
.p2align 4
L(len_less16_prolog):
add $4, %edi
cmpb $0, 12(%edx)
jz L(exit_tail12)
cmp $1, %edi
je L(exit_tail13)
cmpb $0, 13(%edx)
jz L(exit_tail13)
cmp $2, %edi
je L(exit_tail14)
cmpb $0, 14(%edx)
jz L(exit_tail14)
cmp $3, %edi
je L(exit_tail15)
cmpb $0, 15(%edx)
jz L(exit_tail15)
mov $16, %eax
RETURN
#endif
.p2align 4 .p2align 4
L(exit_tail1): L(exit_tail1):
@ -364,6 +743,7 @@ L(exit_tail14):
L(exit_tail15): L(exit_tail15):
add $15, %eax add $15, %eax
ret #ifndef USE_AS_STRCAT
RETURN
END (STRLEN) END (STRLEN)
#endif

View file

@ -0,0 +1,33 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define USE_AS_STRNLEN 1
#define STRLEN strnlen
#include "sse2-strlen-atom.S"

View file

@ -0,0 +1,753 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef L
# define L(label) .L##label
#endif
#ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
#endif
#ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
#endif
#ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#endif
#ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
#endif
#ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
#ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
#endif
#ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
#endif
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#define PARMS 8
#define ENTRANCE PUSH(%edi);
#define RETURN POP (%edi); ret; CFI_PUSH (%edi);
#define STR1 PARMS
#define STR2 STR1+4
.text
ENTRY (strrchr)
ENTRANCE
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
pxor %xmm2, %xmm2
mov %ecx, %edi
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
/* ECX has OFFSET. */
and $63, %ecx
pshufd $0, %xmm1, %xmm1
cmp $48, %ecx
ja L(crosscache)
/* unaligned string. */
movdqu (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
pcmpeqb %xmm1, %xmm0
/* Find where NULL is. */
pmovmskb %xmm2, %ecx
/* Check if there is a match. */
pmovmskb %xmm0, %eax
add $16, %edi
test %eax, %eax
jnz L(unaligned_match1)
test %ecx, %ecx
jnz L(return_null)
and $-16, %edi
PUSH (%esi)
PUSH (%ebx)
xor %ebx, %ebx
jmp L(loop)
CFI_POP (%esi)
CFI_POP (%ebx)
.p2align 4
L(unaligned_match1):
test %ecx, %ecx
jnz L(prolog_find_zero_1)
PUSH (%esi)
PUSH (%ebx)
mov %eax, %ebx
mov %edi, %esi
and $-16, %edi
jmp L(loop)
CFI_POP (%esi)
CFI_POP (%ebx)
.p2align 4
L(crosscache):
/* Hancle unaligned string. */
and $15, %ecx
and $-16, %edi
pxor %xmm3, %xmm3
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm3
pcmpeqb %xmm1, %xmm0
/* Find where NULL is. */
pmovmskb %xmm3, %edx
/* Check if there is a match. */
pmovmskb %xmm0, %eax
/* Remove the leading bytes. */
shr %cl, %edx
shr %cl, %eax
add $16, %edi
test %eax, %eax
jnz L(unaligned_match)
test %edx, %edx
jnz L(return_null)
PUSH (%esi)
PUSH (%ebx)
xor %ebx, %ebx
jmp L(loop)
CFI_POP (%esi)
CFI_POP (%ebx)
.p2align 4
L(unaligned_match):
test %edx, %edx
jnz L(prolog_find_zero)
PUSH (%esi)
PUSH (%ebx)
mov %eax, %ebx
lea (%edi, %ecx), %esi
/* Loop start on aligned string. */
.p2align 4
L(loop):
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm2, %ecx
pmovmskb %xmm0, %eax
or %eax, %ecx
jnz L(matches)
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm2, %ecx
pmovmskb %xmm0, %eax
or %eax, %ecx
jnz L(matches)
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm2, %ecx
pmovmskb %xmm0, %eax
or %eax, %ecx
jnz L(matches)
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm2, %ecx
pmovmskb %xmm0, %eax
or %eax, %ecx
jz L(loop)
L(matches):
test %eax, %eax
jnz L(match)
L(return_value):
test %ebx, %ebx
jz L(return_null_1)
mov %ebx, %eax
mov %esi, %edi
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(return_null_1):
POP (%ebx)
POP (%esi)
xor %eax, %eax
RETURN
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(match):
pmovmskb %xmm2, %ecx
test %ecx, %ecx
jnz L(find_zero)
mov %eax, %ebx
mov %edi, %esi
jmp L(loop)
.p2align 4
L(find_zero):
test %cl, %cl
jz L(find_zero_high)
mov %cl, %dl
and $15, %dl
jz L(find_zero_8)
test $0x01, %cl
jnz L(FindZeroExit1)
test $0x02, %cl
jnz L(FindZeroExit2)
test $0x04, %cl
jnz L(FindZeroExit3)
and $1 << 4 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(find_zero_8):
test $0x10, %cl
jnz L(FindZeroExit5)
test $0x20, %cl
jnz L(FindZeroExit6)
test $0x40, %cl
jnz L(FindZeroExit7)
and $1 << 8 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(find_zero_high):
mov %ch, %dh
and $15, %dh
jz L(find_zero_high_8)
test $0x01, %ch
jnz L(FindZeroExit9)
test $0x02, %ch
jnz L(FindZeroExit10)
test $0x04, %ch
jnz L(FindZeroExit11)
and $1 << 12 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(find_zero_high_8):
test $0x10, %ch
jnz L(FindZeroExit13)
test $0x20, %ch
jnz L(FindZeroExit14)
test $0x40, %ch
jnz L(FindZeroExit15)
and $1 << 16 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit1):
and $1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit2):
and $1 << 2 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit3):
and $1 << 3 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit5):
and $1 << 5 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit6):
and $1 << 6 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit7):
and $1 << 7 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit9):
and $1 << 9 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit10):
and $1 << 10 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit11):
and $1 << 11 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit13):
and $1 << 13 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit14):
and $1 << 14 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
jmp L(match_case1)
CFI_PUSH (%ebx)
CFI_PUSH (%esi)
.p2align 4
L(FindZeroExit15):
and $1 << 15 - 1, %eax
jz L(return_value)
POP (%ebx)
POP (%esi)
.p2align 4
L(match_case1):
test %ah, %ah
jnz L(match_case1_high)
mov %al, %dl
and $15 << 4, %dl
jnz L(match_case1_8)
test $0x08, %al
jnz L(Exit4)
test $0x04, %al
jnz L(Exit3)
test $0x02, %al
jnz L(Exit2)
lea -16(%edi), %eax
RETURN
.p2align 4
L(match_case1_8):
test $0x80, %al
jnz L(Exit8)
test $0x40, %al
jnz L(Exit7)
test $0x20, %al
jnz L(Exit6)
lea -12(%edi), %eax
RETURN
.p2align 4
L(match_case1_high):
mov %ah, %dh
and $15 << 4, %dh
jnz L(match_case1_high_8)
test $0x08, %ah
jnz L(Exit12)
test $0x04, %ah
jnz L(Exit11)
test $0x02, %ah
jnz L(Exit10)
lea -8(%edi), %eax
RETURN
.p2align 4
L(match_case1_high_8):
test $0x80, %ah
jnz L(Exit16)
test $0x40, %ah
jnz L(Exit15)
test $0x20, %ah
jnz L(Exit14)
lea -4(%edi), %eax
RETURN
.p2align 4
L(Exit2):
lea -15(%edi), %eax
RETURN
.p2align 4
L(Exit3):
lea -14(%edi), %eax
RETURN
.p2align 4
L(Exit4):
lea -13(%edi), %eax
RETURN
.p2align 4
L(Exit6):
lea -11(%edi), %eax
RETURN
.p2align 4
L(Exit7):
lea -10(%edi), %eax
RETURN
.p2align 4
L(Exit8):
lea -9(%edi), %eax
RETURN
.p2align 4
L(Exit10):
lea -7(%edi), %eax
RETURN
.p2align 4
L(Exit11):
lea -6(%edi), %eax
RETURN
.p2align 4
L(Exit12):
lea -5(%edi), %eax
RETURN
.p2align 4
L(Exit14):
lea -3(%edi), %eax
RETURN
.p2align 4
L(Exit15):
lea -2(%edi), %eax
RETURN
.p2align 4
L(Exit16):
lea -1(%edi), %eax
RETURN
/* Return NULL. */
.p2align 4
L(return_null):
xor %eax, %eax
RETURN
.p2align 4
L(prolog_find_zero):
add %ecx, %edi
mov %edx, %ecx
L(prolog_find_zero_1):
test %cl, %cl
jz L(prolog_find_zero_high)
mov %cl, %dl
and $15, %dl
jz L(prolog_find_zero_8)
test $0x01, %cl
jnz L(PrologFindZeroExit1)
test $0x02, %cl
jnz L(PrologFindZeroExit2)
test $0x04, %cl
jnz L(PrologFindZeroExit3)
and $1 << 4 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(prolog_find_zero_8):
test $0x10, %cl
jnz L(PrologFindZeroExit5)
test $0x20, %cl
jnz L(PrologFindZeroExit6)
test $0x40, %cl
jnz L(PrologFindZeroExit7)
and $1 << 8 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(prolog_find_zero_high):
mov %ch, %dh
and $15, %dh
jz L(prolog_find_zero_high_8)
test $0x01, %ch
jnz L(PrologFindZeroExit9)
test $0x02, %ch
jnz L(PrologFindZeroExit10)
test $0x04, %ch
jnz L(PrologFindZeroExit11)
and $1 << 12 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(prolog_find_zero_high_8):
test $0x10, %ch
jnz L(PrologFindZeroExit13)
test $0x20, %ch
jnz L(PrologFindZeroExit14)
test $0x40, %ch
jnz L(PrologFindZeroExit15)
and $1 << 16 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit1):
and $1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit2):
and $1 << 2 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit3):
and $1 << 3 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit5):
and $1 << 5 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit6):
and $1 << 6 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit7):
and $1 << 7 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit9):
and $1 << 9 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit10):
and $1 << 10 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit11):
and $1 << 11 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit13):
and $1 << 13 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit14):
and $1 << 14 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
.p2align 4
L(PrologFindZeroExit15):
and $1 << 15 - 1, %eax
jnz L(match_case1)
xor %eax, %eax
RETURN
END (strrchr)

View file

@ -0,0 +1,267 @@
/*
Copyright (c) 2011 Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef L
# define L(label) .L##label
#endif
#ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
#endif
#ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
#endif
#ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#endif
#ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
#endif
#ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
#ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
#endif
#ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
#endif
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#define PARMS 4
#define STR1 PARMS
#define STR2 STR1+4
.text
ENTRY (wcschr)
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
mov %ecx, %eax
punpckldq %xmm1, %xmm1
pxor %xmm2, %xmm2
punpckldq %xmm1, %xmm1
and $63, %eax
cmp $48, %eax
ja L(cross_cache)
movdqu (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
or %eax, %edx
jnz L(matches)
and $-16, %ecx
jmp L(loop)
.p2align 4
L(cross_cache):
PUSH (%edi)
mov %ecx, %edi
mov %eax, %ecx
and $-16, %edi
and $15, %ecx
movdqa (%edi), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
sarl %cl, %edx
sarl %cl, %eax
test %eax, %eax
jz L(unaligned_no_match)
add %edi, %ecx
POP (%edi)
test %edx, %edx
jz L(match_case1)
test %al, %al
jz L(match_higth_case2)
test $15, %al
jnz L(match_case2_4)
test $15, %dl
jnz L(return_null)
lea 4(%ecx), %eax
ret
CFI_PUSH (%edi)
.p2align 4
L(unaligned_no_match):
mov %edi, %ecx
POP (%edi)
test %edx, %edx
jnz L(return_null)
pxor %xmm2, %xmm2
/* Loop start on aligned string. */
.p2align 4
L(loop):
add $16, %ecx
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
or %eax, %edx
jnz L(matches)
add $16, %ecx
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
or %eax, %edx
jnz L(matches)
add $16, %ecx
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
or %eax, %edx
jnz L(matches)
add $16, %ecx
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
pmovmskb %xmm2, %edx
pmovmskb %xmm0, %eax
or %eax, %edx
jz L(loop)
.p2align 4
L(matches):
pmovmskb %xmm2, %edx
test %eax, %eax
jz L(return_null)
test %edx, %edx
jz L(match_case1)
.p2align 4
L(match_case2):
test %al, %al
jz L(match_higth_case2)
test $15, %al
jnz L(match_case2_4)
test $15, %dl
jnz L(return_null)
lea 4(%ecx), %eax
ret
.p2align 4
L(match_case2_4):
mov %ecx, %eax
ret
.p2align 4
L(match_higth_case2):
test %dl, %dl
jnz L(return_null)
test $15, %ah
jnz L(match_case2_12)
test $15, %dh
jnz L(return_null)
lea 12(%ecx), %eax
ret
.p2align 4
L(match_case2_12):
lea 8(%ecx), %eax
ret
.p2align 4
L(match_case1):
test %al, %al
jz L(match_higth_case1)
test $0x01, %al
jnz L(exit0)
lea 4(%ecx), %eax
ret
.p2align 4
L(match_higth_case1):
test $0x01, %ah
jnz L(exit3)
lea 12(%ecx), %eax
ret
.p2align 4
L(exit0):
mov %ecx, %eax
ret
.p2align 4
L(exit3):
lea 8(%ecx), %eax
ret
.p2align 4
L(return_null):
xor %eax, %eax
ret
END (wcschr)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,306 @@
/*
Copyright (c) 2011 Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef USE_AS_WCSCAT
# ifndef L
# define L(label) .L##label
# endif
# ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
# endif
# ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
# endif
# ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
# endif
# ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
# endif
# define PARMS 4
# define STR PARMS
# define RETURN ret
.text
ENTRY (wcslen)
mov STR(%esp), %edx
#endif
cmp $0, (%edx)
jz L(exit_tail0)
cmp $0, 4(%edx)
jz L(exit_tail1)
cmp $0, 8(%edx)
jz L(exit_tail2)
cmp $0, 12(%edx)
jz L(exit_tail3)
cmp $0, 16(%edx)
jz L(exit_tail4)
cmp $0, 20(%edx)
jz L(exit_tail5)
cmp $0, 24(%edx)
jz L(exit_tail6)
cmp $0, 28(%edx)
jz L(exit_tail7)
pxor %xmm0, %xmm0
lea 32(%edx), %eax
lea -16(%eax), %ecx
and $-16, %eax
pcmpeqd (%eax), %xmm0
pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm1
pmovmskb %xmm1, %edx
pxor %xmm2, %xmm2
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm2
pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm3
pmovmskb %xmm3, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm0
pmovmskb %xmm0, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm1
pmovmskb %xmm1, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm2
pmovmskb %xmm2, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm3
pmovmskb %xmm3, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm0
pmovmskb %xmm0, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm1
pmovmskb %xmm1, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm2
pmovmskb %xmm2, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm3
pmovmskb %xmm3, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm0
pmovmskb %xmm0, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm1
pmovmskb %xmm1, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm2
pmovmskb %xmm2, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
pcmpeqd (%eax), %xmm3
pmovmskb %xmm3, %edx
lea 16(%eax), %eax
test %edx, %edx
jnz L(exit)
and $-0x40, %eax
.p2align 4
L(aligned_64_loop):
movaps (%eax), %xmm0
movaps 16(%eax), %xmm1
movaps 32(%eax), %xmm2
movaps 48(%eax), %xmm6
pminub %xmm1, %xmm0
pminub %xmm6, %xmm2
pminub %xmm0, %xmm2
pcmpeqd %xmm3, %xmm2
pmovmskb %xmm2, %edx
lea 64(%eax), %eax
test %edx, %edx
jz L(aligned_64_loop)
pcmpeqd -64(%eax), %xmm3
pmovmskb %xmm3, %edx
lea 48(%ecx), %ecx
test %edx, %edx
jnz L(exit)
pcmpeqd %xmm1, %xmm3
pmovmskb %xmm3, %edx
lea -16(%ecx), %ecx
test %edx, %edx
jnz L(exit)
pcmpeqd -32(%eax), %xmm3
pmovmskb %xmm3, %edx
lea -16(%ecx), %ecx
test %edx, %edx
jnz L(exit)
pcmpeqd %xmm6, %xmm3
pmovmskb %xmm3, %edx
lea -16(%ecx), %ecx
test %edx, %edx
jnz L(exit)
jmp L(aligned_64_loop)
.p2align 4
L(exit):
sub %ecx, %eax
shr $2, %eax
test %dl, %dl
jz L(exit_high)
mov %dl, %cl
and $15, %cl
jz L(exit_1)
RETURN
.p2align 4
L(exit_high):
mov %dh, %ch
and $15, %ch
jz L(exit_3)
add $2, %eax
RETURN
.p2align 4
L(exit_1):
add $1, %eax
RETURN
.p2align 4
L(exit_3):
add $3, %eax
RETURN
.p2align 4
L(exit_tail0):
xor %eax, %eax
RETURN
.p2align 4
L(exit_tail1):
mov $1, %eax
RETURN
.p2align 4
L(exit_tail2):
mov $2, %eax
RETURN
.p2align 4
L(exit_tail3):
mov $3, %eax
RETURN
.p2align 4
L(exit_tail4):
mov $4, %eax
RETURN
.p2align 4
L(exit_tail5):
mov $5, %eax
RETURN
.p2align 4
L(exit_tail6):
mov $6, %eax
RETURN
.p2align 4
L(exit_tail7):
mov $7, %eax
#ifndef USE_AS_WCSCAT
RETURN
END (wcslen)
#endif

View file

@ -0,0 +1,402 @@
/*
Copyright (c) 2011 Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef L
# define L(label) .L##label
#endif
#ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
#endif
#ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
#endif
#ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#endif
#ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
#endif
#ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
#ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
#endif
#ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
#endif
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#define PARMS 8
#define ENTRANCE PUSH(%edi);
#define RETURN POP(%edi); ret; CFI_PUSH(%edi);
#define STR1 PARMS
#define STR2 STR1+4
.text
ENTRY (wcsrchr)
ENTRANCE
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
mov %ecx, %edi
punpckldq %xmm1, %xmm1
pxor %xmm2, %xmm2
punpckldq %xmm1, %xmm1
/* ECX has OFFSET. */
and $63, %ecx
cmp $48, %ecx
ja L(crosscache)
/* unaligned string. */
movdqu (%edi), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
/* Find where NULL is. */
pmovmskb %xmm2, %ecx
/* Check if there is a match. */
pmovmskb %xmm0, %eax
add $16, %edi
test %eax, %eax
jnz L(unaligned_match1)
test %ecx, %ecx
jnz L(return_null)
and $-16, %edi
PUSH (%esi)
xor %edx, %edx
jmp L(loop)
CFI_POP (%esi)
.p2align 4
L(unaligned_match1):
test %ecx, %ecx
jnz L(prolog_find_zero_1)
PUSH (%esi)
/* Save current match */
mov %eax, %edx
mov %edi, %esi
and $-16, %edi
jmp L(loop)
CFI_POP (%esi)
.p2align 4
L(crosscache):
/* Hancle unaligned string. */
and $15, %ecx
and $-16, %edi
pxor %xmm3, %xmm3
movdqa (%edi), %xmm0
pcmpeqd %xmm0, %xmm3
pcmpeqd %xmm1, %xmm0
/* Find where NULL is. */
pmovmskb %xmm3, %edx
/* Check if there is a match. */
pmovmskb %xmm0, %eax
/* Remove the leading bytes. */
shr %cl, %edx
shr %cl, %eax
add $16, %edi
test %eax, %eax
jnz L(unaligned_match)
test %edx, %edx
jnz L(return_null)
PUSH (%esi)
xor %edx, %edx
jmp L(loop)
CFI_POP (%esi)
.p2align 4
L(unaligned_match):
test %edx, %edx
jnz L(prolog_find_zero)
PUSH (%esi)
mov %eax, %edx
lea (%edi, %ecx), %esi
/* Loop start on aligned string. */
.p2align 4
L(loop):
movdqa (%edi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %edi
pcmpeqd %xmm1, %xmm0
pmovmskb %xmm2, %ecx
pmovmskb %xmm0, %eax
or %eax, %ecx
jnz L(matches)
movdqa (%edi), %xmm3
pcmpeqd %xmm3, %xmm2
add $16, %edi
pcmpeqd %xmm1, %xmm3
pmovmskb %xmm2, %ecx
pmovmskb %xmm3, %eax
or %eax, %ecx
jnz L(matches)
movdqa (%edi), %xmm4
pcmpeqd %xmm4, %xmm2
add $16, %edi
pcmpeqd %xmm1, %xmm4
pmovmskb %xmm2, %ecx
pmovmskb %xmm4, %eax
or %eax, %ecx
jnz L(matches)
movdqa (%edi), %xmm5
pcmpeqd %xmm5, %xmm2
add $16, %edi
pcmpeqd %xmm1, %xmm5
pmovmskb %xmm2, %ecx
pmovmskb %xmm5, %eax
or %eax, %ecx
jz L(loop)
.p2align 4
L(matches):
test %eax, %eax
jnz L(match)
L(return_value):
test %edx, %edx
jz L(return_null_1)
mov %edx, %eax
mov %esi, %edi
POP (%esi)
test %ah, %ah
jnz L(match_third_or_fourth_wchar)
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%edi), %eax
RETURN
CFI_PUSH (%esi)
.p2align 4
L(return_null_1):
POP (%esi)
xor %eax, %eax
RETURN
CFI_PUSH (%esi)
.p2align 4
L(match):
pmovmskb %xmm2, %ecx
test %ecx, %ecx
jnz L(find_zero)
/* save match info */
mov %eax, %edx
mov %edi, %esi
jmp L(loop)
.p2align 4
L(find_zero):
test %cl, %cl
jz L(find_zero_in_third_or_fourth_wchar)
test $15, %cl
jz L(find_zero_in_second_wchar)
and $1, %eax
jz L(return_value)
POP (%esi)
lea -16(%edi), %eax
RETURN
CFI_PUSH (%esi)
.p2align 4
L(find_zero_in_second_wchar):
and $1 << 5 - 1, %eax
jz L(return_value)
POP (%esi)
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%edi), %eax
RETURN
CFI_PUSH (%esi)
.p2align 4
L(find_zero_in_third_or_fourth_wchar):
test $15, %ch
jz L(find_zero_in_fourth_wchar)
and $1 << 9 - 1, %eax
jz L(return_value)
POP (%esi)
test %ah, %ah
jnz L(match_third_wchar)
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%edi), %eax
RETURN
CFI_PUSH (%esi)
.p2align 4
L(find_zero_in_fourth_wchar):
POP (%esi)
test %ah, %ah
jnz L(match_third_or_fourth_wchar)
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%edi), %eax
RETURN
CFI_PUSH (%esi)
.p2align 4
L(match_second_wchar):
lea -12(%edi), %eax
RETURN
.p2align 4
L(match_third_or_fourth_wchar):
test $15 << 4, %ah
jnz L(match_fourth_wchar)
lea -8(%edi), %eax
RETURN
.p2align 4
L(match_third_wchar):
lea -8(%edi), %eax
RETURN
.p2align 4
L(match_fourth_wchar):
lea -4(%edi), %eax
RETURN
.p2align 4
L(return_null):
xor %eax, %eax
RETURN
.p2align 4
L(prolog_find_zero):
add %ecx, %edi
mov %edx, %ecx
L(prolog_find_zero_1):
test %cl, %cl
jz L(prolog_find_zero_in_third_or_fourth_wchar)
test $15, %cl
jz L(prolog_find_zero_in_second_wchar)
and $1, %eax
jz L(return_null)
lea -16(%edi), %eax
RETURN
.p2align 4
L(prolog_find_zero_in_second_wchar):
and $1 << 5 - 1, %eax
jz L(return_null)
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%edi), %eax
RETURN
.p2align 4
L(prolog_find_zero_in_third_or_fourth_wchar):
test $15, %ch
jz L(prolog_find_zero_in_fourth_wchar)
and $1 << 9 - 1, %eax
jz L(return_null)
test %ah, %ah
jnz L(match_third_wchar)
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%edi), %eax
RETURN
.p2align 4
L(prolog_find_zero_in_fourth_wchar):
test %ah, %ah
jnz L(match_third_or_fourth_wchar)
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%edi), %eax
RETURN
END (wcsrchr)

View file

@ -28,13 +28,8 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#if defined(USE_SSE2)
# define sse2_strlen_atom strlen #define MEMCPY bcopy
# include "sse2-strlen-atom.S" #define USE_AS_MEMMOVE
#define USE_AS_BCOPY
#else #include "ssse3-memcpy-atom.S"
# include "strlen.S"
#endif

View file

@ -28,8 +28,11 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "cache.h"
#undef __i686
#ifndef MEMCPY #ifndef MEMCPY
# define MEMCPY ssse3_memcpy5 # define MEMCPY memcpy
#endif #endif
#ifndef L #ifndef L

View file

@ -28,13 +28,7 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#if defined(USE_SSSE3)
# define ssse3_strcmp_latest strcmp #define MEMCPY memmove
# include "ssse3-strcmp-latest.S" #define USE_AS_MEMMOVE
#include "ssse3-memcpy-atom.S"
#else
# include "strcmp.S"
#endif

View file

@ -0,0 +1,620 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef L
# define L(label) .L##label
#endif
#ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
#endif
#ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
#endif
#ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#endif
#ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
#endif
#ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
#ifndef cfi_remember_state
# define cfi_remember_state .cfi_remember_state
#endif
#ifndef cfi_restore_state
# define cfi_restore_state .cfi_restore_state
#endif
#ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
#endif
#ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
#endif
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#ifndef STRCAT
# define STRCAT strcat
#endif
#define PARMS 4
#define STR1 PARMS+4
#define STR2 STR1+4
#ifdef USE_AS_STRNCAT
# define LEN STR2+8
#endif
#define USE_AS_STRCAT
.section .text.ssse3,"ax",@progbits
ENTRY (STRCAT)
PUSH (%edi)
mov STR1(%esp), %edi
mov %edi, %edx
#define RETURN jmp L(StrcpyAtom)
#include "sse2-strlen-atom.S"
L(StrcpyAtom):
mov STR2(%esp), %ecx
lea (%edi, %eax), %edx
#ifdef USE_AS_STRNCAT
PUSH (%ebx)
mov LEN(%esp), %ebx
test %ebx, %ebx
jz L(StrncatExit0)
cmp $8, %ebx
jbe L(StrncpyExit8Bytes)
#endif
cmpb $0, (%ecx)
jz L(Exit1)
cmpb $0, 1(%ecx)
jz L(Exit2)
cmpb $0, 2(%ecx)
jz L(Exit3)
cmpb $0, 3(%ecx)
jz L(Exit4)
cmpb $0, 4(%ecx)
jz L(Exit5)
cmpb $0, 5(%ecx)
jz L(Exit6)
cmpb $0, 6(%ecx)
jz L(Exit7)
cmpb $0, 7(%ecx)
jz L(Exit8)
cmpb $0, 8(%ecx)
jz L(Exit9)
#ifdef USE_AS_STRNCAT
cmp $16, %ebx
jb L(StrncpyExit15Bytes)
#endif
cmpb $0, 9(%ecx)
jz L(Exit10)
cmpb $0, 10(%ecx)
jz L(Exit11)
cmpb $0, 11(%ecx)
jz L(Exit12)
cmpb $0, 12(%ecx)
jz L(Exit13)
cmpb $0, 13(%ecx)
jz L(Exit14)
cmpb $0, 14(%ecx)
jz L(Exit15)
cmpb $0, 15(%ecx)
jz L(Exit16)
#ifdef USE_AS_STRNCAT
cmp $16, %ebx
je L(StrncatExit16)
# define RETURN1 POP (%ebx); POP (%edi); ret; \
CFI_PUSH (%ebx); CFI_PUSH (%edi)
# define USE_AS_STRNCPY
#else
# define RETURN1 POP(%edi); ret; CFI_PUSH(%edi)
#endif
#include "ssse3-strcpy-atom.S"
.p2align 4
L(CopyFrom1To16Bytes):
add %esi, %edx
add %esi, %ecx
POP (%esi)
test %al, %al
jz L(ExitHigh)
test $0x01, %al
jnz L(Exit1)
test $0x02, %al
jnz L(Exit2)
test $0x04, %al
jnz L(Exit3)
test $0x08, %al
jnz L(Exit4)
test $0x10, %al
jnz L(Exit5)
test $0x20, %al
jnz L(Exit6)
test $0x40, %al
jnz L(Exit7)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(ExitHigh):
test $0x01, %ah
jnz L(Exit9)
test $0x02, %ah
jnz L(Exit10)
test $0x04, %ah
jnz L(Exit11)
test $0x08, %ah
jnz L(Exit12)
test $0x10, %ah
jnz L(Exit13)
test $0x20, %ah
jnz L(Exit14)
test $0x40, %ah
jnz L(Exit15)
movlpd (%ecx), %xmm0
movlpd 8(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 8(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit1):
movb %bh, 1(%edx)
L(Exit1):
movb (%ecx), %al
movb %al, (%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit2):
movb %bh, 2(%edx)
L(Exit2):
movw (%ecx), %ax
movw %ax, (%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit3):
movb %bh, 3(%edx)
L(Exit3):
movw (%ecx), %ax
movw %ax, (%edx)
movb 2(%ecx), %al
movb %al, 2(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit4):
movb %bh, 4(%edx)
L(Exit4):
movl (%ecx), %eax
movl %eax, (%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit5):
movb %bh, 5(%edx)
L(Exit5):
movl (%ecx), %eax
movl %eax, (%edx)
movb 4(%ecx), %al
movb %al, 4(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit6):
movb %bh, 6(%edx)
L(Exit6):
movl (%ecx), %eax
movl %eax, (%edx)
movw 4(%ecx), %ax
movw %ax, 4(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit7):
movb %bh, 7(%edx)
L(Exit7):
movl (%ecx), %eax
movl %eax, (%edx)
movl 3(%ecx), %eax
movl %eax, 3(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit8):
movb %bh, 8(%edx)
L(Exit8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit9):
movb %bh, 9(%edx)
L(Exit9):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movb 8(%ecx), %al
movb %al, 8(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit10):
movb %bh, 10(%edx)
L(Exit10):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movw 8(%ecx), %ax
movw %ax, 8(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit11):
movb %bh, 11(%edx)
L(Exit11):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl 7(%ecx), %eax
movl %eax, 7(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit12):
movb %bh, 12(%edx)
L(Exit12):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
movl %eax, 8(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit13):
movb %bh, 13(%edx)
L(Exit13):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 5(%ecx), %xmm0
movlpd %xmm0, 5(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit14):
movb %bh, 14(%edx)
L(Exit14):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 6(%ecx), %xmm0
movlpd %xmm0, 6(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit15):
movb %bh, 15(%edx)
L(Exit15):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 7(%ecx), %xmm0
movlpd %xmm0, 7(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit16):
movb %bh, 16(%edx)
L(Exit16):
movlpd (%ecx), %xmm0
movlpd 8(%ecx), %xmm1
movlpd %xmm0, (%edx)
movlpd %xmm1, 8(%edx)
movl %edi, %eax
RETURN1
#ifdef USE_AS_STRNCPY
CFI_PUSH(%esi)
.p2align 4
L(CopyFrom1To16BytesCase2):
add $16, %ebx
add %esi, %ecx
lea (%esi, %edx), %esi
lea -9(%ebx), %edx
and $1<<7, %dh
or %al, %dh
lea (%esi), %edx
POP (%esi)
jz L(ExitHighCase2)
test $0x01, %al
jnz L(Exit1)
cmp $1, %ebx
je L(StrncatExit1)
test $0x02, %al
jnz L(Exit2)
cmp $2, %ebx
je L(StrncatExit2)
test $0x04, %al
jnz L(Exit3)
cmp $3, %ebx
je L(StrncatExit3)
test $0x08, %al
jnz L(Exit4)
cmp $4, %ebx
je L(StrncatExit4)
test $0x10, %al
jnz L(Exit5)
cmp $5, %ebx
je L(StrncatExit5)
test $0x20, %al
jnz L(Exit6)
cmp $6, %ebx
je L(StrncatExit6)
test $0x40, %al
jnz L(Exit7)
cmp $7, %ebx
je L(StrncatExit7)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
lea 7(%edx), %eax
cmpb $1, (%eax)
sbb $-1, %eax
xor %cl, %cl
movb %cl, (%eax)
movl %edi, %eax
RETURN1
.p2align 4
L(ExitHighCase2):
test $0x01, %ah
jnz L(Exit9)
cmp $9, %ebx
je L(StrncatExit9)
test $0x02, %ah
jnz L(Exit10)
cmp $10, %ebx
je L(StrncatExit10)
test $0x04, %ah
jnz L(Exit11)
cmp $11, %ebx
je L(StrncatExit11)
test $0x8, %ah
jnz L(Exit12)
cmp $12, %ebx
je L(StrncatExit12)
test $0x10, %ah
jnz L(Exit13)
cmp $13, %ebx
je L(StrncatExit13)
test $0x20, %ah
jnz L(Exit14)
cmp $14, %ebx
je L(StrncatExit14)
test $0x40, %ah
jnz L(Exit15)
cmp $15, %ebx
je L(StrncatExit15)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 8(%ecx), %xmm1
movlpd %xmm1, 8(%edx)
movl %edi, %eax
RETURN1
CFI_PUSH(%esi)
L(CopyFrom1To16BytesCase2OrCase3):
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
.p2align 4
L(CopyFrom1To16BytesCase3):
add $16, %ebx
add %esi, %edx
add %esi, %ecx
POP (%esi)
cmp $8, %ebx
ja L(ExitHighCase3)
cmp $1, %ebx
je L(StrncatExit1)
cmp $2, %ebx
je L(StrncatExit2)
cmp $3, %ebx
je L(StrncatExit3)
cmp $4, %ebx
je L(StrncatExit4)
cmp $5, %ebx
je L(StrncatExit5)
cmp $6, %ebx
je L(StrncatExit6)
cmp $7, %ebx
je L(StrncatExit7)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movb %bh, 8(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(ExitHighCase3):
cmp $9, %ebx
je L(StrncatExit9)
cmp $10, %ebx
je L(StrncatExit10)
cmp $11, %ebx
je L(StrncatExit11)
cmp $12, %ebx
je L(StrncatExit12)
cmp $13, %ebx
je L(StrncatExit13)
cmp $14, %ebx
je L(StrncatExit14)
cmp $15, %ebx
je L(StrncatExit15)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 8(%ecx), %xmm1
movlpd %xmm1, 8(%edx)
movb %bh, 16(%edx)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncatExit0):
movl %edi, %eax
RETURN1
.p2align 4
L(StrncpyExit15Bytes):
cmp $9, %ebx
je L(StrncatExit9)
cmpb $0, 9(%ecx)
jz L(Exit10)
cmp $10, %ebx
je L(StrncatExit10)
cmpb $0, 10(%ecx)
jz L(Exit11)
cmp $11, %ebx
je L(StrncatExit11)
cmpb $0, 11(%ecx)
jz L(Exit12)
cmp $12, %ebx
je L(StrncatExit12)
cmpb $0, 12(%ecx)
jz L(Exit13)
cmp $13, %ebx
je L(StrncatExit13)
cmpb $0, 13(%ecx)
jz L(Exit14)
cmp $14, %ebx
je L(StrncatExit14)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 7(%ecx), %xmm0
movlpd %xmm0, 7(%edx)
lea 14(%edx), %eax
cmpb $1, (%eax)
sbb $-1, %eax
movb %bh, (%eax)
movl %edi, %eax
RETURN1
.p2align 4
L(StrncpyExit8Bytes):
cmpb $0, (%ecx)
jz L(Exit1)
cmp $1, %ebx
je L(StrncatExit1)
cmpb $0, 1(%ecx)
jz L(Exit2)
cmp $2, %ebx
je L(StrncatExit2)
cmpb $0, 2(%ecx)
jz L(Exit3)
cmp $3, %ebx
je L(StrncatExit3)
cmpb $0, 3(%ecx)
jz L(Exit4)
cmp $4, %ebx
je L(StrncatExit4)
cmpb $0, 4(%ecx)
jz L(Exit5)
cmp $5, %ebx
je L(StrncatExit5)
cmpb $0, 5(%ecx)
jz L(Exit6)
cmp $6, %ebx
je L(StrncatExit6)
cmpb $0, 6(%ecx)
jz L(Exit7)
cmp $7, %ebx
je L(StrncatExit7)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
lea 7(%edx), %eax
cmpb $1, (%eax)
sbb $-1, %eax
movb %bh, (%eax)
movl %edi, %eax
RETURN1
#endif
END (STRCAT)

View file

@ -107,8 +107,12 @@ name: \
sub %esi, %ebp sub %esi, %ebp
#endif #endif
#ifndef STRCMP
# define STRCMP strcmp
#endif
.section .text.ssse3,"ax",@progbits .section .text.ssse3,"ax",@progbits
ENTRY (ssse3_strcmp_latest) ENTRY (STRCMP)
#ifdef USE_AS_STRNCMP #ifdef USE_AS_STRNCMP
PUSH (%ebp) PUSH (%ebp)
#endif #endif
@ -2271,4 +2275,4 @@ L(less16bytes_sncmp):
ret ret
#endif #endif
END (ssse3_strcmp_latest) END (STRCMP)

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,34 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define STRCAT strncat
#define USE_AS_STRNCAT
#include "ssse3-strcat-atom.S"

View file

@ -28,13 +28,8 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#if defined(USE_SSSE3)
# define MEMCMP memcmp #define USE_AS_STRNCMP
# include "ssse3-memcmp3-new.S" #define STRCMP strncmp
#include "ssse3-strcmp-atom.S"
#else
# include "memcmp.S"
#endif

View file

@ -0,0 +1,33 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define USE_AS_STRNCPY
#define STRCPY strncpy
#include "ssse3-strcpy-atom.S"

View file

@ -1,5 +1,5 @@
/* /*
Copyright (c) 2010, Intel Corporation Copyright (c) 2011 Intel Corporation
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -28,18 +28,87 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#ifndef L
#if defined(USE_SSSE3) # define L(label) .L##label
# include "cache_wrapper.S"
# undef __i686
# define MEMCPY bcopy
# define USE_AS_MEMMOVE
# define USE_AS_BCOPY
# include "ssse3-memcpy5.S"
#else
# include "bcopy.S"
#endif #endif
#ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
#endif
#ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
#endif
#ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#endif
#ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
#endif
#ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#endif
#ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
#endif
#ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
#endif
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#define PARMS 4
#define STR1 PARMS+4
#define STR2 STR1+4
#define USE_AS_WCSCAT
.text
ENTRY (wcscat)
PUSH (%edi)
mov STR1(%esp), %edi
mov %edi, %edx
#define RETURN jmp L(WcscpyAtom)
#include "sse2-wcslen-atom.S"
L(WcscpyAtom):
shl $2, %eax
mov STR2(%esp), %ecx
lea (%edi, %eax), %edx
cmp $0, (%ecx)
jz L(Exit4)
cmp $0, 4(%ecx)
jz L(Exit8)
cmp $0, 8(%ecx)
jz L(Exit12)
cmp $0, 12(%ecx)
jz L(Exit16)
#undef RETURN
#define RETURN POP(%edi); ret; CFI_PUSH(%edi)
#include "ssse3-wcscpy-atom.S"
END (wcscat)

View file

@ -0,0 +1,652 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef USE_AS_WCSCAT
# ifndef L
# define L(label) .L##label
# endif
# ifndef cfi_startproc
# define cfi_startproc .cfi_startproc
# endif
# ifndef cfi_endproc
# define cfi_endproc .cfi_endproc
# endif
# ifndef cfi_rel_offset
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
# endif
# ifndef cfi_restore
# define cfi_restore(reg) .cfi_restore reg
# endif
# ifndef cfi_adjust_cfa_offset
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
# endif
# ifndef ENTRY
# define ENTRY(name) \
.type name, @function; \
.globl name; \
.p2align 4; \
name: \
cfi_startproc
# endif
# ifndef END
# define END(name) \
cfi_endproc; \
.size name, .-name
# endif
# define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
# define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
# define POP(REG) popl REG; CFI_POP (REG)
# define PARMS 4
# define RETURN POP (%edi); ret; CFI_PUSH (%edi)
# define STR1 PARMS
# define STR2 STR1+4
# define LEN STR2+4
.text
ENTRY (wcscpy)
mov STR1(%esp), %edx
mov STR2(%esp), %ecx
cmp $0, (%ecx)
jz L(ExitTail4)
cmp $0, 4(%ecx)
jz L(ExitTail8)
cmp $0, 8(%ecx)
jz L(ExitTail12)
cmp $0, 12(%ecx)
jz L(ExitTail16)
PUSH (%edi)
mov %edx, %edi
#endif
PUSH (%esi)
lea 16(%ecx), %esi
and $-16, %esi
pxor %xmm0, %xmm0
pcmpeqd (%esi), %xmm0
movdqu (%ecx), %xmm1
movdqu %xmm1, (%edx)
pmovmskb %xmm0, %eax
sub %ecx, %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
mov %edx, %eax
lea 16(%edx), %edx
and $-16, %edx
sub %edx, %eax
sub %eax, %ecx
mov %ecx, %eax
and $0xf, %eax
mov $0, %esi
jz L(Align16Both)
cmp $4, %eax
je L(Shl4)
cmp $8, %eax
je L(Shl8)
jmp L(Shl12)
L(Align16Both):
movaps (%ecx), %xmm1
movaps 16(%ecx), %xmm2
movaps %xmm1, (%edx)
pcmpeqd %xmm2, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps 16(%ecx, %esi), %xmm3
movaps %xmm2, (%edx, %esi)
pcmpeqd %xmm3, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps 16(%ecx, %esi), %xmm4
movaps %xmm3, (%edx, %esi)
pcmpeqd %xmm4, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps 16(%ecx, %esi), %xmm1
movaps %xmm4, (%edx, %esi)
pcmpeqd %xmm1, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps 16(%ecx, %esi), %xmm2
movaps %xmm1, (%edx, %esi)
pcmpeqd %xmm2, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps 16(%ecx, %esi), %xmm3
movaps %xmm2, (%edx, %esi)
pcmpeqd %xmm3, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps %xmm3, (%edx, %esi)
mov %ecx, %eax
lea 16(%ecx, %esi), %ecx
and $-0x40, %ecx
sub %ecx, %eax
sub %eax, %edx
mov $-0x40, %esi
L(Aligned64Loop):
movaps (%ecx), %xmm2
movaps 32(%ecx), %xmm3
movaps %xmm2, %xmm4
movaps 16(%ecx), %xmm5
movaps %xmm3, %xmm6
movaps 48(%ecx), %xmm7
pminub %xmm5, %xmm2
pminub %xmm7, %xmm3
pminub %xmm2, %xmm3
lea 64(%edx), %edx
pcmpeqd %xmm0, %xmm3
lea 64(%ecx), %ecx
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(Aligned64Leave)
movaps %xmm4, -64(%edx)
movaps %xmm5, -48(%edx)
movaps %xmm6, -32(%edx)
movaps %xmm7, -16(%edx)
jmp L(Aligned64Loop)
L(Aligned64Leave):
pcmpeqd %xmm4, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
pcmpeqd %xmm5, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm4, -64(%edx)
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
pcmpeqd %xmm6, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm5, -48(%edx)
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
movaps %xmm6, -32(%edx)
pcmpeqd %xmm7, %xmm0
pmovmskb %xmm0, %eax
lea 16(%esi), %esi
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
mov $-0x40, %esi
movaps %xmm7, -16(%edx)
jmp L(Aligned64Loop)
.p2align 4
L(Shl4):
movaps -4(%ecx), %xmm1
movaps 12(%ecx), %xmm2
L(Shl4Start):
pcmpeqd %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm1
test %eax, %eax
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
test %eax, %eax
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 28(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -12(%ecx), %ecx
sub %eax, %edx
movaps -4(%ecx), %xmm1
L(Shl4LoopStart):
movaps 12(%ecx), %xmm2
movaps 28(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 44(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 60(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqd %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $4, %xmm4, %xmm5
palignr $4, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl4Start)
palignr $4, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $4, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl4LoopStart)
L(Shl4LoopExit):
movlpd (%ecx), %xmm0
movl 8(%ecx), %esi
movlpd %xmm0, (%edx)
movl %esi, 8(%edx)
POP (%esi)
add $12, %edx
add $12, %ecx
test %al, %al
jz L(ExitHigh)
test $0x01, %al
jnz L(Exit4)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN
CFI_PUSH (%esi)
.p2align 4
L(Shl8):
movaps -8(%ecx), %xmm1
movaps 8(%ecx), %xmm2
L(Shl8Start):
pcmpeqd %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm1
test %eax, %eax
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
test %eax, %eax
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 24(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -8(%ecx), %ecx
sub %eax, %edx
movaps -8(%ecx), %xmm1
L(Shl8LoopStart):
movaps 8(%ecx), %xmm2
movaps 24(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 40(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 56(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqd %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $8, %xmm4, %xmm5
palignr $8, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl8Start)
palignr $8, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $8, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl8LoopStart)
L(Shl8LoopExit):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
POP (%esi)
add $8, %edx
add $8, %ecx
test %al, %al
jz L(ExitHigh)
test $0x01, %al
jnz L(Exit4)
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN
CFI_PUSH (%esi)
.p2align 4
L(Shl12):
movaps -12(%ecx), %xmm1
movaps 4(%ecx), %xmm2
L(Shl12Start):
pcmpeqd %xmm2, %xmm0
pmovmskb %xmm0, %eax
movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm1
test %eax, %eax
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
movaps %xmm2, %xmm3
test %eax, %eax
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
pcmpeqd %xmm2, %xmm0
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
test %eax, %eax
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 20(%ecx), %ecx
lea 16(%edx), %edx
mov %ecx, %eax
and $-0x40, %ecx
sub %ecx, %eax
lea -4(%ecx), %ecx
sub %eax, %edx
movaps -12(%ecx), %xmm1
L(Shl12LoopStart):
movaps 4(%ecx), %xmm2
movaps 20(%ecx), %xmm3
movaps %xmm3, %xmm6
movaps 36(%ecx), %xmm4
movaps %xmm4, %xmm7
movaps 52(%ecx), %xmm5
pminub %xmm2, %xmm6
pminub %xmm5, %xmm7
pminub %xmm6, %xmm7
pcmpeqd %xmm0, %xmm7
pmovmskb %xmm7, %eax
movaps %xmm5, %xmm7
palignr $12, %xmm4, %xmm5
palignr $12, %xmm3, %xmm4
test %eax, %eax
jnz L(Shl12Start)
palignr $12, %xmm2, %xmm3
lea 64(%ecx), %ecx
palignr $12, %xmm1, %xmm2
movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
movaps %xmm2, (%edx)
lea 64(%edx), %edx
jmp L(Shl12LoopStart)
L(Shl12LoopExit):
movl (%ecx), %esi
movl %esi, (%edx)
mov $4, %esi
.p2align 4
L(CopyFrom1To16Bytes):
add %esi, %edx
add %esi, %ecx
POP (%esi)
test %al, %al
jz L(ExitHigh)
test $0x01, %al
jnz L(Exit4)
L(Exit8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN
.p2align 4
L(ExitHigh):
test $0x01, %ah
jnz L(Exit12)
L(Exit16):
movdqu (%ecx), %xmm0
movdqu %xmm0, (%edx)
movl %edi, %eax
RETURN
.p2align 4
L(Exit4):
movl (%ecx), %eax
movl %eax, (%edx)
movl %edi, %eax
RETURN
.p2align 4
L(Exit12):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
movl %eax, 8(%edx)
movl %edi, %eax
RETURN
CFI_POP (%edi)
.p2align 4
L(ExitTail4):
movl (%ecx), %eax
movl %eax, (%edx)
movl %edx, %eax
ret
.p2align 4
L(ExitTail8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edx, %eax
ret
.p2align 4
L(ExitTail12):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
movl %eax, 8(%edx)
movl %edx, %eax
ret
.p2align 4
L(ExitTail16):
movdqu (%ecx), %xmm0
movdqu %xmm0, (%edx)
movl %edx, %eax
ret
#ifndef USE_AS_WCSCAT
END (wcscpy)
#endif

View file

@ -0,0 +1,33 @@
/*
Copyright (c) 2011, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define MEMCMP wmemcmp
#define USE_AS_WMEMCMP 1
#include "ssse3-memcmp-atom.S"

View file

@ -10,16 +10,75 @@ _LIBC_ARCH_COMMON_SRC_FILES := \
arch-x86/bionic/sigsetjmp.S \ arch-x86/bionic/sigsetjmp.S \
arch-x86/bionic/syscall.S \ arch-x86/bionic/syscall.S \
arch-x86/bionic/vfork.S \ arch-x86/bionic/vfork.S \
arch-x86/string/bcopy_wrapper.S \ arch-x86/string/ffs.S
arch-x86/string/bzero_wrapper.S \
arch-x86/string/ffs.S \ ifeq ($(ARCH_X86_HAVE_SSSE3),true)
arch-x86/string/memcmp_wrapper.S \ _LIBC_ARCH_COMMON_SRC_FILES += \
arch-x86/string/memcpy_wrapper.S \ arch-x86/string/ssse3-memcpy-atom.S \
arch-x86/string/memmove_wrapper.S \ arch-x86/string/ssse3-memmove-atom.S \
arch-x86/string/memset_wrapper.S \ arch-x86/string/ssse3-bcopy-atom.S \
arch-x86/string/strcmp_wrapper.S \ arch-x86/string/ssse3-strncat-atom.S \
arch-x86/string/strlen_wrapper.S \ arch-x86/string/ssse3-strncpy-atom.S \
arch-x86/string/strncmp_wrapper.S \ arch-x86/string/ssse3-strlcat-atom.S \
arch-x86/string/ssse3-strlcpy-atom.S \
arch-x86/string/ssse3-strcmp-atom.S \
arch-x86/string/ssse3-strncmp-atom.S \
arch-x86/string/ssse3-strcat-atom.S \
arch-x86/string/ssse3-strcpy-atom.S \
arch-x86/string/ssse3-memcmp-atom.S \
arch-x86/string/ssse3-wmemcmp-atom.S \
arch-x86/string/ssse3-wcscat-atom.S \
arch-x86/string/ssse3-wcscpy-atom.S
else
_LIBC_ARCH_COMMON_SRC_FILES += \
arch-x86/string/memcpy.S \
arch-x86/string/memmove.S \
arch-x86/string/bcopy.S \
arch-x86/string/strcmp.S \
arch-x86/string/strncmp.S \
arch-x86/string/strcat.S \
arch-x86/string/memcmp.S \
string/strcpy.c \
string/strncat.c \
string/strncpy.c \
string/strlcat.c \
string/strlcpy.c \
upstream-freebsd/lib/libc/string/wcscpy.c \
upstream-freebsd/lib/libc/string/wcscat.c \
upstream-freebsd/lib/libc/string/wmemcmp.c
endif
ifeq ($(ARCH_X86_HAVE_SSE2),true)
_LIBC_ARCH_COMMON_SRC_FILES += \
arch-x86/string/sse2-memset-atom.S \
arch-x86/string/sse2-bzero-atom.S \
arch-x86/string/sse2-memchr-atom.S \
arch-x86/string/sse2-memrchr-atom.S \
arch-x86/string/sse2-strchr-atom.S \
arch-x86/string/sse2-strrchr-atom.S \
arch-x86/string/sse2-index-atom.S \
arch-x86/string/sse2-strlen-atom.S \
arch-x86/string/sse2-strnlen-atom.S \
arch-x86/string/sse2-wcschr-atom.S \
arch-x86/string/sse2-wcsrchr-atom.S \
arch-x86/string/sse2-wcslen-atom.S \
arch-x86/string/sse2-wcscmp-atom.S
else
_LIBC_ARCH_COMMON_SRC_FILES += \
arch-x86/string/memset.S \
arch-x86/string/strlen.S \
arch-x86/string/bzero.S \
bionic/memrchr.c \
bionic/memchr.c \
string/strchr.cpp \
string/strrchr.c \
string/index.c \
bionic/strnlen.c \
upstream-freebsd/lib/libc/string/wcschr.c \
upstream-freebsd/lib/libc/string/wcsrchr.c \
upstream-freebsd/lib/libc/string/wcslen.c \
upstream-freebsd/lib/libc/string/wcscmp.c
endif
_LIBC_ARCH_STATIC_SRC_FILES := \ _LIBC_ARCH_STATIC_SRC_FILES := \
bionic/dl_iterate_phdr_static.c \ bionic/dl_iterate_phdr_static.c \

View file

@ -0,0 +1,46 @@
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <string.h>
#include "libc_logging.h"
extern "C" char* __strchr_chk(const char* p, int ch, size_t s_len) {
for (;; ++p, s_len--) {
if (__predict_false(s_len == 0)) {
__fortify_chk_fail("read beyond buffer", 0);
}
if (*p == static_cast<char>(ch)) {
return const_cast<char*>(p);
}
if (*p == '\0') {
return NULL;
}
}
/* NOTREACHED */
}

View file

@ -28,22 +28,6 @@
*/ */
#include <string.h> #include <string.h>
#include "libc_logging.h"
extern "C" char* __strchr_chk(const char* p, int ch, size_t s_len) {
for (;; ++p, s_len--) {
if (__predict_false(s_len == 0)) {
__fortify_chk_fail("read beyond buffer", 0);
}
if (*p == static_cast<char>(ch)) {
return const_cast<char*>(p);
}
if (*p == '\0') {
return NULL;
}
}
/* NOTREACHED */
}
extern "C" char* strchr(const char* p, int ch) { extern "C" char* strchr(const char* p, int ch) {
return __strchr_chk(p, ch, __BIONIC_FORTIFY_UNKNOWN_SIZE); return __strchr_chk(p, ch, __BIONIC_FORTIFY_UNKNOWN_SIZE);

View file

@ -0,0 +1,48 @@
/* $OpenBSD: rindex.c,v 1.6 2005/08/08 08:05:37 espie Exp $ */
/*
* Copyright (c) 1988 Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <string.h>
#include "libc_logging.h"
char *
__strrchr_chk(const char *p, int ch, size_t s_len)
{
char *save;
for (save = NULL;; ++p, s_len--) {
if (s_len == 0)
__fortify_chk_fail("strrchr read beyond buffer", 0);
if (*p == (char) ch)
save = (char *)p;
if (!*p)
return(save);
}
/* NOTREACHED */
}

View file

@ -29,23 +29,6 @@
*/ */
#include <string.h> #include <string.h>
#include "libc_logging.h"
char *
__strrchr_chk(const char *p, int ch, size_t s_len)
{
char *save;
for (save = NULL;; ++p, s_len--) {
if (s_len == 0)
__fortify_chk_fail("strrchr read beyond buffer", 0);
if (*p == (char) ch)
save = (char *)p;
if (!*p)
return(save);
}
/* NOTREACHED */
}
char * char *
strrchr(const char *p, int ch) strrchr(const char *p, int ch)