am 2fbc9dda
: Merge "bionic/x86: Optimization for string routines"
* commit '2fbc9dda345486005569be4ec586c6aae9d51650': bionic/x86: Optimization for string routines
This commit is contained in:
commit
6bcf71c9ac
39 changed files with 13747 additions and 631 deletions
|
@ -63,17 +63,11 @@ libc_common_src_files := \
|
|||
stdlib/strtoumax.c \
|
||||
stdlib/tolower_.c \
|
||||
stdlib/toupper_.c \
|
||||
string/index.c \
|
||||
string/strcasecmp.c \
|
||||
string/strcat.c \
|
||||
string/strcspn.c \
|
||||
string/strdup.c \
|
||||
string/strlcat.c \
|
||||
string/strlcpy.c \
|
||||
string/strncat.c \
|
||||
string/strncpy.c \
|
||||
string/strpbrk.c \
|
||||
string/strrchr.c \
|
||||
string/__strrchr_chk.c \
|
||||
string/strsep.c \
|
||||
string/strspn.c \
|
||||
string/strstr.c \
|
||||
|
@ -119,9 +113,7 @@ libc_common_src_files := \
|
|||
bionic/ldexp.c \
|
||||
bionic/lseek64.c \
|
||||
bionic/md5.c \
|
||||
bionic/memchr.c \
|
||||
bionic/memmem.c \
|
||||
bionic/memrchr.c \
|
||||
bionic/memswap.c \
|
||||
bionic/mmap.c \
|
||||
bionic/openat.c \
|
||||
|
@ -157,7 +149,6 @@ libc_common_src_files := \
|
|||
bionic/sleep.c \
|
||||
bionic/statfs.c \
|
||||
bionic/strndup.c \
|
||||
bionic/strnlen.c \
|
||||
bionic/strntoimax.c \
|
||||
bionic/strntoumax.c \
|
||||
bionic/strtotimeval.c \
|
||||
|
@ -237,7 +228,7 @@ libc_bionic_src_files := \
|
|||
bionic/signalfd.cpp \
|
||||
bionic/sigwait.cpp \
|
||||
bionic/__strcat_chk.cpp \
|
||||
bionic/strchr.cpp \
|
||||
bionic/__strchr_chk.cpp \
|
||||
bionic/__strcpy_chk.cpp \
|
||||
bionic/strerror.cpp \
|
||||
bionic/strerror_r.cpp \
|
||||
|
@ -293,27 +284,20 @@ libc_upstream_freebsd_src_files := \
|
|||
upstream-freebsd/lib/libc/string/wcpcpy.c \
|
||||
upstream-freebsd/lib/libc/string/wcpncpy.c \
|
||||
upstream-freebsd/lib/libc/string/wcscasecmp.c \
|
||||
upstream-freebsd/lib/libc/string/wcscat.c \
|
||||
upstream-freebsd/lib/libc/string/wcschr.c \
|
||||
upstream-freebsd/lib/libc/string/wcscmp.c \
|
||||
upstream-freebsd/lib/libc/string/wcscpy.c \
|
||||
upstream-freebsd/lib/libc/string/wcscspn.c \
|
||||
upstream-freebsd/lib/libc/string/wcsdup.c \
|
||||
upstream-freebsd/lib/libc/string/wcslcat.c \
|
||||
upstream-freebsd/lib/libc/string/wcslcpy.c \
|
||||
upstream-freebsd/lib/libc/string/wcslen.c \
|
||||
upstream-freebsd/lib/libc/string/wcsncasecmp.c \
|
||||
upstream-freebsd/lib/libc/string/wcsncat.c \
|
||||
upstream-freebsd/lib/libc/string/wcsncmp.c \
|
||||
upstream-freebsd/lib/libc/string/wcsncpy.c \
|
||||
upstream-freebsd/lib/libc/string/wcsnlen.c \
|
||||
upstream-freebsd/lib/libc/string/wcspbrk.c \
|
||||
upstream-freebsd/lib/libc/string/wcsrchr.c \
|
||||
upstream-freebsd/lib/libc/string/wcsspn.c \
|
||||
upstream-freebsd/lib/libc/string/wcsstr.c \
|
||||
upstream-freebsd/lib/libc/string/wcstok.c \
|
||||
upstream-freebsd/lib/libc/string/wmemchr.c \
|
||||
upstream-freebsd/lib/libc/string/wmemcmp.c \
|
||||
upstream-freebsd/lib/libc/string/wmemcpy.c \
|
||||
upstream-freebsd/lib/libc/string/wmemmove.c \
|
||||
upstream-freebsd/lib/libc/string/wmemset.c \
|
||||
|
@ -369,6 +353,24 @@ libc_common_src_files += \
|
|||
bionic/memmove.c.arm \
|
||||
string/bcopy.c \
|
||||
string/strncmp.c \
|
||||
string/strcat.c \
|
||||
string/strncat.c \
|
||||
string/strncpy.c \
|
||||
bionic/strchr.cpp \
|
||||
string/strrchr.c \
|
||||
bionic/memchr.c \
|
||||
bionic/memrchr.c \
|
||||
string/index.c \
|
||||
bionic/strnlen.c \
|
||||
string/strlcat.c \
|
||||
string/strlcpy.c \
|
||||
upstream-freebsd/lib/libc/string/wcschr.c \
|
||||
upstream-freebsd/lib/libc/string/wcsrchr.c \
|
||||
upstream-freebsd/lib/libc/string/wcscmp.c \
|
||||
upstream-freebsd/lib/libc/string/wcscpy.c \
|
||||
upstream-freebsd/lib/libc/string/wmemcmp.c \
|
||||
upstream-freebsd/lib/libc/string/wcslen.c \
|
||||
upstream-freebsd/lib/libc/string/wcscat.c
|
||||
|
||||
# These files need to be arm so that gdbserver
|
||||
# can set breakpoints in them without messing
|
||||
|
@ -392,7 +394,6 @@ libc_common_src_files += \
|
|||
bionic/pthread-rwlocks.c \
|
||||
bionic/pthread-timers.c \
|
||||
bionic/ptrace.c \
|
||||
string/strcpy.c \
|
||||
|
||||
libc_static_common_src_files += \
|
||||
bionic/pthread.c \
|
||||
|
@ -407,7 +408,25 @@ libc_common_src_files += \
|
|||
string/bcopy.c \
|
||||
string/strcmp.c \
|
||||
string/strcpy.c \
|
||||
string/strncmp.c
|
||||
string/strncmp.c \
|
||||
string/strcat.c \
|
||||
string/strncat.c \
|
||||
string/strncpy.c \
|
||||
bionic/strchr.cpp \
|
||||
string/strrchr.c \
|
||||
bionic/memchr.c \
|
||||
bionic/memrchr.c \
|
||||
string/index.c \
|
||||
bionic/strnlen.c \
|
||||
string/strlcat.c \
|
||||
string/strlcpy.c \
|
||||
upstream-freebsd/lib/libc/string/wcschr.c \
|
||||
upstream-freebsd/lib/libc/string/wcsrchr.c \
|
||||
upstream-freebsd/lib/libc/string/wcscmp.c \
|
||||
upstream-freebsd/lib/libc/string/wcscpy.c \
|
||||
upstream-freebsd/lib/libc/string/wmemcmp.c \
|
||||
upstream-freebsd/lib/libc/string/wcslen.c \
|
||||
upstream-freebsd/lib/libc/string/wcscat.c
|
||||
|
||||
libc_common_src_files += \
|
||||
bionic/pthread-atfork.c \
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
/*
|
||||
Copyright (c) 2010, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#if defined(USE_SSE2)
|
||||
|
||||
# include "cache_wrapper.S"
|
||||
# undef __i686
|
||||
# define USE_AS_BZERO
|
||||
# define sse2_memset5_atom bzero
|
||||
# include "sse2-memset5-atom.S"
|
||||
|
||||
#else
|
||||
|
||||
# include "bzero.S"
|
||||
|
||||
#endif
|
|
@ -1,43 +0,0 @@
|
|||
/*
|
||||
Copyright (c) 2010, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#if defined(USE_SSSE3)
|
||||
|
||||
# include "cache_wrapper.S"
|
||||
# undef __i686
|
||||
# define MEMCPY memcpy
|
||||
# define USE_AS_MEMMOVE
|
||||
# include "ssse3-memcpy5.S"
|
||||
|
||||
#else
|
||||
|
||||
# include "memcpy.S"
|
||||
|
||||
#endif
|
|
@ -1,43 +0,0 @@
|
|||
/*
|
||||
Copyright (c) 2010, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#if defined(USE_SSSE3)
|
||||
|
||||
# include "cache_wrapper.S"
|
||||
# undef __i686
|
||||
# define MEMCPY memmove
|
||||
# define USE_AS_MEMMOVE
|
||||
# include "ssse3-memcpy5.S"
|
||||
|
||||
#else
|
||||
|
||||
# include "memmove.S"
|
||||
|
||||
#endif
|
|
@ -1,42 +0,0 @@
|
|||
/*
|
||||
Copyright (c) 2010, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#if defined(USE_SSE2)
|
||||
|
||||
# include "cache_wrapper.S"
|
||||
# undef __i686
|
||||
# define sse2_memset5_atom memset
|
||||
# include "sse2-memset5-atom.S"
|
||||
|
||||
#else
|
||||
|
||||
# include "memset.S"
|
||||
|
||||
#endif
|
|
@ -28,15 +28,6 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#if defined(USE_SSSE3)
|
||||
|
||||
# define USE_AS_STRNCMP
|
||||
# define ssse3_strcmp_latest strncmp
|
||||
# include "ssse3-strcmp-latest.S"
|
||||
|
||||
#else
|
||||
|
||||
# include "strncmp.S"
|
||||
|
||||
#endif
|
||||
|
||||
#define USE_AS_BZERO
|
||||
#define MEMSET bzero
|
||||
#include "sse2-memset-atom.S"
|
32
libc/arch-x86/string/sse2-index-atom.S
Normal file
32
libc/arch-x86/string/sse2-index-atom.S
Normal file
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define strchr index
|
||||
#include "sse2-strchr-atom.S"
|
556
libc/arch-x86/string/sse2-memchr-atom.S
Normal file
556
libc/arch-x86/string/sse2-memchr-atom.S
Normal file
|
@ -0,0 +1,556 @@
|
|||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef L
|
||||
# define L(label) .L##label
|
||||
#endif
|
||||
|
||||
#ifndef cfi_startproc
|
||||
# define cfi_startproc .cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_endproc
|
||||
# define cfi_endproc .cfi_endproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_rel_offset
|
||||
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
||||
#endif
|
||||
|
||||
#ifndef cfi_restore
|
||||
# define cfi_restore(reg) .cfi_restore reg
|
||||
#endif
|
||||
|
||||
#ifndef cfi_adjust_cfa_offset
|
||||
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
||||
#endif
|
||||
|
||||
#ifndef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type name, @function; \
|
||||
.globl name; \
|
||||
.p2align 4; \
|
||||
name: \
|
||||
cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef END
|
||||
# define END(name) \
|
||||
cfi_endproc; \
|
||||
.size name, .-name
|
||||
#endif
|
||||
|
||||
#define CFI_PUSH(REG) \
|
||||
cfi_adjust_cfa_offset (4); \
|
||||
cfi_rel_offset (REG, 0)
|
||||
|
||||
#define CFI_POP(REG) \
|
||||
cfi_adjust_cfa_offset (-4); \
|
||||
cfi_restore (REG)
|
||||
|
||||
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||
#define POP(REG) popl REG; CFI_POP (REG)
|
||||
|
||||
#define ENTRANCE PUSH (%edi);
|
||||
#define PARMS 8
|
||||
#define RETURN POP (%edi); ret; CFI_PUSH (%edi);
|
||||
|
||||
#define STR1 PARMS
|
||||
#define STR2 STR1+4
|
||||
#define LEN STR2+4
|
||||
|
||||
.text
|
||||
ENTRY (memchr)
|
||||
ENTRANCE
|
||||
mov STR1(%esp), %ecx
|
||||
movd STR2(%esp), %xmm1
|
||||
mov LEN(%esp), %edx
|
||||
test %edx, %edx
|
||||
jz L(return_null)
|
||||
|
||||
punpcklbw %xmm1, %xmm1
|
||||
mov %ecx, %edi
|
||||
punpcklbw %xmm1, %xmm1
|
||||
|
||||
and $63, %ecx
|
||||
pshufd $0, %xmm1, %xmm1
|
||||
cmp $48, %ecx
|
||||
ja L(crosscache)
|
||||
|
||||
movdqu (%edi), %xmm0
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
test %eax, %eax
|
||||
jnz L(match_case2_prolog)
|
||||
|
||||
sub $16, %edx
|
||||
jbe L(return_null)
|
||||
lea 16(%edi), %edi
|
||||
and $15, %ecx
|
||||
and $-16, %edi
|
||||
add %ecx, %edx
|
||||
sub $64, %edx
|
||||
jbe L(exit_loop)
|
||||
jmp L(loop_prolog)
|
||||
|
||||
.p2align 4
|
||||
L(crosscache):
|
||||
and $15, %ecx
|
||||
and $-16, %edi
|
||||
movdqa (%edi), %xmm0
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
sar %cl, %eax
|
||||
test %eax, %eax
|
||||
|
||||
jnz L(match_case2_prolog1)
|
||||
lea -16(%edx), %edx
|
||||
add %ecx, %edx
|
||||
jle L(return_null)
|
||||
lea 16(%edi), %edi
|
||||
sub $64, %edx
|
||||
jbe L(exit_loop)
|
||||
|
||||
.p2align 4
|
||||
L(loop_prolog):
|
||||
movdqa (%edi), %xmm0
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
xor %ecx, %ecx
|
||||
pmovmskb %xmm0, %eax
|
||||
test %eax, %eax
|
||||
jnz L(match_case1)
|
||||
|
||||
movdqa 16(%edi), %xmm2
|
||||
pcmpeqb %xmm1, %xmm2
|
||||
lea 16(%ecx), %ecx
|
||||
pmovmskb %xmm2, %eax
|
||||
test %eax, %eax
|
||||
jnz L(match_case1)
|
||||
|
||||
movdqa 32(%edi), %xmm3
|
||||
pcmpeqb %xmm1, %xmm3
|
||||
lea 16(%ecx), %ecx
|
||||
pmovmskb %xmm3, %eax
|
||||
test %eax, %eax
|
||||
jnz L(match_case1)
|
||||
|
||||
movdqa 48(%edi), %xmm4
|
||||
pcmpeqb %xmm1, %xmm4
|
||||
lea 16(%ecx), %ecx
|
||||
pmovmskb %xmm4, %eax
|
||||
test %eax, %eax
|
||||
jnz L(match_case1)
|
||||
|
||||
lea 64(%edi), %edi
|
||||
sub $64, %edx
|
||||
jbe L(exit_loop)
|
||||
|
||||
movdqa (%edi), %xmm0
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
xor %ecx, %ecx
|
||||
pmovmskb %xmm0, %eax
|
||||
test %eax, %eax
|
||||
jnz L(match_case1)
|
||||
|
||||
movdqa 16(%edi), %xmm2
|
||||
pcmpeqb %xmm1, %xmm2
|
||||
lea 16(%ecx), %ecx
|
||||
pmovmskb %xmm2, %eax
|
||||
test %eax, %eax
|
||||
jnz L(match_case1)
|
||||
|
||||
movdqa 32(%edi), %xmm3
|
||||
pcmpeqb %xmm1, %xmm3
|
||||
lea 16(%ecx), %ecx
|
||||
pmovmskb %xmm3, %eax
|
||||
test %eax, %eax
|
||||
jnz L(match_case1)
|
||||
|
||||
movdqa 48(%edi), %xmm4
|
||||
pcmpeqb %xmm1, %xmm4
|
||||
lea 16(%ecx), %ecx
|
||||
pmovmskb %xmm4, %eax
|
||||
test %eax, %eax
|
||||
jnz L(match_case1)
|
||||
|
||||
lea 64(%edi), %edi
|
||||
mov %edi, %ecx
|
||||
and $-64, %edi
|
||||
and $63, %ecx
|
||||
add %ecx, %edx
|
||||
|
||||
.p2align 4
|
||||
L(align64_loop):
|
||||
sub $64, %edx
|
||||
jbe L(exit_loop)
|
||||
movdqa (%edi), %xmm0
|
||||
movdqa 16(%edi), %xmm2
|
||||
movdqa 32(%edi), %xmm3
|
||||
movdqa 48(%edi), %xmm4
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm1, %xmm2
|
||||
pcmpeqb %xmm1, %xmm3
|
||||
pcmpeqb %xmm1, %xmm4
|
||||
|
||||
pmaxub %xmm0, %xmm3
|
||||
pmaxub %xmm2, %xmm4
|
||||
pmaxub %xmm3, %xmm4
|
||||
add $64, %edi
|
||||
pmovmskb %xmm4, %eax
|
||||
|
||||
test %eax, %eax
|
||||
jz L(align64_loop)
|
||||
|
||||
sub $64, %edi
|
||||
|
||||
pmovmskb %xmm0, %eax
|
||||
xor %ecx, %ecx
|
||||
test %eax, %eax
|
||||
jnz L(match_case1)
|
||||
|
||||
pmovmskb %xmm2, %eax
|
||||
lea 16(%ecx), %ecx
|
||||
test %eax, %eax
|
||||
jnz L(match_case1)
|
||||
|
||||
movdqa 32(%edi), %xmm3
|
||||
pcmpeqb %xmm1, %xmm3
|
||||
pmovmskb %xmm3, %eax
|
||||
lea 16(%ecx), %ecx
|
||||
test %eax, %eax
|
||||
jnz L(match_case1)
|
||||
|
||||
pcmpeqb 48(%edi), %xmm1
|
||||
pmovmskb %xmm1, %eax
|
||||
lea 16(%ecx), %ecx
|
||||
|
||||
.p2align 4
|
||||
L(match_case1):
|
||||
add %ecx, %edi
|
||||
test %al, %al
|
||||
jz L(match_case1_high)
|
||||
mov %al, %cl
|
||||
and $15, %cl
|
||||
jz L(match_case1_8)
|
||||
test $0x01, %al
|
||||
jnz L(exit_case1_1)
|
||||
test $0x02, %al
|
||||
jnz L(exit_case1_2)
|
||||
test $0x04, %al
|
||||
jnz L(exit_case1_3)
|
||||
lea 3(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(match_case1_8):
|
||||
test $0x10, %al
|
||||
jnz L(exit_case1_5)
|
||||
test $0x20, %al
|
||||
jnz L(exit_case1_6)
|
||||
test $0x40, %al
|
||||
jnz L(exit_case1_7)
|
||||
lea 7(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(match_case1_high):
|
||||
mov %ah, %ch
|
||||
and $15, %ch
|
||||
jz L(match_case1_high_8)
|
||||
test $0x01, %ah
|
||||
jnz L(exit_case1_9)
|
||||
test $0x02, %ah
|
||||
jnz L(exit_case1_10)
|
||||
test $0x04, %ah
|
||||
jnz L(exit_case1_11)
|
||||
lea 11(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(match_case1_high_8):
|
||||
test $0x10, %ah
|
||||
jnz L(exit_case1_13)
|
||||
test $0x20, %ah
|
||||
jnz L(exit_case1_14)
|
||||
test $0x40, %ah
|
||||
jnz L(exit_case1_15)
|
||||
lea 15(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_loop):
|
||||
add $64, %edx
|
||||
|
||||
movdqa (%edi), %xmm0
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
xor %ecx, %ecx
|
||||
pmovmskb %xmm0, %eax
|
||||
test %eax, %eax
|
||||
jnz L(match_case2)
|
||||
cmp $16, %edx
|
||||
jbe L(return_null)
|
||||
|
||||
movdqa 16(%edi), %xmm2
|
||||
pcmpeqb %xmm1, %xmm2
|
||||
lea 16(%ecx), %ecx
|
||||
pmovmskb %xmm2, %eax
|
||||
test %eax, %eax
|
||||
jnz L(match_case2)
|
||||
cmp $32, %edx
|
||||
jbe L(return_null)
|
||||
|
||||
movdqa 32(%edi), %xmm3
|
||||
pcmpeqb %xmm1, %xmm3
|
||||
lea 16(%ecx), %ecx
|
||||
pmovmskb %xmm3, %eax
|
||||
test %eax, %eax
|
||||
jnz L(match_case2)
|
||||
cmp $48, %edx
|
||||
jbe L(return_null)
|
||||
|
||||
pcmpeqb 48(%edi), %xmm1
|
||||
lea 16(%ecx), %ecx
|
||||
pmovmskb %xmm1, %eax
|
||||
test %eax, %eax
|
||||
jnz L(match_case2)
|
||||
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case1_1):
|
||||
mov %edi, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case1_2):
|
||||
lea 1(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case1_3):
|
||||
lea 2(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case1_5):
|
||||
lea 4(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case1_6):
|
||||
lea 5(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case1_7):
|
||||
lea 6(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case1_9):
|
||||
lea 8(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case1_10):
|
||||
lea 9(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case1_11):
|
||||
lea 10(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case1_13):
|
||||
lea 12(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case1_14):
|
||||
lea 13(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case1_15):
|
||||
lea 14(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(match_case2):
|
||||
sub %ecx, %edx
|
||||
L(match_case2_prolog1):
|
||||
add %ecx, %edi
|
||||
L(match_case2_prolog):
|
||||
test %al, %al
|
||||
jz L(match_case2_high)
|
||||
mov %al, %cl
|
||||
and $15, %cl
|
||||
jz L(match_case2_8)
|
||||
test $0x01, %al
|
||||
jnz L(exit_case2_1)
|
||||
test $0x02, %al
|
||||
jnz L(exit_case2_2)
|
||||
test $0x04, %al
|
||||
jnz L(exit_case2_3)
|
||||
sub $4, %edx
|
||||
jb L(return_null)
|
||||
lea 3(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(match_case2_8):
|
||||
test $0x10, %al
|
||||
jnz L(exit_case2_5)
|
||||
test $0x20, %al
|
||||
jnz L(exit_case2_6)
|
||||
test $0x40, %al
|
||||
jnz L(exit_case2_7)
|
||||
sub $8, %edx
|
||||
jb L(return_null)
|
||||
lea 7(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(match_case2_high):
|
||||
mov %ah, %ch
|
||||
and $15, %ch
|
||||
jz L(match_case2_high_8)
|
||||
test $0x01, %ah
|
||||
jnz L(exit_case2_9)
|
||||
test $0x02, %ah
|
||||
jnz L(exit_case2_10)
|
||||
test $0x04, %ah
|
||||
jnz L(exit_case2_11)
|
||||
sub $12, %edx
|
||||
jb L(return_null)
|
||||
lea 11(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(match_case2_high_8):
|
||||
test $0x10, %ah
|
||||
jnz L(exit_case2_13)
|
||||
test $0x20, %ah
|
||||
jnz L(exit_case2_14)
|
||||
test $0x40, %ah
|
||||
jnz L(exit_case2_15)
|
||||
sub $16, %edx
|
||||
jb L(return_null)
|
||||
lea 15(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case2_1):
|
||||
mov %edi, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case2_2):
|
||||
sub $2, %edx
|
||||
jb L(return_null)
|
||||
lea 1(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case2_3):
|
||||
sub $3, %edx
|
||||
jb L(return_null)
|
||||
lea 2(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case2_5):
|
||||
sub $5, %edx
|
||||
jb L(return_null)
|
||||
lea 4(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case2_6):
|
||||
sub $6, %edx
|
||||
jb L(return_null)
|
||||
lea 5(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case2_7):
|
||||
sub $7, %edx
|
||||
jb L(return_null)
|
||||
lea 6(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case2_9):
|
||||
sub $9, %edx
|
||||
jb L(return_null)
|
||||
lea 8(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case2_10):
|
||||
sub $10, %edx
|
||||
jb L(return_null)
|
||||
lea 9(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case2_11):
|
||||
sub $11, %edx
|
||||
jb L(return_null)
|
||||
lea 10(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case2_13):
|
||||
sub $13, %edx
|
||||
jb L(return_null)
|
||||
lea 12(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case2_14):
|
||||
sub $14, %edx
|
||||
jb L(return_null)
|
||||
lea 13(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_case2_15):
|
||||
sub $15, %edx
|
||||
jb L(return_null)
|
||||
lea 14(%edi), %eax
|
||||
RETURN
|
||||
.p2align 4
|
||||
L(return_null):
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
END (memchr)
|
778
libc/arch-x86/string/sse2-memrchr-atom.S
Normal file
778
libc/arch-x86/string/sse2-memrchr-atom.S
Normal file
|
@ -0,0 +1,778 @@
|
|||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef L
|
||||
# define L(label) .L##label
|
||||
#endif
|
||||
|
||||
#ifndef cfi_startproc
|
||||
# define cfi_startproc .cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_endproc
|
||||
# define cfi_endproc .cfi_endproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_rel_offset
|
||||
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
||||
#endif
|
||||
|
||||
#ifndef cfi_restore
|
||||
# define cfi_restore(reg) .cfi_restore reg
|
||||
#endif
|
||||
|
||||
#ifndef cfi_adjust_cfa_offset
|
||||
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
||||
#endif
|
||||
|
||||
#ifndef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type name, @function; \
|
||||
.globl name; \
|
||||
.p2align 4; \
|
||||
name: \
|
||||
cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef END
|
||||
# define END(name) \
|
||||
cfi_endproc; \
|
||||
.size name, .-name
|
||||
#endif
|
||||
|
||||
#define CFI_PUSH(REG) \
|
||||
cfi_adjust_cfa_offset (4); \
|
||||
cfi_rel_offset (REG, 0)
|
||||
|
||||
#define CFI_POP(REG) \
|
||||
cfi_adjust_cfa_offset (-4); \
|
||||
cfi_restore (REG)
|
||||
|
||||
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||
#define POP(REG) popl REG; CFI_POP (REG)
|
||||
|
||||
#define PARMS 4
|
||||
#define STR1 PARMS
|
||||
#define STR2 STR1+4
|
||||
#define LEN STR2+4
|
||||
|
||||
.text
|
||||
ENTRY (memrchr)
|
||||
mov STR1(%esp), %ecx
|
||||
movd STR2(%esp), %xmm1
|
||||
mov LEN(%esp), %edx
|
||||
|
||||
test %edx, %edx
|
||||
jz L(return_null)
|
||||
sub $16, %edx
|
||||
jbe L(length_less16)
|
||||
|
||||
punpcklbw %xmm1, %xmm1
|
||||
add %edx, %ecx
|
||||
punpcklbw %xmm1, %xmm1
|
||||
|
||||
movdqu (%ecx), %xmm0
|
||||
pshufd $0, %xmm1, %xmm1
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
|
||||
pmovmskb %xmm0, %eax
|
||||
test %eax, %eax
|
||||
jnz L(exit_dispatch)
|
||||
|
||||
sub $64, %ecx
|
||||
mov %ecx, %eax
|
||||
and $15, %eax
|
||||
jz L(loop_prolog)
|
||||
|
||||
add $16, %ecx
|
||||
add $16, %edx
|
||||
and $-16, %ecx
|
||||
sub %eax, %edx
|
||||
|
||||
.p2align 4
|
||||
/* Loop start on aligned string. */
|
||||
L(loop_prolog):
|
||||
sub $64, %edx
|
||||
jbe L(exit_loop)
|
||||
|
||||
movdqa 48(%ecx), %xmm0
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches48)
|
||||
|
||||
movdqa 32(%ecx), %xmm2
|
||||
pcmpeqb %xmm1, %xmm2
|
||||
pmovmskb %xmm2, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches32)
|
||||
|
||||
movdqa 16(%ecx), %xmm3
|
||||
pcmpeqb %xmm1, %xmm3
|
||||
pmovmskb %xmm3, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches16)
|
||||
|
||||
movdqa (%ecx), %xmm4
|
||||
pcmpeqb %xmm1, %xmm4
|
||||
pmovmskb %xmm4, %eax
|
||||
test %eax, %eax
|
||||
jnz L(exit_dispatch)
|
||||
|
||||
sub $64, %ecx
|
||||
sub $64, %edx
|
||||
jbe L(exit_loop)
|
||||
|
||||
movdqa 48(%ecx), %xmm0
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches48)
|
||||
|
||||
movdqa 32(%ecx), %xmm2
|
||||
pcmpeqb %xmm1, %xmm2
|
||||
pmovmskb %xmm2, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches32)
|
||||
|
||||
movdqa 16(%ecx), %xmm3
|
||||
pcmpeqb %xmm1, %xmm3
|
||||
pmovmskb %xmm3, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches16)
|
||||
|
||||
movdqa (%ecx), %xmm3
|
||||
pcmpeqb %xmm1, %xmm3
|
||||
pmovmskb %xmm3, %eax
|
||||
test %eax, %eax
|
||||
jnz L(exit_dispatch)
|
||||
|
||||
mov %ecx, %eax
|
||||
and $63, %eax
|
||||
test %eax, %eax
|
||||
jz L(align64_loop)
|
||||
|
||||
add $64, %ecx
|
||||
add $64, %edx
|
||||
and $-64, %ecx
|
||||
sub %eax, %edx
|
||||
|
||||
.p2align 4
|
||||
L(align64_loop):
|
||||
sub $64, %ecx
|
||||
sub $64, %edx
|
||||
jbe L(exit_loop)
|
||||
|
||||
movdqa (%ecx), %xmm0
|
||||
movdqa 16(%ecx), %xmm2
|
||||
movdqa 32(%ecx), %xmm3
|
||||
movdqa 48(%ecx), %xmm4
|
||||
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pcmpeqb %xmm1, %xmm2
|
||||
pcmpeqb %xmm1, %xmm3
|
||||
pcmpeqb %xmm1, %xmm4
|
||||
|
||||
pmaxub %xmm3, %xmm0
|
||||
pmaxub %xmm4, %xmm2
|
||||
pmaxub %xmm0, %xmm2
|
||||
pmovmskb %xmm2, %eax
|
||||
|
||||
test %eax, %eax
|
||||
jz L(align64_loop)
|
||||
|
||||
pmovmskb %xmm4, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches48)
|
||||
|
||||
pmovmskb %xmm3, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches32)
|
||||
|
||||
movdqa 16(%ecx), %xmm2
|
||||
|
||||
pcmpeqb %xmm1, %xmm2
|
||||
pcmpeqb (%ecx), %xmm1
|
||||
|
||||
pmovmskb %xmm2, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches16)
|
||||
|
||||
pmovmskb %xmm1, %eax
|
||||
test %ah, %ah
|
||||
jnz L(exit_dispatch_high)
|
||||
mov %al, %dl
|
||||
and $15 << 4, %dl
|
||||
jnz L(exit_dispatch_8)
|
||||
test $0x08, %al
|
||||
jnz L(exit_4)
|
||||
test $0x04, %al
|
||||
jnz L(exit_3)
|
||||
test $0x02, %al
|
||||
jnz L(exit_2)
|
||||
mov %ecx, %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_loop):
|
||||
add $64, %edx
|
||||
cmp $32, %edx
|
||||
jbe L(exit_loop_32)
|
||||
|
||||
movdqa 48(%ecx), %xmm0
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches48)
|
||||
|
||||
movdqa 32(%ecx), %xmm2
|
||||
pcmpeqb %xmm1, %xmm2
|
||||
pmovmskb %xmm2, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches32)
|
||||
|
||||
movdqa 16(%ecx), %xmm3
|
||||
pcmpeqb %xmm1, %xmm3
|
||||
pmovmskb %xmm3, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches16_1)
|
||||
cmp $48, %edx
|
||||
jbe L(return_null)
|
||||
|
||||
pcmpeqb (%ecx), %xmm1
|
||||
pmovmskb %xmm1, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches0_1)
|
||||
xor %eax, %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_loop_32):
|
||||
movdqa 48(%ecx), %xmm0
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches48_1)
|
||||
cmp $16, %edx
|
||||
jbe L(return_null)
|
||||
|
||||
pcmpeqb 32(%ecx), %xmm1
|
||||
pmovmskb %xmm1, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches32_1)
|
||||
xor %eax, %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(matches16):
|
||||
lea 16(%ecx), %ecx
|
||||
test %ah, %ah
|
||||
jnz L(exit_dispatch_high)
|
||||
mov %al, %dl
|
||||
and $15 << 4, %dl
|
||||
jnz L(exit_dispatch_8)
|
||||
test $0x08, %al
|
||||
jnz L(exit_4)
|
||||
test $0x04, %al
|
||||
jnz L(exit_3)
|
||||
test $0x02, %al
|
||||
jnz L(exit_2)
|
||||
mov %ecx, %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(matches32):
|
||||
lea 32(%ecx), %ecx
|
||||
test %ah, %ah
|
||||
jnz L(exit_dispatch_high)
|
||||
mov %al, %dl
|
||||
and $15 << 4, %dl
|
||||
jnz L(exit_dispatch_8)
|
||||
test $0x08, %al
|
||||
jnz L(exit_4)
|
||||
test $0x04, %al
|
||||
jnz L(exit_3)
|
||||
test $0x02, %al
|
||||
jnz L(exit_2)
|
||||
mov %ecx, %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(matches48):
|
||||
lea 48(%ecx), %ecx
|
||||
|
||||
.p2align 4
|
||||
L(exit_dispatch):
|
||||
test %ah, %ah
|
||||
jnz L(exit_dispatch_high)
|
||||
mov %al, %dl
|
||||
and $15 << 4, %dl
|
||||
jnz L(exit_dispatch_8)
|
||||
test $0x08, %al
|
||||
jnz L(exit_4)
|
||||
test $0x04, %al
|
||||
jnz L(exit_3)
|
||||
test $0x02, %al
|
||||
jnz L(exit_2)
|
||||
mov %ecx, %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_dispatch_8):
|
||||
test $0x80, %al
|
||||
jnz L(exit_8)
|
||||
test $0x40, %al
|
||||
jnz L(exit_7)
|
||||
test $0x20, %al
|
||||
jnz L(exit_6)
|
||||
lea 4(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_dispatch_high):
|
||||
mov %ah, %dh
|
||||
and $15 << 4, %dh
|
||||
jnz L(exit_dispatch_high_8)
|
||||
test $0x08, %ah
|
||||
jnz L(exit_12)
|
||||
test $0x04, %ah
|
||||
jnz L(exit_11)
|
||||
test $0x02, %ah
|
||||
jnz L(exit_10)
|
||||
lea 8(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_dispatch_high_8):
|
||||
test $0x80, %ah
|
||||
jnz L(exit_16)
|
||||
test $0x40, %ah
|
||||
jnz L(exit_15)
|
||||
test $0x20, %ah
|
||||
jnz L(exit_14)
|
||||
lea 12(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_2):
|
||||
lea 1(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_3):
|
||||
lea 2(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_4):
|
||||
lea 3(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_6):
|
||||
lea 5(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_7):
|
||||
lea 6(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_8):
|
||||
lea 7(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_10):
|
||||
lea 9(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_11):
|
||||
lea 10(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_12):
|
||||
lea 11(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_14):
|
||||
lea 13(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_15):
|
||||
lea 14(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_16):
|
||||
lea 15(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(matches0_1):
|
||||
lea -64(%edx), %edx
|
||||
|
||||
test %ah, %ah
|
||||
jnz L(exit_dispatch_1_high)
|
||||
mov %al, %ah
|
||||
and $15 << 4, %ah
|
||||
jnz L(exit_dispatch_1_8)
|
||||
test $0x08, %al
|
||||
jnz L(exit_1_4)
|
||||
test $0x04, %al
|
||||
jnz L(exit_1_3)
|
||||
test $0x02, %al
|
||||
jnz L(exit_1_2)
|
||||
|
||||
add $0, %edx
|
||||
jl L(return_null)
|
||||
mov %ecx, %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(matches16_1):
|
||||
lea -48(%edx), %edx
|
||||
lea 16(%ecx), %ecx
|
||||
|
||||
test %ah, %ah
|
||||
jnz L(exit_dispatch_1_high)
|
||||
mov %al, %ah
|
||||
and $15 << 4, %ah
|
||||
jnz L(exit_dispatch_1_8)
|
||||
test $0x08, %al
|
||||
jnz L(exit_1_4)
|
||||
test $0x04, %al
|
||||
jnz L(exit_1_3)
|
||||
test $0x02, %al
|
||||
jnz L(exit_1_2)
|
||||
|
||||
add $0, %edx
|
||||
jl L(return_null)
|
||||
mov %ecx, %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(matches32_1):
|
||||
lea -32(%edx), %edx
|
||||
lea 32(%ecx), %ecx
|
||||
|
||||
test %ah, %ah
|
||||
jnz L(exit_dispatch_1_high)
|
||||
mov %al, %ah
|
||||
and $15 << 4, %ah
|
||||
jnz L(exit_dispatch_1_8)
|
||||
test $0x08, %al
|
||||
jnz L(exit_1_4)
|
||||
test $0x04, %al
|
||||
jnz L(exit_1_3)
|
||||
test $0x02, %al
|
||||
jnz L(exit_1_2)
|
||||
|
||||
add $0, %edx
|
||||
jl L(return_null)
|
||||
mov %ecx, %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(matches48_1):
|
||||
lea -16(%edx), %edx
|
||||
lea 48(%ecx), %ecx
|
||||
|
||||
.p2align 4
|
||||
L(exit_dispatch_1):
|
||||
test %ah, %ah
|
||||
jnz L(exit_dispatch_1_high)
|
||||
mov %al, %ah
|
||||
and $15 << 4, %ah
|
||||
jnz L(exit_dispatch_1_8)
|
||||
test $0x08, %al
|
||||
jnz L(exit_1_4)
|
||||
test $0x04, %al
|
||||
jnz L(exit_1_3)
|
||||
test $0x02, %al
|
||||
jnz L(exit_1_2)
|
||||
|
||||
add $0, %edx
|
||||
jl L(return_null)
|
||||
mov %ecx, %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_dispatch_1_8):
|
||||
test $0x80, %al
|
||||
jnz L(exit_1_8)
|
||||
test $0x40, %al
|
||||
jnz L(exit_1_7)
|
||||
test $0x20, %al
|
||||
jnz L(exit_1_6)
|
||||
|
||||
add $4, %edx
|
||||
jl L(return_null)
|
||||
lea 4(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_dispatch_1_high):
|
||||
mov %ah, %al
|
||||
and $15 << 4, %al
|
||||
jnz L(exit_dispatch_1_high_8)
|
||||
test $0x08, %ah
|
||||
jnz L(exit_1_12)
|
||||
test $0x04, %ah
|
||||
jnz L(exit_1_11)
|
||||
test $0x02, %ah
|
||||
jnz L(exit_1_10)
|
||||
|
||||
add $8, %edx
|
||||
jl L(return_null)
|
||||
lea 8(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_dispatch_1_high_8):
|
||||
test $0x80, %ah
|
||||
jnz L(exit_1_16)
|
||||
test $0x40, %ah
|
||||
jnz L(exit_1_15)
|
||||
test $0x20, %ah
|
||||
jnz L(exit_1_14)
|
||||
|
||||
add $12, %edx
|
||||
jl L(return_null)
|
||||
lea 12(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_1_2):
|
||||
add $1, %edx
|
||||
jl L(return_null)
|
||||
lea 1(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_1_3):
|
||||
add $2, %edx
|
||||
jl L(return_null)
|
||||
lea 2(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_1_4):
|
||||
add $3, %edx
|
||||
jl L(return_null)
|
||||
lea 3(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_1_6):
|
||||
add $5, %edx
|
||||
jl L(return_null)
|
||||
lea 5(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_1_7):
|
||||
add $6, %edx
|
||||
jl L(return_null)
|
||||
lea 6(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_1_8):
|
||||
add $7, %edx
|
||||
jl L(return_null)
|
||||
lea 7(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_1_10):
|
||||
add $9, %edx
|
||||
jl L(return_null)
|
||||
lea 9(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_1_11):
|
||||
add $10, %edx
|
||||
jl L(return_null)
|
||||
lea 10(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_1_12):
|
||||
add $11, %edx
|
||||
jl L(return_null)
|
||||
lea 11(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_1_14):
|
||||
add $13, %edx
|
||||
jl L(return_null)
|
||||
lea 13(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_1_15):
|
||||
add $14, %edx
|
||||
jl L(return_null)
|
||||
lea 14(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit_1_16):
|
||||
add $15, %edx
|
||||
jl L(return_null)
|
||||
lea 15(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(return_null):
|
||||
xor %eax, %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(length_less16_offset0):
|
||||
mov %dl, %cl
|
||||
pcmpeqb (%eax), %xmm1
|
||||
|
||||
mov $1, %edx
|
||||
sal %cl, %edx
|
||||
sub $1, %edx
|
||||
|
||||
mov %eax, %ecx
|
||||
pmovmskb %xmm1, %eax
|
||||
|
||||
and %edx, %eax
|
||||
test %eax, %eax
|
||||
jnz L(exit_dispatch)
|
||||
|
||||
xor %eax, %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(length_less16):
|
||||
punpcklbw %xmm1, %xmm1
|
||||
add $16, %edx
|
||||
punpcklbw %xmm1, %xmm1
|
||||
|
||||
mov %ecx, %eax
|
||||
pshufd $0, %xmm1, %xmm1
|
||||
|
||||
and $15, %ecx
|
||||
jz L(length_less16_offset0)
|
||||
|
||||
PUSH (%edi)
|
||||
|
||||
mov %cl, %dh
|
||||
add %dl, %dh
|
||||
and $-16, %eax
|
||||
|
||||
sub $16, %dh
|
||||
ja L(length_less16_part2)
|
||||
|
||||
pcmpeqb (%eax), %xmm1
|
||||
pmovmskb %xmm1, %edi
|
||||
|
||||
sar %cl, %edi
|
||||
add %ecx, %eax
|
||||
mov %dl, %cl
|
||||
|
||||
mov $1, %edx
|
||||
sal %cl, %edx
|
||||
sub $1, %edx
|
||||
|
||||
and %edx, %edi
|
||||
test %edi, %edi
|
||||
jz L(ret_null)
|
||||
|
||||
bsr %edi, %edi
|
||||
add %edi, %eax
|
||||
POP (%edi)
|
||||
ret
|
||||
|
||||
CFI_PUSH (%edi)
|
||||
|
||||
.p2align 4
|
||||
L(length_less16_part2):
|
||||
movdqa 16(%eax), %xmm2
|
||||
pcmpeqb %xmm1, %xmm2
|
||||
pmovmskb %xmm2, %edi
|
||||
|
||||
mov %cl, %ch
|
||||
|
||||
mov %dh, %cl
|
||||
mov $1, %edx
|
||||
sal %cl, %edx
|
||||
sub $1, %edx
|
||||
|
||||
and %edx, %edi
|
||||
|
||||
test %edi, %edi
|
||||
jnz L(length_less16_part2_return)
|
||||
|
||||
pcmpeqb (%eax), %xmm1
|
||||
pmovmskb %xmm1, %edi
|
||||
|
||||
mov %ch, %cl
|
||||
sar %cl, %edi
|
||||
test %edi, %edi
|
||||
jz L(ret_null)
|
||||
|
||||
bsr %edi, %edi
|
||||
add %edi, %eax
|
||||
xor %ch, %ch
|
||||
add %ecx, %eax
|
||||
POP (%edi)
|
||||
ret
|
||||
|
||||
CFI_PUSH (%edi)
|
||||
|
||||
.p2align 4
|
||||
L(length_less16_part2_return):
|
||||
bsr %edi, %edi
|
||||
lea 16(%eax, %edi), %eax
|
||||
POP (%edi)
|
||||
ret
|
||||
|
||||
CFI_PUSH (%edi)
|
||||
|
||||
.p2align 4
|
||||
L(ret_null):
|
||||
xor %eax, %eax
|
||||
POP (%edi)
|
||||
ret
|
||||
|
||||
END (memrchr)
|
|
@ -28,6 +28,9 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "cache.h"
|
||||
#undef __i686
|
||||
|
||||
#ifndef L
|
||||
# define L(label) .L##label
|
||||
#endif
|
||||
|
@ -136,9 +139,13 @@ __i686.get_pc_thunk.bx:
|
|||
jmp *TABLE(,%ecx,4)
|
||||
#endif
|
||||
|
||||
#ifndef MEMSET
|
||||
# define MEMSET memset
|
||||
#endif
|
||||
|
||||
.section .text.sse2,"ax",@progbits
|
||||
ALIGN (4)
|
||||
ENTRY (sse2_memset5_atom)
|
||||
ENTRY (MEMSET)
|
||||
ENTRANCE
|
||||
|
||||
movl LEN(%esp), %ecx
|
||||
|
@ -911,4 +918,4 @@ L(aligned_16_15bytes):
|
|||
SETRTNVAL
|
||||
RETURN_END
|
||||
|
||||
END (sse2_memset5_atom)
|
||||
END (MEMSET)
|
391
libc/arch-x86/string/sse2-strchr-atom.S
Normal file
391
libc/arch-x86/string/sse2-strchr-atom.S
Normal file
|
@ -0,0 +1,391 @@
|
|||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef L
|
||||
# define L(label) .L##label
|
||||
#endif
|
||||
|
||||
#ifndef cfi_startproc
|
||||
# define cfi_startproc .cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_endproc
|
||||
# define cfi_endproc .cfi_endproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_rel_offset
|
||||
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
||||
#endif
|
||||
|
||||
#ifndef cfi_restore
|
||||
# define cfi_restore(reg) .cfi_restore reg
|
||||
#endif
|
||||
|
||||
#ifndef cfi_adjust_cfa_offset
|
||||
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
||||
#endif
|
||||
|
||||
#ifndef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type name, @function; \
|
||||
.globl name; \
|
||||
.p2align 4; \
|
||||
name: \
|
||||
cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef END
|
||||
# define END(name) \
|
||||
cfi_endproc; \
|
||||
.size name, .-name
|
||||
#endif
|
||||
|
||||
#define CFI_PUSH(REG) \
|
||||
cfi_adjust_cfa_offset (4); \
|
||||
cfi_rel_offset (REG, 0)
|
||||
|
||||
#define CFI_POP(REG) \
|
||||
cfi_adjust_cfa_offset (-4); \
|
||||
cfi_restore (REG)
|
||||
|
||||
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||
#define POP(REG) popl REG; CFI_POP (REG)
|
||||
|
||||
#define PARMS 8
|
||||
#define ENTRANCE PUSH(%edi)
|
||||
#define RETURN POP (%edi); ret; CFI_PUSH (%edi);
|
||||
|
||||
|
||||
#define STR1 PARMS
|
||||
#define STR2 STR1+4
|
||||
|
||||
.text
|
||||
ENTRY (strchr)
|
||||
|
||||
ENTRANCE
|
||||
mov STR1(%esp), %ecx
|
||||
movd STR2(%esp), %xmm1
|
||||
|
||||
pxor %xmm2, %xmm2
|
||||
mov %ecx, %edi
|
||||
punpcklbw %xmm1, %xmm1
|
||||
punpcklbw %xmm1, %xmm1
|
||||
/* ECX has OFFSET. */
|
||||
and $15, %ecx
|
||||
pshufd $0, %xmm1, %xmm1
|
||||
je L(loop)
|
||||
|
||||
/* Handle unaligned string. */
|
||||
and $-16, %edi
|
||||
movdqa (%edi), %xmm0
|
||||
pcmpeqb %xmm0, %xmm2
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
/* Find where NULL is. */
|
||||
pmovmskb %xmm2, %edx
|
||||
/* Check if there is a match. */
|
||||
pmovmskb %xmm0, %eax
|
||||
/* Remove the leading bytes. */
|
||||
sarl %cl, %edx
|
||||
sarl %cl, %eax
|
||||
test %eax, %eax
|
||||
jz L(unaligned_no_match)
|
||||
add %ecx, %edi
|
||||
test %edx, %edx
|
||||
jz L(match_case1)
|
||||
jmp L(match_case2)
|
||||
|
||||
.p2align 4
|
||||
L(unaligned_no_match):
|
||||
test %edx, %edx
|
||||
jne L(return_null)
|
||||
|
||||
pxor %xmm2, %xmm2
|
||||
add $16, %edi
|
||||
|
||||
.p2align 4
|
||||
/* Loop start on aligned string. */
|
||||
L(loop):
|
||||
movdqa (%edi), %xmm0
|
||||
pcmpeqb %xmm0, %xmm2
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pmovmskb %xmm2, %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches)
|
||||
test %edx, %edx
|
||||
jnz L(return_null)
|
||||
add $16, %edi
|
||||
|
||||
movdqa (%edi), %xmm0
|
||||
pcmpeqb %xmm0, %xmm2
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pmovmskb %xmm2, %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches)
|
||||
test %edx, %edx
|
||||
jnz L(return_null)
|
||||
add $16, %edi
|
||||
|
||||
movdqa (%edi), %xmm0
|
||||
pcmpeqb %xmm0, %xmm2
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pmovmskb %xmm2, %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches)
|
||||
test %edx, %edx
|
||||
jnz L(return_null)
|
||||
add $16, %edi
|
||||
|
||||
movdqa (%edi), %xmm0
|
||||
pcmpeqb %xmm0, %xmm2
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pmovmskb %xmm2, %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
test %eax, %eax
|
||||
jnz L(matches)
|
||||
test %edx, %edx
|
||||
jnz L(return_null)
|
||||
add $16, %edi
|
||||
jmp L(loop)
|
||||
|
||||
L(matches):
|
||||
/* There is a match. First find where NULL is. */
|
||||
test %edx, %edx
|
||||
jz L(match_case1)
|
||||
|
||||
.p2align 4
|
||||
L(match_case2):
|
||||
test %al, %al
|
||||
jz L(match_higth_case2)
|
||||
|
||||
mov %al, %cl
|
||||
and $15, %cl
|
||||
jnz L(match_case2_4)
|
||||
|
||||
mov %dl, %ch
|
||||
and $15, %ch
|
||||
jnz L(return_null)
|
||||
|
||||
test $0x10, %al
|
||||
jnz L(Exit5)
|
||||
test $0x10, %dl
|
||||
jnz L(return_null)
|
||||
test $0x20, %al
|
||||
jnz L(Exit6)
|
||||
test $0x20, %dl
|
||||
jnz L(return_null)
|
||||
test $0x40, %al
|
||||
jnz L(Exit7)
|
||||
test $0x40, %dl
|
||||
jnz L(return_null)
|
||||
lea 7(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(match_case2_4):
|
||||
test $0x01, %al
|
||||
jnz L(Exit1)
|
||||
test $0x01, %dl
|
||||
jnz L(return_null)
|
||||
test $0x02, %al
|
||||
jnz L(Exit2)
|
||||
test $0x02, %dl
|
||||
jnz L(return_null)
|
||||
test $0x04, %al
|
||||
jnz L(Exit3)
|
||||
test $0x04, %dl
|
||||
jnz L(return_null)
|
||||
lea 3(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(match_higth_case2):
|
||||
test %dl, %dl
|
||||
jnz L(return_null)
|
||||
|
||||
mov %ah, %cl
|
||||
and $15, %cl
|
||||
jnz L(match_case2_12)
|
||||
|
||||
mov %dh, %ch
|
||||
and $15, %ch
|
||||
jnz L(return_null)
|
||||
|
||||
test $0x10, %ah
|
||||
jnz L(Exit13)
|
||||
test $0x10, %dh
|
||||
jnz L(return_null)
|
||||
test $0x20, %ah
|
||||
jnz L(Exit14)
|
||||
test $0x20, %dh
|
||||
jnz L(return_null)
|
||||
test $0x40, %ah
|
||||
jnz L(Exit15)
|
||||
test $0x40, %dh
|
||||
jnz L(return_null)
|
||||
lea 15(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(match_case2_12):
|
||||
test $0x01, %ah
|
||||
jnz L(Exit9)
|
||||
test $0x01, %dh
|
||||
jnz L(return_null)
|
||||
test $0x02, %ah
|
||||
jnz L(Exit10)
|
||||
test $0x02, %dh
|
||||
jnz L(return_null)
|
||||
test $0x04, %ah
|
||||
jnz L(Exit11)
|
||||
test $0x04, %dh
|
||||
jnz L(return_null)
|
||||
lea 11(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(match_case1):
|
||||
test %al, %al
|
||||
jz L(match_higth_case1)
|
||||
|
||||
test $0x01, %al
|
||||
jnz L(Exit1)
|
||||
test $0x02, %al
|
||||
jnz L(Exit2)
|
||||
test $0x04, %al
|
||||
jnz L(Exit3)
|
||||
test $0x08, %al
|
||||
jnz L(Exit4)
|
||||
test $0x10, %al
|
||||
jnz L(Exit5)
|
||||
test $0x20, %al
|
||||
jnz L(Exit6)
|
||||
test $0x40, %al
|
||||
jnz L(Exit7)
|
||||
lea 7(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(match_higth_case1):
|
||||
test $0x01, %ah
|
||||
jnz L(Exit9)
|
||||
test $0x02, %ah
|
||||
jnz L(Exit10)
|
||||
test $0x04, %ah
|
||||
jnz L(Exit11)
|
||||
test $0x08, %ah
|
||||
jnz L(Exit12)
|
||||
test $0x10, %ah
|
||||
jnz L(Exit13)
|
||||
test $0x20, %ah
|
||||
jnz L(Exit14)
|
||||
test $0x40, %ah
|
||||
jnz L(Exit15)
|
||||
lea 15(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit1):
|
||||
lea (%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit2):
|
||||
lea 1(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit3):
|
||||
lea 2(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit4):
|
||||
lea 3(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit5):
|
||||
lea 4(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit6):
|
||||
lea 5(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit7):
|
||||
lea 6(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit9):
|
||||
lea 8(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit10):
|
||||
lea 9(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit11):
|
||||
lea 10(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit12):
|
||||
lea 11(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit13):
|
||||
lea 12(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit14):
|
||||
lea 13(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit15):
|
||||
lea 14(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(return_null):
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
END (strchr)
|
|
@ -1,71 +1,112 @@
|
|||
#define STRLEN sse2_strlen_atom
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
#ifndef L
|
||||
# define L(label) .L##label
|
||||
#endif
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
#ifndef cfi_startproc
|
||||
# define cfi_startproc .cfi_startproc
|
||||
#endif
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
#ifndef cfi_endproc
|
||||
# define cfi_endproc .cfi_endproc
|
||||
#endif
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
#ifndef cfi_rel_offset
|
||||
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
||||
#endif
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
#ifndef cfi_restore
|
||||
# define cfi_restore(reg) .cfi_restore reg
|
||||
#endif
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef cfi_adjust_cfa_offset
|
||||
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
||||
#endif
|
||||
#ifndef USE_AS_STRCAT
|
||||
|
||||
#ifndef cfi_remember_state
|
||||
# define cfi_remember_state .cfi_remember_state
|
||||
#endif
|
||||
# ifndef STRLEN
|
||||
# define STRLEN strlen
|
||||
# endif
|
||||
|
||||
#ifndef cfi_restore_state
|
||||
# define cfi_restore_state .cfi_restore_state
|
||||
#endif
|
||||
# ifndef L
|
||||
# define L(label) .L##label
|
||||
# endif
|
||||
|
||||
#ifndef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type name, @function; \
|
||||
.globl name; \
|
||||
.p2align 4; \
|
||||
name: \
|
||||
# ifndef cfi_startproc
|
||||
# define cfi_startproc .cfi_startproc
|
||||
# endif
|
||||
|
||||
# ifndef cfi_endproc
|
||||
# define cfi_endproc .cfi_endproc
|
||||
# endif
|
||||
|
||||
/* calee safe register only for strnlen is required */
|
||||
|
||||
# ifdef USE_AS_STRNLEN
|
||||
# ifndef cfi_rel_offset
|
||||
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
||||
# endif
|
||||
|
||||
# ifndef cfi_restore
|
||||
# define cfi_restore(reg) .cfi_restore reg
|
||||
# endif
|
||||
|
||||
# ifndef cfi_adjust_cfa_offset
|
||||
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# ifndef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type name, @function; \
|
||||
.globl name; \
|
||||
.p2align 4; \
|
||||
name: \
|
||||
cfi_startproc
|
||||
#endif
|
||||
# endif
|
||||
|
||||
#ifndef END
|
||||
# define END(name) \
|
||||
cfi_endproc; \
|
||||
# ifndef END
|
||||
# define END(name) \
|
||||
cfi_endproc; \
|
||||
.size name, .-name
|
||||
#endif
|
||||
# endif
|
||||
|
||||
#define CFI_PUSH(REG) \
|
||||
cfi_adjust_cfa_offset (4); \
|
||||
cfi_rel_offset (REG, 0)
|
||||
# define PARMS 4
|
||||
# define STR PARMS
|
||||
# define RETURN ret
|
||||
|
||||
#define CFI_POP(REG) \
|
||||
cfi_adjust_cfa_offset (-4); \
|
||||
cfi_restore (REG)
|
||||
# ifdef USE_AS_STRNLEN
|
||||
# define LEN PARMS + 8
|
||||
# define CFI_PUSH(REG) \
|
||||
cfi_adjust_cfa_offset (4); \
|
||||
cfi_rel_offset (REG, 0)
|
||||
|
||||
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||
#define POP(REG) popl REG; CFI_POP (REG)
|
||||
#define PARMS 4
|
||||
#define STR PARMS
|
||||
#define ENTRANCE
|
||||
#define RETURN ret
|
||||
# define CFI_POP(REG) \
|
||||
cfi_adjust_cfa_offset (-4); \
|
||||
cfi_restore (REG)
|
||||
|
||||
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||
# define POP(REG) popl REG; CFI_POP (REG)
|
||||
# undef RETURN
|
||||
# define RETURN POP (%edi); ret; CFI_PUSH(%edi);
|
||||
# endif
|
||||
|
||||
.text
|
||||
ENTRY (STRLEN)
|
||||
ENTRANCE
|
||||
mov STR(%esp), %edx
|
||||
# ifdef USE_AS_STRNLEN
|
||||
PUSH (%edi)
|
||||
movl LEN(%esp), %edi
|
||||
sub $4, %edi
|
||||
jbe L(len_less4_prolog)
|
||||
# endif
|
||||
#endif
|
||||
xor %eax, %eax
|
||||
cmpb $0, (%edx)
|
||||
jz L(exit_tail0)
|
||||
|
@ -75,6 +116,12 @@ ENTRY (STRLEN)
|
|||
jz L(exit_tail2)
|
||||
cmpb $0, 3(%edx)
|
||||
jz L(exit_tail3)
|
||||
|
||||
#ifdef USE_AS_STRNLEN
|
||||
sub $4, %edi
|
||||
jbe L(len_less8_prolog)
|
||||
#endif
|
||||
|
||||
cmpb $0, 4(%edx)
|
||||
jz L(exit_tail4)
|
||||
cmpb $0, 5(%edx)
|
||||
|
@ -83,6 +130,12 @@ ENTRY (STRLEN)
|
|||
jz L(exit_tail6)
|
||||
cmpb $0, 7(%edx)
|
||||
jz L(exit_tail7)
|
||||
|
||||
#ifdef USE_AS_STRNLEN
|
||||
sub $4, %edi
|
||||
jbe L(len_less12_prolog)
|
||||
#endif
|
||||
|
||||
cmpb $0, 8(%edx)
|
||||
jz L(exit_tail8)
|
||||
cmpb $0, 9(%edx)
|
||||
|
@ -91,6 +144,12 @@ ENTRY (STRLEN)
|
|||
jz L(exit_tail10)
|
||||
cmpb $0, 11(%edx)
|
||||
jz L(exit_tail11)
|
||||
|
||||
#ifdef USE_AS_STRNLEN
|
||||
sub $4, %edi
|
||||
jbe L(len_less16_prolog)
|
||||
#endif
|
||||
|
||||
cmpb $0, 12(%edx)
|
||||
jz L(exit_tail12)
|
||||
cmpb $0, 13(%edx)
|
||||
|
@ -99,211 +158,531 @@ ENTRY (STRLEN)
|
|||
jz L(exit_tail14)
|
||||
cmpb $0, 15(%edx)
|
||||
jz L(exit_tail15)
|
||||
|
||||
pxor %xmm0, %xmm0
|
||||
mov %edx, %eax
|
||||
mov %edx, %ecx
|
||||
lea 16(%edx), %eax
|
||||
mov %eax, %ecx
|
||||
and $-16, %eax
|
||||
add $16, %ecx
|
||||
add $16, %eax
|
||||
|
||||
#ifdef USE_AS_STRNLEN
|
||||
and $15, %edx
|
||||
add %edx, %edi
|
||||
sub $64, %edi
|
||||
jbe L(len_less64)
|
||||
#endif
|
||||
|
||||
pcmpeqb (%eax), %xmm0
|
||||
pmovmskb %xmm0, %edx
|
||||
pxor %xmm1, %xmm1
|
||||
test %edx, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqb (%eax), %xmm1
|
||||
pmovmskb %xmm1, %edx
|
||||
pxor %xmm2, %xmm2
|
||||
test %edx, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
|
||||
pcmpeqb (%eax), %xmm2
|
||||
pmovmskb %xmm2, %edx
|
||||
pxor %xmm3, %xmm3
|
||||
test %edx, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqb (%eax), %xmm3
|
||||
pmovmskb %xmm3, %edx
|
||||
test %edx, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
#ifdef USE_AS_STRNLEN
|
||||
sub $64, %edi
|
||||
jbe L(len_less64)
|
||||
#endif
|
||||
|
||||
pcmpeqb (%eax), %xmm0
|
||||
pmovmskb %xmm0, %edx
|
||||
test %edx, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqb (%eax), %xmm1
|
||||
pmovmskb %xmm1, %edx
|
||||
test %edx, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqb (%eax), %xmm2
|
||||
pmovmskb %xmm2, %edx
|
||||
test %edx, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqb (%eax), %xmm3
|
||||
pmovmskb %xmm3, %edx
|
||||
test %edx, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
#ifdef USE_AS_STRNLEN
|
||||
sub $64, %edi
|
||||
jbe L(len_less64)
|
||||
#endif
|
||||
|
||||
pcmpeqb (%eax), %xmm0
|
||||
pmovmskb %xmm0, %edx
|
||||
test %edx, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqb (%eax), %xmm1
|
||||
pmovmskb %xmm1, %edx
|
||||
test %edx, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqb (%eax), %xmm2
|
||||
pmovmskb %xmm2, %edx
|
||||
test %edx, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqb (%eax), %xmm3
|
||||
pmovmskb %xmm3, %edx
|
||||
test %edx, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
#ifdef USE_AS_STRNLEN
|
||||
sub $64, %edi
|
||||
jbe L(len_less64)
|
||||
#endif
|
||||
|
||||
pcmpeqb (%eax), %xmm0
|
||||
pmovmskb %xmm0, %edx
|
||||
test %edx, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqb (%eax), %xmm1
|
||||
pmovmskb %xmm1, %edx
|
||||
test %edx, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqb (%eax), %xmm2
|
||||
pmovmskb %xmm2, %edx
|
||||
test %edx, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqb (%eax), %xmm3
|
||||
pmovmskb %xmm3, %edx
|
||||
test %edx, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
#ifdef USE_AS_STRNLEN
|
||||
mov %eax, %edx
|
||||
and $63, %edx
|
||||
add %edx, %edi
|
||||
#endif
|
||||
|
||||
and $-0x40, %eax
|
||||
PUSH (%esi)
|
||||
PUSH (%edi)
|
||||
PUSH (%ebx)
|
||||
PUSH (%ebp)
|
||||
xor %ebp, %ebp
|
||||
L(aligned_64):
|
||||
pcmpeqb (%eax), %xmm0
|
||||
pcmpeqb 16(%eax), %xmm1
|
||||
pcmpeqb 32(%eax), %xmm2
|
||||
pcmpeqb 48(%eax), %xmm3
|
||||
pmovmskb %xmm0, %edx
|
||||
pmovmskb %xmm1, %esi
|
||||
pmovmskb %xmm2, %edi
|
||||
pmovmskb %xmm3, %ebx
|
||||
or %edx, %ebp
|
||||
or %esi, %ebp
|
||||
or %edi, %ebp
|
||||
or %ebx, %ebp
|
||||
|
||||
.p2align 4
|
||||
L(aligned_64_loop):
|
||||
#ifdef USE_AS_STRNLEN
|
||||
sub $64, %edi
|
||||
jbe L(len_less64)
|
||||
#endif
|
||||
movaps (%eax), %xmm0
|
||||
movaps 16(%eax), %xmm1
|
||||
movaps 32(%eax), %xmm2
|
||||
movaps 48(%eax), %xmm6
|
||||
pminub %xmm1, %xmm0
|
||||
pminub %xmm6, %xmm2
|
||||
pminub %xmm0, %xmm2
|
||||
pcmpeqb %xmm3, %xmm2
|
||||
pmovmskb %xmm2, %edx
|
||||
lea 64(%eax), %eax
|
||||
jz L(aligned_64)
|
||||
L(48leave):
|
||||
test %edx, %edx
|
||||
jnz L(aligned_64_exit_16)
|
||||
test %esi, %esi
|
||||
jnz L(aligned_64_exit_32)
|
||||
test %edi, %edi
|
||||
jnz L(aligned_64_exit_48)
|
||||
mov %ebx, %edx
|
||||
lea (%eax), %eax
|
||||
jmp L(aligned_64_exit)
|
||||
L(aligned_64_exit_48):
|
||||
lea -16(%eax), %eax
|
||||
mov %edi, %edx
|
||||
jmp L(aligned_64_exit)
|
||||
L(aligned_64_exit_32):
|
||||
lea -32(%eax), %eax
|
||||
mov %esi, %edx
|
||||
jmp L(aligned_64_exit)
|
||||
L(aligned_64_exit_16):
|
||||
lea -48(%eax), %eax
|
||||
L(aligned_64_exit):
|
||||
POP (%ebp)
|
||||
POP (%ebx)
|
||||
POP (%edi)
|
||||
POP (%esi)
|
||||
jz L(aligned_64_loop)
|
||||
|
||||
pcmpeqb -64(%eax), %xmm3
|
||||
pmovmskb %xmm3, %edx
|
||||
lea 48(%ecx), %ecx
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqb %xmm1, %xmm3
|
||||
pmovmskb %xmm3, %edx
|
||||
lea -16(%ecx), %ecx
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqb -32(%eax), %xmm3
|
||||
pmovmskb %xmm3, %edx
|
||||
lea -16(%ecx), %ecx
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqb %xmm6, %xmm3
|
||||
pmovmskb %xmm3, %edx
|
||||
lea -16(%ecx), %ecx
|
||||
L(exit):
|
||||
sub %ecx, %eax
|
||||
test %dl, %dl
|
||||
jz L(exit_high)
|
||||
|
||||
mov %dl, %cl
|
||||
and $15, %cl
|
||||
jz L(exit_8)
|
||||
test $0x01, %dl
|
||||
jnz L(exit_tail0)
|
||||
|
||||
test $0x02, %dl
|
||||
jnz L(exit_tail1)
|
||||
|
||||
test $0x04, %dl
|
||||
jnz L(exit_tail2)
|
||||
add $3, %eax
|
||||
RETURN
|
||||
|
||||
test $0x08, %dl
|
||||
jnz L(exit_tail3)
|
||||
|
||||
.p2align 4
|
||||
L(exit_8):
|
||||
test $0x10, %dl
|
||||
jnz L(exit_tail4)
|
||||
|
||||
test $0x20, %dl
|
||||
jnz L(exit_tail5)
|
||||
|
||||
test $0x40, %dl
|
||||
jnz L(exit_tail6)
|
||||
add $7, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_high):
|
||||
mov %dh, %ch
|
||||
and $15, %ch
|
||||
jz L(exit_high_8)
|
||||
test $0x01, %dh
|
||||
jnz L(exit_tail8)
|
||||
test $0x02, %dh
|
||||
jnz L(exit_tail9)
|
||||
test $0x04, %dh
|
||||
jnz L(exit_tail10)
|
||||
add $11, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_high_8):
|
||||
test $0x10, %dh
|
||||
jnz L(exit_tail12)
|
||||
test $0x20, %dh
|
||||
jnz L(exit_tail13)
|
||||
test $0x40, %dh
|
||||
jnz L(exit_tail14)
|
||||
add $15, %eax
|
||||
L(exit_tail0):
|
||||
RETURN
|
||||
|
||||
L(exit_high):
|
||||
add $8, %eax
|
||||
test $0x01, %dh
|
||||
jnz L(exit_tail0)
|
||||
#ifdef USE_AS_STRNLEN
|
||||
|
||||
test $0x02, %dh
|
||||
jnz L(exit_tail1)
|
||||
.p2align 4
|
||||
L(len_less64):
|
||||
pxor %xmm0, %xmm0
|
||||
add $64, %edi
|
||||
|
||||
test $0x04, %dh
|
||||
jnz L(exit_tail2)
|
||||
pcmpeqb (%eax), %xmm0
|
||||
pmovmskb %xmm0, %edx
|
||||
pxor %xmm1, %xmm1
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(strnlen_exit)
|
||||
|
||||
test $0x08, %dh
|
||||
jnz L(exit_tail3)
|
||||
sub $16, %edi
|
||||
jbe L(return_start_len)
|
||||
|
||||
test $0x10, %dh
|
||||
jnz L(exit_tail4)
|
||||
pcmpeqb (%eax), %xmm1
|
||||
pmovmskb %xmm1, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(strnlen_exit)
|
||||
|
||||
test $0x20, %dh
|
||||
jnz L(exit_tail5)
|
||||
sub $16, %edi
|
||||
jbe L(return_start_len)
|
||||
|
||||
test $0x40, %dh
|
||||
jnz L(exit_tail6)
|
||||
add $7, %eax
|
||||
pcmpeqb (%eax), %xmm0
|
||||
pmovmskb %xmm0, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(strnlen_exit)
|
||||
|
||||
sub $16, %edi
|
||||
jbe L(return_start_len)
|
||||
|
||||
pcmpeqb (%eax), %xmm1
|
||||
pmovmskb %xmm1, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(strnlen_exit)
|
||||
|
||||
#ifndef USE_AS_STRLCAT
|
||||
movl LEN(%esp), %eax
|
||||
RETURN
|
||||
#else
|
||||
jmp L(return_start_len)
|
||||
#endif
|
||||
|
||||
.p2align 4
|
||||
L(strnlen_exit):
|
||||
sub %ecx, %eax
|
||||
|
||||
test %dl, %dl
|
||||
jz L(strnlen_exit_high)
|
||||
mov %dl, %cl
|
||||
and $15, %cl
|
||||
jz L(strnlen_exit_8)
|
||||
test $0x01, %dl
|
||||
jnz L(exit_tail0)
|
||||
test $0x02, %dl
|
||||
jnz L(strnlen_exit_tail1)
|
||||
test $0x04, %dl
|
||||
jnz L(strnlen_exit_tail2)
|
||||
sub $4, %edi
|
||||
jb L(return_start_len)
|
||||
lea 3(%eax), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(strnlen_exit_8):
|
||||
test $0x10, %dl
|
||||
jnz L(strnlen_exit_tail4)
|
||||
test $0x20, %dl
|
||||
jnz L(strnlen_exit_tail5)
|
||||
test $0x40, %dl
|
||||
jnz L(strnlen_exit_tail6)
|
||||
sub $8, %edi
|
||||
jb L(return_start_len)
|
||||
lea 7(%eax), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(strnlen_exit_high):
|
||||
mov %dh, %ch
|
||||
and $15, %ch
|
||||
jz L(strnlen_exit_high_8)
|
||||
test $0x01, %dh
|
||||
jnz L(strnlen_exit_tail8)
|
||||
test $0x02, %dh
|
||||
jnz L(strnlen_exit_tail9)
|
||||
test $0x04, %dh
|
||||
jnz L(strnlen_exit_tail10)
|
||||
sub $12, %edi
|
||||
jb L(return_start_len)
|
||||
lea 11(%eax), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(strnlen_exit_high_8):
|
||||
test $0x10, %dh
|
||||
jnz L(strnlen_exit_tail12)
|
||||
test $0x20, %dh
|
||||
jnz L(strnlen_exit_tail13)
|
||||
test $0x40, %dh
|
||||
jnz L(strnlen_exit_tail14)
|
||||
sub $16, %edi
|
||||
jb L(return_start_len)
|
||||
lea 15(%eax), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(strnlen_exit_tail1):
|
||||
sub $2, %edi
|
||||
jb L(return_start_len)
|
||||
lea 1(%eax), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(strnlen_exit_tail2):
|
||||
sub $3, %edi
|
||||
jb L(return_start_len)
|
||||
lea 2(%eax), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(strnlen_exit_tail4):
|
||||
sub $5, %edi
|
||||
jb L(return_start_len)
|
||||
lea 4(%eax), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(strnlen_exit_tail5):
|
||||
sub $6, %edi
|
||||
jb L(return_start_len)
|
||||
lea 5(%eax), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(strnlen_exit_tail6):
|
||||
sub $7, %edi
|
||||
jb L(return_start_len)
|
||||
lea 6(%eax), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(strnlen_exit_tail8):
|
||||
sub $9, %edi
|
||||
jb L(return_start_len)
|
||||
lea 8(%eax), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(strnlen_exit_tail9):
|
||||
sub $10, %edi
|
||||
jb L(return_start_len)
|
||||
lea 9(%eax), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(strnlen_exit_tail10):
|
||||
sub $11, %edi
|
||||
jb L(return_start_len)
|
||||
lea 10(%eax), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(strnlen_exit_tail12):
|
||||
sub $13, %edi
|
||||
jb L(return_start_len)
|
||||
lea 12(%eax), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(strnlen_exit_tail13):
|
||||
sub $14, %edi
|
||||
jb L(return_start_len)
|
||||
lea 13(%eax), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(strnlen_exit_tail14):
|
||||
sub $15, %edi
|
||||
jb L(return_start_len)
|
||||
lea 14(%eax), %eax
|
||||
RETURN
|
||||
|
||||
#ifndef USE_AS_STRLCAT
|
||||
.p2align 4
|
||||
L(return_start_len):
|
||||
movl LEN(%esp), %eax
|
||||
RETURN
|
||||
#endif
|
||||
|
||||
/* for prolog only */
|
||||
|
||||
.p2align 4
|
||||
L(len_less4_prolog):
|
||||
xor %eax, %eax
|
||||
|
||||
add $4, %edi
|
||||
jz L(exit_tail0)
|
||||
|
||||
cmpb $0, (%edx)
|
||||
jz L(exit_tail0)
|
||||
cmp $1, %edi
|
||||
je L(exit_tail1)
|
||||
|
||||
cmpb $0, 1(%edx)
|
||||
jz L(exit_tail1)
|
||||
cmp $2, %edi
|
||||
je L(exit_tail2)
|
||||
|
||||
cmpb $0, 2(%edx)
|
||||
jz L(exit_tail2)
|
||||
cmp $3, %edi
|
||||
je L(exit_tail3)
|
||||
|
||||
cmpb $0, 3(%edx)
|
||||
jz L(exit_tail3)
|
||||
mov %edi, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(len_less8_prolog):
|
||||
add $4, %edi
|
||||
|
||||
cmpb $0, 4(%edx)
|
||||
jz L(exit_tail4)
|
||||
cmp $1, %edi
|
||||
je L(exit_tail5)
|
||||
|
||||
cmpb $0, 5(%edx)
|
||||
jz L(exit_tail5)
|
||||
cmp $2, %edi
|
||||
je L(exit_tail6)
|
||||
|
||||
cmpb $0, 6(%edx)
|
||||
jz L(exit_tail6)
|
||||
cmp $3, %edi
|
||||
je L(exit_tail7)
|
||||
|
||||
cmpb $0, 7(%edx)
|
||||
jz L(exit_tail7)
|
||||
mov $8, %eax
|
||||
RETURN
|
||||
|
||||
|
||||
.p2align 4
|
||||
L(len_less12_prolog):
|
||||
add $4, %edi
|
||||
|
||||
cmpb $0, 8(%edx)
|
||||
jz L(exit_tail8)
|
||||
cmp $1, %edi
|
||||
je L(exit_tail9)
|
||||
|
||||
cmpb $0, 9(%edx)
|
||||
jz L(exit_tail9)
|
||||
cmp $2, %edi
|
||||
je L(exit_tail10)
|
||||
|
||||
cmpb $0, 10(%edx)
|
||||
jz L(exit_tail10)
|
||||
cmp $3, %edi
|
||||
je L(exit_tail11)
|
||||
|
||||
cmpb $0, 11(%edx)
|
||||
jz L(exit_tail11)
|
||||
mov $12, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(len_less16_prolog):
|
||||
add $4, %edi
|
||||
|
||||
cmpb $0, 12(%edx)
|
||||
jz L(exit_tail12)
|
||||
cmp $1, %edi
|
||||
je L(exit_tail13)
|
||||
|
||||
cmpb $0, 13(%edx)
|
||||
jz L(exit_tail13)
|
||||
cmp $2, %edi
|
||||
je L(exit_tail14)
|
||||
|
||||
cmpb $0, 14(%edx)
|
||||
jz L(exit_tail14)
|
||||
cmp $3, %edi
|
||||
je L(exit_tail15)
|
||||
|
||||
cmpb $0, 15(%edx)
|
||||
jz L(exit_tail15)
|
||||
mov $16, %eax
|
||||
RETURN
|
||||
#endif
|
||||
|
||||
.p2align 4
|
||||
L(exit_tail1):
|
||||
|
@ -364,6 +743,7 @@ L(exit_tail14):
|
|||
|
||||
L(exit_tail15):
|
||||
add $15, %eax
|
||||
ret
|
||||
|
||||
#ifndef USE_AS_STRCAT
|
||||
RETURN
|
||||
END (STRLEN)
|
||||
#endif
|
||||
|
|
33
libc/arch-x86/string/sse2-strnlen-atom.S
Normal file
33
libc/arch-x86/string/sse2-strnlen-atom.S
Normal file
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define USE_AS_STRNLEN 1
|
||||
#define STRLEN strnlen
|
||||
#include "sse2-strlen-atom.S"
|
753
libc/arch-x86/string/sse2-strrchr-atom.S
Normal file
753
libc/arch-x86/string/sse2-strrchr-atom.S
Normal file
|
@ -0,0 +1,753 @@
|
|||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef L
|
||||
# define L(label) .L##label
|
||||
#endif
|
||||
|
||||
#ifndef cfi_startproc
|
||||
# define cfi_startproc .cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_endproc
|
||||
# define cfi_endproc .cfi_endproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_rel_offset
|
||||
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
||||
#endif
|
||||
|
||||
#ifndef cfi_restore
|
||||
# define cfi_restore(reg) .cfi_restore reg
|
||||
#endif
|
||||
|
||||
#ifndef cfi_adjust_cfa_offset
|
||||
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
||||
#endif
|
||||
|
||||
#ifndef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type name, @function; \
|
||||
.globl name; \
|
||||
.p2align 4; \
|
||||
name: \
|
||||
cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef END
|
||||
# define END(name) \
|
||||
cfi_endproc; \
|
||||
.size name, .-name
|
||||
#endif
|
||||
|
||||
#define CFI_PUSH(REG) \
|
||||
cfi_adjust_cfa_offset (4); \
|
||||
cfi_rel_offset (REG, 0)
|
||||
|
||||
#define CFI_POP(REG) \
|
||||
cfi_adjust_cfa_offset (-4); \
|
||||
cfi_restore (REG)
|
||||
|
||||
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||
#define POP(REG) popl REG; CFI_POP (REG)
|
||||
|
||||
#define PARMS 8
|
||||
#define ENTRANCE PUSH(%edi);
|
||||
#define RETURN POP (%edi); ret; CFI_PUSH (%edi);
|
||||
|
||||
#define STR1 PARMS
|
||||
#define STR2 STR1+4
|
||||
|
||||
.text
|
||||
ENTRY (strrchr)
|
||||
|
||||
ENTRANCE
|
||||
mov STR1(%esp), %ecx
|
||||
movd STR2(%esp), %xmm1
|
||||
|
||||
pxor %xmm2, %xmm2
|
||||
mov %ecx, %edi
|
||||
punpcklbw %xmm1, %xmm1
|
||||
punpcklbw %xmm1, %xmm1
|
||||
/* ECX has OFFSET. */
|
||||
and $63, %ecx
|
||||
pshufd $0, %xmm1, %xmm1
|
||||
cmp $48, %ecx
|
||||
ja L(crosscache)
|
||||
|
||||
/* unaligned string. */
|
||||
movdqu (%edi), %xmm0
|
||||
pcmpeqb %xmm0, %xmm2
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
/* Find where NULL is. */
|
||||
pmovmskb %xmm2, %ecx
|
||||
/* Check if there is a match. */
|
||||
pmovmskb %xmm0, %eax
|
||||
add $16, %edi
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(unaligned_match1)
|
||||
|
||||
test %ecx, %ecx
|
||||
jnz L(return_null)
|
||||
|
||||
and $-16, %edi
|
||||
|
||||
PUSH (%esi)
|
||||
PUSH (%ebx)
|
||||
|
||||
xor %ebx, %ebx
|
||||
jmp L(loop)
|
||||
|
||||
CFI_POP (%esi)
|
||||
CFI_POP (%ebx)
|
||||
|
||||
.p2align 4
|
||||
L(unaligned_match1):
|
||||
test %ecx, %ecx
|
||||
jnz L(prolog_find_zero_1)
|
||||
|
||||
PUSH (%esi)
|
||||
PUSH (%ebx)
|
||||
|
||||
mov %eax, %ebx
|
||||
mov %edi, %esi
|
||||
and $-16, %edi
|
||||
jmp L(loop)
|
||||
|
||||
CFI_POP (%esi)
|
||||
CFI_POP (%ebx)
|
||||
|
||||
.p2align 4
|
||||
L(crosscache):
|
||||
/* Hancle unaligned string. */
|
||||
and $15, %ecx
|
||||
and $-16, %edi
|
||||
pxor %xmm3, %xmm3
|
||||
movdqa (%edi), %xmm0
|
||||
pcmpeqb %xmm0, %xmm3
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
/* Find where NULL is. */
|
||||
pmovmskb %xmm3, %edx
|
||||
/* Check if there is a match. */
|
||||
pmovmskb %xmm0, %eax
|
||||
/* Remove the leading bytes. */
|
||||
shr %cl, %edx
|
||||
shr %cl, %eax
|
||||
add $16, %edi
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(unaligned_match)
|
||||
|
||||
test %edx, %edx
|
||||
jnz L(return_null)
|
||||
|
||||
PUSH (%esi)
|
||||
PUSH (%ebx)
|
||||
|
||||
xor %ebx, %ebx
|
||||
jmp L(loop)
|
||||
|
||||
CFI_POP (%esi)
|
||||
CFI_POP (%ebx)
|
||||
|
||||
.p2align 4
|
||||
L(unaligned_match):
|
||||
test %edx, %edx
|
||||
jnz L(prolog_find_zero)
|
||||
|
||||
PUSH (%esi)
|
||||
PUSH (%ebx)
|
||||
|
||||
mov %eax, %ebx
|
||||
lea (%edi, %ecx), %esi
|
||||
|
||||
/* Loop start on aligned string. */
|
||||
.p2align 4
|
||||
L(loop):
|
||||
movdqa (%edi), %xmm0
|
||||
pcmpeqb %xmm0, %xmm2
|
||||
add $16, %edi
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pmovmskb %xmm2, %ecx
|
||||
pmovmskb %xmm0, %eax
|
||||
or %eax, %ecx
|
||||
jnz L(matches)
|
||||
|
||||
movdqa (%edi), %xmm0
|
||||
pcmpeqb %xmm0, %xmm2
|
||||
add $16, %edi
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pmovmskb %xmm2, %ecx
|
||||
pmovmskb %xmm0, %eax
|
||||
or %eax, %ecx
|
||||
jnz L(matches)
|
||||
|
||||
movdqa (%edi), %xmm0
|
||||
pcmpeqb %xmm0, %xmm2
|
||||
add $16, %edi
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pmovmskb %xmm2, %ecx
|
||||
pmovmskb %xmm0, %eax
|
||||
or %eax, %ecx
|
||||
jnz L(matches)
|
||||
|
||||
movdqa (%edi), %xmm0
|
||||
pcmpeqb %xmm0, %xmm2
|
||||
add $16, %edi
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pmovmskb %xmm2, %ecx
|
||||
pmovmskb %xmm0, %eax
|
||||
or %eax, %ecx
|
||||
jz L(loop)
|
||||
|
||||
L(matches):
|
||||
test %eax, %eax
|
||||
jnz L(match)
|
||||
L(return_value):
|
||||
test %ebx, %ebx
|
||||
jz L(return_null_1)
|
||||
mov %ebx, %eax
|
||||
mov %esi, %edi
|
||||
|
||||
POP (%ebx)
|
||||
POP (%esi)
|
||||
|
||||
jmp L(match_case1)
|
||||
|
||||
CFI_PUSH (%ebx)
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(return_null_1):
|
||||
POP (%ebx)
|
||||
POP (%esi)
|
||||
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
CFI_PUSH (%ebx)
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(match):
|
||||
pmovmskb %xmm2, %ecx
|
||||
test %ecx, %ecx
|
||||
jnz L(find_zero)
|
||||
mov %eax, %ebx
|
||||
mov %edi, %esi
|
||||
jmp L(loop)
|
||||
|
||||
.p2align 4
|
||||
L(find_zero):
|
||||
test %cl, %cl
|
||||
jz L(find_zero_high)
|
||||
mov %cl, %dl
|
||||
and $15, %dl
|
||||
jz L(find_zero_8)
|
||||
test $0x01, %cl
|
||||
jnz L(FindZeroExit1)
|
||||
test $0x02, %cl
|
||||
jnz L(FindZeroExit2)
|
||||
test $0x04, %cl
|
||||
jnz L(FindZeroExit3)
|
||||
and $1 << 4 - 1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%ebx)
|
||||
POP (%esi)
|
||||
jmp L(match_case1)
|
||||
|
||||
CFI_PUSH (%ebx)
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(find_zero_8):
|
||||
test $0x10, %cl
|
||||
jnz L(FindZeroExit5)
|
||||
test $0x20, %cl
|
||||
jnz L(FindZeroExit6)
|
||||
test $0x40, %cl
|
||||
jnz L(FindZeroExit7)
|
||||
and $1 << 8 - 1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%ebx)
|
||||
POP (%esi)
|
||||
jmp L(match_case1)
|
||||
|
||||
CFI_PUSH (%ebx)
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(find_zero_high):
|
||||
mov %ch, %dh
|
||||
and $15, %dh
|
||||
jz L(find_zero_high_8)
|
||||
test $0x01, %ch
|
||||
jnz L(FindZeroExit9)
|
||||
test $0x02, %ch
|
||||
jnz L(FindZeroExit10)
|
||||
test $0x04, %ch
|
||||
jnz L(FindZeroExit11)
|
||||
and $1 << 12 - 1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%ebx)
|
||||
POP (%esi)
|
||||
jmp L(match_case1)
|
||||
|
||||
CFI_PUSH (%ebx)
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(find_zero_high_8):
|
||||
test $0x10, %ch
|
||||
jnz L(FindZeroExit13)
|
||||
test $0x20, %ch
|
||||
jnz L(FindZeroExit14)
|
||||
test $0x40, %ch
|
||||
jnz L(FindZeroExit15)
|
||||
and $1 << 16 - 1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%ebx)
|
||||
POP (%esi)
|
||||
jmp L(match_case1)
|
||||
|
||||
CFI_PUSH (%ebx)
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(FindZeroExit1):
|
||||
and $1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%ebx)
|
||||
POP (%esi)
|
||||
jmp L(match_case1)
|
||||
|
||||
CFI_PUSH (%ebx)
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(FindZeroExit2):
|
||||
and $1 << 2 - 1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%ebx)
|
||||
POP (%esi)
|
||||
jmp L(match_case1)
|
||||
|
||||
CFI_PUSH (%ebx)
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(FindZeroExit3):
|
||||
and $1 << 3 - 1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%ebx)
|
||||
POP (%esi)
|
||||
jmp L(match_case1)
|
||||
|
||||
CFI_PUSH (%ebx)
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(FindZeroExit5):
|
||||
and $1 << 5 - 1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%ebx)
|
||||
POP (%esi)
|
||||
jmp L(match_case1)
|
||||
|
||||
CFI_PUSH (%ebx)
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(FindZeroExit6):
|
||||
and $1 << 6 - 1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%ebx)
|
||||
POP (%esi)
|
||||
jmp L(match_case1)
|
||||
|
||||
CFI_PUSH (%ebx)
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(FindZeroExit7):
|
||||
and $1 << 7 - 1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%ebx)
|
||||
POP (%esi)
|
||||
jmp L(match_case1)
|
||||
|
||||
CFI_PUSH (%ebx)
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(FindZeroExit9):
|
||||
and $1 << 9 - 1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%ebx)
|
||||
POP (%esi)
|
||||
jmp L(match_case1)
|
||||
|
||||
CFI_PUSH (%ebx)
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(FindZeroExit10):
|
||||
and $1 << 10 - 1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%ebx)
|
||||
POP (%esi)
|
||||
jmp L(match_case1)
|
||||
|
||||
CFI_PUSH (%ebx)
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(FindZeroExit11):
|
||||
and $1 << 11 - 1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%ebx)
|
||||
POP (%esi)
|
||||
jmp L(match_case1)
|
||||
|
||||
CFI_PUSH (%ebx)
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(FindZeroExit13):
|
||||
and $1 << 13 - 1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%ebx)
|
||||
POP (%esi)
|
||||
jmp L(match_case1)
|
||||
|
||||
CFI_PUSH (%ebx)
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(FindZeroExit14):
|
||||
and $1 << 14 - 1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%ebx)
|
||||
POP (%esi)
|
||||
jmp L(match_case1)
|
||||
|
||||
CFI_PUSH (%ebx)
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(FindZeroExit15):
|
||||
and $1 << 15 - 1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%ebx)
|
||||
POP (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(match_case1):
|
||||
test %ah, %ah
|
||||
jnz L(match_case1_high)
|
||||
mov %al, %dl
|
||||
and $15 << 4, %dl
|
||||
jnz L(match_case1_8)
|
||||
test $0x08, %al
|
||||
jnz L(Exit4)
|
||||
test $0x04, %al
|
||||
jnz L(Exit3)
|
||||
test $0x02, %al
|
||||
jnz L(Exit2)
|
||||
lea -16(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(match_case1_8):
|
||||
test $0x80, %al
|
||||
jnz L(Exit8)
|
||||
test $0x40, %al
|
||||
jnz L(Exit7)
|
||||
test $0x20, %al
|
||||
jnz L(Exit6)
|
||||
lea -12(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(match_case1_high):
|
||||
mov %ah, %dh
|
||||
and $15 << 4, %dh
|
||||
jnz L(match_case1_high_8)
|
||||
test $0x08, %ah
|
||||
jnz L(Exit12)
|
||||
test $0x04, %ah
|
||||
jnz L(Exit11)
|
||||
test $0x02, %ah
|
||||
jnz L(Exit10)
|
||||
lea -8(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(match_case1_high_8):
|
||||
test $0x80, %ah
|
||||
jnz L(Exit16)
|
||||
test $0x40, %ah
|
||||
jnz L(Exit15)
|
||||
test $0x20, %ah
|
||||
jnz L(Exit14)
|
||||
lea -4(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit2):
|
||||
lea -15(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit3):
|
||||
lea -14(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit4):
|
||||
lea -13(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit6):
|
||||
lea -11(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit7):
|
||||
lea -10(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit8):
|
||||
lea -9(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit10):
|
||||
lea -7(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit11):
|
||||
lea -6(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit12):
|
||||
lea -5(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit14):
|
||||
lea -3(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit15):
|
||||
lea -2(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit16):
|
||||
lea -1(%edi), %eax
|
||||
RETURN
|
||||
|
||||
/* Return NULL. */
|
||||
.p2align 4
|
||||
L(return_null):
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(prolog_find_zero):
|
||||
add %ecx, %edi
|
||||
mov %edx, %ecx
|
||||
L(prolog_find_zero_1):
|
||||
test %cl, %cl
|
||||
jz L(prolog_find_zero_high)
|
||||
mov %cl, %dl
|
||||
and $15, %dl
|
||||
jz L(prolog_find_zero_8)
|
||||
test $0x01, %cl
|
||||
jnz L(PrologFindZeroExit1)
|
||||
test $0x02, %cl
|
||||
jnz L(PrologFindZeroExit2)
|
||||
test $0x04, %cl
|
||||
jnz L(PrologFindZeroExit3)
|
||||
and $1 << 4 - 1, %eax
|
||||
jnz L(match_case1)
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(prolog_find_zero_8):
|
||||
test $0x10, %cl
|
||||
jnz L(PrologFindZeroExit5)
|
||||
test $0x20, %cl
|
||||
jnz L(PrologFindZeroExit6)
|
||||
test $0x40, %cl
|
||||
jnz L(PrologFindZeroExit7)
|
||||
and $1 << 8 - 1, %eax
|
||||
jnz L(match_case1)
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(prolog_find_zero_high):
|
||||
mov %ch, %dh
|
||||
and $15, %dh
|
||||
jz L(prolog_find_zero_high_8)
|
||||
test $0x01, %ch
|
||||
jnz L(PrologFindZeroExit9)
|
||||
test $0x02, %ch
|
||||
jnz L(PrologFindZeroExit10)
|
||||
test $0x04, %ch
|
||||
jnz L(PrologFindZeroExit11)
|
||||
and $1 << 12 - 1, %eax
|
||||
jnz L(match_case1)
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(prolog_find_zero_high_8):
|
||||
test $0x10, %ch
|
||||
jnz L(PrologFindZeroExit13)
|
||||
test $0x20, %ch
|
||||
jnz L(PrologFindZeroExit14)
|
||||
test $0x40, %ch
|
||||
jnz L(PrologFindZeroExit15)
|
||||
and $1 << 16 - 1, %eax
|
||||
jnz L(match_case1)
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(PrologFindZeroExit1):
|
||||
and $1, %eax
|
||||
jnz L(match_case1)
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(PrologFindZeroExit2):
|
||||
and $1 << 2 - 1, %eax
|
||||
jnz L(match_case1)
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(PrologFindZeroExit3):
|
||||
and $1 << 3 - 1, %eax
|
||||
jnz L(match_case1)
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(PrologFindZeroExit5):
|
||||
and $1 << 5 - 1, %eax
|
||||
jnz L(match_case1)
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(PrologFindZeroExit6):
|
||||
and $1 << 6 - 1, %eax
|
||||
jnz L(match_case1)
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(PrologFindZeroExit7):
|
||||
and $1 << 7 - 1, %eax
|
||||
jnz L(match_case1)
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(PrologFindZeroExit9):
|
||||
and $1 << 9 - 1, %eax
|
||||
jnz L(match_case1)
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(PrologFindZeroExit10):
|
||||
and $1 << 10 - 1, %eax
|
||||
jnz L(match_case1)
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(PrologFindZeroExit11):
|
||||
and $1 << 11 - 1, %eax
|
||||
jnz L(match_case1)
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(PrologFindZeroExit13):
|
||||
and $1 << 13 - 1, %eax
|
||||
jnz L(match_case1)
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(PrologFindZeroExit14):
|
||||
and $1 << 14 - 1, %eax
|
||||
jnz L(match_case1)
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(PrologFindZeroExit15):
|
||||
and $1 << 15 - 1, %eax
|
||||
jnz L(match_case1)
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
END (strrchr)
|
267
libc/arch-x86/string/sse2-wcschr-atom.S
Normal file
267
libc/arch-x86/string/sse2-wcschr-atom.S
Normal file
|
@ -0,0 +1,267 @@
|
|||
/*
|
||||
Copyright (c) 2011 Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef L
|
||||
# define L(label) .L##label
|
||||
#endif
|
||||
|
||||
#ifndef cfi_startproc
|
||||
# define cfi_startproc .cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_endproc
|
||||
# define cfi_endproc .cfi_endproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_rel_offset
|
||||
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
||||
#endif
|
||||
|
||||
#ifndef cfi_restore
|
||||
# define cfi_restore(reg) .cfi_restore reg
|
||||
#endif
|
||||
|
||||
#ifndef cfi_adjust_cfa_offset
|
||||
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
||||
#endif
|
||||
|
||||
#ifndef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type name, @function; \
|
||||
.globl name; \
|
||||
.p2align 4; \
|
||||
name: \
|
||||
cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef END
|
||||
# define END(name) \
|
||||
cfi_endproc; \
|
||||
.size name, .-name
|
||||
#endif
|
||||
|
||||
#define CFI_PUSH(REG) \
|
||||
cfi_adjust_cfa_offset (4); \
|
||||
cfi_rel_offset (REG, 0)
|
||||
|
||||
#define CFI_POP(REG) \
|
||||
cfi_adjust_cfa_offset (-4); \
|
||||
cfi_restore (REG)
|
||||
|
||||
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||
#define POP(REG) popl REG; CFI_POP (REG)
|
||||
|
||||
#define PARMS 4
|
||||
|
||||
|
||||
#define STR1 PARMS
|
||||
#define STR2 STR1+4
|
||||
|
||||
.text
|
||||
ENTRY (wcschr)
|
||||
|
||||
mov STR1(%esp), %ecx
|
||||
movd STR2(%esp), %xmm1
|
||||
|
||||
mov %ecx, %eax
|
||||
punpckldq %xmm1, %xmm1
|
||||
pxor %xmm2, %xmm2
|
||||
punpckldq %xmm1, %xmm1
|
||||
|
||||
and $63, %eax
|
||||
cmp $48, %eax
|
||||
ja L(cross_cache)
|
||||
|
||||
movdqu (%ecx), %xmm0
|
||||
pcmpeqd %xmm0, %xmm2
|
||||
pcmpeqd %xmm1, %xmm0
|
||||
pmovmskb %xmm2, %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
or %eax, %edx
|
||||
jnz L(matches)
|
||||
and $-16, %ecx
|
||||
jmp L(loop)
|
||||
|
||||
.p2align 4
|
||||
L(cross_cache):
|
||||
PUSH (%edi)
|
||||
mov %ecx, %edi
|
||||
mov %eax, %ecx
|
||||
and $-16, %edi
|
||||
and $15, %ecx
|
||||
movdqa (%edi), %xmm0
|
||||
pcmpeqd %xmm0, %xmm2
|
||||
pcmpeqd %xmm1, %xmm0
|
||||
pmovmskb %xmm2, %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
|
||||
sarl %cl, %edx
|
||||
sarl %cl, %eax
|
||||
test %eax, %eax
|
||||
jz L(unaligned_no_match)
|
||||
|
||||
add %edi, %ecx
|
||||
POP (%edi)
|
||||
|
||||
test %edx, %edx
|
||||
jz L(match_case1)
|
||||
test %al, %al
|
||||
jz L(match_higth_case2)
|
||||
test $15, %al
|
||||
jnz L(match_case2_4)
|
||||
test $15, %dl
|
||||
jnz L(return_null)
|
||||
lea 4(%ecx), %eax
|
||||
ret
|
||||
|
||||
CFI_PUSH (%edi)
|
||||
|
||||
.p2align 4
|
||||
L(unaligned_no_match):
|
||||
mov %edi, %ecx
|
||||
POP (%edi)
|
||||
|
||||
test %edx, %edx
|
||||
jnz L(return_null)
|
||||
|
||||
pxor %xmm2, %xmm2
|
||||
|
||||
/* Loop start on aligned string. */
|
||||
.p2align 4
|
||||
L(loop):
|
||||
add $16, %ecx
|
||||
movdqa (%ecx), %xmm0
|
||||
pcmpeqd %xmm0, %xmm2
|
||||
pcmpeqd %xmm1, %xmm0
|
||||
pmovmskb %xmm2, %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
or %eax, %edx
|
||||
jnz L(matches)
|
||||
add $16, %ecx
|
||||
|
||||
movdqa (%ecx), %xmm0
|
||||
pcmpeqd %xmm0, %xmm2
|
||||
pcmpeqd %xmm1, %xmm0
|
||||
pmovmskb %xmm2, %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
or %eax, %edx
|
||||
jnz L(matches)
|
||||
add $16, %ecx
|
||||
|
||||
movdqa (%ecx), %xmm0
|
||||
pcmpeqd %xmm0, %xmm2
|
||||
pcmpeqd %xmm1, %xmm0
|
||||
pmovmskb %xmm2, %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
or %eax, %edx
|
||||
jnz L(matches)
|
||||
add $16, %ecx
|
||||
|
||||
movdqa (%ecx), %xmm0
|
||||
pcmpeqd %xmm0, %xmm2
|
||||
pcmpeqd %xmm1, %xmm0
|
||||
pmovmskb %xmm2, %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
or %eax, %edx
|
||||
jz L(loop)
|
||||
|
||||
.p2align 4
|
||||
L(matches):
|
||||
pmovmskb %xmm2, %edx
|
||||
test %eax, %eax
|
||||
jz L(return_null)
|
||||
test %edx, %edx
|
||||
jz L(match_case1)
|
||||
|
||||
.p2align 4
|
||||
L(match_case2):
|
||||
test %al, %al
|
||||
jz L(match_higth_case2)
|
||||
test $15, %al
|
||||
jnz L(match_case2_4)
|
||||
test $15, %dl
|
||||
jnz L(return_null)
|
||||
lea 4(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(match_case2_4):
|
||||
mov %ecx, %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(match_higth_case2):
|
||||
test %dl, %dl
|
||||
jnz L(return_null)
|
||||
test $15, %ah
|
||||
jnz L(match_case2_12)
|
||||
test $15, %dh
|
||||
jnz L(return_null)
|
||||
lea 12(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(match_case2_12):
|
||||
lea 8(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(match_case1):
|
||||
test %al, %al
|
||||
jz L(match_higth_case1)
|
||||
|
||||
test $0x01, %al
|
||||
jnz L(exit0)
|
||||
lea 4(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(match_higth_case1):
|
||||
test $0x01, %ah
|
||||
jnz L(exit3)
|
||||
lea 12(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit0):
|
||||
mov %ecx, %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(exit3):
|
||||
lea 8(%ecx), %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(return_null):
|
||||
xor %eax, %eax
|
||||
ret
|
||||
|
||||
END (wcschr)
|
1062
libc/arch-x86/string/sse2-wcscmp-atom.S
Normal file
1062
libc/arch-x86/string/sse2-wcscmp-atom.S
Normal file
File diff suppressed because it is too large
Load diff
306
libc/arch-x86/string/sse2-wcslen-atom.S
Normal file
306
libc/arch-x86/string/sse2-wcslen-atom.S
Normal file
|
@ -0,0 +1,306 @@
|
|||
/*
|
||||
Copyright (c) 2011 Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef USE_AS_WCSCAT
|
||||
|
||||
# ifndef L
|
||||
# define L(label) .L##label
|
||||
# endif
|
||||
|
||||
# ifndef cfi_startproc
|
||||
# define cfi_startproc .cfi_startproc
|
||||
# endif
|
||||
|
||||
# ifndef cfi_endproc
|
||||
# define cfi_endproc .cfi_endproc
|
||||
# endif
|
||||
|
||||
# ifndef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type name, @function; \
|
||||
.globl name; \
|
||||
.p2align 4; \
|
||||
name: \
|
||||
cfi_startproc
|
||||
# endif
|
||||
|
||||
# ifndef END
|
||||
# define END(name) \
|
||||
cfi_endproc; \
|
||||
.size name, .-name
|
||||
# endif
|
||||
|
||||
# define PARMS 4
|
||||
# define STR PARMS
|
||||
# define RETURN ret
|
||||
|
||||
.text
|
||||
ENTRY (wcslen)
|
||||
mov STR(%esp), %edx
|
||||
#endif
|
||||
cmp $0, (%edx)
|
||||
jz L(exit_tail0)
|
||||
cmp $0, 4(%edx)
|
||||
jz L(exit_tail1)
|
||||
cmp $0, 8(%edx)
|
||||
jz L(exit_tail2)
|
||||
cmp $0, 12(%edx)
|
||||
jz L(exit_tail3)
|
||||
cmp $0, 16(%edx)
|
||||
jz L(exit_tail4)
|
||||
cmp $0, 20(%edx)
|
||||
jz L(exit_tail5)
|
||||
cmp $0, 24(%edx)
|
||||
jz L(exit_tail6)
|
||||
cmp $0, 28(%edx)
|
||||
jz L(exit_tail7)
|
||||
|
||||
pxor %xmm0, %xmm0
|
||||
|
||||
lea 32(%edx), %eax
|
||||
lea -16(%eax), %ecx
|
||||
and $-16, %eax
|
||||
|
||||
pcmpeqd (%eax), %xmm0
|
||||
pmovmskb %xmm0, %edx
|
||||
pxor %xmm1, %xmm1
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqd (%eax), %xmm1
|
||||
pmovmskb %xmm1, %edx
|
||||
pxor %xmm2, %xmm2
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqd (%eax), %xmm2
|
||||
pmovmskb %xmm2, %edx
|
||||
pxor %xmm3, %xmm3
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqd (%eax), %xmm3
|
||||
pmovmskb %xmm3, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqd (%eax), %xmm0
|
||||
pmovmskb %xmm0, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqd (%eax), %xmm1
|
||||
pmovmskb %xmm1, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqd (%eax), %xmm2
|
||||
pmovmskb %xmm2, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqd (%eax), %xmm3
|
||||
pmovmskb %xmm3, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqd (%eax), %xmm0
|
||||
pmovmskb %xmm0, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqd (%eax), %xmm1
|
||||
pmovmskb %xmm1, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqd (%eax), %xmm2
|
||||
pmovmskb %xmm2, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqd (%eax), %xmm3
|
||||
pmovmskb %xmm3, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqd (%eax), %xmm0
|
||||
pmovmskb %xmm0, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqd (%eax), %xmm1
|
||||
pmovmskb %xmm1, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqd (%eax), %xmm2
|
||||
pmovmskb %xmm2, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqd (%eax), %xmm3
|
||||
pmovmskb %xmm3, %edx
|
||||
lea 16(%eax), %eax
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
and $-0x40, %eax
|
||||
|
||||
.p2align 4
|
||||
L(aligned_64_loop):
|
||||
movaps (%eax), %xmm0
|
||||
movaps 16(%eax), %xmm1
|
||||
movaps 32(%eax), %xmm2
|
||||
movaps 48(%eax), %xmm6
|
||||
|
||||
pminub %xmm1, %xmm0
|
||||
pminub %xmm6, %xmm2
|
||||
pminub %xmm0, %xmm2
|
||||
pcmpeqd %xmm3, %xmm2
|
||||
pmovmskb %xmm2, %edx
|
||||
lea 64(%eax), %eax
|
||||
test %edx, %edx
|
||||
jz L(aligned_64_loop)
|
||||
|
||||
pcmpeqd -64(%eax), %xmm3
|
||||
pmovmskb %xmm3, %edx
|
||||
lea 48(%ecx), %ecx
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqd %xmm1, %xmm3
|
||||
pmovmskb %xmm3, %edx
|
||||
lea -16(%ecx), %ecx
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqd -32(%eax), %xmm3
|
||||
pmovmskb %xmm3, %edx
|
||||
lea -16(%ecx), %ecx
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
pcmpeqd %xmm6, %xmm3
|
||||
pmovmskb %xmm3, %edx
|
||||
lea -16(%ecx), %ecx
|
||||
test %edx, %edx
|
||||
jnz L(exit)
|
||||
|
||||
jmp L(aligned_64_loop)
|
||||
|
||||
.p2align 4
|
||||
L(exit):
|
||||
sub %ecx, %eax
|
||||
shr $2, %eax
|
||||
test %dl, %dl
|
||||
jz L(exit_high)
|
||||
|
||||
mov %dl, %cl
|
||||
and $15, %cl
|
||||
jz L(exit_1)
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_high):
|
||||
mov %dh, %ch
|
||||
and $15, %ch
|
||||
jz L(exit_3)
|
||||
add $2, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_1):
|
||||
add $1, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_3):
|
||||
add $3, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_tail0):
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_tail1):
|
||||
mov $1, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_tail2):
|
||||
mov $2, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_tail3):
|
||||
mov $3, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_tail4):
|
||||
mov $4, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_tail5):
|
||||
mov $5, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_tail6):
|
||||
mov $6, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(exit_tail7):
|
||||
mov $7, %eax
|
||||
#ifndef USE_AS_WCSCAT
|
||||
RETURN
|
||||
|
||||
END (wcslen)
|
||||
#endif
|
402
libc/arch-x86/string/sse2-wcsrchr-atom.S
Normal file
402
libc/arch-x86/string/sse2-wcsrchr-atom.S
Normal file
|
@ -0,0 +1,402 @@
|
|||
/*
|
||||
Copyright (c) 2011 Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef L
|
||||
# define L(label) .L##label
|
||||
#endif
|
||||
|
||||
#ifndef cfi_startproc
|
||||
# define cfi_startproc .cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_endproc
|
||||
# define cfi_endproc .cfi_endproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_rel_offset
|
||||
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
||||
#endif
|
||||
|
||||
#ifndef cfi_restore
|
||||
# define cfi_restore(reg) .cfi_restore reg
|
||||
#endif
|
||||
|
||||
#ifndef cfi_adjust_cfa_offset
|
||||
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
||||
#endif
|
||||
|
||||
#ifndef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type name, @function; \
|
||||
.globl name; \
|
||||
.p2align 4; \
|
||||
name: \
|
||||
cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef END
|
||||
# define END(name) \
|
||||
cfi_endproc; \
|
||||
.size name, .-name
|
||||
#endif
|
||||
|
||||
#define CFI_PUSH(REG) \
|
||||
cfi_adjust_cfa_offset (4); \
|
||||
cfi_rel_offset (REG, 0)
|
||||
|
||||
#define CFI_POP(REG) \
|
||||
cfi_adjust_cfa_offset (-4); \
|
||||
cfi_restore (REG)
|
||||
|
||||
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||
#define POP(REG) popl REG; CFI_POP (REG)
|
||||
|
||||
#define PARMS 8
|
||||
#define ENTRANCE PUSH(%edi);
|
||||
#define RETURN POP(%edi); ret; CFI_PUSH(%edi);
|
||||
|
||||
#define STR1 PARMS
|
||||
#define STR2 STR1+4
|
||||
|
||||
.text
|
||||
ENTRY (wcsrchr)
|
||||
|
||||
ENTRANCE
|
||||
mov STR1(%esp), %ecx
|
||||
movd STR2(%esp), %xmm1
|
||||
|
||||
mov %ecx, %edi
|
||||
punpckldq %xmm1, %xmm1
|
||||
pxor %xmm2, %xmm2
|
||||
punpckldq %xmm1, %xmm1
|
||||
|
||||
/* ECX has OFFSET. */
|
||||
and $63, %ecx
|
||||
cmp $48, %ecx
|
||||
ja L(crosscache)
|
||||
|
||||
/* unaligned string. */
|
||||
movdqu (%edi), %xmm0
|
||||
pcmpeqd %xmm0, %xmm2
|
||||
pcmpeqd %xmm1, %xmm0
|
||||
/* Find where NULL is. */
|
||||
pmovmskb %xmm2, %ecx
|
||||
/* Check if there is a match. */
|
||||
pmovmskb %xmm0, %eax
|
||||
add $16, %edi
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(unaligned_match1)
|
||||
|
||||
test %ecx, %ecx
|
||||
jnz L(return_null)
|
||||
|
||||
and $-16, %edi
|
||||
|
||||
PUSH (%esi)
|
||||
|
||||
xor %edx, %edx
|
||||
jmp L(loop)
|
||||
|
||||
CFI_POP (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(unaligned_match1):
|
||||
test %ecx, %ecx
|
||||
jnz L(prolog_find_zero_1)
|
||||
|
||||
PUSH (%esi)
|
||||
|
||||
/* Save current match */
|
||||
mov %eax, %edx
|
||||
mov %edi, %esi
|
||||
and $-16, %edi
|
||||
jmp L(loop)
|
||||
|
||||
CFI_POP (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(crosscache):
|
||||
/* Hancle unaligned string. */
|
||||
and $15, %ecx
|
||||
and $-16, %edi
|
||||
pxor %xmm3, %xmm3
|
||||
movdqa (%edi), %xmm0
|
||||
pcmpeqd %xmm0, %xmm3
|
||||
pcmpeqd %xmm1, %xmm0
|
||||
/* Find where NULL is. */
|
||||
pmovmskb %xmm3, %edx
|
||||
/* Check if there is a match. */
|
||||
pmovmskb %xmm0, %eax
|
||||
/* Remove the leading bytes. */
|
||||
shr %cl, %edx
|
||||
shr %cl, %eax
|
||||
add $16, %edi
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(unaligned_match)
|
||||
|
||||
test %edx, %edx
|
||||
jnz L(return_null)
|
||||
|
||||
PUSH (%esi)
|
||||
|
||||
xor %edx, %edx
|
||||
jmp L(loop)
|
||||
|
||||
CFI_POP (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(unaligned_match):
|
||||
test %edx, %edx
|
||||
jnz L(prolog_find_zero)
|
||||
|
||||
PUSH (%esi)
|
||||
|
||||
mov %eax, %edx
|
||||
lea (%edi, %ecx), %esi
|
||||
|
||||
/* Loop start on aligned string. */
|
||||
.p2align 4
|
||||
L(loop):
|
||||
movdqa (%edi), %xmm0
|
||||
pcmpeqd %xmm0, %xmm2
|
||||
add $16, %edi
|
||||
pcmpeqd %xmm1, %xmm0
|
||||
pmovmskb %xmm2, %ecx
|
||||
pmovmskb %xmm0, %eax
|
||||
or %eax, %ecx
|
||||
jnz L(matches)
|
||||
|
||||
movdqa (%edi), %xmm3
|
||||
pcmpeqd %xmm3, %xmm2
|
||||
add $16, %edi
|
||||
pcmpeqd %xmm1, %xmm3
|
||||
pmovmskb %xmm2, %ecx
|
||||
pmovmskb %xmm3, %eax
|
||||
or %eax, %ecx
|
||||
jnz L(matches)
|
||||
|
||||
movdqa (%edi), %xmm4
|
||||
pcmpeqd %xmm4, %xmm2
|
||||
add $16, %edi
|
||||
pcmpeqd %xmm1, %xmm4
|
||||
pmovmskb %xmm2, %ecx
|
||||
pmovmskb %xmm4, %eax
|
||||
or %eax, %ecx
|
||||
jnz L(matches)
|
||||
|
||||
movdqa (%edi), %xmm5
|
||||
pcmpeqd %xmm5, %xmm2
|
||||
add $16, %edi
|
||||
pcmpeqd %xmm1, %xmm5
|
||||
pmovmskb %xmm2, %ecx
|
||||
pmovmskb %xmm5, %eax
|
||||
or %eax, %ecx
|
||||
jz L(loop)
|
||||
|
||||
.p2align 4
|
||||
L(matches):
|
||||
test %eax, %eax
|
||||
jnz L(match)
|
||||
L(return_value):
|
||||
test %edx, %edx
|
||||
jz L(return_null_1)
|
||||
mov %edx, %eax
|
||||
mov %esi, %edi
|
||||
|
||||
POP (%esi)
|
||||
|
||||
test %ah, %ah
|
||||
jnz L(match_third_or_fourth_wchar)
|
||||
test $15 << 4, %al
|
||||
jnz L(match_second_wchar)
|
||||
lea -16(%edi), %eax
|
||||
RETURN
|
||||
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(return_null_1):
|
||||
POP (%esi)
|
||||
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(match):
|
||||
pmovmskb %xmm2, %ecx
|
||||
test %ecx, %ecx
|
||||
jnz L(find_zero)
|
||||
/* save match info */
|
||||
mov %eax, %edx
|
||||
mov %edi, %esi
|
||||
jmp L(loop)
|
||||
|
||||
.p2align 4
|
||||
L(find_zero):
|
||||
test %cl, %cl
|
||||
jz L(find_zero_in_third_or_fourth_wchar)
|
||||
test $15, %cl
|
||||
jz L(find_zero_in_second_wchar)
|
||||
and $1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%esi)
|
||||
|
||||
lea -16(%edi), %eax
|
||||
RETURN
|
||||
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(find_zero_in_second_wchar):
|
||||
and $1 << 5 - 1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%esi)
|
||||
|
||||
test $15 << 4, %al
|
||||
jnz L(match_second_wchar)
|
||||
lea -16(%edi), %eax
|
||||
RETURN
|
||||
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(find_zero_in_third_or_fourth_wchar):
|
||||
test $15, %ch
|
||||
jz L(find_zero_in_fourth_wchar)
|
||||
and $1 << 9 - 1, %eax
|
||||
jz L(return_value)
|
||||
|
||||
POP (%esi)
|
||||
|
||||
test %ah, %ah
|
||||
jnz L(match_third_wchar)
|
||||
test $15 << 4, %al
|
||||
jnz L(match_second_wchar)
|
||||
lea -16(%edi), %eax
|
||||
RETURN
|
||||
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(find_zero_in_fourth_wchar):
|
||||
|
||||
POP (%esi)
|
||||
|
||||
test %ah, %ah
|
||||
jnz L(match_third_or_fourth_wchar)
|
||||
test $15 << 4, %al
|
||||
jnz L(match_second_wchar)
|
||||
lea -16(%edi), %eax
|
||||
RETURN
|
||||
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(match_second_wchar):
|
||||
lea -12(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(match_third_or_fourth_wchar):
|
||||
test $15 << 4, %ah
|
||||
jnz L(match_fourth_wchar)
|
||||
lea -8(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(match_third_wchar):
|
||||
lea -8(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(match_fourth_wchar):
|
||||
lea -4(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(return_null):
|
||||
xor %eax, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(prolog_find_zero):
|
||||
add %ecx, %edi
|
||||
mov %edx, %ecx
|
||||
L(prolog_find_zero_1):
|
||||
test %cl, %cl
|
||||
jz L(prolog_find_zero_in_third_or_fourth_wchar)
|
||||
test $15, %cl
|
||||
jz L(prolog_find_zero_in_second_wchar)
|
||||
and $1, %eax
|
||||
jz L(return_null)
|
||||
|
||||
lea -16(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(prolog_find_zero_in_second_wchar):
|
||||
and $1 << 5 - 1, %eax
|
||||
jz L(return_null)
|
||||
|
||||
test $15 << 4, %al
|
||||
jnz L(match_second_wchar)
|
||||
lea -16(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(prolog_find_zero_in_third_or_fourth_wchar):
|
||||
test $15, %ch
|
||||
jz L(prolog_find_zero_in_fourth_wchar)
|
||||
and $1 << 9 - 1, %eax
|
||||
jz L(return_null)
|
||||
|
||||
test %ah, %ah
|
||||
jnz L(match_third_wchar)
|
||||
test $15 << 4, %al
|
||||
jnz L(match_second_wchar)
|
||||
lea -16(%edi), %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(prolog_find_zero_in_fourth_wchar):
|
||||
test %ah, %ah
|
||||
jnz L(match_third_or_fourth_wchar)
|
||||
test $15 << 4, %al
|
||||
jnz L(match_second_wchar)
|
||||
lea -16(%edi), %eax
|
||||
RETURN
|
||||
|
||||
END (wcsrchr)
|
|
@ -28,13 +28,8 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#if defined(USE_SSE2)
|
||||
|
||||
# define sse2_strlen_atom strlen
|
||||
# include "sse2-strlen-atom.S"
|
||||
|
||||
#else
|
||||
|
||||
# include "strlen.S"
|
||||
|
||||
#endif
|
||||
#define MEMCPY bcopy
|
||||
#define USE_AS_MEMMOVE
|
||||
#define USE_AS_BCOPY
|
||||
#include "ssse3-memcpy-atom.S"
|
File diff suppressed because it is too large
Load diff
|
@ -28,8 +28,11 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "cache.h"
|
||||
#undef __i686
|
||||
|
||||
#ifndef MEMCPY
|
||||
# define MEMCPY ssse3_memcpy5
|
||||
# define MEMCPY memcpy
|
||||
#endif
|
||||
|
||||
#ifndef L
|
|
@ -28,13 +28,7 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#if defined(USE_SSSE3)
|
||||
|
||||
# define ssse3_strcmp_latest strcmp
|
||||
# include "ssse3-strcmp-latest.S"
|
||||
|
||||
#else
|
||||
|
||||
# include "strcmp.S"
|
||||
|
||||
#endif
|
||||
#define MEMCPY memmove
|
||||
#define USE_AS_MEMMOVE
|
||||
#include "ssse3-memcpy-atom.S"
|
620
libc/arch-x86/string/ssse3-strcat-atom.S
Normal file
620
libc/arch-x86/string/ssse3-strcat-atom.S
Normal file
|
@ -0,0 +1,620 @@
|
|||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef L
|
||||
# define L(label) .L##label
|
||||
#endif
|
||||
|
||||
#ifndef cfi_startproc
|
||||
# define cfi_startproc .cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_endproc
|
||||
# define cfi_endproc .cfi_endproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_rel_offset
|
||||
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
||||
#endif
|
||||
|
||||
#ifndef cfi_restore
|
||||
# define cfi_restore(reg) .cfi_restore reg
|
||||
#endif
|
||||
|
||||
#ifndef cfi_adjust_cfa_offset
|
||||
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
||||
#endif
|
||||
|
||||
#ifndef cfi_remember_state
|
||||
# define cfi_remember_state .cfi_remember_state
|
||||
#endif
|
||||
|
||||
#ifndef cfi_restore_state
|
||||
# define cfi_restore_state .cfi_restore_state
|
||||
#endif
|
||||
|
||||
#ifndef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type name, @function; \
|
||||
.globl name; \
|
||||
.p2align 4; \
|
||||
name: \
|
||||
cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef END
|
||||
# define END(name) \
|
||||
cfi_endproc; \
|
||||
.size name, .-name
|
||||
#endif
|
||||
|
||||
#define CFI_PUSH(REG) \
|
||||
cfi_adjust_cfa_offset (4); \
|
||||
cfi_rel_offset (REG, 0)
|
||||
|
||||
#define CFI_POP(REG) \
|
||||
cfi_adjust_cfa_offset (-4); \
|
||||
cfi_restore (REG)
|
||||
|
||||
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||
#define POP(REG) popl REG; CFI_POP (REG)
|
||||
|
||||
#ifndef STRCAT
|
||||
# define STRCAT strcat
|
||||
#endif
|
||||
|
||||
#define PARMS 4
|
||||
#define STR1 PARMS+4
|
||||
#define STR2 STR1+4
|
||||
|
||||
#ifdef USE_AS_STRNCAT
|
||||
# define LEN STR2+8
|
||||
#endif
|
||||
|
||||
#define USE_AS_STRCAT
|
||||
|
||||
.section .text.ssse3,"ax",@progbits
|
||||
ENTRY (STRCAT)
|
||||
PUSH (%edi)
|
||||
mov STR1(%esp), %edi
|
||||
mov %edi, %edx
|
||||
|
||||
#define RETURN jmp L(StrcpyAtom)
|
||||
#include "sse2-strlen-atom.S"
|
||||
|
||||
L(StrcpyAtom):
|
||||
mov STR2(%esp), %ecx
|
||||
lea (%edi, %eax), %edx
|
||||
#ifdef USE_AS_STRNCAT
|
||||
PUSH (%ebx)
|
||||
mov LEN(%esp), %ebx
|
||||
test %ebx, %ebx
|
||||
jz L(StrncatExit0)
|
||||
cmp $8, %ebx
|
||||
jbe L(StrncpyExit8Bytes)
|
||||
#endif
|
||||
cmpb $0, (%ecx)
|
||||
jz L(Exit1)
|
||||
cmpb $0, 1(%ecx)
|
||||
jz L(Exit2)
|
||||
cmpb $0, 2(%ecx)
|
||||
jz L(Exit3)
|
||||
cmpb $0, 3(%ecx)
|
||||
jz L(Exit4)
|
||||
cmpb $0, 4(%ecx)
|
||||
jz L(Exit5)
|
||||
cmpb $0, 5(%ecx)
|
||||
jz L(Exit6)
|
||||
cmpb $0, 6(%ecx)
|
||||
jz L(Exit7)
|
||||
cmpb $0, 7(%ecx)
|
||||
jz L(Exit8)
|
||||
cmpb $0, 8(%ecx)
|
||||
jz L(Exit9)
|
||||
#ifdef USE_AS_STRNCAT
|
||||
cmp $16, %ebx
|
||||
jb L(StrncpyExit15Bytes)
|
||||
#endif
|
||||
cmpb $0, 9(%ecx)
|
||||
jz L(Exit10)
|
||||
cmpb $0, 10(%ecx)
|
||||
jz L(Exit11)
|
||||
cmpb $0, 11(%ecx)
|
||||
jz L(Exit12)
|
||||
cmpb $0, 12(%ecx)
|
||||
jz L(Exit13)
|
||||
cmpb $0, 13(%ecx)
|
||||
jz L(Exit14)
|
||||
cmpb $0, 14(%ecx)
|
||||
jz L(Exit15)
|
||||
cmpb $0, 15(%ecx)
|
||||
jz L(Exit16)
|
||||
#ifdef USE_AS_STRNCAT
|
||||
cmp $16, %ebx
|
||||
je L(StrncatExit16)
|
||||
|
||||
# define RETURN1 POP (%ebx); POP (%edi); ret; \
|
||||
CFI_PUSH (%ebx); CFI_PUSH (%edi)
|
||||
# define USE_AS_STRNCPY
|
||||
#else
|
||||
# define RETURN1 POP(%edi); ret; CFI_PUSH(%edi)
|
||||
#endif
|
||||
#include "ssse3-strcpy-atom.S"
|
||||
|
||||
.p2align 4
|
||||
L(CopyFrom1To16Bytes):
|
||||
add %esi, %edx
|
||||
add %esi, %ecx
|
||||
|
||||
POP (%esi)
|
||||
test %al, %al
|
||||
jz L(ExitHigh)
|
||||
test $0x01, %al
|
||||
jnz L(Exit1)
|
||||
test $0x02, %al
|
||||
jnz L(Exit2)
|
||||
test $0x04, %al
|
||||
jnz L(Exit3)
|
||||
test $0x08, %al
|
||||
jnz L(Exit4)
|
||||
test $0x10, %al
|
||||
jnz L(Exit5)
|
||||
test $0x20, %al
|
||||
jnz L(Exit6)
|
||||
test $0x40, %al
|
||||
jnz L(Exit7)
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(ExitHigh):
|
||||
test $0x01, %ah
|
||||
jnz L(Exit9)
|
||||
test $0x02, %ah
|
||||
jnz L(Exit10)
|
||||
test $0x04, %ah
|
||||
jnz L(Exit11)
|
||||
test $0x08, %ah
|
||||
jnz L(Exit12)
|
||||
test $0x10, %ah
|
||||
jnz L(Exit13)
|
||||
test $0x20, %ah
|
||||
jnz L(Exit14)
|
||||
test $0x40, %ah
|
||||
jnz L(Exit15)
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd 8(%ecx), %xmm1
|
||||
movlpd %xmm0, (%edx)
|
||||
movlpd %xmm1, 8(%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncatExit1):
|
||||
movb %bh, 1(%edx)
|
||||
L(Exit1):
|
||||
movb (%ecx), %al
|
||||
movb %al, (%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncatExit2):
|
||||
movb %bh, 2(%edx)
|
||||
L(Exit2):
|
||||
movw (%ecx), %ax
|
||||
movw %ax, (%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncatExit3):
|
||||
movb %bh, 3(%edx)
|
||||
L(Exit3):
|
||||
movw (%ecx), %ax
|
||||
movw %ax, (%edx)
|
||||
movb 2(%ecx), %al
|
||||
movb %al, 2(%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncatExit4):
|
||||
movb %bh, 4(%edx)
|
||||
L(Exit4):
|
||||
movl (%ecx), %eax
|
||||
movl %eax, (%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncatExit5):
|
||||
movb %bh, 5(%edx)
|
||||
L(Exit5):
|
||||
movl (%ecx), %eax
|
||||
movl %eax, (%edx)
|
||||
movb 4(%ecx), %al
|
||||
movb %al, 4(%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncatExit6):
|
||||
movb %bh, 6(%edx)
|
||||
L(Exit6):
|
||||
movl (%ecx), %eax
|
||||
movl %eax, (%edx)
|
||||
movw 4(%ecx), %ax
|
||||
movw %ax, 4(%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncatExit7):
|
||||
movb %bh, 7(%edx)
|
||||
L(Exit7):
|
||||
movl (%ecx), %eax
|
||||
movl %eax, (%edx)
|
||||
movl 3(%ecx), %eax
|
||||
movl %eax, 3(%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncatExit8):
|
||||
movb %bh, 8(%edx)
|
||||
L(Exit8):
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncatExit9):
|
||||
movb %bh, 9(%edx)
|
||||
L(Exit9):
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movb 8(%ecx), %al
|
||||
movb %al, 8(%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncatExit10):
|
||||
movb %bh, 10(%edx)
|
||||
L(Exit10):
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movw 8(%ecx), %ax
|
||||
movw %ax, 8(%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncatExit11):
|
||||
movb %bh, 11(%edx)
|
||||
L(Exit11):
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movl 7(%ecx), %eax
|
||||
movl %eax, 7(%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncatExit12):
|
||||
movb %bh, 12(%edx)
|
||||
L(Exit12):
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movl 8(%ecx), %eax
|
||||
movl %eax, 8(%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncatExit13):
|
||||
movb %bh, 13(%edx)
|
||||
L(Exit13):
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movlpd 5(%ecx), %xmm0
|
||||
movlpd %xmm0, 5(%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncatExit14):
|
||||
movb %bh, 14(%edx)
|
||||
L(Exit14):
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movlpd 6(%ecx), %xmm0
|
||||
movlpd %xmm0, 6(%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncatExit15):
|
||||
movb %bh, 15(%edx)
|
||||
L(Exit15):
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movlpd 7(%ecx), %xmm0
|
||||
movlpd %xmm0, 7(%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncatExit16):
|
||||
movb %bh, 16(%edx)
|
||||
L(Exit16):
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd 8(%ecx), %xmm1
|
||||
movlpd %xmm0, (%edx)
|
||||
movlpd %xmm1, 8(%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
#ifdef USE_AS_STRNCPY
|
||||
|
||||
CFI_PUSH(%esi)
|
||||
|
||||
.p2align 4
|
||||
L(CopyFrom1To16BytesCase2):
|
||||
add $16, %ebx
|
||||
add %esi, %ecx
|
||||
lea (%esi, %edx), %esi
|
||||
lea -9(%ebx), %edx
|
||||
and $1<<7, %dh
|
||||
or %al, %dh
|
||||
lea (%esi), %edx
|
||||
POP (%esi)
|
||||
jz L(ExitHighCase2)
|
||||
|
||||
test $0x01, %al
|
||||
jnz L(Exit1)
|
||||
cmp $1, %ebx
|
||||
je L(StrncatExit1)
|
||||
test $0x02, %al
|
||||
jnz L(Exit2)
|
||||
cmp $2, %ebx
|
||||
je L(StrncatExit2)
|
||||
test $0x04, %al
|
||||
jnz L(Exit3)
|
||||
cmp $3, %ebx
|
||||
je L(StrncatExit3)
|
||||
test $0x08, %al
|
||||
jnz L(Exit4)
|
||||
cmp $4, %ebx
|
||||
je L(StrncatExit4)
|
||||
test $0x10, %al
|
||||
jnz L(Exit5)
|
||||
cmp $5, %ebx
|
||||
je L(StrncatExit5)
|
||||
test $0x20, %al
|
||||
jnz L(Exit6)
|
||||
cmp $6, %ebx
|
||||
je L(StrncatExit6)
|
||||
test $0x40, %al
|
||||
jnz L(Exit7)
|
||||
cmp $7, %ebx
|
||||
je L(StrncatExit7)
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
lea 7(%edx), %eax
|
||||
cmpb $1, (%eax)
|
||||
sbb $-1, %eax
|
||||
xor %cl, %cl
|
||||
movb %cl, (%eax)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(ExitHighCase2):
|
||||
test $0x01, %ah
|
||||
jnz L(Exit9)
|
||||
cmp $9, %ebx
|
||||
je L(StrncatExit9)
|
||||
test $0x02, %ah
|
||||
jnz L(Exit10)
|
||||
cmp $10, %ebx
|
||||
je L(StrncatExit10)
|
||||
test $0x04, %ah
|
||||
jnz L(Exit11)
|
||||
cmp $11, %ebx
|
||||
je L(StrncatExit11)
|
||||
test $0x8, %ah
|
||||
jnz L(Exit12)
|
||||
cmp $12, %ebx
|
||||
je L(StrncatExit12)
|
||||
test $0x10, %ah
|
||||
jnz L(Exit13)
|
||||
cmp $13, %ebx
|
||||
je L(StrncatExit13)
|
||||
test $0x20, %ah
|
||||
jnz L(Exit14)
|
||||
cmp $14, %ebx
|
||||
je L(StrncatExit14)
|
||||
test $0x40, %ah
|
||||
jnz L(Exit15)
|
||||
cmp $15, %ebx
|
||||
je L(StrncatExit15)
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movlpd 8(%ecx), %xmm1
|
||||
movlpd %xmm1, 8(%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
CFI_PUSH(%esi)
|
||||
|
||||
L(CopyFrom1To16BytesCase2OrCase3):
|
||||
test %eax, %eax
|
||||
jnz L(CopyFrom1To16BytesCase2)
|
||||
|
||||
.p2align 4
|
||||
L(CopyFrom1To16BytesCase3):
|
||||
add $16, %ebx
|
||||
add %esi, %edx
|
||||
add %esi, %ecx
|
||||
|
||||
POP (%esi)
|
||||
|
||||
cmp $8, %ebx
|
||||
ja L(ExitHighCase3)
|
||||
cmp $1, %ebx
|
||||
je L(StrncatExit1)
|
||||
cmp $2, %ebx
|
||||
je L(StrncatExit2)
|
||||
cmp $3, %ebx
|
||||
je L(StrncatExit3)
|
||||
cmp $4, %ebx
|
||||
je L(StrncatExit4)
|
||||
cmp $5, %ebx
|
||||
je L(StrncatExit5)
|
||||
cmp $6, %ebx
|
||||
je L(StrncatExit6)
|
||||
cmp $7, %ebx
|
||||
je L(StrncatExit7)
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movb %bh, 8(%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(ExitHighCase3):
|
||||
cmp $9, %ebx
|
||||
je L(StrncatExit9)
|
||||
cmp $10, %ebx
|
||||
je L(StrncatExit10)
|
||||
cmp $11, %ebx
|
||||
je L(StrncatExit11)
|
||||
cmp $12, %ebx
|
||||
je L(StrncatExit12)
|
||||
cmp $13, %ebx
|
||||
je L(StrncatExit13)
|
||||
cmp $14, %ebx
|
||||
je L(StrncatExit14)
|
||||
cmp $15, %ebx
|
||||
je L(StrncatExit15)
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movlpd 8(%ecx), %xmm1
|
||||
movlpd %xmm1, 8(%edx)
|
||||
movb %bh, 16(%edx)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncatExit0):
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncpyExit15Bytes):
|
||||
cmp $9, %ebx
|
||||
je L(StrncatExit9)
|
||||
cmpb $0, 9(%ecx)
|
||||
jz L(Exit10)
|
||||
cmp $10, %ebx
|
||||
je L(StrncatExit10)
|
||||
cmpb $0, 10(%ecx)
|
||||
jz L(Exit11)
|
||||
cmp $11, %ebx
|
||||
je L(StrncatExit11)
|
||||
cmpb $0, 11(%ecx)
|
||||
jz L(Exit12)
|
||||
cmp $12, %ebx
|
||||
je L(StrncatExit12)
|
||||
cmpb $0, 12(%ecx)
|
||||
jz L(Exit13)
|
||||
cmp $13, %ebx
|
||||
je L(StrncatExit13)
|
||||
cmpb $0, 13(%ecx)
|
||||
jz L(Exit14)
|
||||
cmp $14, %ebx
|
||||
je L(StrncatExit14)
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movlpd 7(%ecx), %xmm0
|
||||
movlpd %xmm0, 7(%edx)
|
||||
lea 14(%edx), %eax
|
||||
cmpb $1, (%eax)
|
||||
sbb $-1, %eax
|
||||
movb %bh, (%eax)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
.p2align 4
|
||||
L(StrncpyExit8Bytes):
|
||||
cmpb $0, (%ecx)
|
||||
jz L(Exit1)
|
||||
cmp $1, %ebx
|
||||
je L(StrncatExit1)
|
||||
cmpb $0, 1(%ecx)
|
||||
jz L(Exit2)
|
||||
cmp $2, %ebx
|
||||
je L(StrncatExit2)
|
||||
cmpb $0, 2(%ecx)
|
||||
jz L(Exit3)
|
||||
cmp $3, %ebx
|
||||
je L(StrncatExit3)
|
||||
cmpb $0, 3(%ecx)
|
||||
jz L(Exit4)
|
||||
cmp $4, %ebx
|
||||
je L(StrncatExit4)
|
||||
cmpb $0, 4(%ecx)
|
||||
jz L(Exit5)
|
||||
cmp $5, %ebx
|
||||
je L(StrncatExit5)
|
||||
cmpb $0, 5(%ecx)
|
||||
jz L(Exit6)
|
||||
cmp $6, %ebx
|
||||
je L(StrncatExit6)
|
||||
cmpb $0, 6(%ecx)
|
||||
jz L(Exit7)
|
||||
cmp $7, %ebx
|
||||
je L(StrncatExit7)
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
lea 7(%edx), %eax
|
||||
cmpb $1, (%eax)
|
||||
sbb $-1, %eax
|
||||
movb %bh, (%eax)
|
||||
movl %edi, %eax
|
||||
RETURN1
|
||||
|
||||
#endif
|
||||
END (STRCAT)
|
|
@ -107,8 +107,12 @@ name: \
|
|||
sub %esi, %ebp
|
||||
#endif
|
||||
|
||||
#ifndef STRCMP
|
||||
# define STRCMP strcmp
|
||||
#endif
|
||||
|
||||
.section .text.ssse3,"ax",@progbits
|
||||
ENTRY (ssse3_strcmp_latest)
|
||||
ENTRY (STRCMP)
|
||||
#ifdef USE_AS_STRNCMP
|
||||
PUSH (%ebp)
|
||||
#endif
|
||||
|
@ -2271,4 +2275,4 @@ L(less16bytes_sncmp):
|
|||
ret
|
||||
#endif
|
||||
|
||||
END (ssse3_strcmp_latest)
|
||||
END (STRCMP)
|
3955
libc/arch-x86/string/ssse3-strcpy-atom.S
Normal file
3955
libc/arch-x86/string/ssse3-strcpy-atom.S
Normal file
File diff suppressed because it is too large
Load diff
1225
libc/arch-x86/string/ssse3-strlcat-atom.S
Normal file
1225
libc/arch-x86/string/ssse3-strlcat-atom.S
Normal file
File diff suppressed because it is too large
Load diff
1403
libc/arch-x86/string/ssse3-strlcpy-atom.S
Normal file
1403
libc/arch-x86/string/ssse3-strlcpy-atom.S
Normal file
File diff suppressed because it is too large
Load diff
34
libc/arch-x86/string/ssse3-strncat-atom.S
Normal file
34
libc/arch-x86/string/ssse3-strncat-atom.S
Normal file
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define STRCAT strncat
|
||||
#define USE_AS_STRNCAT
|
||||
|
||||
#include "ssse3-strcat-atom.S"
|
|
@ -28,13 +28,8 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#if defined(USE_SSSE3)
|
||||
|
||||
# define MEMCMP memcmp
|
||||
# include "ssse3-memcmp3-new.S"
|
||||
#define USE_AS_STRNCMP
|
||||
#define STRCMP strncmp
|
||||
#include "ssse3-strcmp-atom.S"
|
||||
|
||||
#else
|
||||
|
||||
# include "memcmp.S"
|
||||
|
||||
#endif
|
33
libc/arch-x86/string/ssse3-strncpy-atom.S
Normal file
33
libc/arch-x86/string/ssse3-strncpy-atom.S
Normal file
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define USE_AS_STRNCPY
|
||||
#define STRCPY strncpy
|
||||
#include "ssse3-strcpy-atom.S"
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
Copyright (c) 2010, Intel Corporation
|
||||
Copyright (c) 2011 Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -28,18 +28,87 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
#if defined(USE_SSSE3)
|
||||
|
||||
# include "cache_wrapper.S"
|
||||
# undef __i686
|
||||
# define MEMCPY bcopy
|
||||
# define USE_AS_MEMMOVE
|
||||
# define USE_AS_BCOPY
|
||||
# include "ssse3-memcpy5.S"
|
||||
|
||||
#else
|
||||
|
||||
# include "bcopy.S"
|
||||
|
||||
#ifndef L
|
||||
# define L(label) .L##label
|
||||
#endif
|
||||
|
||||
#ifndef cfi_startproc
|
||||
# define cfi_startproc .cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_endproc
|
||||
# define cfi_endproc .cfi_endproc
|
||||
#endif
|
||||
|
||||
#ifndef cfi_rel_offset
|
||||
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
||||
#endif
|
||||
|
||||
#ifndef cfi_restore
|
||||
# define cfi_restore(reg) .cfi_restore reg
|
||||
#endif
|
||||
|
||||
#ifndef cfi_adjust_cfa_offset
|
||||
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
||||
#endif
|
||||
|
||||
#ifndef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type name, @function; \
|
||||
.globl name; \
|
||||
.p2align 4; \
|
||||
name: \
|
||||
cfi_startproc
|
||||
#endif
|
||||
|
||||
#ifndef END
|
||||
# define END(name) \
|
||||
cfi_endproc; \
|
||||
.size name, .-name
|
||||
#endif
|
||||
|
||||
#define CFI_PUSH(REG) \
|
||||
cfi_adjust_cfa_offset (4); \
|
||||
cfi_rel_offset (REG, 0)
|
||||
|
||||
#define CFI_POP(REG) \
|
||||
cfi_adjust_cfa_offset (-4); \
|
||||
cfi_restore (REG)
|
||||
|
||||
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||
#define POP(REG) popl REG; CFI_POP (REG)
|
||||
|
||||
#define PARMS 4
|
||||
#define STR1 PARMS+4
|
||||
#define STR2 STR1+4
|
||||
|
||||
#define USE_AS_WCSCAT
|
||||
|
||||
.text
|
||||
ENTRY (wcscat)
|
||||
PUSH (%edi)
|
||||
mov STR1(%esp), %edi
|
||||
mov %edi, %edx
|
||||
|
||||
#define RETURN jmp L(WcscpyAtom)
|
||||
#include "sse2-wcslen-atom.S"
|
||||
|
||||
L(WcscpyAtom):
|
||||
shl $2, %eax
|
||||
mov STR2(%esp), %ecx
|
||||
lea (%edi, %eax), %edx
|
||||
|
||||
cmp $0, (%ecx)
|
||||
jz L(Exit4)
|
||||
cmp $0, 4(%ecx)
|
||||
jz L(Exit8)
|
||||
cmp $0, 8(%ecx)
|
||||
jz L(Exit12)
|
||||
cmp $0, 12(%ecx)
|
||||
jz L(Exit16)
|
||||
|
||||
#undef RETURN
|
||||
#define RETURN POP(%edi); ret; CFI_PUSH(%edi)
|
||||
#include "ssse3-wcscpy-atom.S"
|
||||
|
||||
END (wcscat)
|
652
libc/arch-x86/string/ssse3-wcscpy-atom.S
Normal file
652
libc/arch-x86/string/ssse3-wcscpy-atom.S
Normal file
|
@ -0,0 +1,652 @@
|
|||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef USE_AS_WCSCAT
|
||||
|
||||
# ifndef L
|
||||
# define L(label) .L##label
|
||||
# endif
|
||||
|
||||
# ifndef cfi_startproc
|
||||
# define cfi_startproc .cfi_startproc
|
||||
# endif
|
||||
|
||||
# ifndef cfi_endproc
|
||||
# define cfi_endproc .cfi_endproc
|
||||
# endif
|
||||
|
||||
# ifndef cfi_rel_offset
|
||||
# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
||||
# endif
|
||||
|
||||
# ifndef cfi_restore
|
||||
# define cfi_restore(reg) .cfi_restore reg
|
||||
# endif
|
||||
|
||||
# ifndef cfi_adjust_cfa_offset
|
||||
# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
||||
# endif
|
||||
|
||||
# ifndef ENTRY
|
||||
# define ENTRY(name) \
|
||||
.type name, @function; \
|
||||
.globl name; \
|
||||
.p2align 4; \
|
||||
name: \
|
||||
cfi_startproc
|
||||
# endif
|
||||
|
||||
# ifndef END
|
||||
# define END(name) \
|
||||
cfi_endproc; \
|
||||
.size name, .-name
|
||||
# endif
|
||||
|
||||
# define CFI_PUSH(REG) \
|
||||
cfi_adjust_cfa_offset (4); \
|
||||
cfi_rel_offset (REG, 0)
|
||||
|
||||
# define CFI_POP(REG) \
|
||||
cfi_adjust_cfa_offset (-4); \
|
||||
cfi_restore (REG)
|
||||
|
||||
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||
# define POP(REG) popl REG; CFI_POP (REG)
|
||||
|
||||
# define PARMS 4
|
||||
# define RETURN POP (%edi); ret; CFI_PUSH (%edi)
|
||||
|
||||
# define STR1 PARMS
|
||||
# define STR2 STR1+4
|
||||
# define LEN STR2+4
|
||||
|
||||
.text
|
||||
ENTRY (wcscpy)
|
||||
mov STR1(%esp), %edx
|
||||
mov STR2(%esp), %ecx
|
||||
|
||||
cmp $0, (%ecx)
|
||||
jz L(ExitTail4)
|
||||
cmp $0, 4(%ecx)
|
||||
jz L(ExitTail8)
|
||||
cmp $0, 8(%ecx)
|
||||
jz L(ExitTail12)
|
||||
cmp $0, 12(%ecx)
|
||||
jz L(ExitTail16)
|
||||
|
||||
PUSH (%edi)
|
||||
mov %edx, %edi
|
||||
#endif
|
||||
PUSH (%esi)
|
||||
lea 16(%ecx), %esi
|
||||
|
||||
and $-16, %esi
|
||||
|
||||
pxor %xmm0, %xmm0
|
||||
pcmpeqd (%esi), %xmm0
|
||||
movdqu (%ecx), %xmm1
|
||||
movdqu %xmm1, (%edx)
|
||||
|
||||
pmovmskb %xmm0, %eax
|
||||
sub %ecx, %esi
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(CopyFrom1To16Bytes)
|
||||
|
||||
mov %edx, %eax
|
||||
lea 16(%edx), %edx
|
||||
and $-16, %edx
|
||||
sub %edx, %eax
|
||||
|
||||
sub %eax, %ecx
|
||||
mov %ecx, %eax
|
||||
and $0xf, %eax
|
||||
mov $0, %esi
|
||||
|
||||
jz L(Align16Both)
|
||||
cmp $4, %eax
|
||||
je L(Shl4)
|
||||
cmp $8, %eax
|
||||
je L(Shl8)
|
||||
jmp L(Shl12)
|
||||
|
||||
L(Align16Both):
|
||||
movaps (%ecx), %xmm1
|
||||
movaps 16(%ecx), %xmm2
|
||||
movaps %xmm1, (%edx)
|
||||
pcmpeqd %xmm2, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
lea 16(%esi), %esi
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(CopyFrom1To16Bytes)
|
||||
|
||||
movaps 16(%ecx, %esi), %xmm3
|
||||
movaps %xmm2, (%edx, %esi)
|
||||
pcmpeqd %xmm3, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
lea 16(%esi), %esi
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(CopyFrom1To16Bytes)
|
||||
|
||||
movaps 16(%ecx, %esi), %xmm4
|
||||
movaps %xmm3, (%edx, %esi)
|
||||
pcmpeqd %xmm4, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
lea 16(%esi), %esi
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(CopyFrom1To16Bytes)
|
||||
|
||||
movaps 16(%ecx, %esi), %xmm1
|
||||
movaps %xmm4, (%edx, %esi)
|
||||
pcmpeqd %xmm1, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
lea 16(%esi), %esi
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(CopyFrom1To16Bytes)
|
||||
|
||||
movaps 16(%ecx, %esi), %xmm2
|
||||
movaps %xmm1, (%edx, %esi)
|
||||
pcmpeqd %xmm2, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
lea 16(%esi), %esi
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(CopyFrom1To16Bytes)
|
||||
|
||||
movaps 16(%ecx, %esi), %xmm3
|
||||
movaps %xmm2, (%edx, %esi)
|
||||
pcmpeqd %xmm3, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
lea 16(%esi), %esi
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(CopyFrom1To16Bytes)
|
||||
|
||||
movaps %xmm3, (%edx, %esi)
|
||||
mov %ecx, %eax
|
||||
lea 16(%ecx, %esi), %ecx
|
||||
and $-0x40, %ecx
|
||||
sub %ecx, %eax
|
||||
sub %eax, %edx
|
||||
|
||||
mov $-0x40, %esi
|
||||
|
||||
L(Aligned64Loop):
|
||||
movaps (%ecx), %xmm2
|
||||
movaps 32(%ecx), %xmm3
|
||||
movaps %xmm2, %xmm4
|
||||
movaps 16(%ecx), %xmm5
|
||||
movaps %xmm3, %xmm6
|
||||
movaps 48(%ecx), %xmm7
|
||||
pminub %xmm5, %xmm2
|
||||
pminub %xmm7, %xmm3
|
||||
pminub %xmm2, %xmm3
|
||||
lea 64(%edx), %edx
|
||||
pcmpeqd %xmm0, %xmm3
|
||||
lea 64(%ecx), %ecx
|
||||
pmovmskb %xmm3, %eax
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(Aligned64Leave)
|
||||
movaps %xmm4, -64(%edx)
|
||||
movaps %xmm5, -48(%edx)
|
||||
movaps %xmm6, -32(%edx)
|
||||
movaps %xmm7, -16(%edx)
|
||||
jmp L(Aligned64Loop)
|
||||
|
||||
L(Aligned64Leave):
|
||||
pcmpeqd %xmm4, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
test %eax, %eax
|
||||
jnz L(CopyFrom1To16Bytes)
|
||||
|
||||
pcmpeqd %xmm5, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
movaps %xmm4, -64(%edx)
|
||||
lea 16(%esi), %esi
|
||||
test %eax, %eax
|
||||
jnz L(CopyFrom1To16Bytes)
|
||||
|
||||
pcmpeqd %xmm6, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
movaps %xmm5, -48(%edx)
|
||||
lea 16(%esi), %esi
|
||||
test %eax, %eax
|
||||
jnz L(CopyFrom1To16Bytes)
|
||||
|
||||
movaps %xmm6, -32(%edx)
|
||||
pcmpeqd %xmm7, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
lea 16(%esi), %esi
|
||||
test %eax, %eax
|
||||
jnz L(CopyFrom1To16Bytes)
|
||||
|
||||
mov $-0x40, %esi
|
||||
movaps %xmm7, -16(%edx)
|
||||
jmp L(Aligned64Loop)
|
||||
|
||||
.p2align 4
|
||||
L(Shl4):
|
||||
movaps -4(%ecx), %xmm1
|
||||
movaps 12(%ecx), %xmm2
|
||||
L(Shl4Start):
|
||||
pcmpeqd %xmm2, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
movaps %xmm2, %xmm3
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(Shl4LoopExit)
|
||||
|
||||
palignr $4, %xmm1, %xmm2
|
||||
movaps %xmm2, (%edx)
|
||||
movaps 28(%ecx), %xmm2
|
||||
|
||||
pcmpeqd %xmm2, %xmm0
|
||||
lea 16(%edx), %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
lea 16(%ecx), %ecx
|
||||
movaps %xmm2, %xmm1
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(Shl4LoopExit)
|
||||
|
||||
palignr $4, %xmm3, %xmm2
|
||||
movaps %xmm2, (%edx)
|
||||
movaps 28(%ecx), %xmm2
|
||||
|
||||
pcmpeqd %xmm2, %xmm0
|
||||
lea 16(%edx), %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
lea 16(%ecx), %ecx
|
||||
movaps %xmm2, %xmm3
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(Shl4LoopExit)
|
||||
|
||||
palignr $4, %xmm1, %xmm2
|
||||
movaps %xmm2, (%edx)
|
||||
movaps 28(%ecx), %xmm2
|
||||
|
||||
pcmpeqd %xmm2, %xmm0
|
||||
lea 16(%edx), %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
lea 16(%ecx), %ecx
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(Shl4LoopExit)
|
||||
|
||||
palignr $4, %xmm3, %xmm2
|
||||
movaps %xmm2, (%edx)
|
||||
lea 28(%ecx), %ecx
|
||||
lea 16(%edx), %edx
|
||||
|
||||
mov %ecx, %eax
|
||||
and $-0x40, %ecx
|
||||
sub %ecx, %eax
|
||||
lea -12(%ecx), %ecx
|
||||
sub %eax, %edx
|
||||
|
||||
movaps -4(%ecx), %xmm1
|
||||
|
||||
L(Shl4LoopStart):
|
||||
movaps 12(%ecx), %xmm2
|
||||
movaps 28(%ecx), %xmm3
|
||||
movaps %xmm3, %xmm6
|
||||
movaps 44(%ecx), %xmm4
|
||||
movaps %xmm4, %xmm7
|
||||
movaps 60(%ecx), %xmm5
|
||||
pminub %xmm2, %xmm6
|
||||
pminub %xmm5, %xmm7
|
||||
pminub %xmm6, %xmm7
|
||||
pcmpeqd %xmm0, %xmm7
|
||||
pmovmskb %xmm7, %eax
|
||||
movaps %xmm5, %xmm7
|
||||
palignr $4, %xmm4, %xmm5
|
||||
palignr $4, %xmm3, %xmm4
|
||||
test %eax, %eax
|
||||
jnz L(Shl4Start)
|
||||
|
||||
palignr $4, %xmm2, %xmm3
|
||||
lea 64(%ecx), %ecx
|
||||
palignr $4, %xmm1, %xmm2
|
||||
movaps %xmm7, %xmm1
|
||||
movaps %xmm5, 48(%edx)
|
||||
movaps %xmm4, 32(%edx)
|
||||
movaps %xmm3, 16(%edx)
|
||||
movaps %xmm2, (%edx)
|
||||
lea 64(%edx), %edx
|
||||
jmp L(Shl4LoopStart)
|
||||
|
||||
L(Shl4LoopExit):
|
||||
movlpd (%ecx), %xmm0
|
||||
movl 8(%ecx), %esi
|
||||
movlpd %xmm0, (%edx)
|
||||
movl %esi, 8(%edx)
|
||||
POP (%esi)
|
||||
add $12, %edx
|
||||
add $12, %ecx
|
||||
test %al, %al
|
||||
jz L(ExitHigh)
|
||||
test $0x01, %al
|
||||
jnz L(Exit4)
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movl %edi, %eax
|
||||
RETURN
|
||||
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(Shl8):
|
||||
movaps -8(%ecx), %xmm1
|
||||
movaps 8(%ecx), %xmm2
|
||||
L(Shl8Start):
|
||||
pcmpeqd %xmm2, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
movaps %xmm2, %xmm3
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(Shl8LoopExit)
|
||||
|
||||
palignr $8, %xmm1, %xmm2
|
||||
movaps %xmm2, (%edx)
|
||||
movaps 24(%ecx), %xmm2
|
||||
|
||||
pcmpeqd %xmm2, %xmm0
|
||||
lea 16(%edx), %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
lea 16(%ecx), %ecx
|
||||
movaps %xmm2, %xmm1
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(Shl8LoopExit)
|
||||
|
||||
palignr $8, %xmm3, %xmm2
|
||||
movaps %xmm2, (%edx)
|
||||
movaps 24(%ecx), %xmm2
|
||||
|
||||
pcmpeqd %xmm2, %xmm0
|
||||
lea 16(%edx), %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
lea 16(%ecx), %ecx
|
||||
movaps %xmm2, %xmm3
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(Shl8LoopExit)
|
||||
|
||||
palignr $8, %xmm1, %xmm2
|
||||
movaps %xmm2, (%edx)
|
||||
movaps 24(%ecx), %xmm2
|
||||
|
||||
pcmpeqd %xmm2, %xmm0
|
||||
lea 16(%edx), %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
lea 16(%ecx), %ecx
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(Shl8LoopExit)
|
||||
|
||||
palignr $8, %xmm3, %xmm2
|
||||
movaps %xmm2, (%edx)
|
||||
lea 24(%ecx), %ecx
|
||||
lea 16(%edx), %edx
|
||||
|
||||
mov %ecx, %eax
|
||||
and $-0x40, %ecx
|
||||
sub %ecx, %eax
|
||||
lea -8(%ecx), %ecx
|
||||
sub %eax, %edx
|
||||
|
||||
movaps -8(%ecx), %xmm1
|
||||
|
||||
L(Shl8LoopStart):
|
||||
movaps 8(%ecx), %xmm2
|
||||
movaps 24(%ecx), %xmm3
|
||||
movaps %xmm3, %xmm6
|
||||
movaps 40(%ecx), %xmm4
|
||||
movaps %xmm4, %xmm7
|
||||
movaps 56(%ecx), %xmm5
|
||||
pminub %xmm2, %xmm6
|
||||
pminub %xmm5, %xmm7
|
||||
pminub %xmm6, %xmm7
|
||||
pcmpeqd %xmm0, %xmm7
|
||||
pmovmskb %xmm7, %eax
|
||||
movaps %xmm5, %xmm7
|
||||
palignr $8, %xmm4, %xmm5
|
||||
palignr $8, %xmm3, %xmm4
|
||||
test %eax, %eax
|
||||
jnz L(Shl8Start)
|
||||
|
||||
palignr $8, %xmm2, %xmm3
|
||||
lea 64(%ecx), %ecx
|
||||
palignr $8, %xmm1, %xmm2
|
||||
movaps %xmm7, %xmm1
|
||||
movaps %xmm5, 48(%edx)
|
||||
movaps %xmm4, 32(%edx)
|
||||
movaps %xmm3, 16(%edx)
|
||||
movaps %xmm2, (%edx)
|
||||
lea 64(%edx), %edx
|
||||
jmp L(Shl8LoopStart)
|
||||
|
||||
L(Shl8LoopExit):
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
POP (%esi)
|
||||
add $8, %edx
|
||||
add $8, %ecx
|
||||
test %al, %al
|
||||
jz L(ExitHigh)
|
||||
test $0x01, %al
|
||||
jnz L(Exit4)
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movl %edi, %eax
|
||||
RETURN
|
||||
|
||||
CFI_PUSH (%esi)
|
||||
|
||||
.p2align 4
|
||||
L(Shl12):
|
||||
movaps -12(%ecx), %xmm1
|
||||
movaps 4(%ecx), %xmm2
|
||||
L(Shl12Start):
|
||||
pcmpeqd %xmm2, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
movaps %xmm2, %xmm3
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(Shl12LoopExit)
|
||||
|
||||
palignr $12, %xmm1, %xmm2
|
||||
movaps %xmm2, (%edx)
|
||||
movaps 20(%ecx), %xmm2
|
||||
|
||||
pcmpeqd %xmm2, %xmm0
|
||||
lea 16(%edx), %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
lea 16(%ecx), %ecx
|
||||
movaps %xmm2, %xmm1
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(Shl12LoopExit)
|
||||
|
||||
palignr $12, %xmm3, %xmm2
|
||||
movaps %xmm2, (%edx)
|
||||
movaps 20(%ecx), %xmm2
|
||||
|
||||
pcmpeqd %xmm2, %xmm0
|
||||
lea 16(%edx), %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
lea 16(%ecx), %ecx
|
||||
movaps %xmm2, %xmm3
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(Shl12LoopExit)
|
||||
|
||||
palignr $12, %xmm1, %xmm2
|
||||
movaps %xmm2, (%edx)
|
||||
movaps 20(%ecx), %xmm2
|
||||
|
||||
pcmpeqd %xmm2, %xmm0
|
||||
lea 16(%edx), %edx
|
||||
pmovmskb %xmm0, %eax
|
||||
lea 16(%ecx), %ecx
|
||||
|
||||
test %eax, %eax
|
||||
jnz L(Shl12LoopExit)
|
||||
|
||||
palignr $12, %xmm3, %xmm2
|
||||
movaps %xmm2, (%edx)
|
||||
lea 20(%ecx), %ecx
|
||||
lea 16(%edx), %edx
|
||||
|
||||
mov %ecx, %eax
|
||||
and $-0x40, %ecx
|
||||
sub %ecx, %eax
|
||||
lea -4(%ecx), %ecx
|
||||
sub %eax, %edx
|
||||
|
||||
movaps -12(%ecx), %xmm1
|
||||
|
||||
L(Shl12LoopStart):
|
||||
movaps 4(%ecx), %xmm2
|
||||
movaps 20(%ecx), %xmm3
|
||||
movaps %xmm3, %xmm6
|
||||
movaps 36(%ecx), %xmm4
|
||||
movaps %xmm4, %xmm7
|
||||
movaps 52(%ecx), %xmm5
|
||||
pminub %xmm2, %xmm6
|
||||
pminub %xmm5, %xmm7
|
||||
pminub %xmm6, %xmm7
|
||||
pcmpeqd %xmm0, %xmm7
|
||||
pmovmskb %xmm7, %eax
|
||||
movaps %xmm5, %xmm7
|
||||
palignr $12, %xmm4, %xmm5
|
||||
palignr $12, %xmm3, %xmm4
|
||||
test %eax, %eax
|
||||
jnz L(Shl12Start)
|
||||
|
||||
palignr $12, %xmm2, %xmm3
|
||||
lea 64(%ecx), %ecx
|
||||
palignr $12, %xmm1, %xmm2
|
||||
movaps %xmm7, %xmm1
|
||||
movaps %xmm5, 48(%edx)
|
||||
movaps %xmm4, 32(%edx)
|
||||
movaps %xmm3, 16(%edx)
|
||||
movaps %xmm2, (%edx)
|
||||
lea 64(%edx), %edx
|
||||
jmp L(Shl12LoopStart)
|
||||
|
||||
L(Shl12LoopExit):
|
||||
movl (%ecx), %esi
|
||||
movl %esi, (%edx)
|
||||
mov $4, %esi
|
||||
|
||||
.p2align 4
|
||||
L(CopyFrom1To16Bytes):
|
||||
add %esi, %edx
|
||||
add %esi, %ecx
|
||||
|
||||
POP (%esi)
|
||||
test %al, %al
|
||||
jz L(ExitHigh)
|
||||
test $0x01, %al
|
||||
jnz L(Exit4)
|
||||
L(Exit8):
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movl %edi, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(ExitHigh):
|
||||
test $0x01, %ah
|
||||
jnz L(Exit12)
|
||||
L(Exit16):
|
||||
movdqu (%ecx), %xmm0
|
||||
movdqu %xmm0, (%edx)
|
||||
movl %edi, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit4):
|
||||
movl (%ecx), %eax
|
||||
movl %eax, (%edx)
|
||||
movl %edi, %eax
|
||||
RETURN
|
||||
|
||||
.p2align 4
|
||||
L(Exit12):
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movl 8(%ecx), %eax
|
||||
movl %eax, 8(%edx)
|
||||
movl %edi, %eax
|
||||
RETURN
|
||||
|
||||
CFI_POP (%edi)
|
||||
|
||||
.p2align 4
|
||||
L(ExitTail4):
|
||||
movl (%ecx), %eax
|
||||
movl %eax, (%edx)
|
||||
movl %edx, %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(ExitTail8):
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movl %edx, %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(ExitTail12):
|
||||
movlpd (%ecx), %xmm0
|
||||
movlpd %xmm0, (%edx)
|
||||
movl 8(%ecx), %eax
|
||||
movl %eax, 8(%edx)
|
||||
movl %edx, %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(ExitTail16):
|
||||
movdqu (%ecx), %xmm0
|
||||
movdqu %xmm0, (%edx)
|
||||
movl %edx, %eax
|
||||
ret
|
||||
|
||||
#ifndef USE_AS_WCSCAT
|
||||
END (wcscpy)
|
||||
#endif
|
33
libc/arch-x86/string/ssse3-wmemcmp-atom.S
Normal file
33
libc/arch-x86/string/ssse3-wmemcmp-atom.S
Normal file
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define MEMCMP wmemcmp
|
||||
#define USE_AS_WMEMCMP 1
|
||||
#include "ssse3-memcmp-atom.S"
|
|
@ -10,16 +10,75 @@ _LIBC_ARCH_COMMON_SRC_FILES := \
|
|||
arch-x86/bionic/sigsetjmp.S \
|
||||
arch-x86/bionic/syscall.S \
|
||||
arch-x86/bionic/vfork.S \
|
||||
arch-x86/string/bcopy_wrapper.S \
|
||||
arch-x86/string/bzero_wrapper.S \
|
||||
arch-x86/string/ffs.S \
|
||||
arch-x86/string/memcmp_wrapper.S \
|
||||
arch-x86/string/memcpy_wrapper.S \
|
||||
arch-x86/string/memmove_wrapper.S \
|
||||
arch-x86/string/memset_wrapper.S \
|
||||
arch-x86/string/strcmp_wrapper.S \
|
||||
arch-x86/string/strlen_wrapper.S \
|
||||
arch-x86/string/strncmp_wrapper.S \
|
||||
arch-x86/string/ffs.S
|
||||
|
||||
ifeq ($(ARCH_X86_HAVE_SSSE3),true)
|
||||
_LIBC_ARCH_COMMON_SRC_FILES += \
|
||||
arch-x86/string/ssse3-memcpy-atom.S \
|
||||
arch-x86/string/ssse3-memmove-atom.S \
|
||||
arch-x86/string/ssse3-bcopy-atom.S \
|
||||
arch-x86/string/ssse3-strncat-atom.S \
|
||||
arch-x86/string/ssse3-strncpy-atom.S \
|
||||
arch-x86/string/ssse3-strlcat-atom.S \
|
||||
arch-x86/string/ssse3-strlcpy-atom.S \
|
||||
arch-x86/string/ssse3-strcmp-atom.S \
|
||||
arch-x86/string/ssse3-strncmp-atom.S \
|
||||
arch-x86/string/ssse3-strcat-atom.S \
|
||||
arch-x86/string/ssse3-strcpy-atom.S \
|
||||
arch-x86/string/ssse3-memcmp-atom.S \
|
||||
arch-x86/string/ssse3-wmemcmp-atom.S \
|
||||
arch-x86/string/ssse3-wcscat-atom.S \
|
||||
arch-x86/string/ssse3-wcscpy-atom.S
|
||||
else
|
||||
_LIBC_ARCH_COMMON_SRC_FILES += \
|
||||
arch-x86/string/memcpy.S \
|
||||
arch-x86/string/memmove.S \
|
||||
arch-x86/string/bcopy.S \
|
||||
arch-x86/string/strcmp.S \
|
||||
arch-x86/string/strncmp.S \
|
||||
arch-x86/string/strcat.S \
|
||||
arch-x86/string/memcmp.S \
|
||||
string/strcpy.c \
|
||||
string/strncat.c \
|
||||
string/strncpy.c \
|
||||
string/strlcat.c \
|
||||
string/strlcpy.c \
|
||||
upstream-freebsd/lib/libc/string/wcscpy.c \
|
||||
upstream-freebsd/lib/libc/string/wcscat.c \
|
||||
upstream-freebsd/lib/libc/string/wmemcmp.c
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH_X86_HAVE_SSE2),true)
|
||||
_LIBC_ARCH_COMMON_SRC_FILES += \
|
||||
arch-x86/string/sse2-memset-atom.S \
|
||||
arch-x86/string/sse2-bzero-atom.S \
|
||||
arch-x86/string/sse2-memchr-atom.S \
|
||||
arch-x86/string/sse2-memrchr-atom.S \
|
||||
arch-x86/string/sse2-strchr-atom.S \
|
||||
arch-x86/string/sse2-strrchr-atom.S \
|
||||
arch-x86/string/sse2-index-atom.S \
|
||||
arch-x86/string/sse2-strlen-atom.S \
|
||||
arch-x86/string/sse2-strnlen-atom.S \
|
||||
arch-x86/string/sse2-wcschr-atom.S \
|
||||
arch-x86/string/sse2-wcsrchr-atom.S \
|
||||
arch-x86/string/sse2-wcslen-atom.S \
|
||||
arch-x86/string/sse2-wcscmp-atom.S
|
||||
else
|
||||
_LIBC_ARCH_COMMON_SRC_FILES += \
|
||||
arch-x86/string/memset.S \
|
||||
arch-x86/string/strlen.S \
|
||||
arch-x86/string/bzero.S \
|
||||
bionic/memrchr.c \
|
||||
bionic/memchr.c \
|
||||
string/strchr.cpp \
|
||||
string/strrchr.c \
|
||||
string/index.c \
|
||||
bionic/strnlen.c \
|
||||
upstream-freebsd/lib/libc/string/wcschr.c \
|
||||
upstream-freebsd/lib/libc/string/wcsrchr.c \
|
||||
upstream-freebsd/lib/libc/string/wcslen.c \
|
||||
upstream-freebsd/lib/libc/string/wcscmp.c
|
||||
endif
|
||||
|
||||
_LIBC_ARCH_STATIC_SRC_FILES := \
|
||||
bionic/dl_iterate_phdr_static.c \
|
||||
|
|
46
libc/bionic/__strchr_chk.cpp
Normal file
46
libc/bionic/__strchr_chk.cpp
Normal file
|
@ -0,0 +1,46 @@
|
|||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "libc_logging.h"
|
||||
|
||||
extern "C" char* __strchr_chk(const char* p, int ch, size_t s_len) {
|
||||
for (;; ++p, s_len--) {
|
||||
if (__predict_false(s_len == 0)) {
|
||||
__fortify_chk_fail("read beyond buffer", 0);
|
||||
}
|
||||
if (*p == static_cast<char>(ch)) {
|
||||
return const_cast<char*>(p);
|
||||
}
|
||||
if (*p == '\0') {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
/* NOTREACHED */
|
||||
}
|
|
@ -28,22 +28,6 @@
|
|||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "libc_logging.h"
|
||||
|
||||
extern "C" char* __strchr_chk(const char* p, int ch, size_t s_len) {
|
||||
for (;; ++p, s_len--) {
|
||||
if (__predict_false(s_len == 0)) {
|
||||
__fortify_chk_fail("read beyond buffer", 0);
|
||||
}
|
||||
if (*p == static_cast<char>(ch)) {
|
||||
return const_cast<char*>(p);
|
||||
}
|
||||
if (*p == '\0') {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
extern "C" char* strchr(const char* p, int ch) {
|
||||
return __strchr_chk(p, ch, __BIONIC_FORTIFY_UNKNOWN_SIZE);
|
||||
|
|
48
libc/string/__strrchr_chk.c
Normal file
48
libc/string/__strrchr_chk.c
Normal file
|
@ -0,0 +1,48 @@
|
|||
/* $OpenBSD: rindex.c,v 1.6 2005/08/08 08:05:37 espie Exp $ */
|
||||
/*
|
||||
* Copyright (c) 1988 Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "libc_logging.h"
|
||||
|
||||
char *
|
||||
__strrchr_chk(const char *p, int ch, size_t s_len)
|
||||
{
|
||||
char *save;
|
||||
|
||||
for (save = NULL;; ++p, s_len--) {
|
||||
if (s_len == 0)
|
||||
__fortify_chk_fail("strrchr read beyond buffer", 0);
|
||||
if (*p == (char) ch)
|
||||
save = (char *)p;
|
||||
if (!*p)
|
||||
return(save);
|
||||
}
|
||||
/* NOTREACHED */
|
||||
}
|
|
@ -29,23 +29,6 @@
|
|||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "libc_logging.h"
|
||||
|
||||
char *
|
||||
__strrchr_chk(const char *p, int ch, size_t s_len)
|
||||
{
|
||||
char *save;
|
||||
|
||||
for (save = NULL;; ++p, s_len--) {
|
||||
if (s_len == 0)
|
||||
__fortify_chk_fail("strrchr read beyond buffer", 0);
|
||||
if (*p == (char) ch)
|
||||
save = (char *)p;
|
||||
if (!*p)
|
||||
return(save);
|
||||
}
|
||||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
char *
|
||||
strrchr(const char *p, int ch)
|
||||
|
|
Loading…
Reference in a new issue