174 lines
3.7 KiB
ArmAsm
174 lines
3.7 KiB
ArmAsm
|
/*
|
||
|
Copyright (c) 2014, Intel Corporation
|
||
|
All rights reserved.
|
||
|
|
||
|
Redistribution and use in source and binary forms, with or without
|
||
|
modification, are permitted provided that the following conditions are met:
|
||
|
|
||
|
* Redistributions of source code must retain the above copyright notice,
|
||
|
* this list of conditions and the following disclaimer.
|
||
|
|
||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||
|
* this list of conditions and the following disclaimer in the documentation
|
||
|
* and/or other materials provided with the distribution.
|
||
|
|
||
|
* Neither the name of Intel Corporation nor the names of its contributors
|
||
|
* may be used to endorse or promote products derived from this software
|
||
|
* without specific prior written permission.
|
||
|
|
||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
*/
|
||
|
|
||
|
#include "cache.h"
|
||
|
|
||
|
#ifndef MEMSET
|
||
|
# define MEMSET memset
|
||
|
#endif
|
||
|
|
||
|
#ifndef L
|
||
|
# define L(label) .L##label
|
||
|
#endif
|
||
|
|
||
|
#ifndef ALIGN
|
||
|
# define ALIGN(n) .p2align n
|
||
|
#endif
|
||
|
|
||
|
#ifndef cfi_startproc
|
||
|
# define cfi_startproc .cfi_startproc
|
||
|
#endif
|
||
|
|
||
|
#ifndef cfi_endproc
|
||
|
# define cfi_endproc .cfi_endproc
|
||
|
#endif
|
||
|
|
||
|
#ifndef ENTRY
|
||
|
# define ENTRY(name) \
|
||
|
.type name, @function; \
|
||
|
.globl name; \
|
||
|
name: \
|
||
|
cfi_startproc
|
||
|
#endif
|
||
|
|
||
|
#ifndef END
|
||
|
# define END(name) \
|
||
|
cfi_endproc; \
|
||
|
.size name, .-name
|
||
|
#endif
|
||
|
|
||
|
.section .text.sse2,"ax",@progbits
|
||
|
ENTRY (MEMSET)
|
||
|
movq %rdi, %rax
|
||
|
#ifdef USE_AS_BZERO_P
|
||
|
mov %rsi, %rdx
|
||
|
xor %rcx, %rcx
|
||
|
#else
|
||
|
and $0xff, %rsi
|
||
|
mov $0x0101010101010101, %rcx
|
||
|
imul %rsi, %rcx
|
||
|
#endif
|
||
|
cmpq $16, %rdx
|
||
|
jae L(16bytesormore)
|
||
|
testb $8, %dl
|
||
|
jnz L(8_15bytes)
|
||
|
testb $4, %dl
|
||
|
jnz L(4_7bytes)
|
||
|
testb $2, %dl
|
||
|
jnz L(2_3bytes)
|
||
|
testb $1, %dl
|
||
|
jz L(return)
|
||
|
movb %cl, (%rdi)
|
||
|
L(return):
|
||
|
ret
|
||
|
|
||
|
L(8_15bytes):
|
||
|
movq %rcx, (%rdi)
|
||
|
movq %rcx, -8(%rdi, %rdx)
|
||
|
ret
|
||
|
|
||
|
L(4_7bytes):
|
||
|
movl %ecx, (%rdi)
|
||
|
movl %ecx, -4(%rdi, %rdx)
|
||
|
ret
|
||
|
|
||
|
L(2_3bytes):
|
||
|
movw %cx, (%rdi)
|
||
|
movw %cx, -2(%rdi, %rdx)
|
||
|
ret
|
||
|
|
||
|
ALIGN (4)
|
||
|
L(16bytesormore):
|
||
|
#ifdef USE_AS_BZERO_P
|
||
|
pxor %xmm0, %xmm0
|
||
|
#else
|
||
|
movd %rcx, %xmm0
|
||
|
pshufd $0, %xmm0, %xmm0
|
||
|
#endif
|
||
|
movdqu %xmm0, (%rdi)
|
||
|
movdqu %xmm0, -16(%rdi, %rdx)
|
||
|
cmpq $32, %rdx
|
||
|
jbe L(32bytesless)
|
||
|
movdqu %xmm0, 16(%rdi)
|
||
|
movdqu %xmm0, -32(%rdi, %rdx)
|
||
|
cmpq $64, %rdx
|
||
|
jbe L(64bytesless)
|
||
|
movdqu %xmm0, 32(%rdi)
|
||
|
movdqu %xmm0, 48(%rdi)
|
||
|
movdqu %xmm0, -64(%rdi, %rdx)
|
||
|
movdqu %xmm0, -48(%rdi, %rdx)
|
||
|
cmpq $128, %rdx
|
||
|
ja L(128bytesmore)
|
||
|
L(32bytesless):
|
||
|
L(64bytesless):
|
||
|
ret
|
||
|
|
||
|
ALIGN (4)
|
||
|
L(128bytesmore):
|
||
|
leaq 64(%rdi), %rcx
|
||
|
andq $-64, %rcx
|
||
|
movq %rdx, %r8
|
||
|
addq %rdi, %rdx
|
||
|
andq $-64, %rdx
|
||
|
cmpq %rcx, %rdx
|
||
|
je L(return)
|
||
|
|
||
|
#ifdef SHARED_CACHE_SIZE
|
||
|
cmp $SHARED_CACHE_SIZE, %r8
|
||
|
#else
|
||
|
cmp __x86_64_shared_cache_size(%rip), %r8
|
||
|
#endif
|
||
|
ja L(128bytesmore_nt)
|
||
|
|
||
|
ALIGN (4)
|
||
|
L(128bytesmore_normal):
|
||
|
movdqa %xmm0, (%rcx)
|
||
|
movaps %xmm0, 0x10(%rcx)
|
||
|
movaps %xmm0, 0x20(%rcx)
|
||
|
movaps %xmm0, 0x30(%rcx)
|
||
|
addq $64, %rcx
|
||
|
cmpq %rcx, %rdx
|
||
|
jne L(128bytesmore_normal)
|
||
|
ret
|
||
|
|
||
|
ALIGN (4)
|
||
|
L(128bytesmore_nt):
|
||
|
movntdq %xmm0, (%rcx)
|
||
|
movntdq %xmm0, 0x10(%rcx)
|
||
|
movntdq %xmm0, 0x20(%rcx)
|
||
|
movntdq %xmm0, 0x30(%rcx)
|
||
|
leaq 64(%rcx), %rcx
|
||
|
cmpq %rcx, %rdx
|
||
|
jne L(128bytesmore_nt)
|
||
|
sfence
|
||
|
ret
|
||
|
|
||
|
END (MEMSET)
|