diff --git a/libcutils/Android.mk b/libcutils/Android.mk index c0faed4f5..933a77bdf 100644 --- a/libcutils/Android.mk +++ b/libcutils/Android.mk @@ -134,6 +134,9 @@ LOCAL_SRC_FILES := $(commonSources) \ LOCAL_SRC_FILES_arm += \ arch-arm/memset32.S \ +LOCAL_SRC_FILES_arm64 += \ + arch-arm64/android_memset.S \ + LOCAL_SRC_FILES_mips += \ arch-mips/android_memset.c \ @@ -146,6 +149,7 @@ LOCAL_SRC_FILES_x86_64 += \ arch-x86_64/android_memset32_SSE2-atom.S \ LOCAL_CFLAGS_arm += -DHAVE_MEMSET16 -DHAVE_MEMSET32 +LOCAL_CFLAGS_arm64 += -DHAVE_MEMSET16 -DHAVE_MEMSET32 LOCAL_CFLAGS_mips += -DHAVE_MEMSET16 -DHAVE_MEMSET32 LOCAL_CFLAGS_x86 += -DHAVE_MEMSET16 -DHAVE_MEMSET32 LOCAL_CFLAGS_x86_64 += -DHAVE_MEMSET16 -DHAVE_MEMSET32 diff --git a/libcutils/arch-arm/memset32.S b/libcutils/arch-arm/memset32.S index 469726563..6efab9f93 100644 --- a/libcutils/arch-arm/memset32.S +++ b/libcutils/arch-arm/memset32.S @@ -51,8 +51,10 @@ android_memset16: android_memset32: .fnstart - .save {lr} + .cfi_startproc str lr, [sp, #-4]! + .cfi_def_cfa_offset 4 + .cfi_rel_offset lr, 0 /* align the destination to a cache-line */ mov r12, r1 @@ -89,5 +91,8 @@ android_memset32: strmih lr, [r0], #2 ldr lr, [sp], #4 + .cfi_def_cfa_offset 0 + .cfi_restore lr bx lr + .cfi_endproc .fnend diff --git a/libcutils/arch-arm64/android_memset.S b/libcutils/arch-arm64/android_memset.S new file mode 100644 index 000000000..9a83a6876 --- /dev/null +++ b/libcutils/arch-arm64/android_memset.S @@ -0,0 +1,211 @@ +/* Copyright (c) 2012, Linaro Limited + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Linaro nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Assumptions: + * + * ARMv8-a, AArch64 + * Unaligned accesses + * + */ + +/* By default we assume that the DC instruction can be used to zero + data blocks more efficiently. In some circumstances this might be + unsafe, for example in an asymmetric multiprocessor environment with + different DC clear lengths (neither the upper nor lower lengths are + safe to use). */ + +#define dst x0 +#define count x2 +#define tmp1 x3 +#define tmp1w w3 +#define tmp2 x4 +#define tmp2w w4 +#define zva_len_x x5 +#define zva_len w5 +#define zva_bits_x x6 + +#define A_l x1 +#define A_lw w1 +#define tmp3w w9 + +#define ENTRY(f) \ + .text; \ + .globl f; \ + .align 0; \ + .type f, %function; \ + f: \ + .cfi_startproc \ + +#define END(f) \ + .cfi_endproc; \ + .size f, .-f; \ + +ENTRY(android_memset16) + ands A_lw, A_lw, #0xffff + b.eq .Lzero_mem + orr A_lw, A_lw, A_lw, lsl #16 + b .Lexpand_to_64 +END(android_memset16) + +ENTRY(android_memset32) + cmp A_lw, #0 + b.eq .Lzero_mem +.Lexpand_to_64: + orr A_l, A_l, A_l, lsl #32 +.Ltail_maybe_long: + cmp count, #64 + b.ge .Lnot_short +.Ltail_maybe_tiny: + cmp count, #15 + b.le .Ltail15tiny +.Ltail63: + ands tmp1, count, #0x30 + b.eq .Ltail15 + add dst, dst, tmp1 + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + stp A_l, A_l, [dst, #-48] +1: + stp A_l, A_l, [dst, #-32] +2: + stp A_l, A_l, [dst, #-16] + +.Ltail15: + and count, count, #15 + add dst, dst, count + stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */ + ret + +.Ltail15tiny: + /* Set up to 15 bytes. Does not assume earlier memory + being set. */ + tbz count, #3, 1f + str A_l, [dst], #8 +1: + tbz count, #2, 1f + str A_lw, [dst], #4 +1: + tbz count, #1, 1f + strh A_lw, [dst], #2 +1: + ret + + /* Critical loop. Start at a new cache line boundary. Assuming + * 64 bytes per line, this ensures the entire loop is in one line. */ + .p2align 6 +.Lnot_short: + neg tmp2, dst + ands tmp2, tmp2, #15 + b.eq 2f + /* Bring DST to 128-bit (16-byte) alignment. We know that there's + * more than that to set, so we simply store 16 bytes and advance by + * the amount required to reach alignment. */ + sub count, count, tmp2 + stp A_l, A_l, [dst] + add dst, dst, tmp2 + /* There may be less than 63 bytes to go now. */ + cmp count, #63 + b.le .Ltail63 +2: + sub dst, dst, #16 /* Pre-bias. */ + sub count, count, #64 +1: + stp A_l, A_l, [dst, #16] + stp A_l, A_l, [dst, #32] + stp A_l, A_l, [dst, #48] + stp A_l, A_l, [dst, #64]! + subs count, count, #64 + b.ge 1b + tst count, #0x3f + add dst, dst, #16 + b.ne .Ltail63 + ret + + /* For zeroing memory, check to see if we can use the ZVA feature to + * zero entire 'cache' lines. */ +.Lzero_mem: + mov A_l, #0 + cmp count, #63 + b.le .Ltail_maybe_tiny + neg tmp2, dst + ands tmp2, tmp2, #15 + b.eq 1f + sub count, count, tmp2 + stp A_l, A_l, [dst] + add dst, dst, tmp2 + cmp count, #63 + b.le .Ltail63 +1: + /* For zeroing small amounts of memory, it's not worth setting up + * the line-clear code. */ + cmp count, #128 + b.lt .Lnot_short + mrs tmp1, dczid_el0 + tbnz tmp1, #4, .Lnot_short + mov tmp3w, #4 + and zva_len, tmp1w, #15 /* Safety: other bits reserved. */ + lsl zva_len, tmp3w, zva_len + +.Lzero_by_line: + /* Compute how far we need to go to become suitably aligned. We're + * already at quad-word alignment. */ + cmp count, zva_len_x + b.lt .Lnot_short /* Not enough to reach alignment. */ + sub zva_bits_x, zva_len_x, #1 + neg tmp2, dst + ands tmp2, tmp2, zva_bits_x + b.eq 1f /* Already aligned. */ + /* Not aligned, check that there's enough to copy after alignment. */ + sub tmp1, count, tmp2 + cmp tmp1, #64 + ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */ + b.lt .Lnot_short + /* We know that there's at least 64 bytes to zero and that it's safe + * to overrun by 64 bytes. */ + mov count, tmp1 +2: + stp A_l, A_l, [dst] + stp A_l, A_l, [dst, #16] + stp A_l, A_l, [dst, #32] + subs tmp2, tmp2, #64 + stp A_l, A_l, [dst, #48] + add dst, dst, #64 + b.ge 2b + /* We've overrun a bit, so adjust dst downwards. */ + add dst, dst, tmp2 +1: + sub count, count, zva_len_x +3: + dc zva, dst + add dst, dst, zva_len_x + subs count, count, zva_len_x + b.ge 3b + ands count, count, zva_bits_x + b.ne .Ltail_maybe_long + ret +END(android_memset32) diff --git a/libcutils/tests/Android.mk b/libcutils/tests/Android.mk index d3e07f80e..8e6531074 100644 --- a/libcutils/tests/Android.mk +++ b/libcutils/tests/Android.mk @@ -13,20 +13,36 @@ # limitations under the License. LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) test_src_files := \ + MemsetTest.cpp \ PropertiesTest.cpp \ -shared_libraries := \ - libutils \ - liblog - -static_libraries := \ - libcutils - -LOCAL_SHARED_LIBRARIES := $(shared_libraries) -LOCAL_STATIC_LIBRARIES := $(static_libraries) -LOCAL_SRC_FILES := $(test_src_files) +include $(CLEAR_VARS) LOCAL_MODULE := libcutils_test +LOCAL_SRC_FILES := $(test_src_files) +LOCAL_SHARED_LIBRARIES := \ + libcutils \ + liblog \ + libutils \ + +LOCAL_MULTILIB := both +LOCAL_MODULE_STEM_32 := $(LOCAL_MODULE)32 +LOCAL_MODULE_STEM_64 := $(LOCAL_MODULE)64 +include $(BUILD_NATIVE_TEST) + +include $(CLEAR_VARS) +LOCAL_MODULE := libcutils_test_static +LOCAL_FORCE_STATIC_EXECUTABLE := true +LOCAL_SRC_FILES := $(test_src_files) +LOCAL_STATIC_LIBRARIES := \ + libc \ + libcutils \ + liblog \ + libstlport_static \ + libutils \ + +LOCAL_MULTILIB := both +LOCAL_MODULE_STEM_32 := $(LOCAL_MODULE)32 +LOCAL_MODULE_STEM_64 := $(LOCAL_MODULE)64 include $(BUILD_NATIVE_TEST) diff --git a/libcutils/tests/MemsetTest.cpp b/libcutils/tests/MemsetTest.cpp new file mode 100644 index 000000000..45efc519c --- /dev/null +++ b/libcutils/tests/MemsetTest.cpp @@ -0,0 +1,181 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#define FENCEPOST_LENGTH 8 + +#define MAX_TEST_SIZE (64*1024) +// Choose values that have no repeating byte values. +#define MEMSET16_PATTERN 0xb139 +#define MEMSET32_PATTERN 0x48193a27 + +enum test_e { + MEMSET16 = 0, + MEMSET32, +}; + +static int g_memset16_aligns[][2] = { + { 2, 0 }, + { 4, 0 }, + { 8, 0 }, + { 16, 0 }, + { 32, 0 }, + { 64, 0 }, + { 128, 0 }, + + { 4, 2 }, + + { 8, 2 }, + { 8, 4 }, + { 8, 6 }, + + { 128, 2 }, + { 128, 4 }, + { 128, 6 }, + { 128, 8 }, + { 128, 10 }, + { 128, 12 }, + { 128, 14 }, + { 128, 16 }, +}; + +static int g_memset32_aligns[][2] = { + { 4, 0 }, + { 8, 0 }, + { 16, 0 }, + { 32, 0 }, + { 64, 0 }, + { 128, 0 }, + + { 8, 4 }, + + { 128, 4 }, + { 128, 8 }, + { 128, 12 }, + { 128, 16 }, +}; + +static size_t GetIncrement(size_t len, size_t min_incr) { + if (len >= 4096) { + return 1024; + } else if (len >= 1024) { + return 256; + } + return min_incr; +} + +// Return a pointer into the current buffer with the specified alignment. +static void *GetAlignedPtr(void *orig_ptr, int alignment, int or_mask) { + uint64_t ptr = reinterpret_cast(orig_ptr); + if (alignment > 0) { + // When setting the alignment, set it to exactly the alignment chosen. + // The pointer returned will be guaranteed not to be aligned to anything + // more than that. + ptr += alignment - (ptr & (alignment - 1)); + ptr |= alignment | or_mask; + } + + return reinterpret_cast(ptr); +} + +static void SetFencepost(uint8_t *buffer) { + for (int i = 0; i < FENCEPOST_LENGTH; i += 2) { + buffer[i] = 0xde; + buffer[i+1] = 0xad; + } +} + +static void VerifyFencepost(uint8_t *buffer) { + for (int i = 0; i < FENCEPOST_LENGTH; i += 2) { + if (buffer[i] != 0xde || buffer[i+1] != 0xad) { + uint8_t expected_value; + if (buffer[i] == 0xde) { + i++; + expected_value = 0xad; + } else { + expected_value = 0xde; + } + ASSERT_EQ(expected_value, buffer[i]); + } + } +} + +void RunMemsetTests(test_e test_type, uint32_t value, int align[][2], size_t num_aligns) { + size_t min_incr = 4; + if (test_type == MEMSET16) { + min_incr = 2; + value |= value << 16; + } + uint32_t* expected_buf = new uint32_t[MAX_TEST_SIZE/sizeof(uint32_t)]; + for (size_t i = 0; i < MAX_TEST_SIZE/sizeof(uint32_t); i++) { + expected_buf[i] = value; + } + + // Allocate one large buffer with lots of extra space so that we can + // guarantee that all possible alignments will fit. + uint8_t *buf = new uint8_t[3*MAX_TEST_SIZE]; + uint8_t *buf_align; + for (size_t i = 0; i < num_aligns; i++) { + size_t incr = min_incr; + for (size_t len = incr; len <= MAX_TEST_SIZE; len += incr) { + incr = GetIncrement(len, min_incr); + + buf_align = reinterpret_cast(GetAlignedPtr( + buf+FENCEPOST_LENGTH, align[i][0], align[i][1])); + + SetFencepost(&buf_align[-FENCEPOST_LENGTH]); + SetFencepost(&buf_align[len]); + + memset(buf_align, 0xff, len); + if (test_type == MEMSET16) { + android_memset16(reinterpret_cast(buf_align), value, len); + } else { + android_memset32(reinterpret_cast(buf_align), value, len); + } + ASSERT_EQ(0, memcmp(expected_buf, buf_align, len)) + << "Failed size " << len << " align " << align[i][0] << " " << align[i][1] << "\n"; + + VerifyFencepost(&buf_align[-FENCEPOST_LENGTH]); + VerifyFencepost(&buf_align[len]); + } + } + delete expected_buf; + delete buf; +} + +TEST(libcutils, android_memset16_non_zero) { + RunMemsetTests(MEMSET16, MEMSET16_PATTERN, g_memset16_aligns, sizeof(g_memset16_aligns)/sizeof(int[2])); +} + +TEST(libcutils, android_memset16_zero) { + RunMemsetTests(MEMSET16, 0, g_memset16_aligns, sizeof(g_memset16_aligns)/sizeof(int[2])); +} + +TEST(libcutils, android_memset32_non_zero) { + RunMemsetTests(MEMSET32, MEMSET32_PATTERN, g_memset32_aligns, sizeof(g_memset32_aligns)/sizeof(int[2])); +} + +TEST(libcutils, android_memset32_zero) { + RunMemsetTests(MEMSET32, 0, g_memset32_aligns, sizeof(g_memset32_aligns)/sizeof(int[2])); +}