2cc2b2be69
Change-Id: Ieec623c06bc32ec78334f628af25b00c2bccd2e7
148 lines
4.7 KiB
C
148 lines
4.7 KiB
C
/*
|
|
* Copyright (C) 2011 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <assert.h>
|
|
|
|
/*
|
|
* Works like memmove(), except:
|
|
* - if all arguments are at least 32-bit aligned, we guarantee that we
|
|
* will use operations that preserve atomicity of 32-bit values
|
|
* - if not, we guarantee atomicity of 16-bit values
|
|
*
|
|
* If all three arguments are not at least 16-bit aligned, the behavior
|
|
* of this function is undefined. (We could remove this restriction by
|
|
* testing for unaligned values and punting to memmove(), but that's
|
|
* not currently useful.)
|
|
*
|
|
* TODO: add loop for 64-bit alignment
|
|
* TODO: use __builtin_prefetch
|
|
* TODO: write an ARM-optimized version
|
|
*/
|
|
void _memmove_words(void* dest, const void* src, size_t n)
|
|
{
|
|
assert((((uintptr_t) dest | (uintptr_t) src | n) & 0x01) == 0);
|
|
|
|
char* d = (char*) dest;
|
|
const char* s = (const char*) src;
|
|
size_t copyCount;
|
|
|
|
/*
|
|
* If the source and destination pointers are the same, this is
|
|
* an expensive no-op. Testing for an empty move now allows us
|
|
* to skip a check later.
|
|
*/
|
|
if (n == 0 || d == s)
|
|
return;
|
|
|
|
/*
|
|
* Determine if the source and destination buffers will overlap if
|
|
* we copy data forward (i.e. *dest++ = *src++).
|
|
*
|
|
* It's okay if the destination buffer starts before the source and
|
|
* there is some overlap, because the reader is always ahead of the
|
|
* writer.
|
|
*/
|
|
if (__builtin_expect((d < s) || ((size_t)(d - s) >= n), 1)) {
|
|
/*
|
|
* Copy forward. We prefer 32-bit loads and stores even for 16-bit
|
|
* data, so sort that out.
|
|
*/
|
|
if ((((uintptr_t) d | (uintptr_t) s) & 0x03) != 0) {
|
|
/*
|
|
* Not 32-bit aligned. Two possibilities:
|
|
* (1) Congruent, we can align to 32-bit by copying one 16-bit val
|
|
* (2) Non-congruent, we can do one of:
|
|
* a. copy whole buffer as a series of 16-bit values
|
|
* b. load/store 32 bits, using shifts to ensure alignment
|
|
* c. just copy the as 32-bit values and assume the CPU
|
|
* will do a reasonable job
|
|
*
|
|
* We're currently using (a), which is suboptimal.
|
|
*/
|
|
if ((((uintptr_t) d ^ (uintptr_t) s) & 0x03) != 0) {
|
|
copyCount = n;
|
|
} else {
|
|
copyCount = 2;
|
|
}
|
|
n -= copyCount;
|
|
copyCount /= sizeof(uint16_t);
|
|
|
|
while (copyCount--) {
|
|
*(uint16_t*)d = *(uint16_t*)s;
|
|
d += sizeof(uint16_t);
|
|
s += sizeof(uint16_t);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Copy 32-bit aligned words.
|
|
*/
|
|
copyCount = n / sizeof(uint32_t);
|
|
while (copyCount--) {
|
|
*(uint32_t*)d = *(uint32_t*)s;
|
|
d += sizeof(uint32_t);
|
|
s += sizeof(uint32_t);
|
|
}
|
|
|
|
/*
|
|
* Check for leftovers. Either we finished exactly, or we have
|
|
* one remaining 16-bit chunk.
|
|
*/
|
|
if ((n & 0x02) != 0) {
|
|
*(uint16_t*)d = *(uint16_t*)s;
|
|
}
|
|
} else {
|
|
/*
|
|
* Copy backward, starting at the end.
|
|
*/
|
|
d += n;
|
|
s += n;
|
|
|
|
if ((((uintptr_t) d | (uintptr_t) s) & 0x03) != 0) {
|
|
/* try for 32-bit alignment */
|
|
if ((((uintptr_t) d ^ (uintptr_t) s) & 0x03) != 0) {
|
|
copyCount = n;
|
|
} else {
|
|
copyCount = 2;
|
|
}
|
|
n -= copyCount;
|
|
copyCount /= sizeof(uint16_t);
|
|
|
|
while (copyCount--) {
|
|
d -= sizeof(uint16_t);
|
|
s -= sizeof(uint16_t);
|
|
*(uint16_t*)d = *(uint16_t*)s;
|
|
}
|
|
}
|
|
|
|
/* copy 32-bit aligned words */
|
|
copyCount = n / sizeof(uint32_t);
|
|
while (copyCount--) {
|
|
d -= sizeof(uint32_t);
|
|
s -= sizeof(uint32_t);
|
|
*(uint32_t*)d = *(uint32_t*)s;
|
|
}
|
|
|
|
/* copy leftovers */
|
|
if ((n & 0x02) != 0) {
|
|
d -= sizeof(uint16_t);
|
|
s -= sizeof(uint16_t);
|
|
*(uint16_t*)d = *(uint16_t*)s;
|
|
}
|
|
}
|
|
}
|