Atomic/SMP update.

Added atomic-inline.h. Added a platform-specific memory barrier call there. Added android_atomic_acquire_cmpxchg() and android_atomic_release_store(). Not tested on Mac OS X or SH. Added memory barrier calls to linux-x86 atomics. Mac OS X has barrier functions already. sh isn't really SMP-ready. linux-arm needs work (to be done in a separate change). Updated the makefile to make the SMP state visible to the code here. Note that host binaries are NOT built with SMP enabled; while our hosts are very likely SMP, it's not worth figuring out e.g. whether it's okay to use the SSE2 mfence instruction or have to use something else. We haven't had barriers enabled in host tools before, so there's probably no need to stat now. Removed quasiatomic 64-bit calls (now part of Dalvik). Change-Id: I49e5e6c8abe70f304cdedb9d7b8e6e65f8925815
2010-05-19 22:33:28 -07:00 · 2010-05-19 22:33:28 -07:00 · ac322da69e
commit ac322da69e
parent 53a79a841b
6 changed files with 169 additions and 245 deletions
--- a/include/cutils/atomic-inline.h
+++ b/include/cutils/atomic-inline.h
@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_CUTILS_ATOMIC_INLINE_H
+#define ANDROID_CUTILS_ATOMIC_INLINE_H
+
+/*
+ * Inline declarations and macros for some special-purpose atomic
+ * operations.  These are intended for rare circumstances where a
+ * memory barrier needs to be issued inline rather than as a function
+ * call.
+ *
+ * Most code should not use these.
+ *
+ * Anything that does include this file must set ANDROID_SMP to either
+ * 0 or 1, indicating compilation for UP or SMP, respectively.
+ */
+
+#if !defined(ANDROID_SMP)
+# error "Must define ANDROID_SMP before including atomic-inline.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Define the full memory barrier for an SMP system.  This is
+ * platform-specific.
+ */
+
+#ifdef __arm__
+#include <machine/cpu-features.h>
+
+/*
+ * For ARMv6K we need to issue a specific MCR instead of the DMB, since
+ * that wasn't added until v7.  For anything older, SMP isn't relevant.
+ * Since we don't have an ARMv6K to test with, we're not going to deal
+ * with that now.
+ *
+ * The DMB instruction is found in the ARM and Thumb2 instruction sets.
+ * This will fail on plain 16-bit Thumb.
+ */
+#if defined(__ARM_HAVE_DMB)
+# define __android_membar_full_smp() \
+    do { __asm__ __volatile__ ("dmb" ::: "memory"); } while (0)
+#else
+# define __android_membar_full_smp()  ARM_SMP_defined_but_no_DMB()
+#endif
+
+#elif defined(__i386__) || defined(__x86_64__)
+/*
+ * For recent x86, we can use the SSE2 mfence instruction.
+ */
+# define __android_membar_full_smp() \
+    do { __asm__ __volatile__ ("mfence" ::: "memory"); } while (0)
+
+#else
+/*
+ * Implementation not defined for this platform.  Hopefully we're building
+ * in uniprocessor mode.
+ */
+# define __android_membar_full_smp()  SMP_barrier_not_defined_for_platform()
+#endif
+
+
+/*
+ * Full barrier.  On uniprocessors this is just a compiler reorder barrier,
+ * which ensures that the statements appearing above the barrier in the C/C++
+ * code will be issued after the statements appearing below the barrier.
+ *
+ * For SMP this also includes a memory barrier instruction.  On an ARM
+ * CPU this means that the current core will flush pending writes, wait
+ * for pending reads to complete, and discard any cached reads that could
+ * be stale.  Other CPUs may do less, but the end result is equivalent.
+ */
+#if ANDROID_SMP != 0
+# define android_membar_full() __android_membar_full_smp()
+#else
+# define android_membar_full() \
+    do { __asm__ __volatile__ ("" ::: "memory"); } while (0)
+#endif
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // ANDROID_CUTILS_ATOMIC_INLINE_H
--- a/include/cutils/atomic.h
+++ b/include/cutils/atomic.h
@ -25,10 +25,8 @@ extern "C" {
 #endif

 /*
- * NOTE: memory shared between threads is synchronized by all atomic operations
- * below, this means that no explicit memory barrier is required: all reads or 
- * writes issued before android_atomic_* operations are guaranteed to complete
- * before the atomic operation takes place.
+ * Unless otherwise noted, the operations below perform a full fence before
+ * the atomic operation on SMP systems ("release" semantics).
 */

 void android_atomic_write(int32_t value, volatile int32_t* addr);
@ -37,7 +35,6 @@ void android_atomic_write(int32_t value, volatile int32_t* addr);
 * all these atomic operations return the previous value
 */

-
 int32_t android_atomic_inc(volatile int32_t* addr);
 int32_t android_atomic_dec(volatile int32_t* addr);

@ -48,30 +45,32 @@ int32_t android_atomic_or(int32_t value, volatile int32_t* addr);
 int32_t android_atomic_swap(int32_t value, volatile int32_t* addr);

 /*
- * NOTE: Two "quasiatomic" operations on the exact same memory address
- * are guaranteed to operate atomically with respect to each other,
- * but no guarantees are made about quasiatomic operations mixed with
- * non-quasiatomic operations on the same address, nor about
- * quasiatomic operations that are performed on partially-overlapping
- * memory.
+ * cmpxchg returns zero if the new value was successfully written.  This
+ * will only happen when *addr == oldvalue.
+ *
+ * (The return value is inverted from implementations on other platforms, but
+ * matches the ARM ldrex/strex sematics.  Note also this is a compare-and-set
+ * operation, not a compare-and-exchange operation, since we don't return
+ * the original value.)
 */
-
-int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr);
-int64_t android_quasiatomic_read_64(volatile int64_t* addr);
-    
-/*
- * cmpxchg return a non zero value if the exchange was NOT performed,
- * in other words if oldvalue != *addr
- */
-
 int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue,
        volatile int32_t* addr);

-int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
-        volatile int64_t* addr);
+/*
+ * Same basic operation as android_atomic_cmpxchg, but with "acquire"
+ * semantics.  The memory barrier, if required, is performed after the
+ * new value is stored.  Useful for acquiring a spin lock.
+ */
+int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue,
+        volatile int32_t* addr);

+/*
+ * Perform an atomic store with "release" semantics.  The memory barrier,
+ * if required, is performed before the store instruction.  Useful for
+ * releasing a spin lock.
+ */
+#define android_atomic_release_store android_atomic_write

-    
 #ifdef __cplusplus
 } // extern "C"
 #endif
--- a/libcutils/Android.mk
+++ b/libcutils/Android.mk
@ -16,6 +16,13 @@
 LOCAL_PATH := $(my-dir)
 include $(CLEAR_VARS)

+ifeq ($(TARGET_CPU_SMP),true)
+    targetSmpFlag := -DANDROID_SMP=1
+else
+    targetSmpFlag := -DANDROID_SMP=0
+endif
+hostSmpFlag := -DANDROID_SMP=0
+
 commonSources := \
 	array.c \
 	hashmap.c \
@ -80,6 +87,7 @@ LOCAL_MODULE := libcutils
 LOCAL_SRC_FILES := $(commonSources) $(commonHostSources)
 LOCAL_LDLIBS := -lpthread
 LOCAL_STATIC_LIBRARIES := liblog
+LOCAL_CFLAGS += $(hostSmpFlag)
 include $(BUILD_HOST_STATIC_LIBRARY)


@ -92,6 +100,7 @@ LOCAL_MODULE := libcutils
 LOCAL_SRC_FILES := $(commonSources) $(commonHostSources) memory.c dlmalloc_stubs.c
 LOCAL_LDLIBS := -lpthread
 LOCAL_SHARED_LIBRARIES := liblog
+LOCAL_CFLAGS += $(targetSmpFlag)
 include $(BUILD_SHARED_LIBRARY)

 else #!sim
@ -114,12 +123,14 @@ endif # !arm

 LOCAL_C_INCLUDES := $(KERNEL_HEADERS)
 LOCAL_STATIC_LIBRARIES := liblog
+LOCAL_CFLAGS += $(targetSmpFlag)
 include $(BUILD_STATIC_LIBRARY)

 include $(CLEAR_VARS)
 LOCAL_MODULE := libcutils
 LOCAL_WHOLE_STATIC_LIBRARIES := libcutils
 LOCAL_SHARED_LIBRARIES := liblog
+LOCAL_CFLAGS += $(targetSmpFlag)
 include $(BUILD_SHARED_LIBRARY)

 endif #!sim
--- a/libcutils/atomic-android-arm.S
+++ b/libcutils/atomic-android-arm.S
@ -14,6 +14,8 @@
 * limitations under the License.
 */

+/* TODO: insert memory barriers on SMP */
+
 #include <machine/cpu-features.h>

 /*
@ -43,6 +45,8 @@
 	
 	.global android_atomic_cmpxchg
 	.type android_atomic_cmpxchg, %function
+	.global android_atomic_acquire_cmpxchg
+	.type android_atomic_acquire_cmpxchg, %function

 /*
 * ----------------------------------------------------------------------------
@ -237,7 +241,7 @@ android_atomic_or:

 /* replaced swp instruction with ldrex/strex for ARMv6 & ARMv7 */
 android_atomic_swap:
-#if defined (_ARM_HAVE_LDREX_STREX)
+#if defined (__ARM_HAVE_LDREX_STREX)
 1:  ldrex   r2, [r1]
    strex   r3, r0, [r1]
    teq     r3, #0
@ -256,6 +260,7 @@ android_atomic_swap:
 * output: r0 = 0 (xchg done) or non-zero (xchg not done)
 */

+android_atomic_acquire_cmpxchg:
 android_atomic_cmpxchg:
    .fnstart
    .save {r4, lr}
@ -282,10 +287,3 @@ android_atomic_cmpxchg:
    bx      lr
    .fnend

-/*
- * ----------------------------------------------------------------------------
- * android_atomic_cmpxchg_64
- * input: r0-r1=oldvalue, r2-r3=newvalue, arg4 (on stack)=address
- * output: r0 = 0 (xchg done) or non-zero (xchg not done)
- */
-/* TODO: NEED IMPLEMENTATION FOR THIS ARCHITECTURE */
--- a/libcutils/atomic-android-sh.c
+++ b/libcutils/atomic-android-sh.c
@ -118,42 +118,8 @@ int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue,
    return result;
 }

-int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) {
-    int64_t oldValue;
-    pthread_mutex_t*  lock = SWAP_LOCK(addr);
-
-    pthread_mutex_lock(lock);
-
-    oldValue = *addr;
-    *addr    = value;
-
-    pthread_mutex_unlock(lock);
-    return oldValue;
+int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue,
+                           volatile int32_t* addr) {
+    return android_atomic_cmpxchg(oldValue, newValue, addr);
 }

-int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
-        volatile int64_t* addr) {
-    int result;
-    pthread_mutex_t*  lock = SWAP_LOCK(addr);
-
-    pthread_mutex_lock(lock);
-
-    if (*addr == oldvalue) {
-        *addr  = newvalue;
-        result = 0;
-    } else {
-        result = 1;
-    }
-    pthread_mutex_unlock(lock);
-    return result;
-}
-
-int64_t android_quasiatomic_read_64(volatile int64_t* addr) {
-    int64_t result;
-    pthread_mutex_t*  lock = SWAP_LOCK(addr);
-
-    pthread_mutex_lock(lock);
-    result = *addr;
-    pthread_mutex_unlock(lock);
-    return result;
-}
--- a/libcutils/atomic.c
+++ b/libcutils/atomic.c
@ -15,6 +15,7 @@
 */

 #include <cutils/atomic.h>
+#include <cutils/atomic-inline.h>
 #ifdef HAVE_WIN32_THREADS
 #include <windows.h>
 #else
@ -70,40 +71,19 @@ int32_t android_atomic_swap(int32_t value, volatile int32_t* addr) {
 }

 int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, volatile int32_t* addr) {
+    /* OS X CAS returns zero on failure; invert to return zero on success */
    return OSAtomicCompareAndSwap32Barrier(oldvalue, newvalue, (int32_t*)addr) == 0;
 }

-#if defined(__ppc__)        \
-    || defined(__PPC__)     \
-    || defined(__powerpc__) \
-    || defined(__powerpc)   \
-    || defined(__POWERPC__) \
-    || defined(_M_PPC)      \
-    || defined(__PPC)
-#define NEED_QUASIATOMICS 1
-#else
-
-int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
-        volatile int64_t* addr) {
-    return OSAtomicCompareAndSwap64Barrier(oldvalue, newvalue,
-            (int64_t*)addr) == 0;
+int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue,
+        volatile int32_t* addr) {
+    int result = (OSAtomicCompareAndSwap32(oldvalue, newvalue, (int32_t*)addr) == 0);
+    if (!result) {
+        /* success, perform barrier */
+        OSMemoryBarrier();
+    }
 }

-int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) {
-    int64_t oldValue;
-    do {
-        oldValue = *addr;
-    } while (android_quasiatomic_cmpxchg_64(oldValue, value, addr));
-    return oldValue;
-}
-
-int64_t android_quasiatomic_read_64(volatile int64_t* addr) {
-    return OSAtomicAdd64Barrier(0, addr);
-}    
-
-#endif
-
-
 /*****************************************************************************/
 #elif defined(__i386__) || defined(__x86_64__)

@ -163,6 +143,7 @@ int32_t android_atomic_swap(int32_t value, volatile int32_t* addr) {
 }

 int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, volatile int32_t* addr) {
+    android_membar_full();
    int xchg;
    asm volatile
    (
@ -175,75 +156,25 @@ int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, volatile int32_t*
    return xchg;
 }

-#define NEED_QUASIATOMICS 1
+int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue,
+        volatile int32_t* addr) {
+    int xchg;
+    asm volatile
+    (
+    "   lock; cmpxchg %%ecx, (%%edx);"
+    "   setne %%al;"
+    "   andl $1, %%eax"
+    : "=a" (xchg)
+    : "a" (oldvalue), "c" (newvalue), "d" (addr)
+    );
+    android_membar_full();
+    return xchg;
+}
+

 /*****************************************************************************/
 #elif __arm__
-// Most of the implementation is in atomic-android-arm.s.
-
-// on the device, we implement the 64-bit atomic operations through
-// mutex locking. normally, this is bad because we must initialize
-// a pthread_mutex_t before being able to use it, and this means
-// having to do an initialization check on each function call, and
-// that's where really ugly things begin...
-//
-// BUT, as a special twist, we take advantage of the fact that in our
-// pthread library, a mutex is simply a volatile word whose value is always
-// initialized to 0. In other words, simply declaring a static mutex
-// object initializes it !
-//
-// another twist is that we use a small array of mutexes to dispatch
-// the contention locks from different memory addresses
-//
-
-#include <pthread.h>
-
-#define  SWAP_LOCK_COUNT  32U
-static pthread_mutex_t  _swap_locks[SWAP_LOCK_COUNT];
-
-#define  SWAP_LOCK(addr)   \
-   &_swap_locks[((unsigned)(void*)(addr) >> 3U) % SWAP_LOCK_COUNT]
-
-
-int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) {
-    int64_t oldValue;
-    pthread_mutex_t*  lock = SWAP_LOCK(addr);
-
-    pthread_mutex_lock(lock);
-
-    oldValue = *addr;
-    *addr    = value;
-
-    pthread_mutex_unlock(lock);
-    return oldValue;
-}
-
-int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
-        volatile int64_t* addr) {
-    int result;
-    pthread_mutex_t*  lock = SWAP_LOCK(addr);
-
-    pthread_mutex_lock(lock);
-
-    if (*addr == oldvalue) {
-        *addr  = newvalue;
-        result = 0;
-    } else {
-        result = 1;
-    }
-    pthread_mutex_unlock(lock);
-    return result;
-}
-
-int64_t android_quasiatomic_read_64(volatile int64_t* addr) {
-    int64_t result;
-    pthread_mutex_t*  lock = SWAP_LOCK(addr);
-
-    pthread_mutex_lock(lock);
-    result = *addr;
-    pthread_mutex_unlock(lock);
-    return result;
-}    
+// implementation for ARM is in atomic-android-arm.s.

 /*****************************************************************************/
 #elif __sh__
@ -255,85 +186,3 @@ int64_t android_quasiatomic_read_64(volatile int64_t* addr) {

 #endif

-
-
-#if NEED_QUASIATOMICS
-
-/* Note that a spinlock is *not* a good idea in general
- * since they can introduce subtle issues. For example,
- * a real-time thread trying to acquire a spinlock already
- * acquired by another thread will never yeld, making the
- * CPU loop endlessly!
- *
- * However, this code is only used on the Linux simulator
- * so it's probably ok for us.
- *
- * The alternative is to use a pthread mutex, but
- * these must be initialized before being used, and
- * then you have the problem of lazily initializing
- * a mutex without any other synchronization primitive.
- */
-
-/* global spinlock for all 64-bit quasiatomic operations */
-static int32_t quasiatomic_spinlock = 0;
-
-int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
-        volatile int64_t* addr) {
-    int result;
-    
-    while (android_atomic_cmpxchg(0, 1, &quasiatomic_spinlock)) {
-#ifdef HAVE_WIN32_THREADS
-        Sleep(0);
-#else        
-        sched_yield();
-#endif        
-    }
-
-    if (*addr == oldvalue) {
-        *addr = newvalue;
-        result = 0;
-    } else {
-        result = 1;
-    }
-
-    android_atomic_swap(0, &quasiatomic_spinlock);
-
-    return result;
-}
-
-int64_t android_quasiatomic_read_64(volatile int64_t* addr) {
-    int64_t result;
-    
-    while (android_atomic_cmpxchg(0, 1, &quasiatomic_spinlock)) {
-#ifdef HAVE_WIN32_THREADS
-        Sleep(0);
-#else
-        sched_yield();
-#endif
-    }
-
-    result = *addr;
-    android_atomic_swap(0, &quasiatomic_spinlock);
-
-    return result;
-}
-
-int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) {
-    int64_t result;
-    
-    while (android_atomic_cmpxchg(0, 1, &quasiatomic_spinlock)) {
-#ifdef HAVE_WIN32_THREADS
-        Sleep(0);
-#else
-        sched_yield();
-#endif
-    }
-
-    result = *addr;
-    *addr = value;
-    android_atomic_swap(0, &quasiatomic_spinlock);
-
-    return result;
-}
-
-#endif