/* * Copyright (C) 2008 The Android Open Source Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include "pthread_internal.h" #include "private/bionic_constants.h" #include "private/bionic_fortify.h" #include "private/bionic_futex.h" #include "private/bionic_systrace.h" #include "private/bionic_time_conversions.h" #include "private/bionic_tls.h" /* a mutex attribute holds the following fields * * bits: name description * 0-3 type type of mutex * 4 shared process-shared flag * 5 protocol whether it is a priority inherit mutex. */ #define MUTEXATTR_TYPE_MASK 0x000f #define MUTEXATTR_SHARED_MASK 0x0010 #define MUTEXATTR_PROTOCOL_MASK 0x0020 #define MUTEXATTR_PROTOCOL_SHIFT 5 int pthread_mutexattr_init(pthread_mutexattr_t *attr) { *attr = PTHREAD_MUTEX_DEFAULT; return 0; } int pthread_mutexattr_destroy(pthread_mutexattr_t *attr) { *attr = -1; return 0; } int pthread_mutexattr_gettype(const pthread_mutexattr_t *attr, int *type_p) { int type = (*attr & MUTEXATTR_TYPE_MASK); if (type < PTHREAD_MUTEX_NORMAL || type > PTHREAD_MUTEX_ERRORCHECK) { return EINVAL; } *type_p = type; return 0; } int pthread_mutexattr_settype(pthread_mutexattr_t *attr, int type) { if (type < PTHREAD_MUTEX_NORMAL || type > PTHREAD_MUTEX_ERRORCHECK ) { return EINVAL; } *attr = (*attr & ~MUTEXATTR_TYPE_MASK) | type; return 0; } /* process-shared mutexes are not supported at the moment */ int pthread_mutexattr_setpshared(pthread_mutexattr_t *attr, int pshared) { switch (pshared) { case PTHREAD_PROCESS_PRIVATE: *attr &= ~MUTEXATTR_SHARED_MASK; return 0; case PTHREAD_PROCESS_SHARED: /* our current implementation of pthread actually supports shared * mutexes but won't cleanup if a process dies with the mutex held. * Nevertheless, it's better than nothing. Shared mutexes are used * by surfaceflinger and audioflinger. */ *attr |= MUTEXATTR_SHARED_MASK; return 0; } return EINVAL; } int pthread_mutexattr_getpshared(const pthread_mutexattr_t* attr, int* pshared) { *pshared = (*attr & MUTEXATTR_SHARED_MASK) ? PTHREAD_PROCESS_SHARED : PTHREAD_PROCESS_PRIVATE; return 0; } int pthread_mutexattr_setprotocol(pthread_mutexattr_t* attr, int protocol) { if (protocol != PTHREAD_PRIO_NONE && protocol != PTHREAD_PRIO_INHERIT) { return EINVAL; } *attr = (*attr & ~MUTEXATTR_PROTOCOL_MASK) | (protocol << MUTEXATTR_PROTOCOL_SHIFT); return 0; } int pthread_mutexattr_getprotocol(const pthread_mutexattr_t* attr, int* protocol) { *protocol = (*attr & MUTEXATTR_PROTOCOL_MASK) >> MUTEXATTR_PROTOCOL_SHIFT; return 0; } // Priority Inheritance mutex implementation struct PIMutex { // mutex type, can be 0 (normal), 1 (recursive), 2 (errorcheck), constant during lifetime uint8_t type; // process-shared flag, constant during lifetime bool shared; // - 1 uint16_t counter; // owner_tid is read/written by both userspace code and kernel code. It includes three fields: // FUTEX_WAITERS, FUTEX_OWNER_DIED and FUTEX_TID_MASK. atomic_int owner_tid; }; static inline __always_inline int PIMutexTryLock(PIMutex& mutex) { pid_t tid = __get_thread()->tid; // Handle common case first. int old_owner = 0; if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid, &old_owner, tid, memory_order_acquire, memory_order_relaxed))) { return 0; } if (tid == (old_owner & FUTEX_TID_MASK)) { // We already own this mutex. if (mutex.type == PTHREAD_MUTEX_NORMAL) { return EBUSY; } if (mutex.type == PTHREAD_MUTEX_ERRORCHECK) { return EDEADLK; } if (mutex.counter == 0xffff) { return EAGAIN; } mutex.counter++; return 0; } return EBUSY; } // Inlining this function in pthread_mutex_lock() adds the cost of stack frame instructions on // ARM/ARM64, which increases at most 20 percent overhead. So make it noinline. static int __attribute__((noinline)) PIMutexTimedLock(PIMutex& mutex, bool use_realtime_clock, const timespec* abs_timeout) { int ret = PIMutexTryLock(mutex); if (__predict_true(ret == 0)) { return 0; } if (ret == EBUSY) { ScopedTrace trace("Contending for pthread mutex"); ret = -__futex_pi_lock_ex(&mutex.owner_tid, mutex.shared, use_realtime_clock, abs_timeout); } return ret; } static int PIMutexUnlock(PIMutex& mutex) { pid_t tid = __get_thread()->tid; int old_owner = tid; // Handle common case first. if (__predict_true(mutex.type == PTHREAD_MUTEX_NORMAL)) { if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid, &old_owner, 0, memory_order_release, memory_order_relaxed))) { return 0; } } else { old_owner = atomic_load_explicit(&mutex.owner_tid, memory_order_relaxed); } if (tid != (old_owner & FUTEX_TID_MASK)) { // The mutex can only be unlocked by the thread who owns it. return EPERM; } if (mutex.type == PTHREAD_MUTEX_RECURSIVE) { if (mutex.counter != 0u) { --mutex.counter; return 0; } } if (old_owner == tid) { // No thread is waiting. if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid, &old_owner, 0, memory_order_release, memory_order_relaxed))) { return 0; } } return -__futex_pi_unlock(&mutex.owner_tid, mutex.shared); } static int PIMutexDestroy(PIMutex& mutex) { // The mutex should be in unlocked state (owner_tid == 0) when destroyed. // Store 0xffffffff to make the mutex unusable. int old_owner = 0; if (atomic_compare_exchange_strong_explicit(&mutex.owner_tid, &old_owner, 0xffffffff, memory_order_relaxed, memory_order_relaxed)) { return 0; } return EBUSY; } #if !defined(__LP64__) namespace PIMutexAllocator { // pthread_mutex_t has only 4 bytes in 32-bit programs, which are not enough to hold PIMutex. // So we use malloc to allocate PIMutexes and use 16-bit of pthread_mutex_t as indexes to find // the allocated PIMutexes. This allows at most 65536 PI mutexes. // When calling operations like pthread_mutex_lock/unlock, the 16-bit index is mapped to the // corresponding PIMutex. To make the map operation fast, we use a lockless mapping method: // Once a PIMutex is allocated, all the data used to map index to the PIMutex isn't changed until // it is destroyed. // Below are the data structures: // // struct Node contains a PIMutex. // typedef Node NodeArray[256]; // typedef NodeArray* NodeArrayP; // NodeArrayP nodes[256]; // // A 16-bit index is mapped to Node as below: // (*nodes[index >> 8])[index & 0xff] // // Also use a free list to allow O(1) finding recycled PIMutexes. union Node { PIMutex mutex; int next_free_id; // If not -1, refer to the next node in the free PIMutex list. }; typedef Node NodeArray[256]; typedef NodeArray* NodeArrayP; // lock_ protects below items. static Lock lock; static NodeArrayP* nodes; static int next_to_alloc_id; static int first_free_id = -1; // If not -1, refer to the first node in the free PIMutex list. static inline __always_inline Node& IdToNode(int id) { return (*nodes[id >> 8])[id & 0xff]; } static inline __always_inline PIMutex& IdToPIMutex(int id) { return IdToNode(id).mutex; } static int AllocIdLocked() { if (first_free_id != -1) { int result = first_free_id; first_free_id = IdToNode(result).next_free_id; return result; } if (next_to_alloc_id >= 0x10000) { return -1; } int array_pos = next_to_alloc_id >> 8; int node_pos = next_to_alloc_id & 0xff; if (node_pos == 0) { if (array_pos == 0) { nodes = static_cast(calloc(256, sizeof(NodeArray*))); if (nodes == nullptr) { return -1; } } nodes[array_pos] = static_cast(malloc(sizeof(NodeArray))); if (nodes[array_pos] == nullptr) { return -1; } } return next_to_alloc_id++; } // If succeed, return an id referring to a PIMutex, otherwise return -1. // A valid id is in range [0, 0xffff]. static int AllocId() { lock.lock(); int result = AllocIdLocked(); lock.unlock(); if (result != -1) { memset(&IdToPIMutex(result), 0, sizeof(PIMutex)); } return result; } static void FreeId(int id) { lock.lock(); IdToNode(id).next_free_id = first_free_id; first_free_id = id; lock.unlock(); } } // namespace PIMutexAllocator #endif // !defined(__LP64__) /* Convenience macro, creates a mask of 'bits' bits that starts from * the 'shift'-th least significant bit in a 32-bit word. * * Examples: FIELD_MASK(0,4) -> 0xf * FIELD_MASK(16,9) -> 0x1ff0000 */ #define FIELD_MASK(shift,bits) (((1 << (bits))-1) << (shift)) /* This one is used to create a bit pattern from a given field value */ #define FIELD_TO_BITS(val,shift,bits) (((val) & ((1 << (bits))-1)) << (shift)) /* And this one does the opposite, i.e. extract a field's value from a bit pattern */ #define FIELD_FROM_BITS(val,shift,bits) (((val) >> (shift)) & ((1 << (bits))-1)) /* Convenience macros. * * These are used to form or modify the bit pattern of a given mutex value */ /* Mutex state: * * 0 for unlocked * 1 for locked, no waiters * 2 for locked, maybe waiters */ #define MUTEX_STATE_SHIFT 0 #define MUTEX_STATE_LEN 2 #define MUTEX_STATE_MASK FIELD_MASK(MUTEX_STATE_SHIFT, MUTEX_STATE_LEN) #define MUTEX_STATE_FROM_BITS(v) FIELD_FROM_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN) #define MUTEX_STATE_TO_BITS(v) FIELD_TO_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN) #define MUTEX_STATE_UNLOCKED 0 /* must be 0 to match PTHREAD_MUTEX_INITIALIZER */ #define MUTEX_STATE_LOCKED_UNCONTENDED 1 /* must be 1 due to atomic dec in unlock operation */ #define MUTEX_STATE_LOCKED_CONTENDED 2 /* must be 1 + LOCKED_UNCONTENDED due to atomic dec */ #define MUTEX_STATE_BITS_UNLOCKED MUTEX_STATE_TO_BITS(MUTEX_STATE_UNLOCKED) #define MUTEX_STATE_BITS_LOCKED_UNCONTENDED MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_UNCONTENDED) #define MUTEX_STATE_BITS_LOCKED_CONTENDED MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_CONTENDED) // Return true iff the mutex is unlocked. #define MUTEX_STATE_BITS_IS_UNLOCKED(v) (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_UNLOCKED) // Return true iff the mutex is locked with no waiters. #define MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(v) (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_LOCKED_UNCONTENDED) // return true iff the mutex is locked with maybe waiters. #define MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(v) (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_LOCKED_CONTENDED) /* used to flip from LOCKED_UNCONTENDED to LOCKED_CONTENDED */ #define MUTEX_STATE_BITS_FLIP_CONTENTION(v) ((v) ^ (MUTEX_STATE_BITS_LOCKED_CONTENDED ^ MUTEX_STATE_BITS_LOCKED_UNCONTENDED)) /* Mutex counter: * * We need to check for overflow before incrementing, and we also need to * detect when the counter is 0 */ #define MUTEX_COUNTER_SHIFT 2 #define MUTEX_COUNTER_LEN 11 #define MUTEX_COUNTER_MASK FIELD_MASK(MUTEX_COUNTER_SHIFT, MUTEX_COUNTER_LEN) #define MUTEX_COUNTER_BITS_WILL_OVERFLOW(v) (((v) & MUTEX_COUNTER_MASK) == MUTEX_COUNTER_MASK) #define MUTEX_COUNTER_BITS_IS_ZERO(v) (((v) & MUTEX_COUNTER_MASK) == 0) /* Used to increment the counter directly after overflow has been checked */ #define MUTEX_COUNTER_BITS_ONE FIELD_TO_BITS(1, MUTEX_COUNTER_SHIFT,MUTEX_COUNTER_LEN) /* Mutex shared bit flag * * This flag is set to indicate that the mutex is shared among processes. * This changes the futex opcode we use for futex wait/wake operations * (non-shared operations are much faster). */ #define MUTEX_SHARED_SHIFT 13 #define MUTEX_SHARED_MASK FIELD_MASK(MUTEX_SHARED_SHIFT,1) /* Mutex type: * We support normal, recursive and errorcheck mutexes. */ #define MUTEX_TYPE_SHIFT 14 #define MUTEX_TYPE_LEN 2 #define MUTEX_TYPE_MASK FIELD_MASK(MUTEX_TYPE_SHIFT,MUTEX_TYPE_LEN) #define MUTEX_TYPE_TO_BITS(t) FIELD_TO_BITS(t, MUTEX_TYPE_SHIFT, MUTEX_TYPE_LEN) #define MUTEX_TYPE_BITS_NORMAL MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_NORMAL) #define MUTEX_TYPE_BITS_RECURSIVE MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_RECURSIVE) #define MUTEX_TYPE_BITS_ERRORCHECK MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_ERRORCHECK) // Use a special mutex type to mark priority inheritance mutexes. #define PI_MUTEX_STATE MUTEX_TYPE_TO_BITS(3) // For a PI mutex, it includes below fields: // Atomic(uint16_t) state; // PIMutex pi_mutex; // uint16_t pi_mutex_id in 32-bit programs // // state holds the following fields: // // bits: name description // 15-14 type mutex type, should be 3 // 13-0 padding should be 0 // // pi_mutex holds the state of a PI mutex. // pi_mutex_id holds an integer to find the state of a PI mutex. // // For a Non-PI mutex, it includes below fields: // Atomic(uint16_t) state; // atomic_int owner_tid; // Atomic(uint16_t) in 32-bit programs // // state holds the following fields: // // bits: name description // 15-14 type mutex type, can be 0 (normal), 1 (recursive), 2 (errorcheck) // 13 shared process-shared flag // 12-2 counter - 1 // 1-0 state lock state (0, 1 or 2) // // bits 15-13 are constant during the lifetime of the mutex. // // owner_tid is used only in recursive and errorcheck Non-PI mutexes to hold the mutex owner // thread id. // // PI mutexes and Non-PI mutexes are distinguished by checking type field in state. #if defined(__LP64__) struct pthread_mutex_internal_t { _Atomic(uint16_t) state; uint16_t __pad; union { atomic_int owner_tid; PIMutex pi_mutex; }; char __reserved[28]; PIMutex& ToPIMutex() { return pi_mutex; } void FreePIMutex() { } } __attribute__((aligned(4))); #else struct pthread_mutex_internal_t { _Atomic(uint16_t) state; union { _Atomic(uint16_t) owner_tid; uint16_t pi_mutex_id; }; PIMutex& ToPIMutex() { return PIMutexAllocator::IdToPIMutex(pi_mutex_id); } void FreePIMutex() { PIMutexAllocator::FreeId(pi_mutex_id); } } __attribute__((aligned(4))); #endif static_assert(sizeof(pthread_mutex_t) == sizeof(pthread_mutex_internal_t), "pthread_mutex_t should actually be pthread_mutex_internal_t in implementation."); // For binary compatibility with old version of pthread_mutex_t, we can't use more strict alignment // than 4-byte alignment. static_assert(alignof(pthread_mutex_t) == 4, "pthread_mutex_t should fulfill the alignment of pthread_mutex_internal_t."); static inline pthread_mutex_internal_t* __get_internal_mutex(pthread_mutex_t* mutex_interface) { return reinterpret_cast(mutex_interface); } int pthread_mutex_init(pthread_mutex_t* mutex_interface, const pthread_mutexattr_t* attr) { pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface); memset(mutex, 0, sizeof(pthread_mutex_internal_t)); if (__predict_true(attr == nullptr)) { atomic_init(&mutex->state, MUTEX_TYPE_BITS_NORMAL); return 0; } uint16_t state = 0; if ((*attr & MUTEXATTR_SHARED_MASK) != 0) { state |= MUTEX_SHARED_MASK; } switch (*attr & MUTEXATTR_TYPE_MASK) { case PTHREAD_MUTEX_NORMAL: state |= MUTEX_TYPE_BITS_NORMAL; break; case PTHREAD_MUTEX_RECURSIVE: state |= MUTEX_TYPE_BITS_RECURSIVE; break; case PTHREAD_MUTEX_ERRORCHECK: state |= MUTEX_TYPE_BITS_ERRORCHECK; break; default: return EINVAL; } if (((*attr & MUTEXATTR_PROTOCOL_MASK) >> MUTEXATTR_PROTOCOL_SHIFT) == PTHREAD_PRIO_INHERIT) { #if !defined(__LP64__) if (state & MUTEX_SHARED_MASK) { return EINVAL; } int id = PIMutexAllocator::AllocId(); if (id == -1) { return ENOMEM; } mutex->pi_mutex_id = id; #endif atomic_init(&mutex->state, PI_MUTEX_STATE); PIMutex& pi_mutex = mutex->ToPIMutex(); pi_mutex.type = *attr & MUTEXATTR_TYPE_MASK; pi_mutex.shared = (*attr & MUTEXATTR_SHARED_MASK) != 0; } else { atomic_init(&mutex->state, state); atomic_init(&mutex->owner_tid, 0); } return 0; } // namespace for Non-PI mutex routines. namespace NonPI { static inline __always_inline int NormalMutexTryLock(pthread_mutex_internal_t* mutex, uint16_t shared) { const uint16_t unlocked = shared | MUTEX_STATE_BITS_UNLOCKED; const uint16_t locked_uncontended = shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED; uint16_t old_state = unlocked; if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state, &old_state, locked_uncontended, memory_order_acquire, memory_order_relaxed))) { return 0; } return EBUSY; } /* * Lock a normal Non-PI mutex. * * As noted above, there are three states: * 0 (unlocked, no contention) * 1 (locked, no contention) * 2 (locked, contention) * * Non-recursive mutexes don't use the thread-id or counter fields, and the * "type" value is zero, so the only bits that will be set are the ones in * the lock state field. */ static inline __always_inline int NormalMutexLock(pthread_mutex_internal_t* mutex, uint16_t shared, bool use_realtime_clock, const timespec* abs_timeout_or_null) { if (__predict_true(NormalMutexTryLock(mutex, shared) == 0)) { return 0; } int result = check_timespec(abs_timeout_or_null, true); if (result != 0) { return result; } ScopedTrace trace("Contending for pthread mutex"); const uint16_t unlocked = shared | MUTEX_STATE_BITS_UNLOCKED; const uint16_t locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED; // We want to go to sleep until the mutex is available, which requires // promoting it to locked_contended. We need to swap in the new state // and then wait until somebody wakes us up. // An atomic_exchange is used to compete with other threads for the lock. // If it returns unlocked, we have acquired the lock, otherwise another // thread still holds the lock and we should wait again. // If lock is acquired, an acquire fence is needed to make all memory accesses // made by other threads visible to the current CPU. while (atomic_exchange_explicit(&mutex->state, locked_contended, memory_order_acquire) != unlocked) { if (__futex_wait_ex(&mutex->state, shared, locked_contended, use_realtime_clock, abs_timeout_or_null) == -ETIMEDOUT) { return ETIMEDOUT; } } return 0; } /* * Release a normal Non-PI mutex. The caller is responsible for determining * that we are in fact the owner of this lock. */ static inline __always_inline void NormalMutexUnlock(pthread_mutex_internal_t* mutex, uint16_t shared) { const uint16_t unlocked = shared | MUTEX_STATE_BITS_UNLOCKED; const uint16_t locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED; // We use an atomic_exchange to release the lock. If locked_contended state // is returned, some threads is waiting for the lock and we need to wake up // one of them. // A release fence is required to make previous stores visible to next // lock owner threads. if (atomic_exchange_explicit(&mutex->state, unlocked, memory_order_release) == locked_contended) { // Wake up one waiting thread. We don't know which thread will be // woken or when it'll start executing -- futexes make no guarantees // here. There may not even be a thread waiting. // // The newly-woken thread will replace the unlocked state we just set above // with locked_contended state, which means that when it eventually releases // the mutex it will also call FUTEX_WAKE. This results in one extra wake // call whenever a lock is contended, but let us avoid forgetting anyone // without requiring us to track the number of sleepers. // // It's possible for another thread to sneak in and grab the lock between // the exchange above and the wake call below. If the new thread is "slow" // and holds the lock for a while, we'll wake up a sleeper, which will swap // in locked_uncontended state and then go back to sleep since the lock is // still held. If the new thread is "fast", running to completion before // we call wake, the thread we eventually wake will find an unlocked mutex // and will execute. Either way we have correct behavior and nobody is // orphaned on the wait queue. __futex_wake_ex(&mutex->state, shared, 1); } } /* This common inlined function is used to increment the counter of a recursive Non-PI mutex. * * If the counter overflows, it will return EAGAIN. * Otherwise, it atomically increments the counter and returns 0. * */ static inline __always_inline int RecursiveIncrement(pthread_mutex_internal_t* mutex, uint16_t old_state) { // Detect recursive lock overflow and return EAGAIN. // This is safe because only the owner thread can modify the // counter bits in the mutex value. if (MUTEX_COUNTER_BITS_WILL_OVERFLOW(old_state)) { return EAGAIN; } // Other threads are able to change the lower bits (e.g. promoting it to "contended"), // but the mutex counter will not overflow. So we use atomic_fetch_add operation here. // The mutex is already locked by current thread, so we don't need an acquire fence. atomic_fetch_add_explicit(&mutex->state, MUTEX_COUNTER_BITS_ONE, memory_order_relaxed); return 0; } // Wait on a recursive or errorcheck Non-PI mutex. static inline __always_inline int RecursiveOrErrorcheckMutexWait(pthread_mutex_internal_t* mutex, uint16_t shared, uint16_t old_state, bool use_realtime_clock, const timespec* abs_timeout) { // __futex_wait always waits on a 32-bit value. But state is 16-bit. For a normal mutex, the owner_tid // field in mutex is not used. On 64-bit devices, the __pad field in mutex is not used. // But when a recursive or errorcheck mutex is used on 32-bit devices, we need to add the // owner_tid value in the value argument for __futex_wait, otherwise we may always get EAGAIN error. #if defined(__LP64__) return __futex_wait_ex(&mutex->state, shared, old_state, use_realtime_clock, abs_timeout); #else // This implementation works only when the layout of pthread_mutex_internal_t matches below expectation. // And it is based on the assumption that Android is always in little-endian devices. static_assert(offsetof(pthread_mutex_internal_t, state) == 0, ""); static_assert(offsetof(pthread_mutex_internal_t, owner_tid) == 2, ""); uint32_t owner_tid = atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed); return __futex_wait_ex(&mutex->state, shared, (owner_tid << 16) | old_state, use_realtime_clock, abs_timeout); #endif } // Lock a Non-PI mutex. static int MutexLockWithTimeout(pthread_mutex_internal_t* mutex, bool use_realtime_clock, const timespec* abs_timeout_or_null) { uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed); uint16_t mtype = (old_state & MUTEX_TYPE_MASK); uint16_t shared = (old_state & MUTEX_SHARED_MASK); // Handle common case first. if ( __predict_true(mtype == MUTEX_TYPE_BITS_NORMAL) ) { return NormalMutexLock(mutex, shared, use_realtime_clock, abs_timeout_or_null); } // Do we already own this recursive or error-check mutex? pid_t tid = __get_thread()->tid; if (tid == atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed)) { if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) { return EDEADLK; } return RecursiveIncrement(mutex, old_state); } const uint16_t unlocked = mtype | shared | MUTEX_STATE_BITS_UNLOCKED; const uint16_t locked_uncontended = mtype | shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED; const uint16_t locked_contended = mtype | shared | MUTEX_STATE_BITS_LOCKED_CONTENDED; // First, if the mutex is unlocked, try to quickly acquire it. // In the optimistic case where this works, set the state to locked_uncontended. if (old_state == unlocked) { // If exchanged successfully, an acquire fence is required to make // all memory accesses made by other threads visible to the current CPU. if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state, &old_state, locked_uncontended, memory_order_acquire, memory_order_relaxed))) { atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed); return 0; } } ScopedTrace trace("Contending for pthread mutex"); while (true) { if (old_state == unlocked) { // NOTE: We put the state to locked_contended since we _know_ there // is contention when we are in this loop. This ensures all waiters // will be unlocked. // If exchanged successfully, an acquire fence is required to make // all memory accesses made by other threads visible to the current CPU. if (__predict_true(atomic_compare_exchange_weak_explicit(&mutex->state, &old_state, locked_contended, memory_order_acquire, memory_order_relaxed))) { atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed); return 0; } continue; } else if (MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(old_state)) { // We should set it to locked_contended beforing going to sleep. This can make // sure waiters will be woken up eventually. int new_state = MUTEX_STATE_BITS_FLIP_CONTENTION(old_state); if (__predict_false(!atomic_compare_exchange_weak_explicit(&mutex->state, &old_state, new_state, memory_order_relaxed, memory_order_relaxed))) { continue; } old_state = new_state; } int result = check_timespec(abs_timeout_or_null, true); if (result != 0) { return result; } // We are in locked_contended state, sleep until someone wakes us up. if (RecursiveOrErrorcheckMutexWait(mutex, shared, old_state, use_realtime_clock, abs_timeout_or_null) == -ETIMEDOUT) { return ETIMEDOUT; } old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed); } } } // namespace NonPI static inline __always_inline bool IsMutexDestroyed(uint16_t mutex_state) { return mutex_state == 0xffff; } // Inlining this function in pthread_mutex_lock() adds the cost of stack frame instructions on // ARM64. So make it noinline. static int __attribute__((noinline)) HandleUsingDestroyedMutex(pthread_mutex_t* mutex, const char* function_name) { if (android_get_application_target_sdk_version() >= __ANDROID_API_P__) { __fortify_fatal("%s called on a destroyed mutex (%p)", function_name, mutex); } return EBUSY; } int pthread_mutex_lock(pthread_mutex_t* mutex_interface) { #if !defined(__LP64__) // Some apps depend on being able to pass NULL as a mutex and get EINVAL // back. Don't need to worry about it for LP64 since the ABI is brand new, // but keep compatibility for LP32. http://b/19995172. if (mutex_interface == nullptr) { return EINVAL; } #endif pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface); uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed); uint16_t mtype = (old_state & MUTEX_TYPE_MASK); // Avoid slowing down fast path of normal mutex lock operation. if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) { uint16_t shared = (old_state & MUTEX_SHARED_MASK); if (__predict_true(NonPI::NormalMutexTryLock(mutex, shared) == 0)) { return 0; } } if (old_state == PI_MUTEX_STATE) { PIMutex& m = mutex->ToPIMutex(); // Handle common case first. if (__predict_true(PIMutexTryLock(m) == 0)) { return 0; } return PIMutexTimedLock(mutex->ToPIMutex(), false, nullptr); } if (__predict_false(IsMutexDestroyed(old_state))) { return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__); } return NonPI::MutexLockWithTimeout(mutex, false, nullptr); } int pthread_mutex_unlock(pthread_mutex_t* mutex_interface) { #if !defined(__LP64__) // Some apps depend on being able to pass NULL as a mutex and get EINVAL // back. Don't need to worry about it for LP64 since the ABI is brand new, // but keep compatibility for LP32. http://b/19995172. if (mutex_interface == nullptr) { return EINVAL; } #endif pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface); uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed); uint16_t mtype = (old_state & MUTEX_TYPE_MASK); uint16_t shared = (old_state & MUTEX_SHARED_MASK); // Handle common case first. if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) { NonPI::NormalMutexUnlock(mutex, shared); return 0; } if (old_state == PI_MUTEX_STATE) { return PIMutexUnlock(mutex->ToPIMutex()); } if (__predict_false(IsMutexDestroyed(old_state))) { return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__); } // Do we already own this recursive or error-check mutex? pid_t tid = __get_thread()->tid; if ( tid != atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed) ) { return EPERM; } // If the counter is > 0, we can simply decrement it atomically. // Since other threads can mutate the lower state bits (and only the // lower state bits), use a compare_exchange loop to do it. if (!MUTEX_COUNTER_BITS_IS_ZERO(old_state)) { // We still own the mutex, so a release fence is not needed. atomic_fetch_sub_explicit(&mutex->state, MUTEX_COUNTER_BITS_ONE, memory_order_relaxed); return 0; } // The counter is 0, so we'are going to unlock the mutex by resetting its // state to unlocked, we need to perform a atomic_exchange inorder to read // the current state, which will be locked_contended if there may have waiters // to awake. // A release fence is required to make previous stores visible to next // lock owner threads. atomic_store_explicit(&mutex->owner_tid, 0, memory_order_relaxed); const uint16_t unlocked = mtype | shared | MUTEX_STATE_BITS_UNLOCKED; old_state = atomic_exchange_explicit(&mutex->state, unlocked, memory_order_release); if (MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(old_state)) { __futex_wake_ex(&mutex->state, shared, 1); } return 0; } int pthread_mutex_trylock(pthread_mutex_t* mutex_interface) { pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface); uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed); uint16_t mtype = (old_state & MUTEX_TYPE_MASK); // Handle common case first. if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) { uint16_t shared = (old_state & MUTEX_SHARED_MASK); return NonPI::NormalMutexTryLock(mutex, shared); } if (old_state == PI_MUTEX_STATE) { return PIMutexTryLock(mutex->ToPIMutex()); } if (__predict_false(IsMutexDestroyed(old_state))) { return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__); } // Do we already own this recursive or error-check mutex? pid_t tid = __get_thread()->tid; if (tid == atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed)) { if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) { return EBUSY; } return NonPI::RecursiveIncrement(mutex, old_state); } uint16_t shared = (old_state & MUTEX_SHARED_MASK); const uint16_t unlocked = mtype | shared | MUTEX_STATE_BITS_UNLOCKED; const uint16_t locked_uncontended = mtype | shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED; // Same as pthread_mutex_lock, except that we don't want to wait, and // the only operation that can succeed is a single compare_exchange to acquire the // lock if it is released / not owned by anyone. No need for a complex loop. // If exchanged successfully, an acquire fence is required to make // all memory accesses made by other threads visible to the current CPU. old_state = unlocked; if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state, &old_state, locked_uncontended, memory_order_acquire, memory_order_relaxed))) { atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed); return 0; } return EBUSY; } #if !defined(__LP64__) extern "C" int pthread_mutex_lock_timeout_np(pthread_mutex_t* mutex_interface, unsigned ms) { timespec ts; timespec_from_ms(ts, ms); timespec abs_timeout; absolute_timespec_from_timespec(abs_timeout, ts, CLOCK_MONOTONIC); int error = NonPI::MutexLockWithTimeout(__get_internal_mutex(mutex_interface), false, &abs_timeout); if (error == ETIMEDOUT) { error = EBUSY; } return error; } #endif static int __pthread_mutex_timedlock(pthread_mutex_t* mutex_interface, bool use_realtime_clock, const timespec* abs_timeout, const char* function) { pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface); uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed); uint16_t mtype = (old_state & MUTEX_TYPE_MASK); // Handle common case first. if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) { uint16_t shared = (old_state & MUTEX_SHARED_MASK); if (__predict_true(NonPI::NormalMutexTryLock(mutex, shared) == 0)) { return 0; } } if (old_state == PI_MUTEX_STATE) { return PIMutexTimedLock(mutex->ToPIMutex(), use_realtime_clock, abs_timeout); } if (__predict_false(IsMutexDestroyed(old_state))) { return HandleUsingDestroyedMutex(mutex_interface, function); } return NonPI::MutexLockWithTimeout(mutex, use_realtime_clock, abs_timeout); } int pthread_mutex_timedlock(pthread_mutex_t* mutex_interface, const struct timespec* abs_timeout) { return __pthread_mutex_timedlock(mutex_interface, true, abs_timeout, __FUNCTION__); } int pthread_mutex_timedlock_monotonic_np(pthread_mutex_t* mutex_interface, const struct timespec* abs_timeout) { return __pthread_mutex_timedlock(mutex_interface, false, abs_timeout, __FUNCTION__); } int pthread_mutex_destroy(pthread_mutex_t* mutex_interface) { pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface); uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed); if (__predict_false(IsMutexDestroyed(old_state))) { return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__); } if (old_state == PI_MUTEX_STATE) { int result = PIMutexDestroy(mutex->ToPIMutex()); if (result == 0) { mutex->FreePIMutex(); atomic_store(&mutex->state, 0xffff); } return result; } // Store 0xffff to make the mutex unusable. Although POSIX standard says it is undefined // behavior to destroy a locked mutex, we prefer not to change mutex->state in that situation. if (MUTEX_STATE_BITS_IS_UNLOCKED(old_state) && atomic_compare_exchange_strong_explicit(&mutex->state, &old_state, 0xffff, memory_order_relaxed, memory_order_relaxed)) { return 0; } return EBUSY; }