2 * Copyright 2016-2023 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
10 /* We need to use the OPENSSL_fork_*() deprecated APIs */
11 #define OPENSSL_SUPPRESS_DEPRECATED
13 #include <openssl/crypto.h>
14 #include <crypto/cryptlib.h>
15 #include "internal/cryptlib.h"
16 #include "internal/rcu.h"
17 #include "rcu_internal.h"
23 #if defined(__apple_build_version__) && __apple_build_version__ < 6000000
25 * OS/X 10.7 and 10.8 had a weird version of clang which has __ATOMIC_ACQUIRE and
26 * __ATOMIC_ACQ_REL but which expects only one parameter for __atomic_is_lock_free()
27 * rather than two which has signature __atomic_is_lock_free(sizeof(_Atomic(T))).
28 * All of this makes impossible to use __atomic_is_lock_free here.
30 * See: https://github.com/llvm/llvm-project/commit/a4c2602b714e6c6edb98164550a5ae829b2de760
32 #define BROKEN_CLANG_ATOMICS
35 #if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG) && !defined(OPENSSL_SYS_WINDOWS)
37 # if defined(OPENSSL_SYS_UNIX)
38 # include <sys/types.h>
44 # ifdef PTHREAD_RWLOCK_INITIALIZER
48 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
49 # define ATOMIC_LOAD_N(p,o) __atomic_load_n(p, o)
50 # define ATOMIC_STORE_N(p, v, o) __atomic_store_n(p, v, o)
51 # define ATOMIC_STORE(p, v, o) __atomic_store(p, v, o)
52 # define ATOMIC_EXCHANGE_N(p, v, o) __atomic_exchange_n(p, v, o)
53 # define ATOMIC_ADD_FETCH(p, v, o) __atomic_add_fetch(p, v, o)
54 # define ATOMIC_FETCH_ADD(p, v, o) __atomic_fetch_add(p, v, o)
55 # define ATOMIC_SUB_FETCH(p, v, o) __atomic_sub_fetch(p, v, o)
56 # define ATOMIC_AND_FETCH(p, m, o) __atomic_and_fetch(p, m, o)
57 # define ATOMIC_OR_FETCH(p, m, o) __atomic_or_fetch(p, m, o)
59 static pthread_mutex_t atomic_sim_lock = PTHREAD_MUTEX_INITIALIZER;
61 static inline void *fallback_atomic_load_n(void **p)
65 pthread_mutex_lock(&atomic_sim_lock);
67 pthread_mutex_unlock(&atomic_sim_lock);
71 # define ATOMIC_LOAD_N(p, o) fallback_atomic_load_n((void **)p)
73 static inline void *fallback_atomic_store_n(void **p, void *v)
77 pthread_mutex_lock(&atomic_sim_lock);
80 pthread_mutex_unlock(&atomic_sim_lock);
84 # define ATOMIC_STORE_N(p, v, o) fallback_atomic_store_n((void **)p, (void *)v)
86 static inline void fallback_atomic_store(void **p, void **v)
90 pthread_mutex_lock(&atomic_sim_lock);
94 pthread_mutex_unlock(&atomic_sim_lock);
97 # define ATOMIC_STORE(p, v, o) fallback_atomic_store((void **)p, (void **)v)
99 static inline void *fallback_atomic_exchange_n(void **p, void *v)
103 pthread_mutex_lock(&atomic_sim_lock);
106 pthread_mutex_unlock(&atomic_sim_lock);
110 #define ATOMIC_EXCHANGE_N(p, v, o) fallback_atomic_exchange_n((void **)p, (void *)v)
112 static inline uint64_t fallback_atomic_add_fetch(uint64_t *p, uint64_t v)
116 pthread_mutex_lock(&atomic_sim_lock);
119 pthread_mutex_unlock(&atomic_sim_lock);
123 # define ATOMIC_ADD_FETCH(p, v, o) fallback_atomic_add_fetch(p, v)
125 static inline uint64_t fallback_atomic_fetch_add(uint64_t *p, uint64_t v)
129 pthread_mutex_lock(&atomic_sim_lock);
132 pthread_mutex_unlock(&atomic_sim_lock);
136 # define ATOMIC_FETCH_ADD(p, v, o) fallback_atomic_fetch_add(p, v)
138 static inline uint64_t fallback_atomic_sub_fetch(uint64_t *p, uint64_t v)
142 pthread_mutex_lock(&atomic_sim_lock);
145 pthread_mutex_unlock(&atomic_sim_lock);
149 # define ATOMIC_SUB_FETCH(p, v, o) fallback_atomic_sub_fetch(p, v)
151 static inline uint64_t fallback_atomic_and_fetch(uint64_t *p, uint64_t m)
155 pthread_mutex_lock(&atomic_sim_lock);
158 pthread_mutex_unlock(&atomic_sim_lock);
162 # define ATOMIC_AND_FETCH(p, v, o) fallback_atomic_and_fetch(p, v)
164 static inline uint64_t fallback_atomic_or_fetch(uint64_t *p, uint64_t m)
168 pthread_mutex_lock(&atomic_sim_lock);
171 pthread_mutex_unlock(&atomic_sim_lock);
175 # define ATOMIC_OR_FETCH(p, v, o) fallback_atomic_or_fetch(p, v)
178 static CRYPTO_THREAD_LOCAL rcu_thr_key;
181 * users is broken up into 2 parts
182 * bits 0-15 current readers
185 # define READER_SHIFT 0
187 # define READER_SIZE 16
190 # define READER_MASK (((uint64_t)1 << READER_SIZE) - 1)
191 # define ID_MASK (((uint64_t)1 << ID_SIZE) - 1)
192 # define READER_COUNT(x) (((uint64_t)(x) >> READER_SHIFT) & READER_MASK)
193 # define ID_VAL(x) (((uint64_t)(x) >> ID_SHIFT) & ID_MASK)
194 # define VAL_READER ((uint64_t)1 << READER_SHIFT)
195 # define VAL_ID(x) ((uint64_t)x << ID_SHIFT)
198 * This is the core of an rcu lock. It tracks the readers and writers for the
199 * current quiescence point for a given lock. Users is the 64 bit value that
200 * stores the READERS/ID as defined above
210 CRYPTO_RCU_LOCK *lock;
215 * This is the per thread tracking data
216 * that is assigned to each thread participating
219 * qp points to the qp that it last acquired
222 struct rcu_thr_data {
223 struct thread_qp thread_qps[MAX_QPS];
227 * This is the internal version of a CRYPTO_RCU_LOCK
228 * it is cast from CRYPTO_RCU_LOCK
231 /* Callbacks to call for next ossl_synchronize_rcu */
232 struct rcu_cb_item *cb_items;
234 /* rcu generation counter for in-order retirement */
237 /* Array of quiescent points for synchronization */
238 struct rcu_qp *qp_group;
240 /* Number of elements in qp_group array */
243 /* Index of the current qp in the qp_group array */
246 /* value of the next id_ctr value to be retired */
247 uint32_t next_to_retire;
249 /* index of the next free rcu_qp in the qp_group */
250 uint64_t current_alloc_idx;
252 /* number of qp's in qp_group array currently being retired */
253 uint32_t writers_alloced;
255 /* lock protecting write side operations */
256 pthread_mutex_t write_lock;
258 /* lock protecting updates to writers_alloced/current_alloc_idx */
259 pthread_mutex_t alloc_lock;
261 /* signal to wake threads waiting on alloc_lock */
262 pthread_cond_t alloc_signal;
264 /* lock to enforce in-order retirement */
265 pthread_mutex_t prior_lock;
267 /* signal to wake threads waiting on prior_lock */
268 pthread_cond_t prior_signal;
272 * Called on thread exit to free the pthread key
273 * associated with this thread, if any
275 static void free_rcu_thr_data(void *ptr)
277 struct rcu_thr_data *data =
278 (struct rcu_thr_data *)CRYPTO_THREAD_get_local(&rcu_thr_key);
281 CRYPTO_THREAD_set_local(&rcu_thr_key, NULL);
284 static void ossl_rcu_init(void)
286 CRYPTO_THREAD_init_local(&rcu_thr_key, NULL);
289 /* Read side acquisition of the current qp */
290 static struct rcu_qp *get_hold_current_qp(struct rcu_lock_st *lock)
294 /* get the current qp index */
297 * Notes on use of __ATOMIC_ACQUIRE
298 * We need to ensure the following:
299 * 1) That subsequent operations aren't optimized by hoisting them above
300 * this operation. Specifically, we don't want the below re-load of
301 * qp_idx to get optimized away
302 * 2) We want to ensure that any updating of reader_idx on the write side
303 * of the lock is flushed from a local cpu cache so that we see any
304 * updates prior to the load. This is a non-issue on cache coherent
305 * systems like x86, but is relevant on other arches
306 * Note: This applies to the reload below as well
308 qp_idx = (uint64_t)ATOMIC_LOAD_N(&lock->reader_idx, __ATOMIC_ACQUIRE);
311 * Notes of use of __ATOMIC_RELEASE
312 * This counter is only read by the write side of the lock, and so we
313 * specify __ATOMIC_RELEASE here to ensure that the write side of the
314 * lock see this during the spin loop read of users, as it waits for the
315 * reader count to approach zero
317 ATOMIC_ADD_FETCH(&lock->qp_group[qp_idx].users, VAL_READER,
320 /* if the idx hasn't changed, we're good, else try again */
321 if (qp_idx == (uint64_t)ATOMIC_LOAD_N(&lock->reader_idx, __ATOMIC_ACQUIRE))
325 * Notes on use of __ATOMIC_RELEASE
326 * As with the add above, we want to ensure that this decrement is
327 * seen by the write side of the lock as soon as it happens to prevent
328 * undue spinning waiting for write side completion
330 ATOMIC_SUB_FETCH(&lock->qp_group[qp_idx].users, VAL_READER,
334 return &lock->qp_group[qp_idx];
337 void ossl_rcu_read_lock(CRYPTO_RCU_LOCK *lock)
339 struct rcu_thr_data *data;
340 int i, available_qp = -1;
343 * we're going to access current_qp here so ask the
344 * processor to fetch it
346 data = CRYPTO_THREAD_get_local(&rcu_thr_key);
349 data = OPENSSL_zalloc(sizeof(*data));
350 OPENSSL_assert(data != NULL);
351 CRYPTO_THREAD_set_local(&rcu_thr_key, data);
352 ossl_init_thread_start(NULL, NULL, free_rcu_thr_data);
355 for (i = 0; i < MAX_QPS; i++) {
356 if (data->thread_qps[i].qp == NULL && available_qp == -1)
358 /* If we have a hold on this lock already, we're good */
359 if (data->thread_qps[i].lock == lock) {
360 data->thread_qps[i].depth++;
366 * if we get here, then we don't have a hold on this lock yet
368 assert(available_qp != -1);
370 data->thread_qps[available_qp].qp = get_hold_current_qp(lock);
371 data->thread_qps[available_qp].depth = 1;
372 data->thread_qps[available_qp].lock = lock;
375 void ossl_rcu_read_unlock(CRYPTO_RCU_LOCK *lock)
378 struct rcu_thr_data *data = CRYPTO_THREAD_get_local(&rcu_thr_key);
381 assert(data != NULL);
383 for (i = 0; i < MAX_QPS; i++) {
384 if (data->thread_qps[i].lock == lock) {
386 * As with read side acquisition, we use __ATOMIC_RELEASE here
387 * to ensure that the decrement is published immediately
388 * to any write side waiters
390 data->thread_qps[i].depth--;
391 if (data->thread_qps[i].depth == 0) {
392 ret = ATOMIC_SUB_FETCH(&data->thread_qps[i].qp->users, VAL_READER,
394 OPENSSL_assert(ret != UINT64_MAX);
395 data->thread_qps[i].qp = NULL;
396 data->thread_qps[i].lock = NULL;
402 * if we get here, we're trying to unlock a lock that we never acquired
409 * Write side allocation routine to get the current qp
410 * and replace it with a new one
412 static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock)
415 uint64_t current_idx;
417 pthread_mutex_lock(&lock->alloc_lock);
420 * we need at least one qp to be available with one
421 * left over, so that readers can start working on
422 * one that isn't yet being waited on
424 while (lock->group_count - lock->writers_alloced < 2)
425 /* we have to wait for one to be free */
426 pthread_cond_wait(&lock->alloc_signal, &lock->alloc_lock);
428 current_idx = lock->current_alloc_idx;
430 /* Allocate the qp */
431 lock->writers_alloced++;
433 /* increment the allocation index */
434 lock->current_alloc_idx =
435 (lock->current_alloc_idx + 1) % lock->group_count;
437 /* get and insert a new id */
438 new_id = lock->id_ctr;
441 new_id = VAL_ID(new_id);
443 * Even though we are under a write side lock here
444 * We need to use atomic instructions to ensure that the results
445 * of this update are published to the read side prior to updating the
448 ATOMIC_AND_FETCH(&lock->qp_group[current_idx].users, ID_MASK,
450 ATOMIC_OR_FETCH(&lock->qp_group[current_idx].users, new_id,
454 * update the reader index to be the prior qp
455 * Note the use of __ATOMIC_RELEASE here is based on the corresponding use
456 * of __ATOMIC_ACQUIRE in get_hold_current_qp, as we wan't any publication
457 * of this value to be seen on the read side immediately after it happens
459 ATOMIC_STORE_N(&lock->reader_idx, lock->current_alloc_idx,
462 /* wake up any waiters */
463 pthread_cond_signal(&lock->alloc_signal);
464 pthread_mutex_unlock(&lock->alloc_lock);
465 return &lock->qp_group[current_idx];
468 static void retire_qp(CRYPTO_RCU_LOCK *lock, struct rcu_qp *qp)
470 pthread_mutex_lock(&lock->alloc_lock);
471 lock->writers_alloced--;
472 pthread_cond_signal(&lock->alloc_signal);
473 pthread_mutex_unlock(&lock->alloc_lock);
476 static struct rcu_qp *allocate_new_qp_group(CRYPTO_RCU_LOCK *lock,
480 OPENSSL_zalloc(sizeof(*new) * count);
482 lock->group_count = count;
486 void ossl_rcu_write_lock(CRYPTO_RCU_LOCK *lock)
488 pthread_mutex_lock(&lock->write_lock);
491 void ossl_rcu_write_unlock(CRYPTO_RCU_LOCK *lock)
493 pthread_mutex_unlock(&lock->write_lock);
496 void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock)
500 struct rcu_cb_item *cb_items, *tmpcb;
503 * __ATOMIC_ACQ_REL is used here to ensure that we get any prior published
504 * writes before we read, and publish our write immediately
506 cb_items = ATOMIC_EXCHANGE_N(&lock->cb_items, NULL, __ATOMIC_ACQ_REL);
508 qp = update_qp(lock);
511 * wait for the reader count to reach zero
512 * Note the use of __ATOMIC_ACQUIRE here to ensure that any
513 * prior __ATOMIC_RELEASE write operation in get_hold_current_qp
514 * is visible prior to our read
517 count = (uint64_t)ATOMIC_LOAD_N(&qp->users, __ATOMIC_ACQUIRE);
518 } while (READER_COUNT(count) != 0);
520 /* retire in order */
521 pthread_mutex_lock(&lock->prior_lock);
522 while (lock->next_to_retire != ID_VAL(count))
523 pthread_cond_wait(&lock->prior_signal, &lock->prior_lock);
524 lock->next_to_retire++;
525 pthread_cond_broadcast(&lock->prior_signal);
526 pthread_mutex_unlock(&lock->prior_lock);
530 /* handle any callbacks that we have */
531 while (cb_items != NULL) {
533 cb_items = cb_items->next;
534 tmpcb->fn(tmpcb->data);
539 int ossl_rcu_call(CRYPTO_RCU_LOCK *lock, rcu_cb_fn cb, void *data)
541 struct rcu_cb_item *new =
542 OPENSSL_zalloc(sizeof(*new));
550 * Use __ATOMIC_ACQ_REL here to indicate that any prior writes to this
551 * list are visible to us prior to reading, and publish the new value
554 new->next = ATOMIC_EXCHANGE_N(&lock->cb_items, new, __ATOMIC_ACQ_REL);
559 void *ossl_rcu_uptr_deref(void **p)
561 return (void *)ATOMIC_LOAD_N(p, __ATOMIC_ACQUIRE);
564 void ossl_rcu_assign_uptr(void **p, void **v)
566 ATOMIC_STORE(p, v, __ATOMIC_RELEASE);
569 static CRYPTO_ONCE rcu_init_once = CRYPTO_ONCE_STATIC_INIT;
571 CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers)
573 struct rcu_lock_st *new;
575 if (!CRYPTO_THREAD_run_once(&rcu_init_once, ossl_rcu_init))
581 new = OPENSSL_zalloc(sizeof(*new));
585 pthread_mutex_init(&new->write_lock, NULL);
586 pthread_mutex_init(&new->prior_lock, NULL);
587 pthread_mutex_init(&new->alloc_lock, NULL);
588 pthread_cond_init(&new->prior_signal, NULL);
589 pthread_cond_init(&new->alloc_signal, NULL);
590 new->qp_group = allocate_new_qp_group(new, num_writers + 1);
591 if (new->qp_group == NULL) {
598 void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock)
600 struct rcu_lock_st *rlock = (struct rcu_lock_st *)lock;
605 /* make sure we're synchronized */
606 ossl_synchronize_rcu(rlock);
608 OPENSSL_free(rlock->qp_group);
609 /* There should only be a single qp left now */
613 CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void)
618 if ((lock = OPENSSL_zalloc(sizeof(pthread_rwlock_t))) == NULL)
619 /* Don't set error, to avoid recursion blowup. */
622 if (pthread_rwlock_init(lock, NULL) != 0) {
627 pthread_mutexattr_t attr;
630 if ((lock = OPENSSL_zalloc(sizeof(pthread_mutex_t))) == NULL)
631 /* Don't set error, to avoid recursion blowup. */
635 * We don't use recursive mutexes, but try to catch errors if we do.
637 pthread_mutexattr_init(&attr);
638 # if !defined (__TANDEM) && !defined (_SPT_MODEL_)
639 # if !defined(NDEBUG) && !defined(OPENSSL_NO_MUTEX_ERRORCHECK)
640 pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
643 /* The SPT Thread Library does not define MUTEX attributes. */
646 if (pthread_mutex_init(lock, &attr) != 0) {
647 pthread_mutexattr_destroy(&attr);
652 pthread_mutexattr_destroy(&attr);
658 __owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock)
661 if (pthread_rwlock_rdlock(lock) != 0)
664 if (pthread_mutex_lock(lock) != 0) {
665 assert(errno != EDEADLK && errno != EBUSY);
673 __owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock)
676 if (pthread_rwlock_wrlock(lock) != 0)
679 if (pthread_mutex_lock(lock) != 0) {
680 assert(errno != EDEADLK && errno != EBUSY);
688 int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock)
691 if (pthread_rwlock_unlock(lock) != 0)
694 if (pthread_mutex_unlock(lock) != 0) {
695 assert(errno != EPERM);
703 void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock)
709 pthread_rwlock_destroy(lock);
711 pthread_mutex_destroy(lock);
718 int CRYPTO_THREAD_run_once(CRYPTO_ONCE *once, void (*init)(void))
720 if (pthread_once(once, init) != 0)
726 int CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL *key, void (*cleanup)(void *))
728 if (pthread_key_create(key, cleanup) != 0)
734 void *CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL *key)
736 return pthread_getspecific(*key);
739 int CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL *key, void *val)
741 if (pthread_setspecific(*key, val) != 0)
747 int CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL *key)
749 if (pthread_key_delete(*key) != 0)
755 CRYPTO_THREAD_ID CRYPTO_THREAD_get_current_id(void)
757 return pthread_self();
760 int CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a, CRYPTO_THREAD_ID b)
762 return pthread_equal(a, b);
765 int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock)
767 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
768 if (__atomic_is_lock_free(sizeof(*val), val)) {
769 *ret = __atomic_add_fetch(val, amount, __ATOMIC_ACQ_REL);
772 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
773 /* This will work for all future Solaris versions. */
775 *ret = atomic_add_int_nv((volatile unsigned int *)val, amount);
779 if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
785 if (!CRYPTO_THREAD_unlock(lock))
791 int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret,
794 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
795 if (__atomic_is_lock_free(sizeof(*val), val)) {
796 *ret = __atomic_or_fetch(val, op, __ATOMIC_ACQ_REL);
799 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
800 /* This will work for all future Solaris versions. */
802 *ret = atomic_or_64_nv(val, op);
806 if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
811 if (!CRYPTO_THREAD_unlock(lock))
817 int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock)
819 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
820 if (__atomic_is_lock_free(sizeof(*val), val)) {
821 __atomic_load(val, ret, __ATOMIC_ACQUIRE);
824 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
825 /* This will work for all future Solaris versions. */
827 *ret = atomic_or_64_nv(val, 0);
831 if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
834 if (!CRYPTO_THREAD_unlock(lock))
840 int CRYPTO_atomic_load_int(int *val, int *ret, CRYPTO_RWLOCK *lock)
842 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
843 if (__atomic_is_lock_free(sizeof(*val), val)) {
844 __atomic_load(val, ret, __ATOMIC_ACQUIRE);
847 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
848 /* This will work for all future Solaris versions. */
850 *ret = (int *)atomic_or_uint_nv((unsigned int *)val, 0);
854 if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
857 if (!CRYPTO_THREAD_unlock(lock))
864 int openssl_init_fork_handlers(void)
868 # endif /* FIPS_MODULE */
870 int openssl_get_fork_id(void)