crypto/threads_pthread.c

   1 /*
   2  * Copyright 2016-2024 The OpenSSL Project Authors. All Rights Reserved.
   3  *
   4  * Licensed under the Apache License 2.0 (the "License").  You may not use
   5  * this file except in compliance with the License.  You can obtain a copy
   6  * in the file LICENSE in the source distribution or at
   7  * https://www.openssl.org/source/license.html
   8  */
   9
  10 /* We need to use the OPENSSL_fork_*() deprecated APIs */
  11 #define OPENSSL_SUPPRESS_DEPRECATED
  12
  13 #include <openssl/crypto.h>
  14 #include <crypto/cryptlib.h>
  15 #include "internal/cryptlib.h"
  16 #include "internal/rcu.h"
  17 #include "rcu_internal.h"
  18
  19 #if defined(__sun)
  20 # include <atomic.h>
  21 #endif
  22
  23 #if defined(__apple_build_version__) && __apple_build_version__ < 6000000
  24 /*
  25  * OS/X 10.7 and 10.8 had a weird version of clang which has __ATOMIC_ACQUIRE and
  26  * __ATOMIC_ACQ_REL but which expects only one parameter for __atomic_is_lock_free()
  27  * rather than two which has signature __atomic_is_lock_free(sizeof(_Atomic(T))).
  28  * All of this makes impossible to use __atomic_is_lock_free here.
  29  *
  30  * See: https://github.com/llvm/llvm-project/commit/a4c2602b714e6c6edb98164550a5ae829b2de760
  31  */
  32 # define BROKEN_CLANG_ATOMICS
  33 #endif
  34
  35 #if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG) && !defined(OPENSSL_SYS_WINDOWS)
  36
  37 # if defined(OPENSSL_SYS_UNIX)
  38 #  include <sys/types.h>
  39 #  include <unistd.h>
  40 # endif
  41
  42 # include <assert.h>
  43
  44 # ifdef PTHREAD_RWLOCK_INITIALIZER
  45 #  define USE_RWLOCK
  46 # endif
  47
  48 /*
  49  * For all GNU/clang atomic builtins, we also need fallbacks, to cover all
  50  * other compilers.
  51
  52  * Unfortunately, we can't do that with some "generic type", because there's no
  53  * guarantee that the chosen generic type is large enough to cover all cases.
  54  * Therefore, we implement fallbacks for each applicable type, with composed
  55  * names that include the type they handle.
  56  *
  57  * (an anecdote: we previously tried to use |void *| as the generic type, with
  58  * the thought that the pointer itself is the largest type.  However, this is
  59  * not true on 32-bit pointer platforms, as a |uint64_t| is twice as large)
  60  *
  61  * All applicable ATOMIC_ macros take the intended type as first parameter, so
  62  * they can map to the correct fallback function.  In the GNU/clang case, that
  63  * parameter is simply ignored.
  64  */
  65
  66 /*
  67  * Internal types used with the ATOMIC_ macros, to make it possible to compose
  68  * fallback function names.
  69  */
  70 typedef void *pvoid;
  71 typedef struct rcu_cb_item *prcu_cb_item;
  72
  73 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) \
  74     && !defined(USE_ATOMIC_FALLBACKS)
  75 #  if defined(__APPLE__) && defined(__clang__) && defined(__aarch64__)
  76 /*
  77  * For pointers, Apple M1 virtualized cpu seems to have some problem using the
  78  * ldapr instruction (see https://github.com/openssl/openssl/pull/23974)
  79  * When using the native apple clang compiler, this instruction is emitted for
  80  * atomic loads, which is bad.  So, if
  81  * 1) We are building on a target that defines __APPLE__ AND
  82  * 2) We are building on a target using clang (__clang__) AND
  83  * 3) We are building for an M1 processor (__aarch64__)
  84  * Then we shold not use __atomic_load_n and instead implement our own
  85  * function to issue the ldar instruction instead, which procuces the proper
  86  * sequencing guarantees
  87  */
  88 static inline void *apple_atomic_load_n_pvoid(void **p,
  89                                               ossl_unused int memorder)
  90 {
  91     void *ret;
  92
  93     __asm volatile("ldar %0, [%1]" : "=r" (ret): "r" (p):);
  94
  95     return ret;
  96 }
  97
  98 /* For uint64_t, we should be fine, though */
  99 #   define apple_atomic_load_n_uint64_t(p, o) __atomic_load_n(p, o)
 100
 101 #   define ATOMIC_LOAD_N(t, p, o) apple_atomic_load_n_##t(p, o)
 102 #  else
 103 #   define ATOMIC_LOAD_N(t, p, o) __atomic_load_n(p, o)
 104 #  endif
 105 #  define ATOMIC_STORE_N(t, p, v, o) __atomic_store_n(p, v, o)
 106 #  define ATOMIC_STORE(t, p, v, o) __atomic_store(p, v, o)
 107 #  define ATOMIC_EXCHANGE_N(t, p, v, o) __atomic_exchange_n(p, v, o)
 108 #  define ATOMIC_ADD_FETCH(p, v, o) __atomic_add_fetch(p, v, o)
 109 #  define ATOMIC_FETCH_ADD(p, v, o) __atomic_fetch_add(p, v, o)
 110 #  define ATOMIC_SUB_FETCH(p, v, o) __atomic_sub_fetch(p, v, o)
 111 #  define ATOMIC_AND_FETCH(p, m, o) __atomic_and_fetch(p, m, o)
 112 #  define ATOMIC_OR_FETCH(p, m, o) __atomic_or_fetch(p, m, o)
 113 # else
 114 static pthread_mutex_t atomic_sim_lock = PTHREAD_MUTEX_INITIALIZER;
 115
 116 #  define IMPL_fallback_atomic_load_n(t)                        \
 117     static inline t fallback_atomic_load_n_##t(t *p)            \
 118     {                                                           \
 119         t ret;                                                  \
 120                                                                 \
 121         pthread_mutex_lock(&atomic_sim_lock);                   \
 122         ret = *p;                                               \
 123         pthread_mutex_unlock(&atomic_sim_lock);                 \
 124         return ret;                                             \
 125     }
 126 IMPL_fallback_atomic_load_n(uint64_t)
 127 IMPL_fallback_atomic_load_n(pvoid)
 128
 129 #  define ATOMIC_LOAD_N(t, p, o) fallback_atomic_load_n_##t(p)
 130
 131 #  define IMPL_fallback_atomic_store_n(t)                       \
 132     static inline t fallback_atomic_store_n_##t(t *p, t v)      \
 133     {                                                           \
 134         t ret;                                                  \
 135                                                                 \
 136         pthread_mutex_lock(&atomic_sim_lock);                   \
 137         ret = *p;                                               \
 138         *p = v;                                                 \
 139         pthread_mutex_unlock(&atomic_sim_lock);                 \
 140         return ret;                                             \
 141     }
 142 IMPL_fallback_atomic_store_n(uint64_t)
 143
 144 #  define ATOMIC_STORE_N(t, p, v, o) fallback_atomic_store_n_##t(p, v)
 145
 146 #  define IMPL_fallback_atomic_store(t)                         \
 147     static inline void fallback_atomic_store_##t(t *p, t *v)    \
 148     {                                                           \
 149         pthread_mutex_lock(&atomic_sim_lock);                   \
 150         *p = *v;                                                \
 151         pthread_mutex_unlock(&atomic_sim_lock);                 \
 152     }
 153 IMPL_fallback_atomic_store(uint64_t)
 154 IMPL_fallback_atomic_store(pvoid)
 155
 156 #  define ATOMIC_STORE(t, p, v, o) fallback_atomic_store_##t(p, v)
 157
 158 #  define IMPL_fallback_atomic_exchange_n(t)                            \
 159     static inline t fallback_atomic_exchange_n_##t(t *p, t v)           \
 160     {                                                                   \
 161         t ret;                                                          \
 162                                                                         \
 163         pthread_mutex_lock(&atomic_sim_lock);                           \
 164         ret = *p;                                                       \
 165         *p = v;                                                         \
 166         pthread_mutex_unlock(&atomic_sim_lock);                         \
 167         return ret;                                                     \
 168     }
 169 IMPL_fallback_atomic_exchange_n(uint64_t)
 170 IMPL_fallback_atomic_exchange_n(prcu_cb_item)
 171
 172 #  define ATOMIC_EXCHANGE_N(t, p, v, o) fallback_atomic_exchange_n_##t(p, v)
 173
 174 /*
 175  * The fallbacks that follow don't need any per type implementation, as
 176  * they are designed for uint64_t only.  If there comes a time when multiple
 177  * types need to be covered, it's relatively easy to refactor them the same
 178  * way as the fallbacks above.
 179  */
 180
 181 static inline uint64_t fallback_atomic_add_fetch(uint64_t *p, uint64_t v)
 182 {
 183     uint64_t ret;
 184
 185     pthread_mutex_lock(&atomic_sim_lock);
 186     *p += v;
 187     ret = *p;
 188     pthread_mutex_unlock(&atomic_sim_lock);
 189     return ret;
 190 }
 191
 192 #  define ATOMIC_ADD_FETCH(p, v, o) fallback_atomic_add_fetch(p, v)
 193
 194 static inline uint64_t fallback_atomic_fetch_add(uint64_t *p, uint64_t v)
 195 {
 196     uint64_t ret;
 197
 198     pthread_mutex_lock(&atomic_sim_lock);
 199     ret = *p;
 200     *p += v;
 201     pthread_mutex_unlock(&atomic_sim_lock);
 202     return ret;
 203 }
 204
 205 #  define ATOMIC_FETCH_ADD(p, v, o) fallback_atomic_fetch_add(p, v)
 206
 207 static inline uint64_t fallback_atomic_sub_fetch(uint64_t *p, uint64_t v)
 208 {
 209     uint64_t ret;
 210
 211     pthread_mutex_lock(&atomic_sim_lock);
 212     *p -= v;
 213     ret = *p;
 214     pthread_mutex_unlock(&atomic_sim_lock);
 215     return ret;
 216 }
 217
 218 #  define ATOMIC_SUB_FETCH(p, v, o) fallback_atomic_sub_fetch(p, v)
 219
 220 static inline uint64_t fallback_atomic_and_fetch(uint64_t *p, uint64_t m)
 221 {
 222     uint64_t ret;
 223
 224     pthread_mutex_lock(&atomic_sim_lock);
 225     *p &= m;
 226     ret = *p;
 227     pthread_mutex_unlock(&atomic_sim_lock);
 228     return ret;
 229 }
 230
 231 #  define ATOMIC_AND_FETCH(p, v, o) fallback_atomic_and_fetch(p, v)
 232
 233 static inline uint64_t fallback_atomic_or_fetch(uint64_t *p, uint64_t m)
 234 {
 235     uint64_t ret;
 236
 237     pthread_mutex_lock(&atomic_sim_lock);
 238     *p |= m;
 239     ret = *p;
 240     pthread_mutex_unlock(&atomic_sim_lock);
 241     return ret;
 242 }
 243
 244 #  define ATOMIC_OR_FETCH(p, v, o) fallback_atomic_or_fetch(p, v)
 245 # endif
 246
 247 static CRYPTO_THREAD_LOCAL rcu_thr_key;
 248
 249 /*
 250  * users is broken up into 2 parts
 251  * bits 0-15 current readers
 252  * bit 32-63 - ID
 253  */
 254 # define READER_SHIFT 0
 255 # define ID_SHIFT 32
 256 # define READER_SIZE 16
 257 # define ID_SIZE 32
 258
 259 # define READER_MASK     (((uint64_t)1 << READER_SIZE) - 1)
 260 # define ID_MASK         (((uint64_t)1 << ID_SIZE) - 1)
 261 # define READER_COUNT(x) (((uint64_t)(x) >> READER_SHIFT) & READER_MASK)
 262 # define ID_VAL(x)       (((uint64_t)(x) >> ID_SHIFT) & ID_MASK)
 263 # define VAL_READER      ((uint64_t)1 << READER_SHIFT)
 264 # define VAL_ID(x)       ((uint64_t)x << ID_SHIFT)
 265
 266 /*
 267  * This is the core of an rcu lock. It tracks the readers and writers for the
 268  * current quiescence point for a given lock. Users is the 64 bit value that
 269  * stores the READERS/ID as defined above
 270  *
 271  */
 272 struct rcu_qp {
 273     uint64_t users;
 274 };
 275
 276 struct thread_qp {
 277     struct rcu_qp *qp;
 278     unsigned int depth;
 279     CRYPTO_RCU_LOCK *lock;
 280 };
 281
 282 # define MAX_QPS 10
 283 /*
 284  * This is the per thread tracking data
 285  * that is assigned to each thread participating
 286  * in an rcu qp
 287  *
 288  * qp points to the qp that it last acquired
 289  *
 290  */
 291 struct rcu_thr_data {
 292     struct thread_qp thread_qps[MAX_QPS];
 293 };
 294
 295 /*
 296  * This is the internal version of a CRYPTO_RCU_LOCK
 297  * it is cast from CRYPTO_RCU_LOCK
 298  */
 299 struct rcu_lock_st {
 300     /* Callbacks to call for next ossl_synchronize_rcu */
 301     struct rcu_cb_item *cb_items;
 302
 303     /* rcu generation counter for in-order retirement */
 304     uint32_t id_ctr;
 305
 306     /* Array of quiescent points for synchronization */
 307     struct rcu_qp *qp_group;
 308
 309     /* Number of elements in qp_group array */
 310     size_t group_count;
 311
 312     /* Index of the current qp in the qp_group array */
 313     uint64_t reader_idx;
 314
 315     /* value of the next id_ctr value to be retired */
 316     uint32_t next_to_retire;
 317
 318     /* index of the next free rcu_qp in the qp_group */
 319     uint64_t current_alloc_idx;
 320
 321     /* number of qp's in qp_group array currently being retired */
 322     uint32_t writers_alloced;
 323
 324     /* lock protecting write side operations */
 325     pthread_mutex_t write_lock;
 326
 327     /* lock protecting updates to writers_alloced/current_alloc_idx */
 328     pthread_mutex_t alloc_lock;
 329
 330     /* signal to wake threads waiting on alloc_lock */
 331     pthread_cond_t alloc_signal;
 332
 333     /* lock to enforce in-order retirement */
 334     pthread_mutex_t prior_lock;
 335
 336     /* signal to wake threads waiting on prior_lock */
 337     pthread_cond_t prior_signal;
 338 };
 339
 340 /*
 341  * Called on thread exit to free the pthread key
 342  * associated with this thread, if any
 343  */
 344 static void free_rcu_thr_data(void *ptr)
 345 {
 346     struct rcu_thr_data *data =
 347                         (struct rcu_thr_data *)CRYPTO_THREAD_get_local(&rcu_thr_key);
 348
 349     OPENSSL_free(data);
 350     CRYPTO_THREAD_set_local(&rcu_thr_key, NULL);
 351 }
 352
 353 static void ossl_rcu_init(void)
 354 {
 355     CRYPTO_THREAD_init_local(&rcu_thr_key, NULL);
 356 }
 357
 358 /* Read side acquisition of the current qp */
 359 static struct rcu_qp *get_hold_current_qp(struct rcu_lock_st *lock)
 360 {
 361     uint64_t qp_idx;
 362
 363     /* get the current qp index */
 364     for (;;) {
 365         /*
 366          * Notes on use of __ATOMIC_ACQUIRE
 367          * We need to ensure the following:
 368          * 1) That subsequent operations aren't optimized by hoisting them above
 369          * this operation.  Specifically, we don't want the below re-load of
 370          * qp_idx to get optimized away
 371          * 2) We want to ensure that any updating of reader_idx on the write side
 372          * of the lock is flushed from a local cpu cache so that we see any
 373          * updates prior to the load.  This is a non-issue on cache coherent
 374          * systems like x86, but is relevant on other arches
 375          * Note: This applies to the reload below as well
 376          */
 377         qp_idx = ATOMIC_LOAD_N(uint64_t, &lock->reader_idx, __ATOMIC_ACQUIRE);
 378
 379         /*
 380          * Notes of use of __ATOMIC_RELEASE
 381          * This counter is only read by the write side of the lock, and so we
 382          * specify __ATOMIC_RELEASE here to ensure that the write side of the
 383          * lock see this during the spin loop read of users, as it waits for the
 384          * reader count to approach zero
 385          */
 386         ATOMIC_ADD_FETCH(&lock->qp_group[qp_idx].users, VAL_READER,
 387                          __ATOMIC_RELEASE);
 388
 389         /* if the idx hasn't changed, we're good, else try again */
 390         if (qp_idx == ATOMIC_LOAD_N(uint64_t, &lock->reader_idx, __ATOMIC_ACQUIRE))
 391             break;
 392
 393         /*
 394          * Notes on use of __ATOMIC_RELEASE
 395          * As with the add above, we want to ensure that this decrement is
 396          * seen by the write side of the lock as soon as it happens to prevent
 397          * undue spinning waiting for write side completion
 398          */
 399         ATOMIC_SUB_FETCH(&lock->qp_group[qp_idx].users, VAL_READER,
 400                          __ATOMIC_RELEASE);
 401     }
 402
 403     return &lock->qp_group[qp_idx];
 404 }
 405
 406 void ossl_rcu_read_lock(CRYPTO_RCU_LOCK *lock)
 407 {
 408     struct rcu_thr_data *data;
 409     int i, available_qp = -1;
 410
 411     /*
 412      * we're going to access current_qp here so ask the
 413      * processor to fetch it
 414      */
 415     data = CRYPTO_THREAD_get_local(&rcu_thr_key);
 416
 417     if (data == NULL) {
 418         data = OPENSSL_zalloc(sizeof(*data));
 419         OPENSSL_assert(data != NULL);
 420         CRYPTO_THREAD_set_local(&rcu_thr_key, data);
 421         ossl_init_thread_start(NULL, NULL, free_rcu_thr_data);
 422     }
 423
 424     for (i = 0; i < MAX_QPS; i++) {
 425         if (data->thread_qps[i].qp == NULL && available_qp == -1)
 426             available_qp = i;
 427         /* If we have a hold on this lock already, we're good */
 428         if (data->thread_qps[i].lock == lock) {
 429             data->thread_qps[i].depth++;
 430             return;
 431         }
 432     }
 433
 434     /*
 435      * if we get here, then we don't have a hold on this lock yet
 436      */
 437     assert(available_qp != -1);
 438
 439     data->thread_qps[available_qp].qp = get_hold_current_qp(lock);
 440     data->thread_qps[available_qp].depth = 1;
 441     data->thread_qps[available_qp].lock = lock;
 442 }
 443
 444 void ossl_rcu_read_unlock(CRYPTO_RCU_LOCK *lock)
 445 {
 446     int i;
 447     struct rcu_thr_data *data = CRYPTO_THREAD_get_local(&rcu_thr_key);
 448     uint64_t ret;
 449
 450     assert(data != NULL);
 451
 452     for (i = 0; i < MAX_QPS; i++) {
 453         if (data->thread_qps[i].lock == lock) {
 454             /*
 455              * As with read side acquisition, we use __ATOMIC_RELEASE here
 456              * to ensure that the decrement is published immediately
 457              * to any write side waiters
 458              */
 459             data->thread_qps[i].depth--;
 460             if (data->thread_qps[i].depth == 0) {
 461                 ret = ATOMIC_SUB_FETCH(&data->thread_qps[i].qp->users, VAL_READER,
 462                                        __ATOMIC_RELEASE);
 463                 OPENSSL_assert(ret != UINT64_MAX);
 464                 data->thread_qps[i].qp = NULL;
 465                 data->thread_qps[i].lock = NULL;
 466             }
 467             return;
 468         }
 469     }
 470     /*
 471      * If we get here, we're trying to unlock a lock that we never acquired -
 472      * that's fatal.
 473      */
 474     assert(0);
 475 }
 476
 477 /*
 478  * Write side allocation routine to get the current qp
 479  * and replace it with a new one
 480  */
 481 static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock)
 482 {
 483     uint64_t new_id;
 484     uint64_t current_idx;
 485
 486     pthread_mutex_lock(&lock->alloc_lock);
 487
 488     /*
 489      * we need at least one qp to be available with one
 490      * left over, so that readers can start working on
 491      * one that isn't yet being waited on
 492      */
 493     while (lock->group_count - lock->writers_alloced < 2)
 494         /* we have to wait for one to be free */
 495         pthread_cond_wait(&lock->alloc_signal, &lock->alloc_lock);
 496
 497     current_idx = lock->current_alloc_idx;
 498
 499     /* Allocate the qp */
 500     lock->writers_alloced++;
 501
 502     /* increment the allocation index */
 503     lock->current_alloc_idx =
 504         (lock->current_alloc_idx + 1) % lock->group_count;
 505
 506     /* get and insert a new id */
 507     new_id = lock->id_ctr;
 508     lock->id_ctr++;
 509
 510     new_id = VAL_ID(new_id);
 511     /*
 512      * Even though we are under a write side lock here
 513      * We need to use atomic instructions to ensure that the results
 514      * of this update are published to the read side prior to updating the
 515      * reader idx below
 516      */
 517     ATOMIC_AND_FETCH(&lock->qp_group[current_idx].users, ID_MASK,
 518                      __ATOMIC_RELEASE);
 519     ATOMIC_OR_FETCH(&lock->qp_group[current_idx].users, new_id,
 520                     __ATOMIC_RELEASE);
 521
 522     /*
 523      * Update the reader index to be the prior qp.
 524      * Note the use of __ATOMIC_RELEASE here is based on the corresponding use
 525      * of __ATOMIC_ACQUIRE in get_hold_current_qp, as we want any publication
 526      * of this value to be seen on the read side immediately after it happens
 527      */
 528     ATOMIC_STORE_N(uint64_t, &lock->reader_idx, lock->current_alloc_idx,
 529                    __ATOMIC_RELEASE);
 530
 531     /* wake up any waiters */
 532     pthread_cond_signal(&lock->alloc_signal);
 533     pthread_mutex_unlock(&lock->alloc_lock);
 534     return &lock->qp_group[current_idx];
 535 }
 536
 537 static void retire_qp(CRYPTO_RCU_LOCK *lock, struct rcu_qp *qp)
 538 {
 539     pthread_mutex_lock(&lock->alloc_lock);
 540     lock->writers_alloced--;
 541     pthread_cond_signal(&lock->alloc_signal);
 542     pthread_mutex_unlock(&lock->alloc_lock);
 543 }
 544
 545 static struct rcu_qp *allocate_new_qp_group(CRYPTO_RCU_LOCK *lock,
 546                                             int count)
 547 {
 548     struct rcu_qp *new =
 549         OPENSSL_zalloc(sizeof(*new) * count);
 550
 551     lock->group_count = count;
 552     return new;
 553 }
 554
 555 void ossl_rcu_write_lock(CRYPTO_RCU_LOCK *lock)
 556 {
 557     pthread_mutex_lock(&lock->write_lock);
 558 }
 559
 560 void ossl_rcu_write_unlock(CRYPTO_RCU_LOCK *lock)
 561 {
 562     pthread_mutex_unlock(&lock->write_lock);
 563 }
 564
 565 void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock)
 566 {
 567     struct rcu_qp *qp;
 568     uint64_t count;
 569     struct rcu_cb_item *cb_items, *tmpcb;
 570
 571     /*
 572      * __ATOMIC_ACQ_REL is used here to ensure that we get any prior published
 573      * writes before we read, and publish our write immediately
 574      */
 575     cb_items = ATOMIC_EXCHANGE_N(prcu_cb_item, &lock->cb_items, NULL,
 576                                  __ATOMIC_ACQ_REL);
 577
 578     qp = update_qp(lock);
 579
 580     /*
 581      * wait for the reader count to reach zero
 582      * Note the use of __ATOMIC_ACQUIRE here to ensure that any
 583      * prior __ATOMIC_RELEASE write operation in get_hold_current_qp
 584      * is visible prior to our read
 585      */
 586     do {
 587         count = ATOMIC_LOAD_N(uint64_t, &qp->users, __ATOMIC_ACQUIRE);
 588     } while (READER_COUNT(count) != 0);
 589
 590     /* retire in order */
 591     pthread_mutex_lock(&lock->prior_lock);
 592     while (lock->next_to_retire != ID_VAL(count))
 593         pthread_cond_wait(&lock->prior_signal, &lock->prior_lock);
 594     lock->next_to_retire++;
 595     pthread_cond_broadcast(&lock->prior_signal);
 596     pthread_mutex_unlock(&lock->prior_lock);
 597
 598     retire_qp(lock, qp);
 599
 600     /* handle any callbacks that we have */
 601     while (cb_items != NULL) {
 602         tmpcb = cb_items;
 603         cb_items = cb_items->next;
 604         tmpcb->fn(tmpcb->data);
 605         OPENSSL_free(tmpcb);
 606     }
 607 }
 608
 609 int ossl_rcu_call(CRYPTO_RCU_LOCK *lock, rcu_cb_fn cb, void *data)
 610 {
 611     struct rcu_cb_item *new =
 612         OPENSSL_zalloc(sizeof(*new));
 613
 614     if (new == NULL)
 615         return 0;
 616
 617     new->data = data;
 618     new->fn = cb;
 619     /*
 620      * Use __ATOMIC_ACQ_REL here to indicate that any prior writes to this
 621      * list are visible to us prior to reading, and publish the new value
 622      * immediately
 623      */
 624     new->next = ATOMIC_EXCHANGE_N(prcu_cb_item, &lock->cb_items, new,
 625                                   __ATOMIC_ACQ_REL);
 626
 627     return 1;
 628 }
 629
 630 void *ossl_rcu_uptr_deref(void **p)
 631 {
 632     return ATOMIC_LOAD_N(pvoid, p, __ATOMIC_ACQUIRE);
 633 }
 634
 635 void ossl_rcu_assign_uptr(void **p, void **v)
 636 {
 637     ATOMIC_STORE(pvoid, p, v, __ATOMIC_RELEASE);
 638 }
 639
 640 static CRYPTO_ONCE rcu_init_once = CRYPTO_ONCE_STATIC_INIT;
 641
 642 CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers)
 643 {
 644     struct rcu_lock_st *new;
 645
 646     if (!CRYPTO_THREAD_run_once(&rcu_init_once, ossl_rcu_init))
 647         return NULL;
 648
 649     if (num_writers < 1)
 650         num_writers = 1;
 651
 652     new = OPENSSL_zalloc(sizeof(*new));
 653     if (new == NULL)
 654         return NULL;
 655
 656     pthread_mutex_init(&new->write_lock, NULL);
 657     pthread_mutex_init(&new->prior_lock, NULL);
 658     pthread_mutex_init(&new->alloc_lock, NULL);
 659     pthread_cond_init(&new->prior_signal, NULL);
 660     pthread_cond_init(&new->alloc_signal, NULL);
 661     new->qp_group = allocate_new_qp_group(new, num_writers + 1);
 662     if (new->qp_group == NULL) {
 663         OPENSSL_free(new);
 664         new = NULL;
 665     }
 666     return new;
 667 }
 668
 669 void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock)
 670 {
 671     struct rcu_lock_st *rlock = (struct rcu_lock_st *)lock;
 672
 673     if (lock == NULL)
 674         return;
 675
 676     /* make sure we're synchronized */
 677     ossl_synchronize_rcu(rlock);
 678
 679     OPENSSL_free(rlock->qp_group);
 680     /* There should only be a single qp left now */
 681     OPENSSL_free(rlock);
 682 }
 683
 684 CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void)
 685 {
 686 # ifdef USE_RWLOCK
 687     CRYPTO_RWLOCK *lock;
 688
 689     if ((lock = OPENSSL_zalloc(sizeof(pthread_rwlock_t))) == NULL)
 690         /* Don't set error, to avoid recursion blowup. */
 691         return NULL;
 692
 693     if (pthread_rwlock_init(lock, NULL) != 0) {
 694         OPENSSL_free(lock);
 695         return NULL;
 696     }
 697 # else
 698     pthread_mutexattr_t attr;
 699     CRYPTO_RWLOCK *lock;
 700
 701     if ((lock = OPENSSL_zalloc(sizeof(pthread_mutex_t))) == NULL)
 702         /* Don't set error, to avoid recursion blowup. */
 703         return NULL;
 704
 705     /*
 706      * We don't use recursive mutexes, but try to catch errors if we do.
 707      */
 708     pthread_mutexattr_init(&attr);
 709 #  if !defined (__TANDEM) && !defined (_SPT_MODEL_)
 710 #   if !defined(NDEBUG) && !defined(OPENSSL_NO_MUTEX_ERRORCHECK)
 711     pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
 712 #   endif
 713 #  else
 714     /* The SPT Thread Library does not define MUTEX attributes. */
 715 #  endif
 716
 717     if (pthread_mutex_init(lock, &attr) != 0) {
 718         pthread_mutexattr_destroy(&attr);
 719         OPENSSL_free(lock);
 720         return NULL;
 721     }
 722
 723     pthread_mutexattr_destroy(&attr);
 724 # endif
 725
 726     return lock;
 727 }
 728
 729 __owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock)
 730 {
 731 # ifdef USE_RWLOCK
 732     if (pthread_rwlock_rdlock(lock) != 0)
 733         return 0;
 734 # else
 735     if (pthread_mutex_lock(lock) != 0) {
 736         assert(errno != EDEADLK && errno != EBUSY);
 737         return 0;
 738     }
 739 # endif
 740
 741     return 1;
 742 }
 743
 744 __owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock)
 745 {
 746 # ifdef USE_RWLOCK
 747     if (pthread_rwlock_wrlock(lock) != 0)
 748         return 0;
 749 # else
 750     if (pthread_mutex_lock(lock) != 0) {
 751         assert(errno != EDEADLK && errno != EBUSY);
 752         return 0;
 753     }
 754 # endif
 755
 756     return 1;
 757 }
 758
 759 int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock)
 760 {
 761 # ifdef USE_RWLOCK
 762     if (pthread_rwlock_unlock(lock) != 0)
 763         return 0;
 764 # else
 765     if (pthread_mutex_unlock(lock) != 0) {
 766         assert(errno != EPERM);
 767         return 0;
 768     }
 769 # endif
 770
 771     return 1;
 772 }
 773
 774 void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock)
 775 {
 776     if (lock == NULL)
 777         return;
 778
 779 # ifdef USE_RWLOCK
 780     pthread_rwlock_destroy(lock);
 781 # else
 782     pthread_mutex_destroy(lock);
 783 # endif
 784     OPENSSL_free(lock);
 785
 786     return;
 787 }
 788
 789 int CRYPTO_THREAD_run_once(CRYPTO_ONCE *once, void (*init)(void))
 790 {
 791     if (pthread_once(once, init) != 0)
 792         return 0;
 793
 794     return 1;
 795 }
 796
 797 int CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL *key, void (*cleanup)(void *))
 798 {
 799     if (pthread_key_create(key, cleanup) != 0)
 800         return 0;
 801
 802     return 1;
 803 }
 804
 805 void *CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL *key)
 806 {
 807     return pthread_getspecific(*key);
 808 }
 809
 810 int CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL *key, void *val)
 811 {
 812     if (pthread_setspecific(*key, val) != 0)
 813         return 0;
 814
 815     return 1;
 816 }
 817
 818 int CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL *key)
 819 {
 820     if (pthread_key_delete(*key) != 0)
 821         return 0;
 822
 823     return 1;
 824 }
 825
 826 CRYPTO_THREAD_ID CRYPTO_THREAD_get_current_id(void)
 827 {
 828     return pthread_self();
 829 }
 830
 831 int CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a, CRYPTO_THREAD_ID b)
 832 {
 833     return pthread_equal(a, b);
 834 }
 835
 836 int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock)
 837 {
 838 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
 839     if (__atomic_is_lock_free(sizeof(*val), val)) {
 840         *ret = __atomic_add_fetch(val, amount, __ATOMIC_ACQ_REL);
 841         return 1;
 842     }
 843 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
 844     /* This will work for all future Solaris versions. */
 845     if (ret != NULL) {
 846         *ret = atomic_add_int_nv((volatile unsigned int *)val, amount);
 847         return 1;
 848     }
 849 # endif
 850     if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
 851         return 0;
 852
 853     *val += amount;
 854     *ret  = *val;
 855
 856     if (!CRYPTO_THREAD_unlock(lock))
 857         return 0;
 858
 859     return 1;
 860 }
 861
 862 int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret,
 863                      CRYPTO_RWLOCK *lock)
 864 {
 865 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
 866     if (__atomic_is_lock_free(sizeof(*val), val)) {
 867         *ret = __atomic_or_fetch(val, op, __ATOMIC_ACQ_REL);
 868         return 1;
 869     }
 870 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
 871     /* This will work for all future Solaris versions. */
 872     if (ret != NULL) {
 873         *ret = atomic_or_64_nv(val, op);
 874         return 1;
 875     }
 876 # endif
 877     if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
 878         return 0;
 879     *val |= op;
 880     *ret  = *val;
 881
 882     if (!CRYPTO_THREAD_unlock(lock))
 883         return 0;
 884
 885     return 1;
 886 }
 887
 888 int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock)
 889 {
 890 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
 891     if (__atomic_is_lock_free(sizeof(*val), val)) {
 892         __atomic_load(val, ret, __ATOMIC_ACQUIRE);
 893         return 1;
 894     }
 895 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
 896     /* This will work for all future Solaris versions. */
 897     if (ret != NULL) {
 898         *ret = atomic_or_64_nv(val, 0);
 899         return 1;
 900     }
 901 # endif
 902     if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
 903         return 0;
 904     *ret  = *val;
 905     if (!CRYPTO_THREAD_unlock(lock))
 906         return 0;
 907
 908     return 1;
 909 }
 910
 911 int CRYPTO_atomic_load_int(int *val, int *ret, CRYPTO_RWLOCK *lock)
 912 {
 913 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS)
 914     if (__atomic_is_lock_free(sizeof(*val), val)) {
 915         __atomic_load(val, ret, __ATOMIC_ACQUIRE);
 916         return 1;
 917     }
 918 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
 919     /* This will work for all future Solaris versions. */
 920     if (ret != NULL) {
 921         *ret = (int *)atomic_or_uint_nv((unsigned int *)val, 0);
 922         return 1;
 923     }
 924 # endif
 925     if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
 926         return 0;
 927     *ret  = *val;
 928     if (!CRYPTO_THREAD_unlock(lock))
 929         return 0;
 930
 931     return 1;
 932 }
 933
 934 # ifndef FIPS_MODULE
 935 int openssl_init_fork_handlers(void)
 936 {
 937     return 1;
 938 }
 939 # endif /* FIPS_MODULE */
 940
 941 int openssl_get_fork_id(void)
 942 {
 943     return getpid();
 944 }
 945 #endif