sys/sys/smr.h

   1 /*-
   2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
   3  *
   4  * Copyright (c) 2019, 2020 Jeffrey Roberson <jeff@FreeBSD.org>
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice unmodified, this list of conditions, and the following
  11  *    disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26  *
  27  * $FreeBSD$
  28  *
  29  */
  30
  31 #ifndef _SYS_SMR_H_
  32 #define _SYS_SMR_H_
  33
  34 #include <sys/_smr.h>
  35
  36 /*
  37  * Safe memory reclamation.  See subr_smr.c for a description of the
  38  * algorithm, and smr_types.h for macros to define and access SMR-protected
  39  * data structures.
  40  *
  41  * Readers synchronize with smr_enter()/exit() and writers may either
  42  * free directly to a SMR UMA zone or use smr_synchronize or wait.
  43  */
  44
  45 /*
  46  * Modular arithmetic for comparing sequence numbers that have
  47  * potentially wrapped.  Copied from tcp_seq.h.
  48  */
  49 #define SMR_SEQ_LT(a, b)        ((smr_delta_t)((a)-(b)) < 0)
  50 #define SMR_SEQ_LEQ(a, b)       ((smr_delta_t)((a)-(b)) <= 0)
  51 #define SMR_SEQ_GT(a, b)        ((smr_delta_t)((a)-(b)) > 0)
  52 #define SMR_SEQ_GEQ(a, b)       ((smr_delta_t)((a)-(b)) >= 0)
  53 #define SMR_SEQ_DELTA(a, b)     ((smr_delta_t)((a)-(b)))
  54 #define SMR_SEQ_MIN(a, b)       (SMR_SEQ_LT((a), (b)) ? (a) : (b))
  55 #define SMR_SEQ_MAX(a, b)       (SMR_SEQ_GT((a), (b)) ? (a) : (b))
  56
  57 #define SMR_SEQ_INVALID         0
  58
  59 /* Shared SMR state. */
  60 union s_wr {
  61         struct {
  62                 smr_seq_t       seq;    /* Current write sequence #. */
  63                 int             ticks;  /* tick of last update (LAZY) */
  64         };
  65         uint64_t        _pair;
  66 };
  67 struct smr_shared {
  68         const char      *s_name;        /* Name for debugging/reporting. */
  69         union s_wr      s_wr;           /* Write sequence */
  70         smr_seq_t       s_rd_seq;       /* Minimum observed read sequence. */
  71 };
  72 typedef struct smr_shared *smr_shared_t;
  73
  74 /* Per-cpu SMR state. */
  75 struct smr {
  76         smr_seq_t       c_seq;          /* Current observed sequence. */
  77         smr_shared_t    c_shared;       /* Shared SMR state. */
  78         int             c_deferred;     /* Deferred advance counter. */
  79         int             c_limit;        /* Deferred advance limit. */
  80         int             c_flags;        /* SMR Configuration */
  81 };
  82
  83 #define SMR_LAZY        0x0001          /* Higher latency write, fast read. */
  84 #define SMR_DEFERRED    0x0002          /* Aggregate updates to wr_seq. */
  85
  86 /*
  87  * Return the current write sequence number.  This is not the same as the
  88  * current goal which may be in the future.
  89  */
  90 static inline smr_seq_t
  91 smr_shared_current(smr_shared_t s)
  92 {
  93
  94         return (atomic_load_int(&s->s_wr.seq));
  95 }
  96
  97 static inline smr_seq_t
  98 smr_current(smr_t smr)
  99 {
 100
 101         return (smr_shared_current(zpcpu_get(smr)->c_shared));
 102 }
 103
 104 /*
 105  * Enter a read section.
 106  */
 107 static inline void
 108 smr_enter(smr_t smr)
 109 {
 110
 111         critical_enter();
 112         smr = zpcpu_get(smr);
 113         KASSERT((smr->c_flags & SMR_LAZY) == 0,
 114             ("smr_enter(%s) lazy smr.", smr->c_shared->s_name));
 115         KASSERT(smr->c_seq == 0,
 116             ("smr_enter(%s) does not support recursion.",
 117             smr->c_shared->s_name));
 118
 119         /*
 120          * Store the current observed write sequence number in our
 121          * per-cpu state so that it can be queried via smr_poll().
 122          * Frees that are newer than this stored value will be
 123          * deferred until we call smr_exit().
 124          *
 125          * Subsequent loads must not be re-ordered with the store.  On
 126          * x86 platforms, any locked instruction will provide this
 127          * guarantee, so as an optimization we use a single operation to
 128          * both store the cached write sequence number and provide the
 129          * requisite barrier, taking advantage of the fact that
 130          * SMR_SEQ_INVALID is zero.
 131          *
 132          * It is possible that a long delay between loading the wr_seq
 133          * and storing the c_seq could create a situation where the
 134          * rd_seq advances beyond our stored c_seq.  In this situation
 135          * only the observed wr_seq is stale, the fence still orders
 136          * the load.  See smr_poll() for details on how this condition
 137          * is detected and handled there.
 138          */
 139 #if defined(__amd64__) || defined(__i386__)
 140         atomic_add_acq_int(&smr->c_seq, smr_shared_current(smr->c_shared));
 141 #else
 142         atomic_store_int(&smr->c_seq, smr_shared_current(smr->c_shared));
 143         atomic_thread_fence_seq_cst();
 144 #endif
 145 }
 146
 147 /*
 148  * Exit a read section.
 149  */
 150 static inline void
 151 smr_exit(smr_t smr)
 152 {
 153
 154         smr = zpcpu_get(smr);
 155         CRITICAL_ASSERT(curthread);
 156         KASSERT((smr->c_flags & SMR_LAZY) == 0,
 157             ("smr_exit(%s) lazy smr.", smr->c_shared->s_name));
 158         KASSERT(smr->c_seq != SMR_SEQ_INVALID,
 159             ("smr_exit(%s) not in a smr section.", smr->c_shared->s_name));
 160
 161         /*
 162          * Clear the recorded sequence number.  This allows poll() to
 163          * detect CPUs not in read sections.
 164          *
 165          * Use release semantics to retire any stores before the sequence
 166          * number is cleared.
 167          */
 168         atomic_store_rel_int(&smr->c_seq, SMR_SEQ_INVALID);
 169         critical_exit();
 170 }
 171
 172 /*
 173  * Enter a lazy smr section.  This is used for read-mostly state that
 174  * can tolerate a high free latency.
 175  */
 176 static inline void
 177 smr_lazy_enter(smr_t smr)
 178 {
 179
 180         critical_enter();
 181         smr = zpcpu_get(smr);
 182         KASSERT((smr->c_flags & SMR_LAZY) != 0,
 183             ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name));
 184         KASSERT(smr->c_seq == 0,
 185             ("smr_lazy_enter(%s) does not support recursion.",
 186             smr->c_shared->s_name));
 187
 188         /*
 189          * This needs no serialization.  If an interrupt occurs before we
 190          * assign sr_seq to c_seq any speculative loads will be discarded.
 191          * If we assign a stale wr_seq value due to interrupt we use the
 192          * same algorithm that renders smr_enter() safe.
 193          */
 194         atomic_store_int(&smr->c_seq, smr_shared_current(smr->c_shared));
 195 }
 196
 197 /*
 198  * Exit a lazy smr section.  This is used for read-mostly state that
 199  * can tolerate a high free latency.
 200  */
 201 static inline void
 202 smr_lazy_exit(smr_t smr)
 203 {
 204
 205         smr = zpcpu_get(smr);
 206         CRITICAL_ASSERT(curthread);
 207         KASSERT((smr->c_flags & SMR_LAZY) != 0,
 208             ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name));
 209         KASSERT(smr->c_seq != SMR_SEQ_INVALID,
 210             ("smr_lazy_exit(%s) not in a smr section.", smr->c_shared->s_name));
 211
 212         /*
 213          * All loads/stores must be retired before the sequence becomes
 214          * visible.  The fence compiles away on amd64.  Another
 215          * alternative would be to omit the fence but store the exit
 216          * time and wait 1 tick longer.
 217          */
 218         atomic_thread_fence_rel();
 219         atomic_store_int(&smr->c_seq, SMR_SEQ_INVALID);
 220         critical_exit();
 221 }
 222
 223 /*
 224  * Advances the write sequence number.  Returns the sequence number
 225  * required to ensure that all modifications are visible to readers.
 226  */
 227 smr_seq_t smr_advance(smr_t smr);
 228
 229 /*
 230  * Returns true if a goal sequence has been reached.  If
 231  * wait is true this will busy loop until success.
 232  */
 233 bool smr_poll(smr_t smr, smr_seq_t goal, bool wait);
 234
 235 /* Create a new SMR context. */
 236 smr_t smr_create(const char *name, int limit, int flags);
 237
 238 /* Destroy the context. */
 239 void smr_destroy(smr_t smr);
 240
 241 /*
 242  * Blocking wait for all readers to observe 'goal'.
 243  */
 244 static inline bool
 245 smr_wait(smr_t smr, smr_seq_t goal)
 246 {
 247
 248         return (smr_poll(smr, goal, true));
 249 }
 250
 251 /*
 252  * Synchronize advances the write sequence and returns when all
 253  * readers have observed it.
 254  *
 255  * If your application can cache a sequence number returned from
 256  * smr_advance() and poll or wait at a later time there will
 257  * be less chance of busy looping while waiting for readers.
 258  */
 259 static inline void
 260 smr_synchronize(smr_t smr)
 261 {
 262
 263         smr_wait(smr, smr_advance(smr));
 264 }
 265
 266 /* Only at startup. */
 267 void smr_init(void);
 268
 269 #endif  /* _SYS_SMR_H_ */