]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet/tcp_fastopen.c
hpts: make stacks responsible for tcp_hpts_init()
[FreeBSD/FreeBSD.git] / sys / netinet / tcp_fastopen.c
1 /*-
2  * Copyright (c) 2015-2017 Patrick Kelsey
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 /*
28  * This is an implementation of TCP Fast Open (TFO) [RFC7413]. To include
29  * this code, add the following line to your kernel config:
30  *
31  * options TCP_RFC7413
32  *
33  *
34  * The generated TFO cookies are the 64-bit output of
35  * SipHash24(key=<16-byte-key>, msg=<client-ip>).  Multiple concurrent valid
36  * keys are supported so that time-based rolling cookie invalidation
37  * policies can be implemented in the system.  The default number of
38  * concurrent keys is 2.  This can be adjusted in the kernel config as
39  * follows:
40  *
41  * options TCP_RFC7413_MAX_KEYS=<num-keys>
42  *
43  *
44  * In addition to the facilities defined in RFC7413, this implementation
45  * supports a pre-shared key (PSK) mode of operation in which the TFO server
46  * requires the client to be in posession of a shared secret in order for
47  * the client to be able to successfully open TFO connections with the
48  * server.  This is useful, for example, in environments where TFO servers
49  * are exposed to both internal and external clients and only wish to allow
50  * TFO connections from internal clients.
51  *
52  * In the PSK mode of operation, the server generates and sends TFO cookies
53  * to requesting clients as usual.  However, when validating cookies
54  * received in TFO SYNs from clients, the server requires the
55  * client-supplied cookie to equal SipHash24(key=<16-byte-psk>,
56  * msg=<cookie-sent-to-client>).
57  *
58  * Multiple concurrent valid pre-shared keys are supported so that
59  * time-based rolling PSK invalidation policies can be implemented in the
60  * system.  The default number of concurrent pre-shared keys is 2.  This can
61  * be adjusted in the kernel config as follows:
62  *
63  * options TCP_RFC7413_MAX_PSKS=<num-psks>
64  *
65  *
66  * The following TFO-specific sysctls are defined:
67  *
68  * net.inet.tcp.fastopen.acceptany (RW, default 0)
69  *     When non-zero, all client-supplied TFO cookies will be considered to
70  *     be valid.
71  *
72  * net.inet.tcp.fastopen.autokey (RW, default 120)
73  *     When this and net.inet.tcp.fastopen.server_enable are non-zero, a new
74  *     key will be automatically generated after this many seconds.
75  *
76  * net.inet.tcp.fastopen.ccache_bucket_limit
77  *                     (RWTUN, default TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT)
78  *     The maximum number of entries in a client cookie cache bucket.
79  *
80  * net.inet.tcp.fastopen.ccache_buckets
81  *                          (RDTUN, default TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT)
82  *     The number of client cookie cache buckets.
83  *
84  * net.inet.tcp.fastopen.ccache_list (RO)
85  *     Print the client cookie cache.
86  *
87  * net.inet.tcp.fastopen.client_enable (RW, default 0)
88  *     When zero, no new active (i.e., client) TFO connections can be
89  *     created.  On the transition from enabled to disabled, the client
90  *     cookie cache is cleared and disabled.  The transition from enabled to
91  *     disabled does not affect any active TFO connections in progress; it
92  *     only prevents new ones from being made.
93  *
94  * net.inet.tcp.fastopen.keylen (RD)
95  *     The key length in bytes.
96  *
97  * net.inet.tcp.fastopen.maxkeys (RD)
98  *     The maximum number of keys supported.
99  *
100  * net.inet.tcp.fastopen.maxpsks (RD)
101  *     The maximum number of pre-shared keys supported.
102  *
103  * net.inet.tcp.fastopen.numkeys (RD)
104  *     The current number of keys installed.
105  *
106  * net.inet.tcp.fastopen.numpsks (RD)
107  *     The current number of pre-shared keys installed.
108  *
109  * net.inet.tcp.fastopen.path_disable_time
110  *                          (RW, default TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT)
111  *     When a failure occurs while trying to create a new active (i.e.,
112  *     client) TFO connection, new active connections on the same path, as
113  *     determined by the tuple {client_ip, server_ip, server_port}, will be
114  *     forced to be non-TFO for this many seconds.  Note that the path
115  *     disable mechanism relies on state stored in client cookie cache
116  *     entries, so it is possible for the disable time for a given path to
117  *     be reduced if the corresponding client cookie cache entry is reused
118  *     due to resource pressure before the disable period has elapsed.
119  *
120  * net.inet.tcp.fastopen.psk_enable (RW, default 0)
121  *     When non-zero, pre-shared key (PSK) mode is enabled for all TFO
122  *     servers.  On the transition from enabled to disabled, all installed
123  *     pre-shared keys are removed.
124  *
125  * net.inet.tcp.fastopen.server_enable (RW, default 0)
126  *     When zero, no new passive (i.e., server) TFO connections can be
127  *     created.  On the transition from enabled to disabled, all installed
128  *     keys and pre-shared keys are removed.  On the transition from
129  *     disabled to enabled, if net.inet.tcp.fastopen.autokey is non-zero and
130  *     there are no keys installed, a new key will be generated immediately.
131  *     The transition from enabled to disabled does not affect any passive
132  *     TFO connections in progress; it only prevents new ones from being
133  *     made.
134  *
135  * net.inet.tcp.fastopen.setkey (WR)
136  *     Install a new key by writing net.inet.tcp.fastopen.keylen bytes to
137  *     this sysctl.
138  *
139  * net.inet.tcp.fastopen.setpsk (WR)
140  *     Install a new pre-shared key by writing net.inet.tcp.fastopen.keylen
141  *     bytes to this sysctl.
142  *
143  * In order for TFO connections to be created via a listen socket, that
144  * socket must have the TCP_FASTOPEN socket option set on it.  This option
145  * can be set on the socket either before or after the listen() is invoked.
146  * Clearing this option on a listen socket after it has been set has no
147  * effect on existing TFO connections or TFO connections in progress; it
148  * only prevents new TFO connections from being made.
149  *
150  * For passively-created sockets, the TCP_FASTOPEN socket option can be
151  * queried to determine whether the connection was established using TFO.
152  * Note that connections that are established via a TFO SYN, but that fall
153  * back to using a non-TFO SYN|ACK will have the TCP_FASTOPEN socket option
154  * set.
155  *
156  * Per the RFC, this implementation limits the number of TFO connections
157  * that can be in the SYN_RECEIVED state on a per listen-socket basis.
158  * Whenever this limit is exceeded, requests for new TFO connections are
159  * serviced as non-TFO requests.  Without such a limit, given a valid TFO
160  * cookie, an attacker could keep the listen queue in an overflow condition
161  * using a TFO SYN flood.  This implementation sets the limit at half the
162  * configured listen backlog.
163  *
164  */
165
166 #include <sys/cdefs.h>
167 #include "opt_inet.h"
168
169 #include <sys/param.h>
170 #include <sys/jail.h>
171 #include <sys/kernel.h>
172 #include <sys/hash.h>
173 #include <sys/limits.h>
174 #include <sys/lock.h>
175 #include <sys/proc.h>
176 #include <sys/rmlock.h>
177 #include <sys/sbuf.h>
178 #include <sys/socket.h>
179 #include <sys/socketvar.h>
180 #include <sys/sysctl.h>
181 #include <sys/systm.h>
182
183 #include <crypto/siphash/siphash.h>
184
185 #include <net/vnet.h>
186
187 #include <netinet/in.h>
188 #include <netinet/in_pcb.h>
189 #include <netinet/tcp_var.h>
190 #include <netinet/tcp_fastopen.h>
191
192 #define TCP_FASTOPEN_KEY_LEN    SIPHASH_KEY_LENGTH
193
194 #if TCP_FASTOPEN_PSK_LEN != TCP_FASTOPEN_KEY_LEN
195 #error TCP_FASTOPEN_PSK_LEN must be equal to TCP_FASTOPEN_KEY_LEN
196 #endif
197
198 /*
199  * Because a PSK-mode setsockopt() uses tcpcb.t_tfo_cookie.client to hold
200  * the PSK until the connect occurs.
201  */
202 #if TCP_FASTOPEN_MAX_COOKIE_LEN < TCP_FASTOPEN_PSK_LEN
203 #error TCP_FASTOPEN_MAX_COOKIE_LEN must be >= TCP_FASTOPEN_PSK_LEN
204 #endif
205
206 #define TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT        16
207 #define TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT             2048 /* must be power of 2 */
208
209 #define TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT          900 /* seconds */
210
211 #if !defined(TCP_RFC7413_MAX_KEYS) || (TCP_RFC7413_MAX_KEYS < 1)
212 #define TCP_FASTOPEN_MAX_KEYS   2
213 #else
214 #define TCP_FASTOPEN_MAX_KEYS   TCP_RFC7413_MAX_KEYS
215 #endif
216
217 #if TCP_FASTOPEN_MAX_KEYS > 10
218 #undef TCP_FASTOPEN_MAX_KEYS
219 #define TCP_FASTOPEN_MAX_KEYS   10
220 #endif
221
222 #if !defined(TCP_RFC7413_MAX_PSKS) || (TCP_RFC7413_MAX_PSKS < 1)
223 #define TCP_FASTOPEN_MAX_PSKS   2
224 #else
225 #define TCP_FASTOPEN_MAX_PSKS   TCP_RFC7413_MAX_PSKS
226 #endif
227
228 #if TCP_FASTOPEN_MAX_PSKS > 10
229 #undef TCP_FASTOPEN_MAX_PSKS
230 #define TCP_FASTOPEN_MAX_PSKS   10
231 #endif
232
233 struct tcp_fastopen_keylist {
234         unsigned int newest;
235         unsigned int newest_psk;
236         uint8_t key[TCP_FASTOPEN_MAX_KEYS][TCP_FASTOPEN_KEY_LEN];
237         uint8_t psk[TCP_FASTOPEN_MAX_PSKS][TCP_FASTOPEN_KEY_LEN];
238 };
239
240 struct tcp_fastopen_callout {
241         struct callout c;
242         struct vnet *v;
243 };
244
245 static struct tcp_fastopen_ccache_entry *tcp_fastopen_ccache_lookup(
246     struct in_conninfo *, struct tcp_fastopen_ccache_bucket **);
247 static struct tcp_fastopen_ccache_entry *tcp_fastopen_ccache_create(
248     struct tcp_fastopen_ccache_bucket *, struct in_conninfo *, uint16_t, uint8_t,
249     uint8_t *);
250 static void tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket *,
251     unsigned int);
252 static void tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry *,
253     struct tcp_fastopen_ccache_bucket *);
254
255 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
256     "TCP Fast Open");
257
258 VNET_DEFINE_STATIC(int, tcp_fastopen_acceptany) = 0;
259 #define V_tcp_fastopen_acceptany        VNET(tcp_fastopen_acceptany)
260 SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, acceptany,
261     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_acceptany), 0,
262     "Accept any non-empty cookie");
263
264 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_autokey) = 120;
265 #define V_tcp_fastopen_autokey  VNET(tcp_fastopen_autokey)
266 static int sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS);
267 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, autokey,
268     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
269     NULL, 0, &sysctl_net_inet_tcp_fastopen_autokey, "IU",
270     "Number of seconds between auto-generation of a new key; zero disables");
271
272 static int sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS);
273 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, ccache_bucket_limit,
274     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_NEEDGIANT,
275     NULL, 0, &sysctl_net_inet_tcp_fastopen_ccache_bucket_limit, "IU",
276     "Max entries per bucket in client cookie cache");
277
278 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_ccache_buckets) =
279     TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT;
280 #define V_tcp_fastopen_ccache_buckets VNET(tcp_fastopen_ccache_buckets)
281 SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, ccache_buckets,
282     CTLFLAG_VNET | CTLFLAG_RDTUN, &VNET_NAME(tcp_fastopen_ccache_buckets), 0,
283     "Client cookie cache number of buckets (power of 2)");
284
285 VNET_DEFINE(unsigned int, tcp_fastopen_client_enable) = 1;
286 static int sysctl_net_inet_tcp_fastopen_client_enable(SYSCTL_HANDLER_ARGS);
287 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, client_enable,
288     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
289     NULL, 0, &sysctl_net_inet_tcp_fastopen_client_enable, "IU",
290     "Enable/disable TCP Fast Open client functionality");
291
292 SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, keylen,
293     CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_KEY_LEN,
294     "Key length in bytes");
295
296 SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxkeys,
297     CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_KEYS,
298     "Maximum number of keys supported");
299
300 SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxpsks,
301     CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_PSKS,
302     "Maximum number of pre-shared keys supported");
303
304 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_numkeys) = 0;
305 #define V_tcp_fastopen_numkeys  VNET(tcp_fastopen_numkeys)
306 SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numkeys,
307     CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numkeys), 0,
308     "Number of keys installed");
309
310 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_numpsks) = 0;
311 #define V_tcp_fastopen_numpsks  VNET(tcp_fastopen_numpsks)
312 SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numpsks,
313     CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numpsks), 0,
314     "Number of pre-shared keys installed");
315
316 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_path_disable_time) =
317     TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT;
318 #define V_tcp_fastopen_path_disable_time VNET(tcp_fastopen_path_disable_time)
319 SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, path_disable_time,
320     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_path_disable_time), 0,
321     "Seconds a TFO failure disables a {client_ip, server_ip, server_port} path");
322
323 VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_psk_enable) = 0;
324 #define V_tcp_fastopen_psk_enable       VNET(tcp_fastopen_psk_enable)
325 static int sysctl_net_inet_tcp_fastopen_psk_enable(SYSCTL_HANDLER_ARGS);
326 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, psk_enable,
327     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
328     NULL, 0, &sysctl_net_inet_tcp_fastopen_psk_enable, "IU",
329     "Enable/disable TCP Fast Open server pre-shared key mode");
330
331 VNET_DEFINE(unsigned int, tcp_fastopen_server_enable) = 0;
332 static int sysctl_net_inet_tcp_fastopen_server_enable(SYSCTL_HANDLER_ARGS);
333 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, server_enable,
334     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
335     NULL, 0, &sysctl_net_inet_tcp_fastopen_server_enable, "IU",
336     "Enable/disable TCP Fast Open server functionality");
337
338 static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS);
339 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setkey,
340     CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR | CTLFLAG_MPSAFE,
341     NULL, 0, &sysctl_net_inet_tcp_fastopen_setkey, "",
342     "Install a new key");
343
344 static int sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS);
345 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setpsk,
346     CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR | CTLFLAG_MPSAFE,
347     NULL, 0, &sysctl_net_inet_tcp_fastopen_setpsk, "",
348     "Install a new pre-shared key");
349
350 static int sysctl_net_inet_tcp_fastopen_ccache_list(SYSCTL_HANDLER_ARGS);
351 SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, ccache_list,
352     CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE,
353     NULL, 0, sysctl_net_inet_tcp_fastopen_ccache_list, "A",
354     "List of all client cookie cache entries");
355
356 VNET_DEFINE_STATIC(struct rmlock, tcp_fastopen_keylock);
357 #define V_tcp_fastopen_keylock  VNET(tcp_fastopen_keylock)
358
359 #define TCP_FASTOPEN_KEYS_RLOCK(t)      rm_rlock(&V_tcp_fastopen_keylock, (t))
360 #define TCP_FASTOPEN_KEYS_RUNLOCK(t)    rm_runlock(&V_tcp_fastopen_keylock, (t))
361 #define TCP_FASTOPEN_KEYS_WLOCK()       rm_wlock(&V_tcp_fastopen_keylock)
362 #define TCP_FASTOPEN_KEYS_WUNLOCK()     rm_wunlock(&V_tcp_fastopen_keylock)
363
364 VNET_DEFINE_STATIC(struct tcp_fastopen_keylist, tcp_fastopen_keys);
365 #define V_tcp_fastopen_keys     VNET(tcp_fastopen_keys)
366
367 VNET_DEFINE_STATIC(struct tcp_fastopen_callout, tcp_fastopen_autokey_ctx);
368 #define V_tcp_fastopen_autokey_ctx      VNET(tcp_fastopen_autokey_ctx)
369
370 VNET_DEFINE_STATIC(uma_zone_t, counter_zone);
371 #define V_counter_zone                  VNET(counter_zone)
372
373 static MALLOC_DEFINE(M_TCP_FASTOPEN_CCACHE, "tfo_ccache", "TFO client cookie cache buckets");
374
375 VNET_DEFINE_STATIC(struct tcp_fastopen_ccache, tcp_fastopen_ccache);
376 #define V_tcp_fastopen_ccache   VNET(tcp_fastopen_ccache)
377
378 #define CCB_LOCK(ccb)           mtx_lock(&(ccb)->ccb_mtx)
379 #define CCB_UNLOCK(ccb)         mtx_unlock(&(ccb)->ccb_mtx)
380 #define CCB_LOCK_ASSERT(ccb)    mtx_assert(&(ccb)->ccb_mtx, MA_OWNED)
381
382 void
383 tcp_fastopen_init(void)
384 {
385         unsigned int i;
386
387         V_counter_zone = uma_zcreate("tfo", sizeof(unsigned int),
388             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
389         rm_init(&V_tcp_fastopen_keylock, "tfo_keylock");
390         callout_init_rm(&V_tcp_fastopen_autokey_ctx.c,
391             &V_tcp_fastopen_keylock, 0);
392         V_tcp_fastopen_autokey_ctx.v = curvnet;
393         V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
394         V_tcp_fastopen_keys.newest_psk = TCP_FASTOPEN_MAX_PSKS - 1;
395
396         TUNABLE_INT_FETCH("net.inet.tcp.fastopen.ccache_bucket_limit",
397             &V_tcp_fastopen_ccache.bucket_limit);
398         if (V_tcp_fastopen_ccache.bucket_limit == 0)
399                 V_tcp_fastopen_ccache.bucket_limit =
400                     TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT;
401
402         /* May already be non-zero if kernel tunable was set */
403         if ((V_tcp_fastopen_ccache_buckets == 0) ||
404             !powerof2(V_tcp_fastopen_ccache_buckets))
405                 V_tcp_fastopen_ccache.buckets =
406                         TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT;
407         else
408                 V_tcp_fastopen_ccache.buckets = V_tcp_fastopen_ccache_buckets;
409
410         V_tcp_fastopen_ccache.mask = V_tcp_fastopen_ccache.buckets - 1;
411         V_tcp_fastopen_ccache.secret = arc4random();
412
413         V_tcp_fastopen_ccache.base = malloc(V_tcp_fastopen_ccache.buckets *
414             sizeof(struct tcp_fastopen_ccache_bucket), M_TCP_FASTOPEN_CCACHE,
415             M_WAITOK | M_ZERO);
416
417         for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
418                 TAILQ_INIT(&V_tcp_fastopen_ccache.base[i].ccb_entries);
419                 mtx_init(&V_tcp_fastopen_ccache.base[i].ccb_mtx, "tfo_ccache_bucket",
420                          NULL, MTX_DEF);
421                 if (V_tcp_fastopen_client_enable) {
422                         /* enable bucket */
423                         V_tcp_fastopen_ccache.base[i].ccb_num_entries = 0;
424                 } else {
425                         /* disable bucket */
426                         V_tcp_fastopen_ccache.base[i].ccb_num_entries = -1;
427                 }
428                 V_tcp_fastopen_ccache.base[i].ccb_ccache = &V_tcp_fastopen_ccache;
429         }
430
431         /*
432          * Note that while the total number of entries in the cookie cache
433          * is limited by the table management logic to
434          * V_tcp_fastopen_ccache.buckets *
435          * V_tcp_fastopen_ccache.bucket_limit, the total number of items in
436          * this zone can exceed that amount by the number of CPUs in the
437          * system times the maximum number of unallocated items that can be
438          * present in each UMA per-CPU cache for this zone.
439          */
440         V_tcp_fastopen_ccache.zone = uma_zcreate("tfo_ccache_entries",
441             sizeof(struct tcp_fastopen_ccache_entry), NULL, NULL, NULL, NULL,
442             UMA_ALIGN_CACHE, 0);
443 }
444
445 void
446 tcp_fastopen_destroy(void)
447 {
448         struct tcp_fastopen_ccache_bucket *ccb;
449         unsigned int i;
450
451         for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
452                 ccb = &V_tcp_fastopen_ccache.base[i];
453                 tcp_fastopen_ccache_bucket_trim(ccb, 0);
454                 mtx_destroy(&ccb->ccb_mtx);
455         }
456
457         KASSERT(uma_zone_get_cur(V_tcp_fastopen_ccache.zone) == 0,
458             ("%s: TFO ccache zone allocation count not 0", __func__));
459         uma_zdestroy(V_tcp_fastopen_ccache.zone);
460         free(V_tcp_fastopen_ccache.base, M_TCP_FASTOPEN_CCACHE);
461
462         callout_drain(&V_tcp_fastopen_autokey_ctx.c);
463         rm_destroy(&V_tcp_fastopen_keylock);
464         uma_zdestroy(V_counter_zone);
465 }
466
467 unsigned int *
468 tcp_fastopen_alloc_counter(void)
469 {
470         unsigned int *counter;
471         counter = uma_zalloc(V_counter_zone, M_NOWAIT);
472         if (counter)
473                 *counter = 1;
474         return (counter);
475 }
476
477 void
478 tcp_fastopen_decrement_counter(unsigned int *counter)
479 {
480         if (*counter == 1)
481                 uma_zfree(V_counter_zone, counter);
482         else
483                 atomic_subtract_int(counter, 1);
484 }
485
486 static void
487 tcp_fastopen_addkey_locked(uint8_t *key)
488 {
489
490         V_tcp_fastopen_keys.newest++;
491         if (V_tcp_fastopen_keys.newest == TCP_FASTOPEN_MAX_KEYS)
492                 V_tcp_fastopen_keys.newest = 0;
493         memcpy(V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest], key,
494             TCP_FASTOPEN_KEY_LEN);
495         if (V_tcp_fastopen_numkeys < TCP_FASTOPEN_MAX_KEYS)
496                 V_tcp_fastopen_numkeys++;
497 }
498
499 static void
500 tcp_fastopen_addpsk_locked(uint8_t *psk)
501 {
502
503         V_tcp_fastopen_keys.newest_psk++;
504         if (V_tcp_fastopen_keys.newest_psk == TCP_FASTOPEN_MAX_PSKS)
505                 V_tcp_fastopen_keys.newest_psk = 0;
506         memcpy(V_tcp_fastopen_keys.psk[V_tcp_fastopen_keys.newest_psk], psk,
507             TCP_FASTOPEN_KEY_LEN);
508         if (V_tcp_fastopen_numpsks < TCP_FASTOPEN_MAX_PSKS)
509                 V_tcp_fastopen_numpsks++;
510 }
511
512 static void
513 tcp_fastopen_autokey_locked(void)
514 {
515         uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
516
517         arc4rand(newkey, TCP_FASTOPEN_KEY_LEN, 0);
518         tcp_fastopen_addkey_locked(newkey);
519 }
520
521 static void
522 tcp_fastopen_autokey_callout(void *arg)
523 {
524         struct tcp_fastopen_callout *ctx = arg;
525
526         CURVNET_SET(ctx->v);
527         tcp_fastopen_autokey_locked();
528         callout_reset(&ctx->c, V_tcp_fastopen_autokey * hz,
529                       tcp_fastopen_autokey_callout, ctx);
530         CURVNET_RESTORE();
531 }
532
533 static uint64_t
534 tcp_fastopen_make_cookie(uint8_t key[SIPHASH_KEY_LENGTH], struct in_conninfo *inc)
535 {
536         SIPHASH_CTX ctx;
537         uint64_t siphash;
538
539         SipHash24_Init(&ctx);
540         SipHash_SetKey(&ctx, key);
541         switch (inc->inc_flags & INC_ISIPV6) {
542 #ifdef INET
543         case 0:
544                 SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
545                 break;
546 #endif
547 #ifdef INET6
548         case INC_ISIPV6:
549                 SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
550                 break;
551 #endif
552         }
553         SipHash_Final((u_int8_t *)&siphash, &ctx);
554
555         return (siphash);
556 }
557
558 static uint64_t
559 tcp_fastopen_make_psk_cookie(uint8_t *psk, uint8_t *cookie, uint8_t cookie_len)
560 {
561         SIPHASH_CTX ctx;
562         uint64_t psk_cookie;
563
564         SipHash24_Init(&ctx);
565         SipHash_SetKey(&ctx, psk);
566         SipHash_Update(&ctx, cookie, cookie_len);
567         SipHash_Final((u_int8_t *)&psk_cookie, &ctx);
568
569         return (psk_cookie);
570 }
571
572 static int
573 tcp_fastopen_find_cookie_match_locked(uint8_t *wire_cookie, uint64_t *cur_cookie)
574 {
575         unsigned int i, psk_index;
576         uint64_t psk_cookie;
577
578         if (V_tcp_fastopen_psk_enable) {
579                 psk_index = V_tcp_fastopen_keys.newest_psk;
580                 for (i = 0; i < V_tcp_fastopen_numpsks; i++) {
581                         psk_cookie =
582                             tcp_fastopen_make_psk_cookie(
583                                  V_tcp_fastopen_keys.psk[psk_index],
584                                  (uint8_t *)cur_cookie,
585                                  TCP_FASTOPEN_COOKIE_LEN);
586
587                         if (memcmp(wire_cookie, &psk_cookie,
588                                    TCP_FASTOPEN_COOKIE_LEN) == 0)
589                                 return (1);
590
591                         if (psk_index == 0)
592                                 psk_index = TCP_FASTOPEN_MAX_PSKS - 1;
593                         else
594                                 psk_index--;
595                 }
596         } else if (memcmp(wire_cookie, cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0)
597                 return (1);
598
599         return (0);
600 }
601
602 /*
603  * Return values:
604  *      -1      the cookie is invalid and no valid cookie is available
605  *       0      the cookie is invalid and the latest cookie has been returned
606  *       1      the cookie is valid and the latest cookie has been returned
607  */
608 int
609 tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
610     unsigned int len, uint64_t *latest_cookie)
611 {
612         struct rm_priotracker tracker;
613         unsigned int i, key_index;
614         int rv;
615         uint64_t cur_cookie;
616
617         if (V_tcp_fastopen_acceptany) {
618                 *latest_cookie = 0;
619                 return (1);
620         }
621
622         TCP_FASTOPEN_KEYS_RLOCK(&tracker);
623         if (len != TCP_FASTOPEN_COOKIE_LEN) {
624                 if (V_tcp_fastopen_numkeys > 0) {
625                         *latest_cookie =
626                             tcp_fastopen_make_cookie(
627                                 V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest],
628                                 inc);
629                         rv = 0;
630                 } else
631                         rv = -1;
632                 goto out;
633         }
634
635         /*
636          * Check against each available key, from newest to oldest.
637          */
638         key_index = V_tcp_fastopen_keys.newest;
639         for (i = 0; i < V_tcp_fastopen_numkeys; i++) {
640                 cur_cookie =
641                     tcp_fastopen_make_cookie(V_tcp_fastopen_keys.key[key_index],
642                         inc);
643                 if (i == 0)
644                         *latest_cookie = cur_cookie;
645                 rv = tcp_fastopen_find_cookie_match_locked(cookie, &cur_cookie);
646                 if (rv)
647                         goto out;
648                 if (key_index == 0)
649                         key_index = TCP_FASTOPEN_MAX_KEYS - 1;
650                 else
651                         key_index--;
652         }
653         rv = 0;
654
655  out:
656         TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
657         return (rv);
658 }
659
660 static int
661 sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS)
662 {
663         int error;
664         unsigned int new;
665
666         new = V_tcp_fastopen_autokey;
667         error = sysctl_handle_int(oidp, &new, 0, req);
668         if (error == 0 && req->newptr) {
669                 if (new > (INT_MAX / hz))
670                         return (EINVAL);
671
672                 TCP_FASTOPEN_KEYS_WLOCK();
673                 if (V_tcp_fastopen_server_enable) {
674                         if (V_tcp_fastopen_autokey && !new)
675                                 callout_stop(&V_tcp_fastopen_autokey_ctx.c);
676                         else if (new)
677                                 callout_reset(&V_tcp_fastopen_autokey_ctx.c,
678                                     new * hz, tcp_fastopen_autokey_callout,
679                                     &V_tcp_fastopen_autokey_ctx);
680                 }
681                 V_tcp_fastopen_autokey = new;
682                 TCP_FASTOPEN_KEYS_WUNLOCK();
683         }
684
685         return (error);
686 }
687
688 static int
689 sysctl_net_inet_tcp_fastopen_psk_enable(SYSCTL_HANDLER_ARGS)
690 {
691         int error;
692         unsigned int new;
693
694         new = V_tcp_fastopen_psk_enable;
695         error = sysctl_handle_int(oidp, &new, 0, req);
696         if (error == 0 && req->newptr) {
697                 if (V_tcp_fastopen_psk_enable && !new) {
698                         /* enabled -> disabled */
699                         TCP_FASTOPEN_KEYS_WLOCK();
700                         V_tcp_fastopen_numpsks = 0;
701                         V_tcp_fastopen_keys.newest_psk =
702                             TCP_FASTOPEN_MAX_PSKS - 1;
703                         V_tcp_fastopen_psk_enable = 0;
704                         TCP_FASTOPEN_KEYS_WUNLOCK();
705                 } else if (!V_tcp_fastopen_psk_enable && new) {
706                         /* disabled -> enabled */
707                         TCP_FASTOPEN_KEYS_WLOCK();
708                         V_tcp_fastopen_psk_enable = 1;
709                         TCP_FASTOPEN_KEYS_WUNLOCK();
710                 }
711         }
712         return (error);
713 }
714
715 static int
716 sysctl_net_inet_tcp_fastopen_server_enable(SYSCTL_HANDLER_ARGS)
717 {
718         int error;
719         unsigned int new;
720
721         new = V_tcp_fastopen_server_enable;
722         error = sysctl_handle_int(oidp, &new, 0, req);
723         if (error == 0 && req->newptr) {
724                 if (V_tcp_fastopen_server_enable && !new) {
725                         /* enabled -> disabled */
726                         TCP_FASTOPEN_KEYS_WLOCK();
727                         V_tcp_fastopen_numkeys = 0;
728                         V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
729                         if (V_tcp_fastopen_autokey)
730                                 callout_stop(&V_tcp_fastopen_autokey_ctx.c);
731                         V_tcp_fastopen_numpsks = 0;
732                         V_tcp_fastopen_keys.newest_psk =
733                             TCP_FASTOPEN_MAX_PSKS - 1;
734                         V_tcp_fastopen_server_enable = 0;
735                         TCP_FASTOPEN_KEYS_WUNLOCK();
736                 } else if (!V_tcp_fastopen_server_enable && new) {
737                         /* disabled -> enabled */
738                         TCP_FASTOPEN_KEYS_WLOCK();
739                         if (V_tcp_fastopen_autokey &&
740                             (V_tcp_fastopen_numkeys == 0)) {
741                                 tcp_fastopen_autokey_locked();
742                                 callout_reset(&V_tcp_fastopen_autokey_ctx.c,
743                                     V_tcp_fastopen_autokey * hz,
744                                     tcp_fastopen_autokey_callout,
745                                     &V_tcp_fastopen_autokey_ctx);
746                         }
747                         V_tcp_fastopen_server_enable = 1;
748                         TCP_FASTOPEN_KEYS_WUNLOCK();
749                 }
750         }
751         return (error);
752 }
753
754 static int
755 sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS)
756 {
757         int error;
758         uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
759
760         if (req->oldptr != NULL || req->oldlen != 0)
761                 return (EINVAL);
762         if (req->newptr == NULL)
763                 return (EPERM);
764         if (req->newlen != sizeof(newkey))
765                 return (EINVAL);
766         error = SYSCTL_IN(req, newkey, sizeof(newkey));
767         if (error)
768                 return (error);
769
770         TCP_FASTOPEN_KEYS_WLOCK();
771         tcp_fastopen_addkey_locked(newkey);
772         TCP_FASTOPEN_KEYS_WUNLOCK();
773
774         return (0);
775 }
776
777 static int
778 sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS)
779 {
780         int error;
781         uint8_t newpsk[TCP_FASTOPEN_KEY_LEN];
782
783         if (req->oldptr != NULL || req->oldlen != 0)
784                 return (EINVAL);
785         if (req->newptr == NULL)
786                 return (EPERM);
787         if (req->newlen != sizeof(newpsk))
788                 return (EINVAL);
789         error = SYSCTL_IN(req, newpsk, sizeof(newpsk));
790         if (error)
791                 return (error);
792
793         TCP_FASTOPEN_KEYS_WLOCK();
794         tcp_fastopen_addpsk_locked(newpsk);
795         TCP_FASTOPEN_KEYS_WUNLOCK();
796
797         return (0);
798 }
799
800 static int
801 sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS)
802 {
803         struct tcp_fastopen_ccache_bucket *ccb;
804         int error;
805         unsigned int new;
806         unsigned int i;
807
808         new = V_tcp_fastopen_ccache.bucket_limit;
809         error = sysctl_handle_int(oidp, &new, 0, req);
810         if (error == 0 && req->newptr) {
811                 if ((new == 0) || (new > INT_MAX))
812                         error = EINVAL;
813                 else {
814                         if (new < V_tcp_fastopen_ccache.bucket_limit) {
815                                 for (i = 0; i < V_tcp_fastopen_ccache.buckets;
816                                      i++) {
817                                         ccb = &V_tcp_fastopen_ccache.base[i];
818                                         tcp_fastopen_ccache_bucket_trim(ccb, new);
819                                 }
820                         }
821                         V_tcp_fastopen_ccache.bucket_limit = new;
822                 }
823         }
824         return (error);
825 }
826
827 static int
828 sysctl_net_inet_tcp_fastopen_client_enable(SYSCTL_HANDLER_ARGS)
829 {
830         struct tcp_fastopen_ccache_bucket *ccb;
831         int error;
832         unsigned int new, i;
833
834         new = V_tcp_fastopen_client_enable;
835         error = sysctl_handle_int(oidp, &new, 0, req);
836         if (error == 0 && req->newptr) {
837                 if (V_tcp_fastopen_client_enable && !new) {
838                         /* enabled -> disabled */
839                         for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
840                                 ccb = &V_tcp_fastopen_ccache.base[i];
841                                 KASSERT(ccb->ccb_num_entries > -1,
842                                     ("%s: ccb->ccb_num_entries %d is negative",
843                                         __func__, ccb->ccb_num_entries));
844                                 tcp_fastopen_ccache_bucket_trim(ccb, 0);
845                         }
846                         V_tcp_fastopen_client_enable = 0;
847                 } else if (!V_tcp_fastopen_client_enable && new) {
848                         /* disabled -> enabled */
849                         for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
850                                 ccb = &V_tcp_fastopen_ccache.base[i];
851                                 CCB_LOCK(ccb);
852                                 KASSERT(TAILQ_EMPTY(&ccb->ccb_entries),
853                                     ("%s: ccb->ccb_entries not empty", __func__));
854                                 KASSERT(ccb->ccb_num_entries == -1,
855                                     ("%s: ccb->ccb_num_entries %d not -1", __func__,
856                                         ccb->ccb_num_entries));
857                                 ccb->ccb_num_entries = 0; /* enable bucket */
858                                 CCB_UNLOCK(ccb);
859                         }
860                         V_tcp_fastopen_client_enable = 1;
861                 }
862         }
863         return (error);
864 }
865
866 void
867 tcp_fastopen_connect(struct tcpcb *tp)
868 {
869         struct inpcb *inp = tptoinpcb(tp);
870         struct tcp_fastopen_ccache_bucket *ccb;
871         struct tcp_fastopen_ccache_entry *cce;
872         sbintime_t now;
873         uint16_t server_mss;
874         uint64_t psk_cookie;
875
876         psk_cookie = 0;
877         cce = tcp_fastopen_ccache_lookup(&inp->inp_inc, &ccb);
878         if (cce) {
879                 if (cce->disable_time == 0) {
880                         if ((cce->cookie_len > 0) &&
881                             (tp->t_tfo_client_cookie_len ==
882                              TCP_FASTOPEN_PSK_LEN)) {
883                                 psk_cookie =
884                                     tcp_fastopen_make_psk_cookie(
885                                         tp->t_tfo_cookie.client,
886                                         cce->cookie, cce->cookie_len);
887                         } else {
888                                 tp->t_tfo_client_cookie_len = cce->cookie_len;
889                                 memcpy(tp->t_tfo_cookie.client, cce->cookie,
890                                     cce->cookie_len);
891                         }
892                         server_mss = cce->server_mss;
893                         CCB_UNLOCK(ccb);
894                         if (tp->t_tfo_client_cookie_len ==
895                             TCP_FASTOPEN_PSK_LEN && psk_cookie) {
896                                 tp->t_tfo_client_cookie_len =
897                                     TCP_FASTOPEN_COOKIE_LEN;
898                                 memcpy(tp->t_tfo_cookie.client, &psk_cookie,
899                                     TCP_FASTOPEN_COOKIE_LEN);
900                         }
901                         tcp_mss(tp, server_mss ? server_mss : -1);
902                         tp->snd_wnd = tp->t_maxseg;
903                 } else {
904                         /*
905                          * The path is disabled.  Check the time and
906                          * possibly re-enable.
907                          */
908                         now = getsbinuptime();
909                         if (now - cce->disable_time >
910                             ((sbintime_t)V_tcp_fastopen_path_disable_time << 32)) {
911                                 /*
912                                  * Re-enable path.  Force a TFO cookie
913                                  * request.  Forget the old MSS as it may be
914                                  * bogus now, and we will rediscover it in
915                                  * the SYN|ACK.
916                                  */
917                                 cce->disable_time = 0;
918                                 cce->server_mss = 0;
919                                 cce->cookie_len = 0;
920                                 /*
921                                  * tp->t_tfo... cookie details are already
922                                  * zero from the tcpcb init.
923                                  */
924                         } else {
925                                 /*
926                                  * Path is disabled, so disable TFO on this
927                                  * connection.
928                                  */
929                                 tp->t_flags &= ~TF_FASTOPEN;
930                         }
931                         CCB_UNLOCK(ccb);
932                         tcp_mss(tp, -1);
933                         /*
934                          * snd_wnd is irrelevant since we are either forcing
935                          * a TFO cookie request or disabling TFO - either
936                          * way, no data with the SYN.
937                          */
938                 }
939         } else {
940                 /*
941                  * A new entry for this path will be created when a SYN|ACK
942                  * comes back, or the attempt otherwise fails.
943                  */
944                 CCB_UNLOCK(ccb);
945                 tcp_mss(tp, -1);
946                 /*
947                  * snd_wnd is irrelevant since we are forcing a TFO cookie
948                  * request.
949                  */
950         }
951 }
952
953 void
954 tcp_fastopen_disable_path(struct tcpcb *tp)
955 {
956         struct in_conninfo *inc = &tptoinpcb(tp)->inp_inc;
957         struct tcp_fastopen_ccache_bucket *ccb;
958         struct tcp_fastopen_ccache_entry *cce;
959
960         cce = tcp_fastopen_ccache_lookup(inc, &ccb);
961         if (cce) {
962                 cce->server_mss = 0;
963                 cce->cookie_len = 0;
964                 /*
965                  * Preserve the existing disable time if it is already
966                  * disabled.
967                  */
968                 if (cce->disable_time == 0)
969                         cce->disable_time = getsbinuptime();
970         } else /* use invalid cookie len to create disabled entry */
971                 tcp_fastopen_ccache_create(ccb, inc, 0,
972                     TCP_FASTOPEN_MAX_COOKIE_LEN + 1, NULL);
973
974         CCB_UNLOCK(ccb);
975         tp->t_flags &= ~TF_FASTOPEN;
976 }
977
978 void
979 tcp_fastopen_update_cache(struct tcpcb *tp, uint16_t mss,
980     uint8_t cookie_len, uint8_t *cookie)
981 {
982         struct in_conninfo *inc = &tptoinpcb(tp)->inp_inc;
983         struct tcp_fastopen_ccache_bucket *ccb;
984         struct tcp_fastopen_ccache_entry *cce;
985
986         cce = tcp_fastopen_ccache_lookup(inc, &ccb);
987         if (cce) {
988                 if ((cookie_len >= TCP_FASTOPEN_MIN_COOKIE_LEN) &&
989                     (cookie_len <= TCP_FASTOPEN_MAX_COOKIE_LEN) &&
990                     ((cookie_len & 0x1) == 0)) {
991                         cce->server_mss = mss;
992                         cce->cookie_len = cookie_len;
993                         memcpy(cce->cookie, cookie, cookie_len);
994                         cce->disable_time = 0;
995                 } else {
996                         /* invalid cookie length, disable entry */
997                         cce->server_mss = 0;
998                         cce->cookie_len = 0;
999                         /*
1000                          * Preserve the existing disable time if it is
1001                          * already disabled.
1002                          */
1003                         if (cce->disable_time == 0)
1004                                 cce->disable_time = getsbinuptime();
1005                 }
1006         } else
1007                 tcp_fastopen_ccache_create(ccb, inc, mss, cookie_len, cookie);
1008
1009         CCB_UNLOCK(ccb);
1010 }
1011
1012 static struct tcp_fastopen_ccache_entry *
1013 tcp_fastopen_ccache_lookup(struct in_conninfo *inc,
1014     struct tcp_fastopen_ccache_bucket **ccbp)
1015 {
1016         struct tcp_fastopen_ccache_bucket *ccb;
1017         struct tcp_fastopen_ccache_entry *cce;
1018         uint32_t last_word;
1019         uint32_t hash;
1020
1021         hash = jenkins_hash32((uint32_t *)&inc->inc_ie.ie_dependladdr, 4,
1022             V_tcp_fastopen_ccache.secret);
1023         hash = jenkins_hash32((uint32_t *)&inc->inc_ie.ie_dependfaddr, 4,
1024             hash);
1025         last_word = inc->inc_fport;
1026         hash = jenkins_hash32(&last_word, 1, hash);
1027         ccb = &V_tcp_fastopen_ccache.base[hash & V_tcp_fastopen_ccache.mask];
1028         *ccbp = ccb;
1029         CCB_LOCK(ccb);
1030
1031         /*
1032          * Always returns with locked bucket.
1033          */
1034         TAILQ_FOREACH(cce, &ccb->ccb_entries, cce_link)
1035                 if ((!(cce->af == AF_INET6) == !(inc->inc_flags & INC_ISIPV6)) &&
1036                     (cce->server_port == inc->inc_ie.ie_fport) &&
1037                     (((cce->af == AF_INET) &&
1038                       (cce->cce_client_ip.v4.s_addr == inc->inc_laddr.s_addr) &&
1039                       (cce->cce_server_ip.v4.s_addr == inc->inc_faddr.s_addr)) ||
1040                      ((cce->af == AF_INET6) &&
1041                       IN6_ARE_ADDR_EQUAL(&cce->cce_client_ip.v6, &inc->inc6_laddr) &&
1042                       IN6_ARE_ADDR_EQUAL(&cce->cce_server_ip.v6, &inc->inc6_faddr))))
1043                         break;
1044
1045         return (cce);
1046 }
1047
1048 static struct tcp_fastopen_ccache_entry *
1049 tcp_fastopen_ccache_create(struct tcp_fastopen_ccache_bucket *ccb,
1050     struct in_conninfo *inc, uint16_t mss, uint8_t cookie_len, uint8_t *cookie)
1051 {
1052         struct tcp_fastopen_ccache_entry *cce;
1053
1054         /*
1055          * 1. Create a new entry, or
1056          * 2. Reclaim an existing entry, or
1057          * 3. Fail
1058          */
1059
1060         CCB_LOCK_ASSERT(ccb);
1061
1062         cce = NULL;
1063         if (ccb->ccb_num_entries < V_tcp_fastopen_ccache.bucket_limit)
1064                 cce = uma_zalloc(V_tcp_fastopen_ccache.zone, M_NOWAIT);
1065
1066         if (cce == NULL) {
1067                 /*
1068                  * At bucket limit, or out of memory - reclaim last
1069                  * entry in bucket.
1070                  */
1071                 cce = TAILQ_LAST(&ccb->ccb_entries, bucket_entries);
1072                 if (cce == NULL) {
1073                         /* XXX count this event */
1074                         return (NULL);
1075                 }
1076
1077                 TAILQ_REMOVE(&ccb->ccb_entries, cce, cce_link);
1078         } else
1079                 ccb->ccb_num_entries++;
1080
1081         TAILQ_INSERT_HEAD(&ccb->ccb_entries, cce, cce_link);
1082         cce->af = (inc->inc_flags & INC_ISIPV6) ? AF_INET6 : AF_INET;
1083         if (cce->af == AF_INET) {
1084                 cce->cce_client_ip.v4 = inc->inc_laddr;
1085                 cce->cce_server_ip.v4 = inc->inc_faddr;
1086         } else {
1087                 cce->cce_client_ip.v6 = inc->inc6_laddr;
1088                 cce->cce_server_ip.v6 = inc->inc6_faddr;
1089         }
1090         cce->server_port = inc->inc_fport;
1091         if ((cookie_len >= TCP_FASTOPEN_MIN_COOKIE_LEN) &&
1092             (cookie_len <= TCP_FASTOPEN_MAX_COOKIE_LEN) &&
1093             ((cookie_len & 0x1) == 0)) {
1094                 cce->server_mss = mss;
1095                 cce->cookie_len = cookie_len;
1096                 memcpy(cce->cookie, cookie, cookie_len);
1097                 cce->disable_time = 0;
1098         } else {
1099                 /* invalid cookie length, disable cce */
1100                 cce->server_mss = 0;
1101                 cce->cookie_len = 0;
1102                 cce->disable_time = getsbinuptime();
1103         }
1104
1105         return (cce);
1106 }
1107
1108 static void
1109 tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket *ccb,
1110     unsigned int limit)
1111 {
1112         struct tcp_fastopen_ccache_entry *cce, *cce_tmp;
1113         unsigned int entries;
1114
1115         CCB_LOCK(ccb);
1116         entries = 0;
1117         TAILQ_FOREACH_SAFE(cce, &ccb->ccb_entries, cce_link, cce_tmp) {
1118                 entries++;
1119                 if (entries > limit)
1120                         tcp_fastopen_ccache_entry_drop(cce, ccb);
1121         }
1122         KASSERT(ccb->ccb_num_entries <= (int)limit,
1123             ("%s: ccb->ccb_num_entries %d exceeds limit %d", __func__,
1124                 ccb->ccb_num_entries, limit));
1125         if (limit == 0) {
1126                 KASSERT(TAILQ_EMPTY(&ccb->ccb_entries),
1127                     ("%s: ccb->ccb_entries not empty", __func__));
1128                 ccb->ccb_num_entries = -1; /* disable bucket */
1129         }
1130         CCB_UNLOCK(ccb);
1131 }
1132
1133 static void
1134 tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry *cce,
1135     struct tcp_fastopen_ccache_bucket *ccb)
1136 {
1137
1138         CCB_LOCK_ASSERT(ccb);
1139
1140         TAILQ_REMOVE(&ccb->ccb_entries, cce, cce_link);
1141         ccb->ccb_num_entries--;
1142         uma_zfree(V_tcp_fastopen_ccache.zone, cce);
1143 }
1144
1145 static int
1146 sysctl_net_inet_tcp_fastopen_ccache_list(SYSCTL_HANDLER_ARGS)
1147 {
1148         struct sbuf sb;
1149         struct tcp_fastopen_ccache_bucket *ccb;
1150         struct tcp_fastopen_ccache_entry *cce;
1151         sbintime_t now, duration, limit;
1152         const int linesize = 128;
1153         int i, error, num_entries;
1154         unsigned int j;
1155 #ifdef INET6
1156         char clt_buf[INET6_ADDRSTRLEN], srv_buf[INET6_ADDRSTRLEN];
1157 #else
1158         char clt_buf[INET_ADDRSTRLEN], srv_buf[INET_ADDRSTRLEN];
1159 #endif
1160
1161         if (jailed_without_vnet(curthread->td_ucred) != 0)
1162                 return (EPERM);
1163
1164         /* Only allow root to read the client cookie cache */
1165         if (curthread->td_ucred->cr_uid != 0)
1166                 return (EPERM);
1167
1168         num_entries = 0;
1169         for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
1170                 ccb = &V_tcp_fastopen_ccache.base[i];
1171                 CCB_LOCK(ccb);
1172                 if (ccb->ccb_num_entries > 0)
1173                         num_entries += ccb->ccb_num_entries;
1174                 CCB_UNLOCK(ccb);
1175         }
1176         sbuf_new(&sb, NULL, linesize * (num_entries + 1), SBUF_INCLUDENUL);
1177
1178         sbuf_printf(&sb,
1179                     "\nLocal IP address     Remote IP address     Port   MSS"
1180                     " Disabled Cookie\n");
1181
1182         now = getsbinuptime();
1183         limit = (sbintime_t)V_tcp_fastopen_path_disable_time << 32;
1184         for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
1185                 ccb = &V_tcp_fastopen_ccache.base[i];
1186                 CCB_LOCK(ccb);
1187                 TAILQ_FOREACH(cce, &ccb->ccb_entries, cce_link) {
1188                         if (cce->disable_time != 0) {
1189                                 duration = now - cce->disable_time;
1190                                 if (limit >= duration)
1191                                         duration = limit - duration;
1192                                 else
1193                                         duration = 0;
1194                         } else
1195                                 duration = 0;
1196                         sbuf_printf(&sb,
1197                                     "%-20s %-20s %5u %5u ",
1198                                     inet_ntop(cce->af, &cce->cce_client_ip,
1199                                         clt_buf, sizeof(clt_buf)),
1200                                     inet_ntop(cce->af, &cce->cce_server_ip,
1201                                         srv_buf, sizeof(srv_buf)),
1202                                     ntohs(cce->server_port),
1203                                     cce->server_mss);
1204                         if (duration > 0)
1205                                 sbuf_printf(&sb, "%7ds ", sbintime_getsec(duration));
1206                         else
1207                                 sbuf_printf(&sb, "%8s ", "No");
1208                         for (j = 0; j < cce->cookie_len; j++)
1209                                 sbuf_printf(&sb, "%02x", cce->cookie[j]);
1210                         sbuf_putc(&sb, '\n');
1211                 }
1212                 CCB_UNLOCK(ccb);
1213         }
1214         error = sbuf_finish(&sb);
1215         if (error == 0)
1216                 error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb));
1217         sbuf_delete(&sb);
1218         return (error);
1219 }