2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2016-2017
5 * Netflix Inc. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
33 #include <sys/param.h>
35 #include <sys/fcntl.h>
36 #include <sys/filio.h>
37 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
42 #include <sys/queue.h>
43 #include <sys/refcount.h>
44 #include <sys/mutex.h>
45 #include <sys/selinfo.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/sysctl.h>
51 #include <machine/atomic.h>
52 #include <sys/counter.h>
54 #include <dev/tcp_log/tcp_log_dev.h>
56 #ifdef TCPLOG_DEBUG_COUNTERS
57 extern counter_u64_t tcp_log_que_read;
58 extern counter_u64_t tcp_log_que_freed;
61 static struct cdev *tcp_log_dev;
62 static struct selinfo tcp_log_sel;
64 static struct log_queueh tcp_log_dev_queue_head = STAILQ_HEAD_INITIALIZER(tcp_log_dev_queue_head);
65 static struct log_infoh tcp_log_dev_reader_head = STAILQ_HEAD_INITIALIZER(tcp_log_dev_reader_head);
67 MALLOC_DEFINE(M_TCPLOGDEV, "tcp_log_dev", "TCP log device data structures");
69 static int tcp_log_dev_listeners = 0;
71 static struct mtx tcp_log_dev_queue_lock;
73 #define TCP_LOG_DEV_QUEUE_LOCK() mtx_lock(&tcp_log_dev_queue_lock)
74 #define TCP_LOG_DEV_QUEUE_UNLOCK() mtx_unlock(&tcp_log_dev_queue_lock)
75 #define TCP_LOG_DEV_QUEUE_LOCK_ASSERT() mtx_assert(&tcp_log_dev_queue_lock, MA_OWNED)
76 #define TCP_LOG_DEV_QUEUE_UNLOCK_ASSERT() mtx_assert(&tcp_log_dev_queue_lock, MA_NOTOWNED)
77 #define TCP_LOG_DEV_QUEUE_REF(tldq) refcount_acquire(&((tldq)->tldq_refcnt))
78 #define TCP_LOG_DEV_QUEUE_UNREF(tldq) refcount_release(&((tldq)->tldq_refcnt))
80 static void tcp_log_dev_clear_refcount(struct tcp_log_dev_queue *entry);
81 static void tcp_log_dev_clear_cdevpriv(void *data);
82 static int tcp_log_dev_open(struct cdev *dev __unused, int flags,
83 int devtype __unused, struct thread *td __unused);
84 static int tcp_log_dev_write(struct cdev *dev __unused,
85 struct uio *uio __unused, int flags __unused);
86 static int tcp_log_dev_read(struct cdev *dev __unused, struct uio *uio,
88 static int tcp_log_dev_ioctl(struct cdev *dev __unused, u_long cmd,
89 caddr_t data, int fflag __unused, struct thread *td __unused);
90 static int tcp_log_dev_poll(struct cdev *dev __unused, int events,
94 enum tcp_log_dev_queue_lock_state {
99 static struct cdevsw tcp_log_cdevsw = {
100 .d_version = D_VERSION,
101 .d_read = tcp_log_dev_read,
102 .d_open = tcp_log_dev_open,
103 .d_write = tcp_log_dev_write,
104 .d_poll = tcp_log_dev_poll,
105 .d_ioctl = tcp_log_dev_ioctl,
107 .d_mmap = tcp_log_dev_mmap,
113 tcp_log_dev_queue_validate_lock(int lockstate)
119 TCP_LOG_DEV_QUEUE_LOCK_ASSERT();
122 TCP_LOG_DEV_QUEUE_UNLOCK_ASSERT();
125 kassert_panic("%s:%d: unknown queue lock state", __func__,
132 * Clear the refcount. If appropriate, it will remove the entry from the
133 * queue and call the destructor.
135 * This must be called with the queue lock held.
138 tcp_log_dev_clear_refcount(struct tcp_log_dev_queue *entry)
141 KASSERT(entry != NULL, ("%s: called with NULL entry", __func__));
143 TCP_LOG_DEV_QUEUE_LOCK_ASSERT();
145 if (TCP_LOG_DEV_QUEUE_UNREF(entry)) {
146 #ifdef TCPLOG_DEBUG_COUNTERS
147 counter_u64_add(tcp_log_que_freed, 1);
149 /* Remove the entry from the queue and call the destructor. */
150 STAILQ_REMOVE(&tcp_log_dev_queue_head, entry, tcp_log_dev_queue,
152 (*entry->tldq_dtor)(entry);
157 tcp_log_dev_clear_cdevpriv(void *data)
159 struct tcp_log_dev_info *priv;
160 struct tcp_log_dev_queue *entry, *entry_tmp;
162 priv = (struct tcp_log_dev_info *)data;
167 * Lock the queue and drop our references. We hold references to all
168 * the entries starting with tldi_head (or, if tldi_head == NULL, all
169 * entries in the queue).
171 * Because we don't want anyone adding addition things to the queue
172 * while we are doing this, we lock the queue.
174 TCP_LOG_DEV_QUEUE_LOCK();
175 if (priv->tldi_head != NULL) {
176 entry = priv->tldi_head;
177 STAILQ_FOREACH_FROM_SAFE(entry, &tcp_log_dev_queue_head,
178 tldq_queue, entry_tmp) {
179 tcp_log_dev_clear_refcount(entry);
182 tcp_log_dev_listeners--;
183 KASSERT(tcp_log_dev_listeners >= 0,
184 ("%s: tcp_log_dev_listeners is unexpectedly negative", __func__));
185 STAILQ_REMOVE(&tcp_log_dev_reader_head, priv, tcp_log_dev_info,
187 TCP_LOG_DEV_QUEUE_LOCK_ASSERT();
188 TCP_LOG_DEV_QUEUE_UNLOCK();
189 free(priv, M_TCPLOGDEV);
193 tcp_log_dev_open(struct cdev *dev __unused, int flags, int devtype __unused,
194 struct thread *td __unused)
196 struct tcp_log_dev_info *priv;
197 struct tcp_log_dev_queue *entry;
201 * Ideally, we shouldn't see these because of file system
204 if (flags & (FWRITE | FEXEC | FAPPEND | O_TRUNC))
207 /* Allocate space to hold information about where we are. */
208 priv = malloc(sizeof(struct tcp_log_dev_info), M_TCPLOGDEV,
211 /* Stash the private data away. */
212 rv = devfs_set_cdevpriv((void *)priv, tcp_log_dev_clear_cdevpriv);
215 * Increase the listener count, add this reader to the list, and
216 * take references on all current queues.
218 TCP_LOG_DEV_QUEUE_LOCK();
219 tcp_log_dev_listeners++;
220 STAILQ_INSERT_HEAD(&tcp_log_dev_reader_head, priv, tldi_list);
221 priv->tldi_head = STAILQ_FIRST(&tcp_log_dev_queue_head);
222 if (priv->tldi_head != NULL)
223 priv->tldi_cur = priv->tldi_head->tldq_buf;
224 STAILQ_FOREACH(entry, &tcp_log_dev_queue_head, tldq_queue)
225 TCP_LOG_DEV_QUEUE_REF(entry);
226 TCP_LOG_DEV_QUEUE_UNLOCK();
228 /* Free the entry. */
229 free(priv, M_TCPLOGDEV);
235 tcp_log_dev_write(struct cdev *dev __unused, struct uio *uio __unused,
243 tcp_log_dev_rotate_bufs(struct tcp_log_dev_info *priv, int *lockstate)
245 struct tcp_log_dev_queue *entry;
247 KASSERT(priv->tldi_head != NULL,
248 ("%s:%d: priv->tldi_head unexpectedly NULL",
249 __func__, __LINE__));
250 KASSERT(priv->tldi_head->tldq_buf == priv->tldi_cur,
251 ("%s:%d: buffer mismatch (%p vs %p)",
252 __func__, __LINE__, priv->tldi_head->tldq_buf,
254 tcp_log_dev_queue_validate_lock(*lockstate);
256 if (*lockstate == QUEUE_UNLOCKED) {
257 TCP_LOG_DEV_QUEUE_LOCK();
258 *lockstate = QUEUE_LOCKED;
260 entry = priv->tldi_head;
261 priv->tldi_head = STAILQ_NEXT(entry, tldq_queue);
262 tcp_log_dev_clear_refcount(entry);
263 priv->tldi_cur = NULL;
267 tcp_log_dev_read(struct cdev *dev __unused, struct uio *uio, int flags)
269 struct tcp_log_common_header *buf;
270 struct tcp_log_dev_info *priv;
271 struct tcp_log_dev_queue *entry;
275 /* Get our private info. */
276 rv = devfs_get_cdevpriv((void **)&priv);
280 lockstate = QUEUE_UNLOCKED;
282 /* Do we need to get a new buffer? */
283 while (priv->tldi_cur == NULL ||
284 priv->tldi_cur->tlch_length <= priv->tldi_off) {
285 /* Did we somehow forget to rotate? */
286 KASSERT(priv->tldi_cur == NULL,
287 ("%s:%d: tldi_cur is unexpectedly non-NULL", __func__,
289 if (priv->tldi_cur != NULL)
290 tcp_log_dev_rotate_bufs(priv, &lockstate);
293 * Before we start looking at tldi_head, we need a lock on the
294 * queue to make sure tldi_head stays stable.
296 if (lockstate == QUEUE_UNLOCKED) {
297 TCP_LOG_DEV_QUEUE_LOCK();
298 lockstate = QUEUE_LOCKED;
301 /* We need the next buffer. Do we have one? */
302 if (priv->tldi_head == NULL && (flags & FNONBLOCK)) {
306 if (priv->tldi_head == NULL) {
307 /* Sleep and wait for more things we can read. */
308 rv = mtx_sleep(&tcp_log_dev_listeners,
309 &tcp_log_dev_queue_lock, PCATCH, "tcplogdev", 0);
312 if (priv->tldi_head == NULL)
317 * We have an entry to read. We want to try to create a
318 * buffer, if one doesn't already exist.
320 entry = priv->tldi_head;
321 if (entry->tldq_buf == NULL) {
322 TCP_LOG_DEV_QUEUE_LOCK_ASSERT();
323 buf = (*entry->tldq_xform)(entry);
328 entry->tldq_buf = buf;
331 priv->tldi_cur = entry->tldq_buf;
335 /* Copy what we can from this buffer to the output buffer. */
336 if (uio->uio_resid > 0) {
337 /* Drop locks so we can take page faults. */
338 if (lockstate == QUEUE_LOCKED)
339 TCP_LOG_DEV_QUEUE_UNLOCK();
340 lockstate = QUEUE_UNLOCKED;
342 KASSERT(priv->tldi_cur != NULL,
343 ("%s: priv->tldi_cur is unexpectedly NULL", __func__));
345 /* Copy as much as we can to this uio. */
346 len = priv->tldi_cur->tlch_length - priv->tldi_off;
347 if (len > uio->uio_resid)
348 len = uio->uio_resid;
349 rv = uiomove(((uint8_t *)priv->tldi_cur) + priv->tldi_off,
353 priv->tldi_off += len;
354 #ifdef TCPLOG_DEBUG_COUNTERS
355 counter_u64_add(tcp_log_que_read, len);
358 /* Are we done with this buffer? If so, find the next one. */
359 if (priv->tldi_off >= priv->tldi_cur->tlch_length) {
360 KASSERT(priv->tldi_off == priv->tldi_cur->tlch_length,
361 ("%s: offset (%ju) exceeds length (%ju)", __func__,
362 (uintmax_t)priv->tldi_off,
363 (uintmax_t)priv->tldi_cur->tlch_length));
364 tcp_log_dev_rotate_bufs(priv, &lockstate);
367 tcp_log_dev_queue_validate_lock(lockstate);
368 if (lockstate == QUEUE_LOCKED)
369 TCP_LOG_DEV_QUEUE_UNLOCK();
374 tcp_log_dev_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data,
375 int fflag __unused, struct thread *td __unused)
377 struct tcp_log_dev_info *priv;
380 /* Get our private info. */
381 rv = devfs_get_cdevpriv((void **)&priv);
386 * Set things. Here, we are most concerned about the non-blocking I/O
394 if (*(int *)data != 0)
404 tcp_log_dev_poll(struct cdev *dev __unused, int events, struct thread *td)
406 struct tcp_log_dev_info *priv;
410 * Get our private info. If this fails, claim that all events are
411 * ready. That should prod the user to do something that will
412 * make the error evident to them.
414 if (devfs_get_cdevpriv((void **)&priv))
418 if (events & (POLLIN | POLLRDNORM)) {
420 * We can (probably) read right now if we are partway through
421 * a buffer or if we are just about to start a buffer.
422 * Because we are going to read tldi_head, we should acquire
423 * a read lock on the queue.
425 TCP_LOG_DEV_QUEUE_LOCK();
426 if ((priv->tldi_head != NULL && priv->tldi_cur == NULL) ||
427 (priv->tldi_cur != NULL &&
428 priv->tldi_off < priv->tldi_cur->tlch_length))
429 revents = events & (POLLIN | POLLRDNORM);
431 selrecord(td, &tcp_log_sel);
432 TCP_LOG_DEV_QUEUE_UNLOCK();
435 * It only makes sense to poll for reading. So, again, prod the
436 * user to do something that will make the error of their ways
445 tcp_log_dev_add_log(struct tcp_log_dev_queue *entry)
447 struct tcp_log_dev_info *priv;
451 KASSERT(entry->tldq_buf != NULL || entry->tldq_xform != NULL,
452 ("%s: Called with both tldq_buf and tldq_xform set to NULL",
454 KASSERT(entry->tldq_dtor != NULL,
455 ("%s: Called with tldq_dtor set to NULL", __func__));
457 /* Get a lock on the queue. */
458 TCP_LOG_DEV_QUEUE_LOCK();
460 /* If no one is listening, tell the caller to free the resources. */
461 if (tcp_log_dev_listeners == 0) {
466 /* Add this to the end of the tailq. */
467 STAILQ_INSERT_TAIL(&tcp_log_dev_queue_head, entry, tldq_queue);
469 /* Add references for all current listeners. */
470 refcount_init(&entry->tldq_refcnt, tcp_log_dev_listeners);
473 * If any listener is currently stuck on NULL, that means they are
474 * waiting. Point their head to this new entry.
476 wakeup_needed = false;
477 STAILQ_FOREACH(priv, &tcp_log_dev_reader_head, tldi_list)
478 if (priv->tldi_head == NULL) {
479 priv->tldi_head = entry;
480 wakeup_needed = true;
484 selwakeup(&tcp_log_sel);
485 wakeup(&tcp_log_dev_listeners);
491 TCP_LOG_DEV_QUEUE_LOCK_ASSERT();
492 TCP_LOG_DEV_QUEUE_UNLOCK();
497 tcp_log_dev_modevent(module_t mod __unused, int type, void *data __unused)
500 /* TODO: Support intelligent unloading. */
504 printf("tcp_log: tcp_log device\n");
505 memset(&tcp_log_sel, 0, sizeof(tcp_log_sel));
506 memset(&tcp_log_dev_queue_lock, 0, sizeof(struct mtx));
507 mtx_init(&tcp_log_dev_queue_lock, "tcp_log dev",
508 "tcp_log device queues", MTX_DEF);
509 tcp_log_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD,
510 &tcp_log_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0400,
520 DEV_MODULE(tcp_log_dev, tcp_log_dev_modevent, NULL);
521 MODULE_VERSION(tcp_log_dev, 1);