sys/netlink/netlink_message_writer.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-2-Clause
   3  *
   4  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  * 2. Redistributions in binary form must reproduce the above copyright
  12  *    notice, this list of conditions and the following disclaimer in the
  13  *    documentation and/or other materials provided with the distribution.
  14  *
  15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25  * SUCH DAMAGE.
  26  */
  27
  28 #include "opt_netlink.h"
  29
  30 #include <sys/cdefs.h>
  31 __FBSDID("$FreeBSD$");
  32 #include <sys/param.h>
  33 #include <sys/malloc.h>
  34 #include <sys/lock.h>
  35 #include <sys/rmlock.h>
  36 #include <sys/mbuf.h>
  37 #include <sys/ck.h>
  38 #include <sys/socket.h>
  39 #include <sys/socketvar.h>
  40 #include <sys/syslog.h>
  41
  42 #include <netlink/netlink.h>
  43 #include <netlink/netlink_ctl.h>
  44 #include <netlink/netlink_linux.h>
  45 #include <netlink/netlink_var.h>
  46
  47 #define DEBUG_MOD_NAME  nl_writer
  48 #define DEBUG_MAX_LEVEL LOG_DEBUG3
  49 #include <netlink/netlink_debug.h>
  50 _DECLARE_DEBUG(LOG_INFO);
  51
  52 /*
  53  * The goal of this file is to provide convenient message writing KPI on top of
  54  * different storage methods (mbufs, uio, temporary memory chunks).
  55  *
  56  * The main KPI guarantee is the the (last) message always resides in the contiguous
  57  *  memory buffer, so one is able to update the header after writing the entire message.
  58  *
  59  * This guarantee comes with a side effect of potentially reallocating underlying
  60  *  buffer, so one needs to update the desired pointers after something is added
  61  *  to the header.
  62  *
  63  * Messaging layer contains hooks performing transparent Linux translation for the messages.
  64  *
  65  * There are 3 types of supported targets:
  66  *  * socket (adds mbufs to the socket buffer, used for message replies)
  67  *  * group (sends mbuf/chain to the specified groups, used for the notifications)
  68  *  * chain (returns mbuf chain, used in Linux message translation code)
  69  *
  70  * There are 3 types of storage:
  71  * * NS_WRITER_TYPE_MBUF (mbuf-based, most efficient, used when a single message
  72  *    fits in MCLBYTES)
  73  * * NS_WRITER_TYPE_BUF (fallback, malloc-based, used when a single message needs
  74  *    to be larger than one supported by NS_WRITER_TYPE_MBUF)
  75  * * NS_WRITER_TYPE_LBUF (malloc-based, similar to NS_WRITER_TYPE_BUF, used for
  76  *    Linux sockets, calls translation hook prior to sending messages to the socket).
  77  *
  78  * Internally, KPI switches between different types of storage when memory requirements
  79  *  change. It happens transparently to the caller.
  80  */
  81
  82
  83 typedef bool nlwriter_op_init(struct nl_writer *nw, int size, bool waitok);
  84 typedef bool nlwriter_op_write(struct nl_writer *nw, void *buf, int buflen, int cnt);
  85
  86 struct nlwriter_ops {
  87         nlwriter_op_init        *init;
  88         nlwriter_op_write       *write_socket;
  89         nlwriter_op_write       *write_group;
  90         nlwriter_op_write       *write_chain;
  91 };
  92
  93 /*
  94  * NS_WRITER_TYPE_BUF
  95  * Writes message to a temporary memory buffer,
  96  * flushing to the socket/group when buffer size limit is reached
  97  */
  98 static bool
  99 nlmsg_get_ns_buf(struct nl_writer *nw, int size, bool waitok)
 100 {
 101         int mflag = waitok ? M_WAITOK : M_NOWAIT;
 102         nw->_storage = malloc(size, M_NETLINK, mflag | M_ZERO);
 103         if (__predict_false(nw->_storage == NULL))
 104                 return (false);
 105         nw->alloc_len = size;
 106         nw->offset = 0;
 107         nw->hdr = NULL;
 108         nw->data = nw->_storage;
 109         nw->writer_type = NS_WRITER_TYPE_BUF;
 110         nw->malloc_flag = mflag;
 111         nw->num_messages = 0;
 112         nw->enomem = false;
 113         return (true);
 114 }
 115
 116 static bool
 117 nlmsg_write_socket_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 118 {
 119         NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
 120         if (__predict_false(datalen == 0)) {
 121                 free(buf, M_NETLINK);
 122                 return (true);
 123         }
 124
 125         struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
 126         if (__predict_false(m == NULL)) {
 127                 /* XXX: should we set sorcverr? */
 128                 free(buf, M_NETLINK);
 129                 return (false);
 130         }
 131         m_append(m, datalen, buf);
 132         free(buf, M_NETLINK);
 133
 134         int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
 135         return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags));
 136 }
 137
 138 static bool
 139 nlmsg_write_group_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 140 {
 141         NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen,
 142             nw->arg.group.proto, nw->arg.group.id);
 143         if (__predict_false(datalen == 0)) {
 144                 free(buf, M_NETLINK);
 145                 return (true);
 146         }
 147
 148         struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
 149         if (__predict_false(m == NULL)) {
 150                 free(buf, M_NETLINK);
 151                 return (false);
 152         }
 153         bool success = m_append(m, datalen, buf) != 0;
 154         free(buf, M_NETLINK);
 155
 156         if (!success)
 157                 return (false);
 158
 159         nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
 160         return (true);
 161 }
 162
 163 static bool
 164 nlmsg_write_chain_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 165 {
 166         struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr);
 167         NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
 168
 169         if (__predict_false(datalen == 0)) {
 170                 free(buf, M_NETLINK);
 171                 return (true);
 172         }
 173
 174         if (*m0 == NULL) {
 175                 struct mbuf *m;
 176
 177                 m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
 178                 if (__predict_false(m == NULL)) {
 179                         free(buf, M_NETLINK);
 180                         return (false);
 181                 }
 182                 *m0 = m;
 183         }
 184         if (__predict_false(m_append(*m0, datalen, buf) == 0)) {
 185                 free(buf, M_NETLINK);
 186                 return (false);
 187         }
 188         return (true);
 189 }
 190
 191
 192 /*
 193  * NS_WRITER_TYPE_MBUF
 194  * Writes message to the allocated mbuf,
 195  * flushing to socket/group when mbuf size limit is reached.
 196  * This is the most efficient mechanism as it avoids double-copying.
 197  *
 198  * Allocates a single mbuf suitable to store up to @size bytes of data.
 199  * If size < MHLEN (around 160 bytes), allocates mbuf with pkghdr
 200  * If size <= MCLBYTES (2k), allocate a single mbuf cluster
 201  * Otherwise, return NULL.
 202  */
 203 static bool
 204 nlmsg_get_ns_mbuf(struct nl_writer *nw, int size, bool waitok)
 205 {
 206         struct mbuf *m;
 207
 208         int mflag = waitok ? M_WAITOK : M_NOWAIT;
 209         m = m_get2(size, mflag, MT_DATA, M_PKTHDR);
 210         if (__predict_false(m == NULL))
 211                 return (false);
 212         nw->alloc_len = M_TRAILINGSPACE(m);
 213         nw->offset = 0;
 214         nw->hdr = NULL;
 215         nw->_storage = (void *)m;
 216         nw->data = mtod(m, void *);
 217         nw->writer_type = NS_WRITER_TYPE_MBUF;
 218         nw->malloc_flag = mflag;
 219         nw->num_messages = 0;
 220         nw->enomem = false;
 221         memset(nw->data, 0, size);
 222         NL_LOG(LOG_DEBUG2, "alloc mbuf %p req_len %d alloc_len %d data_ptr %p",
 223             m, size, nw->alloc_len, nw->data);
 224         return (true);
 225 }
 226
 227 static bool
 228 nlmsg_write_socket_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 229 {
 230         struct mbuf *m = (struct mbuf *)buf;
 231         NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
 232
 233         if (__predict_false(datalen == 0)) {
 234                 m_freem(m);
 235                 return (true);
 236         }
 237
 238         m->m_pkthdr.len = datalen;
 239         m->m_len = datalen;
 240         int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
 241         return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags));
 242 }
 243
 244 static bool
 245 nlmsg_write_group_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 246 {
 247         struct mbuf *m = (struct mbuf *)buf;
 248         NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen,
 249             nw->arg.group.proto, nw->arg.group.id);
 250
 251         if (__predict_false(datalen == 0)) {
 252                 m_freem(m);
 253                 return (true);
 254         }
 255
 256         m->m_pkthdr.len = datalen;
 257         m->m_len = datalen;
 258         nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
 259         return (true);
 260 }
 261
 262 static bool
 263 nlmsg_write_chain_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 264 {
 265         struct mbuf *m_new = (struct mbuf *)buf;
 266         struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr);
 267
 268         NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
 269
 270         if (__predict_false(datalen == 0)) {
 271                 m_freem(m_new);
 272                 return (true);
 273         }
 274
 275         m_new->m_pkthdr.len = datalen;
 276         m_new->m_len = datalen;
 277
 278         if (*m0 == NULL) {
 279                 *m0 = m_new;
 280         } else {
 281                 struct mbuf *m_last;
 282                 for (m_last = *m0; m_last->m_next != NULL; m_last = m_last->m_next)
 283                         ;
 284                 m_last->m_next = m_new;
 285                 (*m0)->m_pkthdr.len += datalen;
 286         }
 287
 288         return (true);
 289 }
 290
 291 /*
 292  * NS_WRITER_TYPE_LBUF
 293  * Writes message to the allocated memory buffer,
 294  * flushing to socket/group when mbuf size limit is reached.
 295  * Calls linux handler to rewrite messages before sending to the socket.
 296  */
 297 static bool
 298 nlmsg_get_ns_lbuf(struct nl_writer *nw, int size, bool waitok)
 299 {
 300         int mflag = waitok ? M_WAITOK : M_NOWAIT;
 301         size = roundup2(size, sizeof(void *));
 302         int add_size = sizeof(struct linear_buffer) + SCRATCH_BUFFER_SIZE;
 303         char *buf = malloc(add_size + size * 2, M_NETLINK, mflag | M_ZERO);
 304         if (__predict_false(buf == NULL))
 305                 return (false);
 306
 307         /* Fill buffer header first */
 308         struct linear_buffer *lb = (struct linear_buffer *)buf;
 309         lb->base = &buf[sizeof(struct linear_buffer) + size];
 310         lb->size = size + SCRATCH_BUFFER_SIZE;
 311
 312         nw->alloc_len = size;
 313         nw->offset = 0;
 314         nw->hdr = NULL;
 315         nw->_storage = buf;
 316         nw->data = (char *)(lb + 1);
 317         nw->malloc_flag = mflag;
 318         nw->writer_type = NS_WRITER_TYPE_LBUF;
 319         nw->num_messages = 0;
 320         nw->enomem = false;
 321         return (true);
 322 }
 323
 324 static bool
 325 nlmsg_write_socket_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 326 {
 327         struct linear_buffer *lb = (struct linear_buffer *)buf;
 328         char *data = (char *)(lb + 1);
 329         struct nlpcb *nlp = (struct nlpcb *)(nw->arg.ptr);
 330
 331         if (__predict_false(datalen == 0)) {
 332                 free(buf, M_NETLINK);
 333                 return (true);
 334         }
 335
 336         struct mbuf *m = NULL;
 337         if (linux_netlink_p != NULL)
 338                 m = linux_netlink_p->msgs_to_linux(nlp->nl_proto, data, datalen, nlp);
 339         free(buf, M_NETLINK);
 340
 341         if (__predict_false(m == NULL)) {
 342                 /* XXX: should we set sorcverr? */
 343                 return (false);
 344         }
 345
 346         int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
 347         return (nl_send_one(m, nlp, cnt, io_flags));
 348 }
 349
 350 /* Shouldn't be called (maybe except Linux code originating message) */
 351 static bool
 352 nlmsg_write_group_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 353 {
 354         struct linear_buffer *lb = (struct linear_buffer *)buf;
 355         char *data = (char *)(lb + 1);
 356
 357         if (__predict_false(datalen == 0)) {
 358                 free(buf, M_NETLINK);
 359                 return (true);
 360         }
 361
 362         struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
 363         if (__predict_false(m == NULL)) {
 364                 free(buf, M_NETLINK);
 365                 return (false);
 366         }
 367         m_append(m, datalen, data);
 368         free(buf, M_NETLINK);
 369
 370         nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
 371         return (true);
 372 }
 373
 374 static const struct nlwriter_ops nlmsg_writers[] = {
 375         /* NS_WRITER_TYPE_MBUF */
 376         {
 377                 .init = nlmsg_get_ns_mbuf,
 378                 .write_socket = nlmsg_write_socket_mbuf,
 379                 .write_group = nlmsg_write_group_mbuf,
 380                 .write_chain = nlmsg_write_chain_mbuf,
 381         },
 382         /* NS_WRITER_TYPE_BUF */
 383         {
 384                 .init = nlmsg_get_ns_buf,
 385                 .write_socket = nlmsg_write_socket_buf,
 386                 .write_group = nlmsg_write_group_buf,
 387                 .write_chain = nlmsg_write_chain_buf,
 388         },
 389         /* NS_WRITER_TYPE_LBUF */
 390         {
 391                 .init = nlmsg_get_ns_lbuf,
 392                 .write_socket = nlmsg_write_socket_lbuf,
 393                 .write_group = nlmsg_write_group_lbuf,
 394         },
 395 };
 396
 397 static void
 398 nlmsg_set_callback(struct nl_writer *nw)
 399 {
 400         const struct nlwriter_ops *pops = &nlmsg_writers[nw->writer_type];
 401
 402         switch (nw->writer_target) {
 403         case NS_WRITER_TARGET_SOCKET:
 404                 nw->cb = pops->write_socket;
 405                 break;
 406         case NS_WRITER_TARGET_GROUP:
 407                 nw->cb = pops->write_group;
 408                 break;
 409         case NS_WRITER_TARGET_CHAIN:
 410                 nw->cb = pops->write_chain;
 411                 break;
 412         default:
 413                 panic("not implemented");
 414         }
 415 }
 416
 417 static bool
 418 nlmsg_get_buf_type(struct nl_writer *nw, int size, int type, bool waitok)
 419 {
 420         MPASS(type + 1 <= sizeof(nlmsg_writers) / sizeof(nlmsg_writers[0]));
 421         NL_LOG(LOG_DEBUG3, "Setting up nw %p size %d type %d", nw, size, type);
 422         return (nlmsg_writers[type].init(nw, size, waitok));
 423 }
 424
 425 static bool
 426 nlmsg_get_buf(struct nl_writer *nw, int size, bool waitok, bool is_linux)
 427 {
 428         int type;
 429
 430         if (!is_linux) {
 431                 if (__predict_true(size <= MCLBYTES))
 432                         type = NS_WRITER_TYPE_MBUF;
 433                 else
 434                         type = NS_WRITER_TYPE_BUF;
 435         } else
 436                 type = NS_WRITER_TYPE_LBUF;
 437         return (nlmsg_get_buf_type(nw, size, type, waitok));
 438 }
 439
 440 bool
 441 _nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp)
 442 {
 443         if (!nlmsg_get_buf(nw, size, false, nlp->nl_linux))
 444                 return (false);
 445         nw->arg.ptr = (void *)nlp;
 446         nw->writer_target = NS_WRITER_TARGET_SOCKET;
 447         nlmsg_set_callback(nw);
 448         return (true);
 449 }
 450
 451 bool
 452 _nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id)
 453 {
 454         if (!nlmsg_get_buf(nw, size, false, false))
 455                 return (false);
 456         nw->arg.group.proto = protocol;
 457         nw->arg.group.id = group_id;
 458         nw->writer_target = NS_WRITER_TARGET_GROUP;
 459         nlmsg_set_callback(nw);
 460         return (true);
 461 }
 462
 463 bool
 464 _nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm)
 465 {
 466         if (!nlmsg_get_buf(nw, size, false, false))
 467                 return (false);
 468         *pm = NULL;
 469         nw->arg.ptr = (void *)pm;
 470         nw->writer_target = NS_WRITER_TARGET_CHAIN;
 471         nlmsg_set_callback(nw);
 472         NL_LOG(LOG_DEBUG3, "setup cb %p (need %p)", nw->cb, &nlmsg_write_chain_mbuf);
 473         return (true);
 474 }
 475
 476 void
 477 _nlmsg_ignore_limit(struct nl_writer *nw)
 478 {
 479         nw->ignore_limit = true;
 480 }
 481
 482 bool
 483 _nlmsg_flush(struct nl_writer *nw)
 484 {
 485
 486         if (__predict_false(nw->hdr != NULL)) {
 487                 /* Last message has not been completed, skip it. */
 488                 int completed_len = (char *)nw->hdr - nw->data;
 489                 /* Send completed messages */
 490                 nw->offset -= nw->offset - completed_len;
 491                 nw->hdr = NULL;
 492         }
 493
 494         NL_LOG(LOG_DEBUG2, "OUT");
 495         bool result = nw->cb(nw, nw->_storage, nw->offset, nw->num_messages);
 496         nw->_storage = NULL;
 497
 498         if (!result) {
 499                 NL_LOG(LOG_DEBUG, "nw %p offset %d: flush with %p() failed", nw, nw->offset, nw->cb);
 500         }
 501
 502         return (result);
 503 }
 504
 505 /*
 506  * Flushes previous data and allocates new underlying storage
 507  *  sufficient for holding at least @required_len bytes.
 508  * Return true on success.
 509  */
 510 bool
 511 _nlmsg_refill_buffer(struct nl_writer *nw, int required_len)
 512 {
 513         struct nl_writer ns_new = {};
 514         int completed_len, new_len;
 515
 516         if (nw->enomem)
 517                 return (false);
 518
 519         NL_LOG(LOG_DEBUG3, "no space at offset %d/%d (want %d), trying to reclaim",
 520             nw->offset, nw->alloc_len, required_len);
 521
 522         /* Calculated new buffer size and allocate it s*/
 523         completed_len = (nw->hdr != NULL) ? (char *)nw->hdr - nw->data : nw->offset;
 524         if (completed_len > 0 && required_len < MCLBYTES) {
 525                 /* We already ran out of space, use the largest effective size */
 526                 new_len = max(nw->alloc_len, MCLBYTES);
 527         } else {
 528                 if (nw->alloc_len < MCLBYTES)
 529                         new_len = MCLBYTES;
 530                 else
 531                         new_len = nw->alloc_len * 2;
 532                 while (new_len < required_len)
 533                         new_len *= 2;
 534         }
 535         bool waitok = (nw->malloc_flag == M_WAITOK);
 536         bool is_linux = (nw->writer_type == NS_WRITER_TYPE_LBUF);
 537         if (!nlmsg_get_buf(&ns_new, new_len, waitok, is_linux)) {
 538                 nw->enomem = true;
 539                 NL_LOG(LOG_DEBUG, "getting new buf failed, setting ENOMEM");
 540                 return (false);
 541         }
 542         if (nw->ignore_limit)
 543                 nlmsg_ignore_limit(&ns_new);
 544
 545         /* Update callback data */
 546         ns_new.writer_target = nw->writer_target;
 547         nlmsg_set_callback(&ns_new);
 548         ns_new.arg = nw->arg;
 549
 550         /* Copy last (unfinished) header to the new storage */
 551         int last_len = nw->offset - completed_len;
 552         if (last_len > 0) {
 553                 memcpy(ns_new.data, nw->hdr, last_len);
 554                 ns_new.hdr = (struct nlmsghdr *)ns_new.data;
 555                 ns_new.offset = last_len;
 556         }
 557
 558         NL_LOG(LOG_DEBUG2, "completed: %d bytes, copied: %d bytes", completed_len, last_len);
 559
 560         /* Flush completed headers & switch to the new nw */
 561         nlmsg_flush(nw);
 562         memcpy(nw, &ns_new, sizeof(struct nl_writer));
 563         NL_LOG(LOG_DEBUG2, "switched buffer: used %d/%d bytes", nw->offset, nw->alloc_len);
 564
 565         return (true);
 566 }
 567
 568 bool
 569 _nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
 570     uint16_t flags, uint32_t len)
 571 {
 572         struct nlmsghdr *hdr;
 573
 574         MPASS(nw->hdr == NULL);
 575
 576         int required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr));
 577         if (__predict_false(nw->offset + required_len > nw->alloc_len)) {
 578                 if (!nlmsg_refill_buffer(nw, required_len))
 579                         return (false);
 580         }
 581
 582         hdr = (struct nlmsghdr *)(&nw->data[nw->offset]);
 583
 584         hdr->nlmsg_len = len;
 585         hdr->nlmsg_type = type;
 586         hdr->nlmsg_flags = flags;
 587         hdr->nlmsg_seq = seq;
 588         hdr->nlmsg_pid = portid;
 589
 590         nw->hdr = hdr;
 591         nw->offset += sizeof(struct nlmsghdr);
 592
 593         return (true);
 594 }
 595
 596 bool
 597 _nlmsg_end(struct nl_writer *nw)
 598 {
 599         MPASS(nw->hdr != NULL);
 600
 601         if (nw->enomem) {
 602                 NL_LOG(LOG_DEBUG, "ENOMEM when dumping message");
 603                 nlmsg_abort(nw);
 604                 return (false);
 605         }
 606
 607         nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr);
 608         NL_LOG(LOG_DEBUG2, "wrote msg len: %u type: %d: flags: 0x%X seq: %u pid: %u",
 609             nw->hdr->nlmsg_len, nw->hdr->nlmsg_type, nw->hdr->nlmsg_flags,
 610             nw->hdr->nlmsg_seq, nw->hdr->nlmsg_pid);
 611         nw->hdr = NULL;
 612         nw->num_messages++;
 613         return (true);
 614 }
 615
 616 void
 617 _nlmsg_abort(struct nl_writer *nw)
 618 {
 619         if (nw->hdr != NULL) {
 620                 nw->offset = (uint32_t)((char *)nw->hdr - nw->data);
 621                 nw->hdr = NULL;
 622         }
 623 }
 624
 625 void
 626 nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *hdr,
 627     struct nl_pstate *npt)
 628 {
 629         struct nlmsgerr *errmsg;
 630         int payload_len;
 631         uint32_t flags = nlp->nl_flags;
 632         struct nl_writer *nw = npt->nw;
 633         bool cap_ack;
 634
 635         payload_len = sizeof(struct nlmsgerr);
 636
 637         /*
 638          * The only case when we send the full message in the
 639          * reply is when there is an error and NETLINK_CAP_ACK
 640          * is not set.
 641          */
 642         cap_ack = (error == 0) || (flags & NLF_CAP_ACK);
 643         if (!cap_ack)
 644                 payload_len += hdr->nlmsg_len - sizeof(struct nlmsghdr);
 645         payload_len = NETLINK_ALIGN(payload_len);
 646
 647         uint16_t nl_flags = cap_ack ? NLM_F_CAPPED : 0;
 648         if ((npt->err_msg || npt->err_off) && nlp->nl_flags & NLF_EXT_ACK)
 649                 nl_flags |= NLM_F_ACK_TLVS;
 650
 651         NL_LOG(LOG_DEBUG3, "acknowledging message type %d seq %d",
 652             hdr->nlmsg_type, hdr->nlmsg_seq);
 653
 654         if (!nlmsg_add(nw, nlp->nl_port, hdr->nlmsg_seq, NLMSG_ERROR, nl_flags, payload_len))
 655                 goto enomem;
 656
 657         errmsg = nlmsg_reserve_data(nw, payload_len, struct nlmsgerr);
 658         errmsg->error = error;
 659         /* In case of error copy the whole message, else just the header */
 660         memcpy(&errmsg->msg, hdr, cap_ack ? sizeof(*hdr) : hdr->nlmsg_len);
 661
 662         if (npt->err_msg != NULL && nlp->nl_flags & NLF_EXT_ACK)
 663                 nlattr_add_string(nw, NLMSGERR_ATTR_MSG, npt->err_msg);
 664         if (npt->err_off != 0 && nlp->nl_flags & NLF_EXT_ACK)
 665                 nlattr_add_u32(nw, NLMSGERR_ATTR_OFFS, npt->err_off);
 666         if (npt->cookie != NULL)
 667                 nlattr_add_raw(nw, npt->cookie);
 668
 669         if (nlmsg_end(nw))
 670                 return;
 671 enomem:
 672         NLP_LOG(LOG_DEBUG, nlp, "error allocating ack data for message %d seq %u",
 673             hdr->nlmsg_type, hdr->nlmsg_seq);
 674         nlmsg_abort(nw);
 675 }
 676
 677 bool
 678 _nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr)
 679 {
 680         if (!nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, NLMSG_DONE, 0, sizeof(int))) {
 681                 NL_LOG(LOG_DEBUG, "Error finalizing table dump");
 682                 return (false);
 683         }
 684         /* Save operation result */
 685         int *perror = nlmsg_reserve_object(nw, int);
 686         NL_LOG(LOG_DEBUG2, "record error=%d at off %d (%p)", error,
 687             nw->offset, perror);
 688         *perror = error;
 689         nlmsg_end(nw);
 690         nw->suppress_ack = true;
 691
 692         return (true);
 693 }