2 * Copyright (c) 1982, 1986, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * @(#)mbuf.h 8.5 (Berkeley) 2/19/95
41 * Mbufs are of a single size, MSIZE (machine/param.h), which
42 * includes overhead. An mbuf may add a single "mbuf cluster" of size
43 * MCLBYTES (also in machine/param.h), which has no additional overhead
44 * and is used instead of the internal data area; this is done when
45 * at least MINCLSIZE of data must be stored. Additionally, it is possible
46 * to allocate a separate buffer externally and attach it to the mbuf in
47 * a way similar to that of mbuf clusters.
49 #define MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */
50 #define MHLEN (MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */
51 #define MINCLSIZE (MHLEN + 1) /* smallest amount to put in cluster */
52 #define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */
56 * Macros for type conversion:
57 * mtod(m, t) -- Convert mbuf pointer to data pointer of correct type.
58 * dtom(x) -- Convert data pointer within mbuf to mbuf pointer (XXX).
60 #define mtod(m, t) ((t)((m)->m_data))
61 #define dtom(x) ((struct mbuf *)((intptr_t)(x) & ~(MSIZE-1)))
65 * Header present at the beginning of every mbuf.
68 struct mbuf *mh_next; /* next buffer in chain */
69 struct mbuf *mh_nextpkt; /* next chain in queue/record */
70 caddr_t mh_data; /* location of data */
71 int mh_len; /* amount of data in this mbuf */
72 short mh_type; /* type of data in this mbuf */
73 short mh_flags; /* flags; see below */
77 * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set.
80 struct ifnet *rcvif; /* rcv interface */
81 int len; /* total packet length */
82 /* variables for ip and tcp reassembly */
83 void *header; /* pointer to packet header */
84 /* variables for hardware checksum */
85 int csum_flags; /* flags regarding checksum */
86 int csum_data; /* data field used by csum routines */
87 struct mbuf *aux; /* extra data buffer; ipsec/others */
91 * Description of external storage mapped into mbuf; valid only if M_EXT is set.
94 caddr_t ext_buf; /* start of buffer */
95 void (*ext_free) /* free routine if not the usual */
97 void *ext_args; /* optional argument pointer */
98 u_int ext_size; /* size of buffer, for ext_free */
99 u_int *ref_cnt; /* pointer to ref count info */
100 int ext_type; /* type of external storage */
104 * The core of the mbuf object along with some shortcut defines for
105 * practical purposes.
111 struct pkthdr MH_pkthdr; /* M_PKTHDR set */
113 struct m_ext MH_ext; /* M_EXT set */
114 char MH_databuf[MHLEN];
117 char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */
120 #define m_next m_hdr.mh_next
121 #define m_len m_hdr.mh_len
122 #define m_data m_hdr.mh_data
123 #define m_type m_hdr.mh_type
124 #define m_flags m_hdr.mh_flags
125 #define m_nextpkt m_hdr.mh_nextpkt
126 #define m_act m_nextpkt
127 #define m_pkthdr M_dat.MH.MH_pkthdr
128 #define m_ext M_dat.MH.MH_dat.MH_ext
129 #define m_pktdat M_dat.MH.MH_dat.MH_databuf
130 #define m_dat M_dat.M_databuf
135 #define M_EXT 0x0001 /* has associated external storage */
136 #define M_PKTHDR 0x0002 /* start of record */
137 #define M_EOR 0x0004 /* end of record */
138 #define M_RDONLY 0x0008 /* associated data is marked read-only */
139 #define M_PROTO1 0x0010 /* protocol-specific */
140 #define M_PROTO2 0x0020 /* protocol-specific */
141 #define M_PROTO3 0x0040 /* protocol-specific */
142 #define M_PROTO4 0x0080 /* protocol-specific */
143 #define M_PROTO5 0x0100 /* protocol-specific */
146 * mbuf pkthdr flags (also stored in m_flags).
148 #define M_BCAST 0x0200 /* send/received as link-level broadcast */
149 #define M_MCAST 0x0400 /* send/received as link-level multicast */
150 #define M_FRAG 0x0800 /* packet is a fragment of a larger packet */
151 #define M_FIRSTFRAG 0x1000 /* packet is first fragment */
152 #define M_LASTFRAG 0x2000 /* packet is last fragment */
155 * External buffer types: identify ext_buf type.
157 #define EXT_CLUSTER 1 /* mbuf cluster */
158 #define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */
159 #define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */
160 #define EXT_MOD_TYPE 200 /* custom module's ext_buf type */
161 #define EXT_DISPOSABLE 300 /* can throw this buffer away w/page flipping */
164 * Flags copied when copying m_pkthdr.
166 #define M_COPYFLAGS (M_PKTHDR|M_EOR|M_PROTO1|M_PROTO1|M_PROTO2|M_PROTO3 | \
167 M_PROTO4|M_PROTO5|M_BCAST|M_MCAST|M_FRAG|M_RDONLY)
170 * Flags indicating hw checksum support and sw checksum requirements.
172 #define CSUM_IP 0x0001 /* will csum IP */
173 #define CSUM_TCP 0x0002 /* will csum TCP */
174 #define CSUM_UDP 0x0004 /* will csum UDP */
175 #define CSUM_IP_FRAGS 0x0008 /* will csum IP fragments */
176 #define CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */
178 #define CSUM_IP_CHECKED 0x0100 /* did csum IP */
179 #define CSUM_IP_VALID 0x0200 /* ... the csum is valid */
180 #define CSUM_DATA_VALID 0x0400 /* csum_data field is valid */
181 #define CSUM_PSEUDO_HDR 0x0800 /* csum_data has pseudo hdr */
183 #define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP)
184 #define CSUM_DELAY_IP (CSUM_IP) /* XXX add ipv6 here too? */
189 #define MT_NOTMBUF 0 /* USED INTERNALLY ONLY! Object is not mbuf */
190 #define MT_DATA 1 /* dynamic (data) allocation */
191 #define MT_HEADER 2 /* packet header */
193 #define MT_SOCKET 3 /* socket structure */
194 #define MT_PCB 4 /* protocol control block */
195 #define MT_RTABLE 5 /* routing tables */
196 #define MT_HTABLE 6 /* IMP host tables */
197 #define MT_ATABLE 7 /* address resolution tables */
199 #define MT_SONAME 8 /* socket name */
201 #define MT_SOOPTS 10 /* socket options */
203 #define MT_FTABLE 11 /* fragment reassembly header */
205 #define MT_RIGHTS 12 /* access rights */
206 #define MT_IFADDR 13 /* interface address */
208 #define MT_TAG 13 /* volatile metadata associated to pkts */
209 #define MT_CONTROL 14 /* extra-data protocol message */
210 #define MT_OOBDATA 15 /* expedited data */
211 #define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */
214 * Mbuf and cluster allocation statistics PCPU structure.
221 long mb_mbtypes[MT_NTYPES];
226 * General mbuf allocator statistics structure.
227 * XXX: Modifications of these are not protected by any mutex locks nor by
228 * any atomic() manipulations. As a result, we may occasionally lose
229 * a count or two. Luckily, not all of these fields are modified at all
230 * and remain static, and those that are manipulated are only manipulated
231 * in failure situations, which do not occur (hopefully) very often.
234 u_long m_drops; /* times failed to allocate */
235 u_long m_wait; /* times succesfully returned from wait */
236 u_long m_drain; /* times drained protocols for space */
237 u_long m_mcfail; /* XXX: times m_copym failed */
238 u_long m_mpfail; /* XXX: times m_pullup failed */
239 u_long m_msize; /* length of an mbuf */
240 u_long m_mclbytes; /* length of an mbuf cluster */
241 u_long m_minclsize; /* min length of data to allocate a cluster */
242 u_long m_mlen; /* length of data in an mbuf */
243 u_long m_mhlen; /* length of data in a header mbuf */
244 /* Number of mbtypes (gives # elems in mbpstat's mb_mbtypes[] array: */
249 * Flags specifying how an allocation should be made.
250 * M_DONTWAIT means "don't block if nothing is available" whereas
251 * M_TRYWAIT means "block for mbuf_wait ticks at most if nothing is
256 #define M_WAIT M_TRYWAIT /* XXX: Deprecated. */
260 * mbuf external reference count management macros.
262 * MEXT_IS_REF(m): true if (m) is not the only mbuf referencing
263 * the external buffer ext_buf.
265 * MEXT_REM_REF(m): remove reference to m_ext object.
267 * MEXT_ADD_REF(m): add reference to m_ext object already
268 * referred to by (m).
270 #define MEXT_IS_REF(m) (*((m)->m_ext.ref_cnt) > 1)
272 #define MEXT_REM_REF(m) do { \
273 KASSERT(*((m)->m_ext.ref_cnt) > 0, ("m_ext refcnt < 0")); \
274 atomic_subtract_int((m)->m_ext.ref_cnt, 1); \
277 #define MEXT_ADD_REF(m) atomic_add_int((m)->m_ext.ref_cnt, 1)
280 * mbuf, cluster, and external object allocation macros
281 * (for compatibility purposes).
283 #define M_COPY_PKTHDR(to, from) m_copy_pkthdr(to, from)
284 #define m_getclr m_get_clrd
285 #define MGET(m, how, type) (m) = m_get((how), (type))
286 #define MGETHDR(m, how, type) (m) = m_gethdr((how), (type))
287 #define MCLGET(m, how) m_clget((m), (how))
288 #define MEXTADD(m, buf, size, free, args, flags, type) \
289 m_extadd((m), (caddr_t)(buf), (size), (free), (args), (flags), (type))
292 * MEXTFREE(m): disassociate (and possibly free) an external object from (m).
294 * If the atomic_cmpset_int() returns 0, then we effectively do nothing
295 * in terms of "cleaning up" (freeing the ext buf and ref. counter) as
296 * this means that either there are still references, or another thread
297 * is taking care of the clean-up.
299 #define MEXTFREE(m) do { \
300 struct mbuf *_mb = (m); \
303 if (atomic_cmpset_int(_mb->m_ext.ref_cnt, 0, 1)) \
305 _mb->m_flags &= ~M_EXT; \
309 * Evaluate TRUE if it's safe to write to the mbuf m's data region (this
310 * can be both the local data payload, or an external buffer area,
311 * depending on whether M_EXT is set).
313 #define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && (!((m)->m_flags \
314 & M_EXT) || !MEXT_IS_REF(m)))
317 * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place
318 * an object of the specified size at the end of the mbuf, longword aligned.
320 #define M_ALIGN(m, len) do { \
321 (m)->m_data += (MLEN - (len)) & ~(sizeof(long) - 1); \
325 * As above, for mbufs allocated with m_gethdr/MGETHDR
326 * or initialized by M_COPY_PKTHDR.
328 #define MH_ALIGN(m, len) do { \
329 (m)->m_data += (MHLEN - (len)) & ~(sizeof(long) - 1); \
333 * Compute the amount of space available
334 * before the current start of data in an mbuf.
336 * The M_WRITABLE() is a temporary, conservative safety measure: the burden
337 * of checking writability of the mbuf data area rests solely with the caller.
339 #define M_LEADINGSPACE(m) \
340 ((m)->m_flags & M_EXT ? \
341 (M_WRITABLE(m) ? (m)->m_data - (m)->m_ext.ext_buf : 0): \
342 (m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat : \
343 (m)->m_data - (m)->m_dat)
346 * Compute the amount of space available
347 * after the end of data in an mbuf.
349 * The M_WRITABLE() is a temporary, conservative safety measure: the burden
350 * of checking writability of the mbuf data area rests solely with the caller.
352 #define M_TRAILINGSPACE(m) \
353 ((m)->m_flags & M_EXT ? \
354 (M_WRITABLE(m) ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size \
355 - ((m)->m_data + (m)->m_len) : 0) : \
356 &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len))
359 * Arrange to prepend space of size plen to mbuf m.
360 * If a new mbuf must be allocated, how specifies whether to wait.
361 * If the allocation fails, the original mbuf chain is freed and m is
364 #define M_PREPEND(m, plen, how) do { \
365 struct mbuf **_mmp = &(m); \
366 struct mbuf *_mm = *_mmp; \
367 int _mplen = (plen); \
368 int __mhow = (how); \
370 if (M_LEADINGSPACE(_mm) >= _mplen) { \
371 _mm->m_data -= _mplen; \
372 _mm->m_len += _mplen; \
374 _mm = m_prepend(_mm, _mplen, __mhow); \
375 if (_mm != NULL && _mm->m_flags & M_PKTHDR) \
376 _mm->m_pkthdr.len += _mplen; \
381 * Change mbuf to new type.
382 * This is a relatively expensive operation and should be avoided.
384 #define MCHTYPE(m, t) m_chtype((m), (t))
386 /* Length to m_copy to copy all. */
387 #define M_COPYALL 1000000000
389 /* Compatibility with 4.3 */
390 #define m_copy(m, o, l) m_copym((m), (o), (l), M_DONTWAIT)
393 * pkthdr.aux type tags.
402 * Some packet tags to identify different mbuf annotations.
404 * Eventually, these annotations will end up in an appropriate chain
405 * (struct m_tag or similar, e.g. as in NetBSD) properly managed by
406 * the mbuf handling routines.
408 * As a temporary and low impact solution to replace the even uglier
409 * approach used so far in some parts of the network stack (which relies
410 * on global variables), these annotations are stored in MT_TAG
411 * mbufs (or lookalikes) prepended to the actual mbuf chain.
415 * m_next = next buffer in chain.
417 * BE VERY CAREFUL not to pass these blocks to the mbuf handling routines.
421 #define m_tag_id m_hdr.mh_flags
423 /* Packet tag types -- first ones are from NetBSD */
425 #define PACKET_TAG_NONE 0 /* Nadda */
426 #define PACKET_TAG_IPSEC_IN_DONE 1 /* IPsec applied, in */
427 #define PACKET_TAG_IPSEC_OUT_DONE 2 /* IPsec applied, out */
428 #define PACKET_TAG_IPSEC_IN_CRYPTO_DONE 3 /* NIC IPsec crypto done */
429 #define PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED 4 /* NIC IPsec crypto req'ed */
430 #define PACKET_TAG_IPSEC_IN_COULD_DO_CRYPTO 5 /* NIC notifies IPsec */
431 #define PACKET_TAG_IPSEC_PENDING_TDB 6 /* Reminder to do IPsec */
432 #define PACKET_TAG_BRIDGE 7 /* Bridge processing done */
433 #define PACKET_TAG_GIF 8 /* GIF processing done */
434 #define PACKET_TAG_GRE 9 /* GRE processing done */
435 #define PACKET_TAG_IN_PACKET_CHECKSUM 10 /* NIC checksumming done */
436 #define PACKET_TAG_ENCAP 11 /* Encap. processing */
437 #define PACKET_TAG_IPSEC_SOCKET 12 /* IPSEC socket ref */
438 #define PACKET_TAG_IPSEC_HISTORY 13 /* IPSEC history */
439 #define PACKET_TAG_IPV6_INPUT 14 /* IPV6 input processing */
441 /* Packet tags used in the FreeBSD network stack */
442 #define PACKET_TAG_DUMMYNET 15 /* dummynet info */
443 #define PACKET_TAG_IPFW 16 /* ipfw classification */
444 #define PACKET_TAG_DIVERT 17 /* divert info */
445 #define PACKET_TAG_IPFORWARD 18 /* ipforward info */
447 #define PACKET_TAG_MAX 19
449 extern int max_datalen; /* MHLEN - max_hdr */
450 extern int max_hdr; /* largest link + protocol header */
451 extern int max_linkhdr; /* largest link-level header */
452 extern int max_protohdr; /* largest protocol header */
453 extern struct mbpstat mb_statpcpu[]; /* Per-CPU allocation stats. */
454 extern struct mbstat mbstat; /* General mbuf stats/infos. */
455 extern int nmbclusters; /* Maximum number of clusters */
456 extern int nmbcnt; /* Scale kmem_map for counter space */
457 extern int nmbufs; /* Maximum number of mbufs */
458 extern int nsfbufs; /* Number of sendfile(2) bufs */
460 void _mext_free(struct mbuf *);
461 void m_adj(struct mbuf *, int);
462 struct mbuf *m_aux_add(struct mbuf *, int, int);
463 struct mbuf *m_aux_add2(struct mbuf *, int, int, void *);
464 void m_aux_delete(struct mbuf *, struct mbuf *);
465 struct mbuf *m_aux_find(struct mbuf *, int, int);
466 struct mbuf *m_aux_find2(struct mbuf *, int, int, void *);
467 void m_cat(struct mbuf *, struct mbuf *);
468 void m_chtype(struct mbuf *, short);
469 void m_clget(struct mbuf *, int);
470 void m_extadd(struct mbuf *, caddr_t, u_int,
471 void (*free)(void *, void *), void *, short, int);
472 void m_copyback(struct mbuf *, int, int, caddr_t);
473 void m_copydata(const struct mbuf *, int, int, caddr_t);
474 struct mbuf *m_copym(struct mbuf *, int, int, int);
475 struct mbuf *m_copypacket(struct mbuf *, int);
476 void m_copy_pkthdr(struct mbuf *to, struct mbuf *from);
477 struct mbuf *m_devget(char *, int, int, struct ifnet *,
478 void (*copy)(char *, caddr_t, u_int));
479 struct mbuf *m_dup(struct mbuf *, int);
480 struct mbuf *m_free(struct mbuf *);
481 void m_freem(struct mbuf *);
482 struct mbuf *m_get(int, short);
483 struct mbuf *m_get_clrd(int, short);
484 struct mbuf *m_getcl(int, short, int);
485 struct mbuf *m_gethdr(int, short);
486 struct mbuf *m_gethdr_clrd(int, short);
487 struct mbuf *m_getm(struct mbuf *, int, int, short);
488 struct mbuf *m_prepend(struct mbuf *, int, int);
489 void m_print(const struct mbuf *m);
490 struct mbuf *m_pulldown(struct mbuf *, int, int, int *);
491 struct mbuf *m_pullup(struct mbuf *, int);
492 struct mbuf *m_split(struct mbuf *, int, int);
495 #endif /* !_SYS_MBUF_H_ */