summaryrefslogtreecommitdiffstats
path: root/sys/netipsec/ipsec_mbuf.c
blob: 7fa291b5a9c73b2bfeffb38e2741570b8d216f9a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
/*	$FreeBSD$	*/

/*
 * IPsec-specific mbuf routines.
 */

#include "opt_param.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/socket.h>

#include <net/route.h>
#include <netinet/in.h>

#include <netipsec/ipsec.h>

extern	struct mbuf *m_getptr(struct mbuf *, int, int *);

/*
 * Create a writable copy of the mbuf chain.  While doing this
 * we compact the chain with a goal of producing a chain with
 * at most two mbufs.  The second mbuf in this chain is likely
 * to be a cluster.  The primary purpose of this work is to create
 * a writable packet for encryption, compression, etc.  The
 * secondary goal is to linearize the data so the data can be
 * passed to crypto hardware in the most efficient manner possible.
 */
struct mbuf *
m_clone(struct mbuf *m0)
{
	struct mbuf *m, *mprev;
	struct mbuf *n, *mfirst, *mlast;
	int len, off;

	KASSERT(m0 != NULL, ("m_clone: null mbuf"));

	mprev = NULL;
	for (m = m0; m != NULL; m = mprev->m_next) {
		/*
		 * Regular mbufs are ignored unless there's a cluster
		 * in front of it that we can use to coalesce.  We do
		 * the latter mainly so later clusters can be coalesced
		 * also w/o having to handle them specially (i.e. convert
		 * mbuf+cluster -> cluster).  This optimization is heavily
		 * influenced by the assumption that we're running over
		 * Ethernet where MCLBYTES is large enough that the max
		 * packet size will permit lots of coalescing into a
		 * single cluster.  This in turn permits efficient
		 * crypto operations, especially when using hardware.
		 */
		if ((m->m_flags & M_EXT) == 0) {
			if (mprev && (mprev->m_flags & M_EXT) &&
			    m->m_len <= M_TRAILINGSPACE(mprev)) {
				/* XXX: this ignores mbuf types */
				memcpy(mtod(mprev, caddr_t) + mprev->m_len,
				       mtod(m, caddr_t), m->m_len);
				mprev->m_len += m->m_len;
				mprev->m_next = m->m_next;	/* unlink from chain */
				m_free(m);			/* reclaim mbuf */
				newipsecstat.ips_mbcoalesced++;
			} else {
				mprev = m;
			}
			continue;
		}
		/*
		 * Writable mbufs are left alone (for now).
		 */
		if (!MEXT_IS_REF(m)) {
			mprev = m;
			continue;
		}

		/*
		 * Not writable, replace with a copy or coalesce with
		 * the previous mbuf if possible (since we have to copy
		 * it anyway, we try to reduce the number of mbufs and
		 * clusters so that future work is easier).
		 */
		KASSERT(m->m_flags & M_EXT,
			("m_clone: m_flags 0x%x", m->m_flags));
		/* NB: we only coalesce into a cluster or larger */
		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
		    m->m_len <= M_TRAILINGSPACE(mprev)) {
			/* XXX: this ignores mbuf types */
			memcpy(mtod(mprev, caddr_t) + mprev->m_len,
			       mtod(m, caddr_t), m->m_len);
			mprev->m_len += m->m_len;
			mprev->m_next = m->m_next;	/* unlink from chain */
			m_free(m);			/* reclaim mbuf */
			newipsecstat.ips_clcoalesced++;
			continue;
		}

		/*
		 * Allocate new space to hold the copy...
		 */
		/* XXX why can M_PKTHDR be set past the first mbuf? */
		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
			/*
			 * NB: if a packet header is present we must
			 * allocate the mbuf separately from any cluster
			 * because M_MOVE_PKTHDR will smash the data
			 * pointer and drop the M_EXT marker.
			 */
			MGETHDR(n, M_DONTWAIT, m->m_type);
			if (n == NULL) {
				m_freem(m0);
				return (NULL);
			}
			M_MOVE_PKTHDR(n, m);
			MCLGET(n, M_DONTWAIT);
			if ((n->m_flags & M_EXT) == 0) {
				m_free(n);
				m_freem(m0);
				return (NULL);
			}
		} else {
			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
			if (n == NULL) {
				m_freem(m0);
				return (NULL);
			}
		}
		/*
		 * ... and copy the data.  We deal with jumbo mbufs
		 * (i.e. m_len > MCLBYTES) by splitting them into
		 * clusters.  We could just malloc a buffer and make
		 * it external but too many device drivers don't know
		 * how to break up the non-contiguous memory when
		 * doing DMA.
		 */
		len = m->m_len;
		off = 0;
		mfirst = n;
		mlast = NULL;
		for (;;) {
			int cc = min(len, MCLBYTES);
			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
			n->m_len = cc;
			if (mlast != NULL)
				mlast->m_next = n;
			mlast = n;	
			newipsecstat.ips_clcopied++;

			len -= cc;
			if (len <= 0)
				break;
			off += cc;

			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
			if (n == NULL) {
				m_freem(mfirst);
				m_freem(m0);
				return (NULL);
			}
		}
		n->m_next = m->m_next; 
		if (mprev == NULL)
			m0 = mfirst;		/* new head of chain */
		else
			mprev->m_next = mfirst;	/* replace old mbuf */
		m_free(m);			/* release old mbuf */
		mprev = mfirst;
	}
	return (m0);
}

/*
 * Make space for a new header of length hlen at skip bytes
 * into the packet.  When doing this we allocate new mbufs only
 * when absolutely necessary.  The mbuf where the new header
 * is to go is returned together with an offset into the mbuf.
 * If NULL is returned then the mbuf chain may have been modified;
 * the caller is assumed to always free the chain.
 */
struct mbuf *
m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
{
	struct mbuf *m;
	unsigned remain;

	KASSERT(m0 != NULL, ("m_dmakespace: null mbuf"));
	KASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen));

	for (m = m0; m && skip > m->m_len; m = m->m_next)
		skip -= m->m_len;
	if (m == NULL)
		return (NULL);
	/*
	 * At this point skip is the offset into the mbuf m
	 * where the new header should be placed.  Figure out
	 * if there's space to insert the new header.  If so,
	 * and copying the remainder makese sense then do so.
	 * Otherwise insert a new mbuf in the chain, splitting
	 * the contents of m as needed.
	 */
	remain = m->m_len - skip;		/* data to move */
	if (hlen > M_TRAILINGSPACE(m)) {
		struct mbuf *n;

		/* XXX code doesn't handle clusters XXX */
		KASSERT(remain < MLEN,
			("m_makespace: remainder too big: %u", remain));
		/*
		 * Not enough space in m, split the contents
		 * of m, inserting new mbufs as required.
		 *
		 * NB: this ignores mbuf types.
		 */
		MGET(n, M_DONTWAIT, MT_DATA);
		if (n == NULL)
			return (NULL);
		n->m_next = m->m_next;		/* splice new mbuf */
		m->m_next = n;
		newipsecstat.ips_mbinserted++;
		if (hlen <= M_TRAILINGSPACE(m) + remain) {
			/*
			 * New header fits in the old mbuf if we copy
			 * the remainder; just do the copy to the new
			 * mbuf and we're good to go.
			 */
			memcpy(mtod(n, caddr_t),
			       mtod(m, caddr_t) + skip, remain);
			n->m_len = remain;
			m->m_len = skip + hlen;
			*off = skip;
		} else {
			/*
			 * No space in the old mbuf for the new header.
			 * Make space in the new mbuf and check the
			 * remainder'd data fits too.  If not then we
			 * must allocate an additional mbuf (yech).
			 */
			n->m_len = 0;
			if (remain + hlen > M_TRAILINGSPACE(n)) {
				struct mbuf *n2;

				MGET(n2, M_DONTWAIT, MT_DATA);
				/* NB: new mbuf is on chain, let caller free */
				if (n2 == NULL)
					return (NULL);
				n2->m_len = 0;
				memcpy(mtod(n2, caddr_t),
				       mtod(m, caddr_t) + skip, remain);
				n2->m_len = remain;
				/* splice in second mbuf */
				n2->m_next = n->m_next;
				n->m_next = n2;
				newipsecstat.ips_mbinserted++;
			} else {
				memcpy(mtod(n, caddr_t) + hlen,
				       mtod(m, caddr_t) + skip, remain);
				n->m_len += remain;
			}
			m->m_len -= remain;
			n->m_len += hlen;
			m = n;			/* header is at front ... */
			*off = 0;		/* ... of new mbuf */
		}
	} else {
		/*
		 * Copy the remainder to the back of the mbuf
		 * so there's space to write the new header.
		 */
		/* XXX can this be memcpy? does it handle overlap? */
		ovbcopy(mtod(m, caddr_t) + skip,
			mtod(m, caddr_t) + skip + hlen, remain);
		m->m_len += hlen;
		*off = skip;
	}
	m0->m_pkthdr.len += hlen;		/* adjust packet length */
	return m;
}

/*
 * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
 * length is updated, and a pointer to the first byte of the padding
 * (which is guaranteed to be all in one mbuf) is returned.
 */
caddr_t
m_pad(struct mbuf *m, int n)
{
	register struct mbuf *m0, *m1;
	register int len, pad;
	caddr_t retval;

	if (n <= 0) {  /* No stupid arguments. */
		DPRINTF(("m_pad: pad length invalid (%d)\n", n));
		m_freem(m);
		return NULL;
	}

	len = m->m_pkthdr.len;
	pad = n;
	m0 = m;

	while (m0->m_len < len) {
KASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/
		len -= m0->m_len;
		m0 = m0->m_next;
	}

	if (m0->m_len != len) {
		DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n",
		    m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len));

		m_freem(m);
		return NULL;
	}

	/* Check for zero-length trailing mbufs, and find the last one. */
	for (m1 = m0; m1->m_next; m1 = m1->m_next) {
		if (m1->m_next->m_len != 0) {
			DPRINTF(("m_pad: length mismatch (should be %d "
			    "instead of %d)\n",
			    m->m_pkthdr.len,
			    m->m_pkthdr.len + m1->m_next->m_len));

			m_freem(m);
			return NULL;
		}

		m0 = m1->m_next;
	}

	if (pad > M_TRAILINGSPACE(m0)) {
		/* Add an mbuf to the chain. */
		MGET(m1, M_DONTWAIT, MT_DATA);
		if (m1 == 0) {
			m_freem(m0);
			DPRINTF(("m_pad: unable to get extra mbuf\n"));
			return NULL;
		}

		m0->m_next = m1;
		m0 = m1;
		m0->m_len = 0;
	}

	retval = m0->m_data + m0->m_len;
	m0->m_len += pad;
	m->m_pkthdr.len += pad;

	return retval;
}

/*
 * Remove hlen data at offset skip in the packet.  This is used by
 * the protocols strip protocol headers and associated data (e.g. IV,
 * authenticator) on input.
 */
int
m_striphdr(struct mbuf *m, int skip, int hlen)
{
	struct mbuf *m1;
	int roff;

	/* Find beginning of header */
	m1 = m_getptr(m, skip, &roff);
	if (m1 == NULL)
		return (EINVAL);

	/* Remove the header and associated data from the mbuf. */
	if (roff == 0) {
		/* The header was at the beginning of the mbuf */
		newipsecstat.ips_input_front++;
		m_adj(m1, hlen);
		if ((m1->m_flags & M_PKTHDR) == 0)
			m->m_pkthdr.len -= hlen;
	} else if (roff + hlen >= m1->m_len) {
		struct mbuf *mo;

		/*
		 * Part or all of the header is at the end of this mbuf,
		 * so first let's remove the remainder of the header from
		 * the beginning of the remainder of the mbuf chain, if any.
		 */
		newipsecstat.ips_input_end++;
		if (roff + hlen > m1->m_len) {
			/* Adjust the next mbuf by the remainder */
			m_adj(m1->m_next, roff + hlen - m1->m_len);

			/* The second mbuf is guaranteed not to have a pkthdr... */
			m->m_pkthdr.len -= (roff + hlen - m1->m_len);
		}

		/* Now, let's unlink the mbuf chain for a second...*/
		mo = m1->m_next;
		m1->m_next = NULL;

		/* ...and trim the end of the first part of the chain...sick */
		m_adj(m1, -(m1->m_len - roff));
		if ((m1->m_flags & M_PKTHDR) == 0)
			m->m_pkthdr.len -= (m1->m_len - roff);

		/* Finally, let's relink */
		m1->m_next = mo;
	} else {
		/*
		 * The header lies in the "middle" of the mbuf; copy
		 * the remainder of the mbuf down over the header.
		 */
		newipsecstat.ips_input_middle++;
		bcopy(mtod(m1, u_char *) + roff + hlen,
		      mtod(m1, u_char *) + roff,
		      m1->m_len - (roff + hlen));
		m1->m_len -= hlen;
		m->m_pkthdr.len -= hlen;
	}
	return (0);
}

/*
 * Diagnostic routine to check mbuf alignment as required by the
 * crypto device drivers (that use DMA).
 */
void
m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
{
	int roff;
	struct mbuf *m = m_getptr(m0, off, &roff);
	caddr_t addr;

	if (m == NULL)
		return;
	printf("%s (off %u len %u): ", where, off, len);
	addr = mtod(m, caddr_t) + roff;
	do {
		int mlen;

		if (((uintptr_t) addr) & 3) {
			printf("addr misaligned %p,", addr);
			break;
		}
		mlen = m->m_len;
		if (mlen > len)
			mlen = len;
		len -= mlen;
		if (len && (mlen & 3)) {
			printf("len mismatch %u,", mlen);
			break;
		}
		m = m->m_next;
		addr = m ? mtod(m, caddr_t) : NULL;
	} while (m && len > 0);
	for (m = m0; m; m = m->m_next)
		printf(" [%p:%u]", mtod(m, caddr_t), m->m_len);
	printf("\n");
}
OpenPOWER on IntegriCloud