1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
|
/*-
* Copyright (c) 1998 Doug Rabson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _MACHINE_ATOMIC_H_
#define _MACHINE_ATOMIC_H_
#ifndef _SYS_CDEFS_H_
#error this file needs sys/cdefs.h as a prerequisite
#endif
#define mb() __asm __volatile("mfence;" : : : "memory")
#define wmb() __asm __volatile("sfence;" : : : "memory")
#define rmb() __asm __volatile("lfence;" : : : "memory")
/*
* Various simple operations on memory, each of which is atomic in the
* presence of interrupts and multiple processors.
*
* atomic_set_char(P, V) (*(u_char *)(P) |= (V))
* atomic_clear_char(P, V) (*(u_char *)(P) &= ~(V))
* atomic_add_char(P, V) (*(u_char *)(P) += (V))
* atomic_subtract_char(P, V) (*(u_char *)(P) -= (V))
*
* atomic_set_short(P, V) (*(u_short *)(P) |= (V))
* atomic_clear_short(P, V) (*(u_short *)(P) &= ~(V))
* atomic_add_short(P, V) (*(u_short *)(P) += (V))
* atomic_subtract_short(P, V) (*(u_short *)(P) -= (V))
*
* atomic_set_int(P, V) (*(u_int *)(P) |= (V))
* atomic_clear_int(P, V) (*(u_int *)(P) &= ~(V))
* atomic_add_int(P, V) (*(u_int *)(P) += (V))
* atomic_subtract_int(P, V) (*(u_int *)(P) -= (V))
* atomic_swap_int(P, V) (return (*(u_int *)(P)); *(u_int *)(P) = (V);)
* atomic_readandclear_int(P) (return (*(u_int *)(P)); *(u_int *)(P) = 0;)
*
* atomic_set_long(P, V) (*(u_long *)(P) |= (V))
* atomic_clear_long(P, V) (*(u_long *)(P) &= ~(V))
* atomic_add_long(P, V) (*(u_long *)(P) += (V))
* atomic_subtract_long(P, V) (*(u_long *)(P) -= (V))
* atomic_swap_long(P, V) (return (*(u_long *)(P)); *(u_long *)(P) = (V);)
* atomic_readandclear_long(P) (return (*(u_long *)(P)); *(u_long *)(P) = 0;)
*/
/*
* The above functions are expanded inline in the statically-linked
* kernel. Lock prefixes are generated if an SMP kernel is being
* built.
*
* Kernel modules call real functions which are built into the kernel.
* This allows kernel modules to be portable between UP and SMP systems.
*/
#if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM)
#define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \
void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \
void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src);
int atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src);
u_int atomic_fetchadd_int(volatile u_int *p, u_int v);
u_long atomic_fetchadd_long(volatile u_long *p, u_long v);
int atomic_testandset_int(volatile u_int *p, u_int v);
int atomic_testandset_long(volatile u_long *p, u_int v);
#define ATOMIC_LOAD(TYPE, LOP) \
u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p)
#define ATOMIC_STORE(TYPE) \
void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
#else /* !KLD_MODULE && __GNUCLIKE_ASM */
/*
* For userland, always use lock prefixes so that the binaries will run
* on both SMP and !SMP systems.
*/
#if defined(SMP) || !defined(_KERNEL)
#define MPLOCKED "lock ; "
#else
#define MPLOCKED
#endif
/*
* The assembly is volatilized to avoid code chunk removal by the compiler.
* GCC aggressively reorders operations and memory clobbering is necessary
* in order to avoid that for memory barriers.
*/
#define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \
static __inline void \
atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
{ \
__asm __volatile(MPLOCKED OP \
: "+m" (*p) \
: CONS (V) \
: "cc"); \
} \
\
static __inline void \
atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
{ \
__asm __volatile(MPLOCKED OP \
: "+m" (*p) \
: CONS (V) \
: "memory", "cc"); \
} \
struct __hack
/*
* Atomic compare and set, used by the mutex functions
*
* if (*dst == expect) *dst = src (all 32 bit words)
*
* Returns 0 on failure, non-zero on success
*/
static __inline int
atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src)
{
u_char res;
__asm __volatile(
" " MPLOCKED " "
" cmpxchgl %3,%1 ; "
" sete %0 ; "
"# atomic_cmpset_int"
: "=q" (res), /* 0 */
"+m" (*dst), /* 1 */
"+a" (expect) /* 2 */
: "r" (src) /* 3 */
: "memory", "cc");
return (res);
}
static __inline int
atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src)
{
u_char res;
__asm __volatile(
" " MPLOCKED " "
" cmpxchgq %3,%1 ; "
" sete %0 ; "
"# atomic_cmpset_long"
: "=q" (res), /* 0 */
"+m" (*dst), /* 1 */
"+a" (expect) /* 2 */
: "r" (src) /* 3 */
: "memory", "cc");
return (res);
}
/*
* Atomically add the value of v to the integer pointed to by p and return
* the previous value of *p.
*/
static __inline u_int
atomic_fetchadd_int(volatile u_int *p, u_int v)
{
__asm __volatile(
" " MPLOCKED " "
" xaddl %0,%1 ; "
"# atomic_fetchadd_int"
: "+r" (v), /* 0 */
"+m" (*p) /* 1 */
: : "cc");
return (v);
}
/*
* Atomically add the value of v to the long integer pointed to by p and return
* the previous value of *p.
*/
static __inline u_long
atomic_fetchadd_long(volatile u_long *p, u_long v)
{
__asm __volatile(
" " MPLOCKED " "
" xaddq %0,%1 ; "
"# atomic_fetchadd_long"
: "+r" (v), /* 0 */
"+m" (*p) /* 1 */
: : "cc");
return (v);
}
static __inline int
atomic_testandset_int(volatile u_int *p, u_int v)
{
u_char res;
__asm __volatile(
" " MPLOCKED " "
" btsl %2,%1 ; "
" setc %0 ; "
"# atomic_testandset_int"
: "=q" (res), /* 0 */
"+m" (*p) /* 1 */
: "Ir" (v & 0x1f) /* 2 */
: "cc");
return (res);
}
static __inline int
atomic_testandset_long(volatile u_long *p, u_int v)
{
u_char res;
__asm __volatile(
" " MPLOCKED " "
" btsq %2,%1 ; "
" setc %0 ; "
"# atomic_testandset_long"
: "=q" (res), /* 0 */
"+m" (*p) /* 1 */
: "Jr" ((u_long)(v & 0x3f)) /* 2 */
: "cc");
return (res);
}
/*
* We assume that a = b will do atomic loads and stores. Due to the
* IA32 memory model, a simple store guarantees release semantics.
*
* However, loads may pass stores, so for atomic_load_acq we have to
* ensure a Store/Load barrier to do the load in SMP kernels. We use
* "lock cmpxchg" as recommended by the AMD Software Optimization
* Guide, and not mfence. For UP kernels, however, the cache of the
* single processor is always consistent, so we only need to take care
* of the compiler.
*/
#define ATOMIC_STORE(TYPE) \
static __inline void \
atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
{ \
__compiler_membar(); \
*p = v; \
} \
struct __hack
#if defined(_KERNEL) && !defined(SMP)
#define ATOMIC_LOAD(TYPE, LOP) \
static __inline u_##TYPE \
atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
{ \
u_##TYPE tmp; \
\
tmp = *p; \
__compiler_membar(); \
return (tmp); \
} \
struct __hack
#else /* !(_KERNEL && !SMP) */
#define ATOMIC_LOAD(TYPE, LOP) \
static __inline u_##TYPE \
atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
{ \
u_##TYPE res; \
\
__asm __volatile(MPLOCKED LOP \
: "=a" (res), /* 0 */ \
"+m" (*p) /* 1 */ \
: : "memory", "cc"); \
return (res); \
} \
struct __hack
#endif /* _KERNEL && !SMP */
#endif /* KLD_MODULE || !__GNUCLIKE_ASM */
ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v);
ATOMIC_ASM(clear, char, "andb %b1,%0", "iq", ~v);
ATOMIC_ASM(add, char, "addb %b1,%0", "iq", v);
ATOMIC_ASM(subtract, char, "subb %b1,%0", "iq", v);
ATOMIC_ASM(set, short, "orw %w1,%0", "ir", v);
ATOMIC_ASM(clear, short, "andw %w1,%0", "ir", ~v);
ATOMIC_ASM(add, short, "addw %w1,%0", "ir", v);
ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir", v);
ATOMIC_ASM(set, int, "orl %1,%0", "ir", v);
ATOMIC_ASM(clear, int, "andl %1,%0", "ir", ~v);
ATOMIC_ASM(add, int, "addl %1,%0", "ir", v);
ATOMIC_ASM(subtract, int, "subl %1,%0", "ir", v);
ATOMIC_ASM(set, long, "orq %1,%0", "ir", v);
ATOMIC_ASM(clear, long, "andq %1,%0", "ir", ~v);
ATOMIC_ASM(add, long, "addq %1,%0", "ir", v);
ATOMIC_ASM(subtract, long, "subq %1,%0", "ir", v);
ATOMIC_LOAD(char, "cmpxchgb %b0,%1");
ATOMIC_LOAD(short, "cmpxchgw %w0,%1");
ATOMIC_LOAD(int, "cmpxchgl %0,%1");
ATOMIC_LOAD(long, "cmpxchgq %0,%1");
ATOMIC_STORE(char);
ATOMIC_STORE(short);
ATOMIC_STORE(int);
ATOMIC_STORE(long);
#undef ATOMIC_ASM
#undef ATOMIC_LOAD
#undef ATOMIC_STORE
#ifndef WANT_FUNCTIONS
/* Read the current value and store a new value in the destination. */
#ifdef __GNUCLIKE_ASM
static __inline u_int
atomic_swap_int(volatile u_int *p, u_int v)
{
__asm __volatile(
" xchgl %1,%0 ; "
"# atomic_swap_int"
: "+r" (v), /* 0 */
"+m" (*p)); /* 1 */
return (v);
}
static __inline u_long
atomic_swap_long(volatile u_long *p, u_long v)
{
__asm __volatile(
" xchgq %1,%0 ; "
"# atomic_swap_long"
: "+r" (v), /* 0 */
"+m" (*p)); /* 1 */
return (v);
}
#else /* !__GNUCLIKE_ASM */
u_int atomic_swap_int(volatile u_int *p, u_int v);
u_long atomic_swap_long(volatile u_long *p, u_long v);
#endif /* __GNUCLIKE_ASM */
#define atomic_set_acq_char atomic_set_barr_char
#define atomic_set_rel_char atomic_set_barr_char
#define atomic_clear_acq_char atomic_clear_barr_char
#define atomic_clear_rel_char atomic_clear_barr_char
#define atomic_add_acq_char atomic_add_barr_char
#define atomic_add_rel_char atomic_add_barr_char
#define atomic_subtract_acq_char atomic_subtract_barr_char
#define atomic_subtract_rel_char atomic_subtract_barr_char
#define atomic_set_acq_short atomic_set_barr_short
#define atomic_set_rel_short atomic_set_barr_short
#define atomic_clear_acq_short atomic_clear_barr_short
#define atomic_clear_rel_short atomic_clear_barr_short
#define atomic_add_acq_short atomic_add_barr_short
#define atomic_add_rel_short atomic_add_barr_short
#define atomic_subtract_acq_short atomic_subtract_barr_short
#define atomic_subtract_rel_short atomic_subtract_barr_short
#define atomic_set_acq_int atomic_set_barr_int
#define atomic_set_rel_int atomic_set_barr_int
#define atomic_clear_acq_int atomic_clear_barr_int
#define atomic_clear_rel_int atomic_clear_barr_int
#define atomic_add_acq_int atomic_add_barr_int
#define atomic_add_rel_int atomic_add_barr_int
#define atomic_subtract_acq_int atomic_subtract_barr_int
#define atomic_subtract_rel_int atomic_subtract_barr_int
#define atomic_cmpset_acq_int atomic_cmpset_int
#define atomic_cmpset_rel_int atomic_cmpset_int
#define atomic_set_acq_long atomic_set_barr_long
#define atomic_set_rel_long atomic_set_barr_long
#define atomic_clear_acq_long atomic_clear_barr_long
#define atomic_clear_rel_long atomic_clear_barr_long
#define atomic_add_acq_long atomic_add_barr_long
#define atomic_add_rel_long atomic_add_barr_long
#define atomic_subtract_acq_long atomic_subtract_barr_long
#define atomic_subtract_rel_long atomic_subtract_barr_long
#define atomic_cmpset_acq_long atomic_cmpset_long
#define atomic_cmpset_rel_long atomic_cmpset_long
#define atomic_readandclear_int(p) atomic_swap_int(p, 0)
#define atomic_readandclear_long(p) atomic_swap_long(p, 0)
/* Operations on 8-bit bytes. */
#define atomic_set_8 atomic_set_char
#define atomic_set_acq_8 atomic_set_acq_char
#define atomic_set_rel_8 atomic_set_rel_char
#define atomic_clear_8 atomic_clear_char
#define atomic_clear_acq_8 atomic_clear_acq_char
#define atomic_clear_rel_8 atomic_clear_rel_char
#define atomic_add_8 atomic_add_char
#define atomic_add_acq_8 atomic_add_acq_char
#define atomic_add_rel_8 atomic_add_rel_char
#define atomic_subtract_8 atomic_subtract_char
#define atomic_subtract_acq_8 atomic_subtract_acq_char
#define atomic_subtract_rel_8 atomic_subtract_rel_char
#define atomic_load_acq_8 atomic_load_acq_char
#define atomic_store_rel_8 atomic_store_rel_char
/* Operations on 16-bit words. */
#define atomic_set_16 atomic_set_short
#define atomic_set_acq_16 atomic_set_acq_short
#define atomic_set_rel_16 atomic_set_rel_short
#define atomic_clear_16 atomic_clear_short
#define atomic_clear_acq_16 atomic_clear_acq_short
#define atomic_clear_rel_16 atomic_clear_rel_short
#define atomic_add_16 atomic_add_short
#define atomic_add_acq_16 atomic_add_acq_short
#define atomic_add_rel_16 atomic_add_rel_short
#define atomic_subtract_16 atomic_subtract_short
#define atomic_subtract_acq_16 atomic_subtract_acq_short
#define atomic_subtract_rel_16 atomic_subtract_rel_short
#define atomic_load_acq_16 atomic_load_acq_short
#define atomic_store_rel_16 atomic_store_rel_short
/* Operations on 32-bit double words. */
#define atomic_set_32 atomic_set_int
#define atomic_set_acq_32 atomic_set_acq_int
#define atomic_set_rel_32 atomic_set_rel_int
#define atomic_clear_32 atomic_clear_int
#define atomic_clear_acq_32 atomic_clear_acq_int
#define atomic_clear_rel_32 atomic_clear_rel_int
#define atomic_add_32 atomic_add_int
#define atomic_add_acq_32 atomic_add_acq_int
#define atomic_add_rel_32 atomic_add_rel_int
#define atomic_subtract_32 atomic_subtract_int
#define atomic_subtract_acq_32 atomic_subtract_acq_int
#define atomic_subtract_rel_32 atomic_subtract_rel_int
#define atomic_load_acq_32 atomic_load_acq_int
#define atomic_store_rel_32 atomic_store_rel_int
#define atomic_cmpset_32 atomic_cmpset_int
#define atomic_cmpset_acq_32 atomic_cmpset_acq_int
#define atomic_cmpset_rel_32 atomic_cmpset_rel_int
#define atomic_swap_32 atomic_swap_int
#define atomic_readandclear_32 atomic_readandclear_int
#define atomic_fetchadd_32 atomic_fetchadd_int
#define atomic_testandset_32 atomic_testandset_int
/* Operations on 64-bit quad words. */
#define atomic_set_64 atomic_set_long
#define atomic_set_acq_64 atomic_set_acq_long
#define atomic_set_rel_64 atomic_set_rel_long
#define atomic_clear_64 atomic_clear_long
#define atomic_clear_acq_64 atomic_clear_acq_long
#define atomic_clear_rel_64 atomic_clear_rel_long
#define atomic_add_64 atomic_add_long
#define atomic_add_acq_64 atomic_add_acq_long
#define atomic_add_rel_64 atomic_add_rel_long
#define atomic_subtract_64 atomic_subtract_long
#define atomic_subtract_acq_64 atomic_subtract_acq_long
#define atomic_subtract_rel_64 atomic_subtract_rel_long
#define atomic_load_acq_64 atomic_load_acq_long
#define atomic_store_rel_64 atomic_store_rel_long
#define atomic_cmpset_64 atomic_cmpset_long
#define atomic_cmpset_acq_64 atomic_cmpset_acq_long
#define atomic_cmpset_rel_64 atomic_cmpset_rel_long
#define atomic_swap_64 atomic_swap_long
#define atomic_readandclear_64 atomic_readandclear_long
#define atomic_testandset_64 atomic_testandset_long
/* Operations on pointers. */
#define atomic_set_ptr atomic_set_long
#define atomic_set_acq_ptr atomic_set_acq_long
#define atomic_set_rel_ptr atomic_set_rel_long
#define atomic_clear_ptr atomic_clear_long
#define atomic_clear_acq_ptr atomic_clear_acq_long
#define atomic_clear_rel_ptr atomic_clear_rel_long
#define atomic_add_ptr atomic_add_long
#define atomic_add_acq_ptr atomic_add_acq_long
#define atomic_add_rel_ptr atomic_add_rel_long
#define atomic_subtract_ptr atomic_subtract_long
#define atomic_subtract_acq_ptr atomic_subtract_acq_long
#define atomic_subtract_rel_ptr atomic_subtract_rel_long
#define atomic_load_acq_ptr atomic_load_acq_long
#define atomic_store_rel_ptr atomic_store_rel_long
#define atomic_cmpset_ptr atomic_cmpset_long
#define atomic_cmpset_acq_ptr atomic_cmpset_acq_long
#define atomic_cmpset_rel_ptr atomic_cmpset_rel_long
#define atomic_swap_ptr atomic_swap_long
#define atomic_readandclear_ptr atomic_readandclear_long
#endif /* !WANT_FUNCTIONS */
#endif /* !_MACHINE_ATOMIC_H_ */
|