summaryrefslogtreecommitdiffstats
path: root/sys/i386/i386/mplock.s
blob: dc5ba01e1f05bc4735bb613627d8d9808fc1c058 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
/*
 * ----------------------------------------------------------------------------
 * "THE BEER-WARE LICENSE" (Revision 42):
 * <phk@FreeBSD.org> wrote this file.  As long as you retain this notice you
 * can do whatever you want with this stuff. If we meet some day, and you think
 * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
 * ----------------------------------------------------------------------------
 *
 * $FreeBSD$
 *
 * Functions for locking between CPUs in a SMP system.
 *
 * This is an "exclusive counting semaphore".  This means that it can be
 * free (0xffffffff) or be owned by a CPU (0xXXYYYYYY where XX is CPU-id
 * and YYYYYY is the count).
 *
 * Contrary to most implementations around, this one is entirely atomic:
 * The attempt to seize/release the semaphore and the increment/decrement
 * is done in one atomic operation.  This way we are safe from all kinds
 * of weird reentrancy situations.
 */

#include <machine/asmacros.h>
#include <machine/smptests.h>		/** GRAB_LOPRIO */
#include <machine/apic.h>

#define GLPROFILE_NOT

#ifdef CHEAP_TPR

/* we assumme that the 'reserved bits' can be written with zeros */

#else /* CHEAP_TPR */

#error HEADS UP: this code needs work
/*
 * The APIC doc says that reserved bits must be written with whatever
 * value they currently contain, ie you should:	read, modify, write,
 * instead of just writing new values to the TPR register.  Current
 * silicon seems happy with just writing.  If the behaviour of the
 * silicon changes, all code that access the lapic_tpr must be modified.
 * The last version to contain such code was:
 *   Id: mplock.s,v 1.17 1997/08/10 20:59:07 fsmp Exp
 */

#endif /* CHEAP_TPR */

#ifdef GRAB_LOPRIO
/*
 * Claim LOWest PRIOrity, ie. attempt to grab ALL INTerrupts.
 */

/* after 1st acquire of lock we grab all hardware INTs */
#define GRAB_HWI	movl	$ALLHWI_LEVEL, lapic_tpr

/* after last release of lock give up LOW PRIO (ie, arbitrate INTerrupts) */
#define ARB_HWI		movl	$LOPRIO_LEVEL, lapic_tpr /* CHEAP_TPR */

#else /* GRAB_LOPRIO */

#define GRAB_HWI	/* nop */
#define ARB_HWI		/* nop */

#endif /* GRAB_LOPRIO */


	.text

#ifdef SMP 

/***********************************************************************
 *  void MPgetlock_edx(unsigned int *lock : %edx)
 *  ----------------------------------
 *  Destroys	%eax, %ecx.  %edx must hold lock argument.
 *
 *  Grabs hardware interrupts on first aquire.
 *
 *  NOTE: Serialization is not required if we already hold the lock, since
 *  we already hold the lock, nor do we need a locked instruction if we 
 *  already hold the lock.
 */

NON_GPROF_ENTRY(MPgetlock_edx)
1:
	movl	(%edx), %eax		/* Get current contents of lock */
	movl	%eax, %ecx
	andl	$CPU_FIELD,%ecx
	cmpl	_cpu_lockid, %ecx	/* Do we already own the lock? */
	jne	2f
	incl	%eax			/* yes, just bump the count */
	movl	%eax, (%edx)		/* serialization not required */
	ret
2:
	movl	$FREE_LOCK, %eax	/* lock must be free */
	movl	_cpu_lockid, %ecx
	incl	%ecx
	lock
	cmpxchg	%ecx, (%edx)		/* attempt to replace %eax<->%ecx */
#ifdef GLPROFILE
	jne	3f
	incl	_gethits2
#else
	jne	1b
#endif /* GLPROFILE */
	GRAB_HWI			/* 1st acquire, grab hw INTs */
	ret
#ifdef GLPROFILE
3:
	incl	_gethits3
	jmp	1b
#endif

/***********************************************************************
 *  int MPtrylock(unsigned int *lock)
 *  ---------------------------------
 *  Destroys	%eax, %ecx and %edx.
 *  Returns	1 if lock was successfull
 */

NON_GPROF_ENTRY(MPtrylock)
	movl	4(%esp), %edx		/* Get the address of the lock */

	movl	$FREE_LOCK, %eax	/* Assume it's free */
	movl	_cpu_lockid, %ecx	/* - get pre-shifted logical cpu id */
	incl	%ecx			/* - new count is one */
	lock
	cmpxchg	%ecx, (%edx)		/* - try it atomically */
	jne	1f			/* ...do not collect $200 */
#ifdef GLPROFILE
	incl	_tryhits2
#endif /* GLPROFILE */
	GRAB_HWI			/* 1st acquire, grab hw INTs */
	movl	$1, %eax
	ret
1:
  	movl	(%edx), %eax		/* Try to see if we have it already */
	andl	$COUNT_FIELD, %eax	/* - get count */
	movl	_cpu_lockid, %ecx	/* - get pre-shifted logical cpu id */
	orl	%ecx, %eax		/* - combine them */
	movl	%eax, %ecx
	incl	%ecx			/* - new count is one more */
	lock
	cmpxchg	%ecx, (%edx)		/* - try it atomically */
	jne	2f			/* - miss */
#ifdef GLPROFILE
	incl	_tryhits
#endif /* GLPROFILE */
	movl	$1, %eax
	ret
2:
#ifdef GLPROFILE
	incl	_tryhits3
#endif /* GLPROFILE */
	movl	$0, %eax
	ret


/***********************************************************************
 *  void MPrellock_edx(unsigned int *lock : %edx)
 *  ----------------------------------
 *  Destroys	%ecx, argument must be in %edx
 *
 *  SERIALIZATION NOTE!
 *
 *  After a lot of arguing, it turns out that there is no problem with
 *  not having a synchronizing instruction in the MP unlock code.  There
 *  are two things to keep in mind:  First, Intel guarentees that writes
 *  are ordered amoungst themselves.  Second, the P6 is allowed to reorder
 *  reads around writes.  Third, the P6 maintains cache consistency (snoops
 *  the bus).  The second is not an issue since the one read we do is the 
 *  basis for the conditional which determines whether the write will be 
 *  made or not.
 *
 *  Therefore, no synchronizing instruction is required on unlock.  There are
 *  three performance cases:  First, if a single cpu is getting and releasing
 *  the lock the removal of the synchronizing instruction saves approx
 *  200 nS (testing w/ duel cpu PIII 450).  Second, if one cpu is contending
 *  for the lock while the other holds it, the removal of the synchronizing
 *  instruction results in a 700nS LOSS in performance.  Third, if two cpu's
 *  are switching off ownership of the MP lock but not contending for it (the
 *  most common case), this results in a 400nS IMPROVEMENT in performance.
 *
 *  Since our goal is to reduce lock contention in the first place, we have
 *  decided to remove the synchronizing instruction from the unlock code.
 */

NON_GPROF_ENTRY(MPrellock_edx)
  	movl	(%edx), %ecx		/* - get the value */
	decl	%ecx			/* - new count is one less */
	testl	$COUNT_FIELD, %ecx	/* - Unless it's zero... */
	jnz	2f
	ARB_HWI				/* last release, arbitrate hw INTs */
	movl	$FREE_LOCK, %ecx	/* - In which case we release it */
#if 0
	lock
	addl	$0,0(%esp)		/* see note above */
#endif
2:
	movl	%ecx, (%edx)
	ret

/***********************************************************************
 *  void get_mplock()
 *  -----------------
 *  All registers preserved
 *
 *  Stack (after call to _MPgetlock):
 *	
 *	edx		 4(%esp)
 *	ecx		 8(%esp)
 *	eax		12(%esp)
 *
 * Requirements:  Interrupts should be enabled on call so we can take
 *		  IPI's and FAST INTs while we are waiting for the lock
 *		  (else the system may not be able to halt).
 *
 *		  XXX there are still places where get_mplock() is called
 *		  with interrupts disabled, so we have to temporarily reenable
 *		  interrupts.
 *
 * Side effects:  The current cpu will be given ownership of the
 *		  hardware interrupts when it first aquires the lock.
 *
 * Costs:	  Initial aquisition requires the use of a costly locked
 *		  instruction, but recursive aquisition is cheap.  Release
 *		  is very cheap.
 */

NON_GPROF_ENTRY(get_mplock)
	pushl	%eax
	pushl	%ecx
	pushl	%edx
	movl	$_mp_lock, %edx
	pushfl	
	testl   $(1<<9), (%esp)
	jz     2f           
	call	_MPgetlock_edx
	addl	$4,%esp
1:
	popl	%edx
	popl	%ecx
	popl	%eax
	ret
2:
	sti
	call	_MPgetlock_edx
	popfl
	jmp	1b

/*
 * Special version of get_mplock that is used during bootstrap when we can't
 * yet enable interrupts of any sort since the APIC isn't online yet.  We
 * do an endrun around MPgetlock_edx to avoid enabling interrupts.
 *
 * XXX FIXME.. - APIC should be online from the start to simplify IPI's.
 */
NON_GPROF_ENTRY(boot_get_mplock)
	pushl	%eax
	pushl	%ecx
	pushl	%edx
#ifdef GRAB_LOPRIO	
	pushfl
	pushl	lapic_tpr
	cli
#endif
	
	movl	$_mp_lock, %edx
	call	_MPgetlock_edx

#ifdef GRAB_LOPRIO	
	popl	lapic_tpr
	popfl
#endif
	popl	%edx
	popl	%ecx
	popl	%eax
	ret

/***********************************************************************
 *  void try_mplock()
 *  -----------------
 *  reg %eax == 1 if success
 */

NON_GPROF_ENTRY(try_mplock)
	pushl	%ecx
	pushl	%edx
	pushl	$_mp_lock
	call	_MPtrylock
	add	$4, %esp
	popl	%edx
	popl	%ecx
	ret

/***********************************************************************
 *  void rel_mplock()
 *  -----------------
 *  All registers preserved
 */

NON_GPROF_ENTRY(rel_mplock)
	pushl	%ecx
	pushl	%edx
	movl	$_mp_lock,%edx
	call	_MPrellock_edx
	popl	%edx
	popl	%ecx
	ret

#endif

/***********************************************************************
 * 
 */
	.data
	.p2align 2			/* xx_lock aligned on int boundary */

#ifdef SMP

	.globl _mp_lock
_mp_lock:	.long	0		

#ifdef GLPROFILE
	.globl	_gethits
_gethits:
	.long	0
_gethits2:
	.long	0
_gethits3:
	.long	0

	.globl	_tryhits
_tryhits:
	.long	0
_tryhits2:
	.long	0
_tryhits3:
	.long	0

msg:
	.asciz	"lock hits: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x\n"
#endif /* GLPROFILE */
#endif /* SMP */
OpenPOWER on IntegriCloud