summaryrefslogtreecommitdiffstats
path: root/drivers/dma/dmaengine.c
blob: 404cc7b6e70530066b5a91970d791c8ae8f978c0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
/*
 * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59
 * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 *
 * The full GNU General Public License is included in this distribution in the
 * file called COPYING.
 */

/*
 * This code implements the DMA subsystem. It provides a HW-neutral interface
 * for other kernel code to use asynchronous memory copy capabilities,
 * if present, and allows different HW DMA drivers to register as providing
 * this capability.
 *
 * Due to the fact we are accelerating what is already a relatively fast
 * operation, the code goes to great lengths to avoid additional overhead,
 * such as locking.
 *
 * LOCKING:
 *
 * The subsystem keeps two global lists, dma_device_list and dma_client_list.
 * Both of these are protected by a mutex, dma_list_mutex.
 *
 * Each device has a channels list, which runs unlocked but is never modified
 * once the device is registered, it's just setup by the driver.
 *
 * Each client has a channels list, it's only modified under the client->lock
 * and in an RCU callback, so it's safe to read under rcu_read_lock().
 *
 * Each device has a kref, which is initialized to 1 when the device is
 * registered. A kref_put is done for each class_device registered.  When the
 * class_device is released, the coresponding kref_put is done in the release
 * method. Every time one of the device's channels is allocated to a client,
 * a kref_get occurs.  When the channel is freed, the coresponding kref_put
 * happens. The device's release function does a completion, so
 * unregister_device does a remove event, class_device_unregister, a kref_put
 * for the first reference, then waits on the completion for all other
 * references to finish.
 *
 * Each channel has an open-coded implementation of Rusty Russell's "bigref,"
 * with a kref and a per_cpu local_t.  A single reference is set when on an
 * ADDED event, and removed with a REMOVE event.  Net DMA client takes an
 * extra reference per outstanding transaction.  The relase function does a
 * kref_put on the device. -ChrisL
 */

#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/device.h>
#include <linux/dmaengine.h>
#include <linux/hardirq.h>
#include <linux/spinlock.h>
#include <linux/percpu.h>
#include <linux/rcupdate.h>
#include <linux/mutex.h>
#include <linux/jiffies.h>

static DEFINE_MUTEX(dma_list_mutex);
static LIST_HEAD(dma_device_list);
static LIST_HEAD(dma_client_list);

/* --- sysfs implementation --- */

static ssize_t show_memcpy_count(struct class_device *cd, char *buf)
{
	struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev);
	unsigned long count = 0;
	int i;

	for_each_possible_cpu(i)
		count += per_cpu_ptr(chan->local, i)->memcpy_count;

	return sprintf(buf, "%lu\n", count);
}

static ssize_t show_bytes_transferred(struct class_device *cd, char *buf)
{
	struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev);
	unsigned long count = 0;
	int i;

	for_each_possible_cpu(i)
		count += per_cpu_ptr(chan->local, i)->bytes_transferred;

	return sprintf(buf, "%lu\n", count);
}

static ssize_t show_in_use(struct class_device *cd, char *buf)
{
	struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev);

	return sprintf(buf, "%d\n", (chan->client ? 1 : 0));
}

static struct class_device_attribute dma_class_attrs[] = {
	__ATTR(memcpy_count, S_IRUGO, show_memcpy_count, NULL),
	__ATTR(bytes_transferred, S_IRUGO, show_bytes_transferred, NULL),
	__ATTR(in_use, S_IRUGO, show_in_use, NULL),
	__ATTR_NULL
};

static void dma_async_device_cleanup(struct kref *kref);

static void dma_class_dev_release(struct class_device *cd)
{
	struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev);
	kref_put(&chan->device->refcount, dma_async_device_cleanup);
}

static struct class dma_devclass = {
	.name            = "dma",
	.class_dev_attrs = dma_class_attrs,
	.release = dma_class_dev_release,
};

/* --- client and device registration --- */

/**
 * dma_client_chan_alloc - try to allocate a channel to a client
 * @client: &dma_client
 *
 * Called with dma_list_mutex held.
 */
static struct dma_chan *dma_client_chan_alloc(struct dma_client *client)
{
	struct dma_device *device;
	struct dma_chan *chan;
	unsigned long flags;
	int desc;	/* allocated descriptor count */

	/* Find a channel, any DMA engine will do */
	list_for_each_entry(device, &dma_device_list, global_node) {
		list_for_each_entry(chan, &device->channels, device_node) {
			if (chan->client)
				continue;

			desc = chan->device->device_alloc_chan_resources(chan);
			if (desc >= 0) {
				kref_get(&device->refcount);
				kref_init(&chan->refcount);
				chan->slow_ref = 0;
				INIT_RCU_HEAD(&chan->rcu);
				chan->client = client;
				spin_lock_irqsave(&client->lock, flags);
				list_add_tail_rcu(&chan->client_node,
				                  &client->channels);
				spin_unlock_irqrestore(&client->lock, flags);
				return chan;
			}
		}
	}

	return NULL;
}

enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
{
	enum dma_status status;
	unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);

	dma_async_issue_pending(chan);
	do {
		status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
		if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
			printk(KERN_ERR "dma_sync_wait_timeout!\n");
			return DMA_ERROR;
		}
	} while (status == DMA_IN_PROGRESS);

	return status;
}
EXPORT_SYMBOL(dma_sync_wait);

/**
 * dma_chan_cleanup - release a DMA channel's resources
 * @kref: kernel reference structure that contains the DMA channel device
 */
void dma_chan_cleanup(struct kref *kref)
{
	struct dma_chan *chan = container_of(kref, struct dma_chan, refcount);
	chan->device->device_free_chan_resources(chan);
	chan->client = NULL;
	kref_put(&chan->device->refcount, dma_async_device_cleanup);
}
EXPORT_SYMBOL(dma_chan_cleanup);

static void dma_chan_free_rcu(struct rcu_head *rcu)
{
	struct dma_chan *chan = container_of(rcu, struct dma_chan, rcu);
	int bias = 0x7FFFFFFF;
	int i;
	for_each_possible_cpu(i)
		bias -= local_read(&per_cpu_ptr(chan->local, i)->refcount);
	atomic_sub(bias, &chan->refcount.refcount);
	kref_put(&chan->refcount, dma_chan_cleanup);
}

static void dma_client_chan_free(struct dma_chan *chan)
{
	atomic_add(0x7FFFFFFF, &chan->refcount.refcount);
	chan->slow_ref = 1;
	call_rcu(&chan->rcu, dma_chan_free_rcu);
}

/**
 * dma_chans_rebalance - reallocate channels to clients
 *
 * When the number of DMA channel in the system changes,
 * channels need to be rebalanced among clients.
 */
static void dma_chans_rebalance(void)
{
	struct dma_client *client;
	struct dma_chan *chan;
	unsigned long flags;

	mutex_lock(&dma_list_mutex);

	list_for_each_entry(client, &dma_client_list, global_node) {
		while (client->chans_desired > client->chan_count) {
			chan = dma_client_chan_alloc(client);
			if (!chan)
				break;
			client->chan_count++;
			client->event_callback(client,
	                                       chan,
	                                       DMA_RESOURCE_ADDED);
		}
		while (client->chans_desired < client->chan_count) {
			spin_lock_irqsave(&client->lock, flags);
			chan = list_entry(client->channels.next,
			                  struct dma_chan,
			                  client_node);
			list_del_rcu(&chan->client_node);
			spin_unlock_irqrestore(&client->lock, flags);
			client->chan_count--;
			client->event_callback(client,
			                       chan,
			                       DMA_RESOURCE_REMOVED);
			dma_client_chan_free(chan);
		}
	}

	mutex_unlock(&dma_list_mutex);
}

/**
 * dma_async_client_register - allocate and register a &dma_client
 * @event_callback: callback for notification of channel addition/removal
 */
struct dma_client *dma_async_client_register(dma_event_callback event_callback)
{
	struct dma_client *client;

	client = kzalloc(sizeof(*client), GFP_KERNEL);
	if (!client)
		return NULL;

	INIT_LIST_HEAD(&client->channels);
	spin_lock_init(&client->lock);
	client->chans_desired = 0;
	client->chan_count = 0;
	client->event_callback = event_callback;

	mutex_lock(&dma_list_mutex);
	list_add_tail(&client->global_node, &dma_client_list);
	mutex_unlock(&dma_list_mutex);

	return client;
}
EXPORT_SYMBOL(dma_async_client_register);

/**
 * dma_async_client_unregister - unregister a client and free the &dma_client
 * @client: &dma_client to free
 *
 * Force frees any allocated DMA channels, frees the &dma_client memory
 */
void dma_async_client_unregister(struct dma_client *client)
{
	struct dma_chan *chan;

	if (!client)
		return;

	rcu_read_lock();
	list_for_each_entry_rcu(chan, &client->channels, client_node)
		dma_client_chan_free(chan);
	rcu_read_unlock();

	mutex_lock(&dma_list_mutex);
	list_del(&client->global_node);
	mutex_unlock(&dma_list_mutex);

	kfree(client);
	dma_chans_rebalance();
}
EXPORT_SYMBOL(dma_async_client_unregister);

/**
 * dma_async_client_chan_request - request DMA channels
 * @client: &dma_client
 * @number: count of DMA channels requested
 *
 * Clients call dma_async_client_chan_request() to specify how many
 * DMA channels they need, 0 to free all currently allocated.
 * The resulting allocations/frees are indicated to the client via the
 * event callback.
 */
void dma_async_client_chan_request(struct dma_client *client,
			unsigned int number)
{
	client->chans_desired = number;
	dma_chans_rebalance();
}
EXPORT_SYMBOL(dma_async_client_chan_request);

/**
 * dma_async_device_register - registers DMA devices found
 * @device: &dma_device
 */
int dma_async_device_register(struct dma_device *device)
{
	static int id;
	int chancnt = 0, rc;
	struct dma_chan* chan;

	if (!device)
		return -ENODEV;

	/* validate device routines */
	BUG_ON(dma_has_cap(DMA_MEMCPY, device->cap_mask) &&
		!device->device_prep_dma_memcpy);
	BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
		!device->device_prep_dma_xor);
	BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
		!device->device_prep_dma_zero_sum);
	BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
		!device->device_prep_dma_memset);
	BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
		!device->device_prep_dma_interrupt);

	BUG_ON(!device->device_alloc_chan_resources);
	BUG_ON(!device->device_free_chan_resources);
	BUG_ON(!device->device_dependency_added);
	BUG_ON(!device->device_is_tx_complete);
	BUG_ON(!device->device_issue_pending);
	BUG_ON(!device->dev);

	init_completion(&device->done);
	kref_init(&device->refcount);
	device->dev_id = id++;

	/* represent channels in sysfs. Probably want devs too */
	list_for_each_entry(chan, &device->channels, device_node) {
		chan->local = alloc_percpu(typeof(*chan->local));
		if (chan->local == NULL)
			continue;

		chan->chan_id = chancnt++;
		chan->class_dev.class = &dma_devclass;
		chan->class_dev.dev = NULL;
		snprintf(chan->class_dev.class_id, BUS_ID_SIZE, "dma%dchan%d",
		         device->dev_id, chan->chan_id);

		rc = class_device_register(&chan->class_dev);
		if (rc) {
			chancnt--;
			free_percpu(chan->local);
			chan->local = NULL;
			goto err_out;
		}

		kref_get(&device->refcount);
	}

	mutex_lock(&dma_list_mutex);
	list_add_tail(&device->global_node, &dma_device_list);
	mutex_unlock(&dma_list_mutex);

	dma_chans_rebalance();

	return 0;

err_out:
	list_for_each_entry(chan, &device->channels, device_node) {
		if (chan->local == NULL)
			continue;
		kref_put(&device->refcount, dma_async_device_cleanup);
		class_device_unregister(&chan->class_dev);
		chancnt--;
		free_percpu(chan->local);
	}
	return rc;
}
EXPORT_SYMBOL(dma_async_device_register);

/**
 * dma_async_device_cleanup - function called when all references are released
 * @kref: kernel reference object
 */
static void dma_async_device_cleanup(struct kref *kref)
{
	struct dma_device *device;

	device = container_of(kref, struct dma_device, refcount);
	complete(&device->done);
}

/**
 * dma_async_device_unregister - unregisters DMA devices
 * @device: &dma_device
 */
void dma_async_device_unregister(struct dma_device *device)
{
	struct dma_chan *chan;
	unsigned long flags;

	mutex_lock(&dma_list_mutex);
	list_del(&device->global_node);
	mutex_unlock(&dma_list_mutex);

	list_for_each_entry(chan, &device->channels, device_node) {
		if (chan->client) {
			spin_lock_irqsave(&chan->client->lock, flags);
			list_del(&chan->client_node);
			chan->client->chan_count--;
			spin_unlock_irqrestore(&chan->client->lock, flags);
			chan->client->event_callback(chan->client,
			                             chan,
			                             DMA_RESOURCE_REMOVED);
			dma_client_chan_free(chan);
		}
		class_device_unregister(&chan->class_dev);
	}
	dma_chans_rebalance();

	kref_put(&device->refcount, dma_async_device_cleanup);
	wait_for_completion(&device->done);
}
EXPORT_SYMBOL(dma_async_device_unregister);

/**
 * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
 * @chan: DMA channel to offload copy to
 * @dest: destination address (virtual)
 * @src: source address (virtual)
 * @len: length
 *
 * Both @dest and @src must be mappable to a bus address according to the
 * DMA mapping API rules for streaming mappings.
 * Both @dest and @src must stay memory resident (kernel memory or locked
 * user space pages).
 */
dma_cookie_t
dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
			void *src, size_t len)
{
	struct dma_device *dev = chan->device;
	struct dma_async_tx_descriptor *tx;
	dma_addr_t addr;
	dma_cookie_t cookie;
	int cpu;

	tx = dev->device_prep_dma_memcpy(chan, len, 0);
	if (!tx)
		return -ENOMEM;

	tx->ack = 1;
	tx->callback = NULL;
	addr = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
	tx->tx_set_src(addr, tx, 0);
	addr = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
	tx->tx_set_dest(addr, tx, 0);
	cookie = tx->tx_submit(tx);

	cpu = get_cpu();
	per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
	per_cpu_ptr(chan->local, cpu)->memcpy_count++;
	put_cpu();

	return cookie;
}
EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);

/**
 * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
 * @chan: DMA channel to offload copy to
 * @page: destination page
 * @offset: offset in page to copy to
 * @kdata: source address (virtual)
 * @len: length
 *
 * Both @page/@offset and @kdata must be mappable to a bus address according
 * to the DMA mapping API rules for streaming mappings.
 * Both @page/@offset and @kdata must stay memory resident (kernel memory or
 * locked user space pages)
 */
dma_cookie_t
dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
			unsigned int offset, void *kdata, size_t len)
{
	struct dma_device *dev = chan->device;
	struct dma_async_tx_descriptor *tx;
	dma_addr_t addr;
	dma_cookie_t cookie;
	int cpu;

	tx = dev->device_prep_dma_memcpy(chan, len, 0);
	if (!tx)
		return -ENOMEM;

	tx->ack = 1;
	tx->callback = NULL;
	addr = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
	tx->tx_set_src(addr, tx, 0);
	addr = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
	tx->tx_set_dest(addr, tx, 0);
	cookie = tx->tx_submit(tx);

	cpu = get_cpu();
	per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
	per_cpu_ptr(chan->local, cpu)->memcpy_count++;
	put_cpu();

	return cookie;
}
EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);

/**
 * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
 * @chan: DMA channel to offload copy to
 * @dest_pg: destination page
 * @dest_off: offset in page to copy to
 * @src_pg: source page
 * @src_off: offset in page to copy from
 * @len: length
 *
 * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus
 * address according to the DMA mapping API rules for streaming mappings.
 * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident
 * (kernel memory or locked user space pages).
 */
dma_cookie_t
dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
	unsigned int dest_off, struct page *src_pg, unsigned int src_off,
	size_t len)
{
	struct dma_device *dev = chan->device;
	struct dma_async_tx_descriptor *tx;
	dma_addr_t addr;
	dma_cookie_t cookie;
	int cpu;

	tx = dev->device_prep_dma_memcpy(chan, len, 0);
	if (!tx)
		return -ENOMEM;

	tx->ack = 1;
	tx->callback = NULL;
	addr = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
	tx->tx_set_src(addr, tx, 0);
	addr = dma_map_page(dev->dev, dest_pg, dest_off, len, DMA_FROM_DEVICE);
	tx->tx_set_dest(addr, tx, 0);
	cookie = tx->tx_submit(tx);

	cpu = get_cpu();
	per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
	per_cpu_ptr(chan->local, cpu)->memcpy_count++;
	put_cpu();

	return cookie;
}
EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg);

void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
	struct dma_chan *chan)
{
	tx->chan = chan;
	spin_lock_init(&tx->lock);
	INIT_LIST_HEAD(&tx->depend_node);
	INIT_LIST_HEAD(&tx->depend_list);
}
EXPORT_SYMBOL(dma_async_tx_descriptor_init);

static int __init dma_bus_init(void)
{
	mutex_init(&dma_list_mutex);
	return class_register(&dma_devclass);
}
subsys_initcall(dma_bus_init);

OpenPOWER on IntegriCloud