summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/acpi_pad.c7
-rw-r--r--drivers/base/topology.c16
-rw-r--r--drivers/block/Kconfig17
-rw-r--r--drivers/block/Makefile1
-rw-r--r--drivers/block/rbd.c1841
-rw-r--r--drivers/block/rbd_types.h73
-rw-r--r--drivers/block/virtio_blk.c17
-rw-r--r--drivers/char/agp/Kconfig2
-rw-r--r--drivers/char/agp/amd64-agp.c39
-rw-r--r--drivers/char/agp/generic.c4
-rw-r--r--drivers/char/tpm/tpm.c22
-rw-r--r--drivers/char/virtio_console.c257
-rw-r--r--drivers/edac/Kconfig16
-rw-r--r--drivers/edac/Makefile3
-rw-r--r--drivers/edac/amd64_edac.c17
-rw-r--r--drivers/edac/amd64_edac.h5
-rw-r--r--drivers/edac/amd64_edac_dbg.c207
-rw-r--r--drivers/edac/edac_device_sysfs.c16
-rw-r--r--drivers/edac/edac_mc_sysfs.c11
-rw-r--r--drivers/edac/edac_mce_amd.c452
-rw-r--r--drivers/edac/edac_module.c79
-rw-r--r--drivers/edac/edac_module.h1
-rw-r--r--drivers/edac/edac_pci_sysfs.c10
-rw-r--r--drivers/edac/edac_stub.c51
-rw-r--r--drivers/edac/mce_amd.c680
-rw-r--r--drivers/edac/mce_amd.h (renamed from drivers/edac/edac_mce_amd.h)59
-rw-r--r--drivers/edac/mce_amd_inj.c171
-rw-r--r--drivers/firewire/ohci.c19
-rw-r--r--drivers/firewire/ohci.h8
-rw-r--r--drivers/firmware/Kconfig2
-rw-r--r--drivers/gpu/drm/radeon/radeon_cursor.c28
-rw-r--r--drivers/i2c/busses/i2c-davinci.c24
-rw-r--r--drivers/i2c/busses/i2c-imx.c12
-rw-r--r--drivers/idle/intel_idle.c9
-rw-r--r--drivers/input/evdev.c10
-rw-r--r--drivers/input/misc/hp_sdc_rtc.c4
-rw-r--r--drivers/input/serio/hil_mlc.c6
-rw-r--r--drivers/input/serio/hp_sdc.c4
-rw-r--r--drivers/macintosh/adb.c2
-rw-r--r--drivers/media/video/v4l2-compat-ioctl32.c32
-rw-r--r--drivers/mtd/nand/mxc_nand.c92
-rw-r--r--drivers/net/3c527.c2
-rw-r--r--drivers/net/hamradio/6pack.c2
-rw-r--r--drivers/net/hamradio/mkiss.c2
-rw-r--r--drivers/net/irda/sir_dev.c2
-rw-r--r--drivers/net/ppp_async.c2
-rw-r--r--drivers/net/wan/cosa.c2
-rw-r--r--drivers/oprofile/oprof.c32
-rw-r--r--drivers/oprofile/oprof.h2
-rw-r--r--drivers/oprofile/oprofile_files.c7
-rw-r--r--drivers/oprofile/oprofile_perf.c328
-rw-r--r--drivers/oprofile/oprofilefs.c54
-rw-r--r--drivers/parport/share.c2
-rw-r--r--drivers/scsi/scsi.c4
-rw-r--r--drivers/serial/ioc3_serial.c1
-rw-r--r--drivers/vhost/net.c16
-rw-r--r--drivers/vhost/vhost.c22
-rw-r--r--drivers/vhost/vhost.h10
58 files changed, 3788 insertions, 1028 deletions
diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c
index 6b115f6..6afceb3 100644
--- a/drivers/acpi/acpi_pad.c
+++ b/drivers/acpi/acpi_pad.c
@@ -30,18 +30,13 @@
#include <linux/slab.h>
#include <acpi/acpi_bus.h>
#include <acpi/acpi_drivers.h>
+#include <asm/mwait.h>
#define ACPI_PROCESSOR_AGGREGATOR_CLASS "acpi_pad"
#define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator"
#define ACPI_PROCESSOR_AGGREGATOR_NOTIFY 0x80
static DEFINE_MUTEX(isolated_cpus_lock);
-#define MWAIT_SUBSTATE_MASK (0xf)
-#define MWAIT_CSTATE_MASK (0xf)
-#define MWAIT_SUBSTATE_SIZE (4)
-#define CPUID_MWAIT_LEAF (5)
-#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
-#define CPUID5_ECX_INTERRUPT_BREAK (0x2)
static unsigned long power_saving_mwait_eax;
static unsigned char tsc_detected_unstable;
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 9fc630c..f6f37a0 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -45,7 +45,8 @@ static ssize_t show_##name(struct sys_device *dev, \
return sprintf(buf, "%d\n", topology_##name(cpu)); \
}
-#if defined(topology_thread_cpumask) || defined(topology_core_cpumask)
+#if defined(topology_thread_cpumask) || defined(topology_core_cpumask) || \
+ defined(topology_book_cpumask)
static ssize_t show_cpumap(int type, const struct cpumask *mask, char *buf)
{
ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
@@ -114,6 +115,14 @@ define_siblings_show_func(core_cpumask);
define_one_ro_named(core_siblings, show_core_cpumask);
define_one_ro_named(core_siblings_list, show_core_cpumask_list);
+#ifdef CONFIG_SCHED_BOOK
+define_id_show_func(book_id);
+define_one_ro(book_id);
+define_siblings_show_func(book_cpumask);
+define_one_ro_named(book_siblings, show_book_cpumask);
+define_one_ro_named(book_siblings_list, show_book_cpumask_list);
+#endif
+
static struct attribute *default_attrs[] = {
&attr_physical_package_id.attr,
&attr_core_id.attr,
@@ -121,6 +130,11 @@ static struct attribute *default_attrs[] = {
&attr_thread_siblings_list.attr,
&attr_core_siblings.attr,
&attr_core_siblings_list.attr,
+#ifdef CONFIG_SCHED_BOOK
+ &attr_book_id.attr,
+ &attr_book_siblings.attr,
+ &attr_book_siblings_list.attr,
+#endif
NULL
};
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index de27768..4b9359a 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -488,4 +488,21 @@ config BLK_DEV_HD
If unsure, say N.
+config BLK_DEV_RBD
+ tristate "Rados block device (RBD)"
+ depends on INET && EXPERIMENTAL && BLOCK
+ select CEPH_LIB
+ select LIBCRC32C
+ select CRYPTO_AES
+ select CRYPTO
+ default n
+ help
+ Say Y here if you want include the Rados block device, which stripes
+ a block device over objects stored in the Ceph distributed object
+ store.
+
+ More information at http://ceph.newdream.net/.
+
+ If unsure, say N.
+
endif # BLK_DEV
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index aff5ac9..d7f463d 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -37,5 +37,6 @@ obj-$(CONFIG_BLK_DEV_HD) += hd.o
obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
+obj-$(CONFIG_BLK_DEV_RBD) += rbd.o
swim_mod-objs := swim.o swim_asm.o
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
new file mode 100644
index 0000000..6ec9d53
--- /dev/null
+++ b/drivers/block/rbd.c
@@ -0,0 +1,1841 @@
+/*
+ rbd.c -- Export ceph rados objects as a Linux block device
+
+
+ based on drivers/block/osdblk.c:
+
+ Copyright 2009 Red Hat, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to
+ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+
+ Instructions for use
+ --------------------
+
+ 1) Map a Linux block device to an existing rbd image.
+
+ Usage: <mon ip addr> <options> <pool name> <rbd image name> [snap name]
+
+ $ echo "192.168.0.1 name=admin rbd foo" > /sys/class/rbd/add
+
+ The snapshot name can be "-" or omitted to map the image read/write.
+
+ 2) List all active blkdev<->object mappings.
+
+ In this example, we have performed step #1 twice, creating two blkdevs,
+ mapped to two separate rados objects in the rados rbd pool
+
+ $ cat /sys/class/rbd/list
+ #id major client_name pool name snap KB
+ 0 254 client4143 rbd foo - 1024000
+
+ The columns, in order, are:
+ - blkdev unique id
+ - blkdev assigned major
+ - rados client id
+ - rados pool name
+ - rados block device name
+ - mapped snapshot ("-" if none)
+ - device size in KB
+
+
+ 3) Create a snapshot.
+
+ Usage: <blkdev id> <snapname>
+
+ $ echo "0 mysnap" > /sys/class/rbd/snap_create
+
+
+ 4) Listing a snapshot.
+
+ $ cat /sys/class/rbd/snaps_list
+ #id snap KB
+ 0 - 1024000 (*)
+ 0 foo 1024000
+
+ The columns, in order, are:
+ - blkdev unique id
+ - snapshot name, '-' means none (active read/write version)
+ - size of device at time of snapshot
+ - the (*) indicates this is the active version
+
+ 5) Rollback to snapshot.
+
+ Usage: <blkdev id> <snapname>
+
+ $ echo "0 mysnap" > /sys/class/rbd/snap_rollback
+
+
+ 6) Mapping an image using snapshot.
+
+ A snapshot mapping is read-only. This is being done by passing
+ snap=<snapname> to the options when adding a device.
+
+ $ echo "192.168.0.1 name=admin,snap=mysnap rbd foo" > /sys/class/rbd/add
+
+
+ 7) Remove an active blkdev<->rbd image mapping.
+
+ In this example, we remove the mapping with blkdev unique id 1.
+
+ $ echo 1 > /sys/class/rbd/remove
+
+
+ NOTE: The actual creation and deletion of rados objects is outside the scope
+ of this driver.
+
+ */
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/osd_client.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/decode.h>
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+
+#include "rbd_types.h"
+
+#define DRV_NAME "rbd"
+#define DRV_NAME_LONG "rbd (rados block device)"
+
+#define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */
+
+#define RBD_MAX_MD_NAME_LEN (96 + sizeof(RBD_SUFFIX))
+#define RBD_MAX_POOL_NAME_LEN 64
+#define RBD_MAX_SNAP_NAME_LEN 32
+#define RBD_MAX_OPT_LEN 1024
+
+#define RBD_SNAP_HEAD_NAME "-"
+
+#define DEV_NAME_LEN 32
+
+/*
+ * block device image metadata (in-memory version)
+ */
+struct rbd_image_header {
+ u64 image_size;
+ char block_name[32];
+ __u8 obj_order;
+ __u8 crypt_type;
+ __u8 comp_type;
+ struct rw_semaphore snap_rwsem;
+ struct ceph_snap_context *snapc;
+ size_t snap_names_len;
+ u64 snap_seq;
+ u32 total_snaps;
+
+ char *snap_names;
+ u64 *snap_sizes;
+};
+
+/*
+ * an instance of the client. multiple devices may share a client.
+ */
+struct rbd_client {
+ struct ceph_client *client;
+ struct kref kref;
+ struct list_head node;
+};
+
+/*
+ * a single io request
+ */
+struct rbd_request {
+ struct request *rq; /* blk layer request */
+ struct bio *bio; /* cloned bio */
+ struct page **pages; /* list of used pages */
+ u64 len;
+};
+
+/*
+ * a single device
+ */
+struct rbd_device {
+ int id; /* blkdev unique id */
+
+ int major; /* blkdev assigned major */
+ struct gendisk *disk; /* blkdev's gendisk and rq */
+ struct request_queue *q;
+
+ struct ceph_client *client;
+ struct rbd_client *rbd_client;
+
+ char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
+
+ spinlock_t lock; /* queue lock */
+
+ struct rbd_image_header header;
+ char obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */
+ int obj_len;
+ char obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */
+ char pool_name[RBD_MAX_POOL_NAME_LEN];
+ int poolid;
+
+ char snap_name[RBD_MAX_SNAP_NAME_LEN];
+ u32 cur_snap; /* index+1 of current snapshot within snap context
+ 0 - for the head */
+ int read_only;
+
+ struct list_head node;
+};
+
+static spinlock_t node_lock; /* protects client get/put */
+
+static struct class *class_rbd; /* /sys/class/rbd */
+static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */
+static LIST_HEAD(rbd_dev_list); /* devices */
+static LIST_HEAD(rbd_client_list); /* clients */
+
+
+static int rbd_open(struct block_device *bdev, fmode_t mode)
+{
+ struct gendisk *disk = bdev->bd_disk;
+ struct rbd_device *rbd_dev = disk->private_data;
+
+ set_device_ro(bdev, rbd_dev->read_only);
+
+ if ((mode & FMODE_WRITE) && rbd_dev->read_only)
+ return -EROFS;
+
+ return 0;
+}
+
+static const struct block_device_operations rbd_bd_ops = {
+ .owner = THIS_MODULE,
+ .open = rbd_open,
+};
+
+/*
+ * Initialize an rbd client instance.
+ * We own *opt.
+ */
+static struct rbd_client *rbd_client_create(struct ceph_options *opt)
+{
+ struct rbd_client *rbdc;
+ int ret = -ENOMEM;
+
+ dout("rbd_client_create\n");
+ rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL);
+ if (!rbdc)
+ goto out_opt;
+
+ kref_init(&rbdc->kref);
+ INIT_LIST_HEAD(&rbdc->node);
+
+ rbdc->client = ceph_create_client(opt, rbdc);
+ if (IS_ERR(rbdc->client))
+ goto out_rbdc;
+ opt = NULL; /* Now rbdc->client is responsible for opt */
+
+ ret = ceph_open_session(rbdc->client);
+ if (ret < 0)
+ goto out_err;
+
+ spin_lock(&node_lock);
+ list_add_tail(&rbdc->node, &rbd_client_list);
+ spin_unlock(&node_lock);
+
+ dout("rbd_client_create created %p\n", rbdc);
+ return rbdc;
+
+out_err:
+ ceph_destroy_client(rbdc->client);
+out_rbdc:
+ kfree(rbdc);
+out_opt:
+ if (opt)
+ ceph_destroy_options(opt);
+ return ERR_PTR(ret);
+}
+
+/*
+ * Find a ceph client with specific addr and configuration.
+ */
+static struct rbd_client *__rbd_client_find(struct ceph_options *opt)
+{
+ struct rbd_client *client_node;
+
+ if (opt->flags & CEPH_OPT_NOSHARE)
+ return NULL;
+
+ list_for_each_entry(client_node, &rbd_client_list, node)
+ if (ceph_compare_options(opt, client_node->client) == 0)
+ return client_node;
+ return NULL;
+}
+
+/*
+ * Get a ceph client with specific addr and configuration, if one does
+ * not exist create it.
+ */
+static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr,
+ char *options)
+{
+ struct rbd_client *rbdc;
+ struct ceph_options *opt;
+ int ret;
+
+ ret = ceph_parse_options(&opt, options, mon_addr,
+ mon_addr + strlen(mon_addr), NULL, NULL);
+ if (ret < 0)
+ return ret;
+
+ spin_lock(&node_lock);
+ rbdc = __rbd_client_find(opt);
+ if (rbdc) {
+ ceph_destroy_options(opt);
+
+ /* using an existing client */
+ kref_get(&rbdc->kref);
+ rbd_dev->rbd_client = rbdc;
+ rbd_dev->client = rbdc->client;
+ spin_unlock(&node_lock);
+ return 0;
+ }
+ spin_unlock(&node_lock);
+
+ rbdc = rbd_client_create(opt);
+ if (IS_ERR(rbdc))
+ return PTR_ERR(rbdc);
+
+ rbd_dev->rbd_client = rbdc;
+ rbd_dev->client = rbdc->client;
+ return 0;
+}
+
+/*
+ * Destroy ceph client
+ */
+static void rbd_client_release(struct kref *kref)
+{
+ struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
+
+ dout("rbd_release_client %p\n", rbdc);
+ spin_lock(&node_lock);
+ list_del(&rbdc->node);
+ spin_unlock(&node_lock);
+
+ ceph_destroy_client(rbdc->client);
+ kfree(rbdc);
+}
+
+/*
+ * Drop reference to ceph client node. If it's not referenced anymore, release
+ * it.
+ */
+static void rbd_put_client(struct rbd_device *rbd_dev)
+{
+ kref_put(&rbd_dev->rbd_client->kref, rbd_client_release);
+ rbd_dev->rbd_client = NULL;
+ rbd_dev->client = NULL;
+}
+
+
+/*
+ * Create a new header structure, translate header format from the on-disk
+ * header.
+ */
+static int rbd_header_from_disk(struct rbd_image_header *header,
+ struct rbd_image_header_ondisk *ondisk,
+ int allocated_snaps,
+ gfp_t gfp_flags)
+{
+ int i;
+ u32 snap_count = le32_to_cpu(ondisk->snap_count);
+ int ret = -ENOMEM;
+
+ init_rwsem(&header->snap_rwsem);
+
+ header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
+ header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
+ snap_count *
+ sizeof(struct rbd_image_snap_ondisk),
+ gfp_flags);
+ if (!header->snapc)
+ return -ENOMEM;
+ if (snap_count) {
+ header->snap_names = kmalloc(header->snap_names_len,
+ GFP_KERNEL);
+ if (!header->snap_names)
+ goto err_snapc;
+ header->snap_sizes = kmalloc(snap_count * sizeof(u64),
+ GFP_KERNEL);
+ if (!header->snap_sizes)
+ goto err_names;
+ } else {
+ header->snap_names = NULL;
+ header->snap_sizes = NULL;
+ }
+ memcpy(header->block_name, ondisk->block_name,
+ sizeof(ondisk->block_name));
+
+ header->image_size = le64_to_cpu(ondisk->image_size);
+ header->obj_order = ondisk->options.order;
+ header->crypt_type = ondisk->options.crypt_type;
+ header->comp_type = ondisk->options.comp_type;
+
+ atomic_set(&header->snapc->nref, 1);
+ header->snap_seq = le64_to_cpu(ondisk->snap_seq);
+ header->snapc->num_snaps = snap_count;
+ header->total_snaps = snap_count;
+
+ if (snap_count &&
+ allocated_snaps == snap_count) {
+ for (i = 0; i < snap_count; i++) {
+ header->snapc->snaps[i] =
+ le64_to_cpu(ondisk->snaps[i].id);
+ header->snap_sizes[i] =
+ le64_to_cpu(ondisk->snaps[i].image_size);
+ }
+
+ /* copy snapshot names */
+ memcpy(header->snap_names, &ondisk->snaps[i],
+ header->snap_names_len);
+ }
+
+ return 0;
+
+err_names:
+ kfree(header->snap_names);
+err_snapc:
+ kfree(header->snapc);
+ return ret;
+}
+
+static int snap_index(struct rbd_image_header *header, int snap_num)
+{
+ return header->total_snaps - snap_num;
+}
+
+static u64 cur_snap_id(struct rbd_device *rbd_dev)
+{
+ struct rbd_image_header *header = &rbd_dev->header;
+
+ if (!rbd_dev->cur_snap)
+ return 0;
+
+ return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)];
+}
+
+static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
+ u64 *seq, u64 *size)
+{
+ int i;
+ char *p = header->snap_names;
+
+ for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) {
+ if (strcmp(snap_name, p) == 0)
+ break;
+ }
+ if (i == header->total_snaps)
+ return -ENOENT;
+ if (seq)
+ *seq = header->snapc->snaps[i];
+
+ if (size)
+ *size = header->snap_sizes[i];
+
+ return i;
+}
+
+static int rbd_header_set_snap(struct rbd_device *dev,
+ const char *snap_name,
+ u64 *size)
+{
+ struct rbd_image_header *header = &dev->header;
+ struct ceph_snap_context *snapc = header->snapc;
+ int ret = -ENOENT;
+
+ down_write(&header->snap_rwsem);
+
+ if (!snap_name ||
+ !*snap_name ||
+ strcmp(snap_name, "-") == 0 ||
+ strcmp(snap_name, RBD_SNAP_HEAD_NAME) == 0) {
+ if (header->total_snaps)
+ snapc->seq = header->snap_seq;
+ else
+ snapc->seq = 0;
+ dev->cur_snap = 0;
+ dev->read_only = 0;
+ if (size)
+ *size = header->image_size;
+ } else {
+ ret = snap_by_name(header, snap_name, &snapc->seq, size);
+ if (ret < 0)
+ goto done;
+
+ dev->cur_snap = header->total_snaps - ret;
+ dev->read_only = 1;
+ }
+
+ ret = 0;
+done:
+ up_write(&header->snap_rwsem);
+ return ret;
+}
+
+static void rbd_header_free(struct rbd_image_header *header)
+{
+ kfree(header->snapc);
+ kfree(header->snap_names);
+ kfree(header->snap_sizes);
+}
+
+/*
+ * get the actual striped segment name, offset and length
+ */
+static u64 rbd_get_segment(struct rbd_image_header *header,
+ const char *block_name,
+ u64 ofs, u64 len,
+ char *seg_name, u64 *segofs)
+{
+ u64 seg = ofs >> header->obj_order;
+
+ if (seg_name)
+ snprintf(seg_name, RBD_MAX_SEG_NAME_LEN,
+ "%s.%012llx", block_name, seg);
+
+ ofs = ofs & ((1 << header->obj_order) - 1);
+ len = min_t(u64, len, (1 << header->obj_order) - ofs);
+
+ if (segofs)
+ *segofs = ofs;
+
+ return len;
+}
+
+/*
+ * bio helpers
+ */
+
+static void bio_chain_put(struct bio *chain)
+{
+ struct bio *tmp;
+
+ while (chain) {
+ tmp = chain;
+ chain = chain->bi_next;
+ bio_put(tmp);
+ }
+}
+
+/*
+ * zeros a bio chain, starting at specific offset
+ */
+static void zero_bio_chain(struct bio *chain, int start_ofs)
+{
+ struct bio_vec *bv;
+ unsigned long flags;
+ void *buf;
+ int i;
+ int pos = 0;
+
+ while (chain) {
+ bio_for_each_segment(bv, chain, i) {
+ if (pos + bv->bv_len > start_ofs) {
+ int remainder = max(start_ofs - pos, 0);
+ buf = bvec_kmap_irq(bv, &flags);
+ memset(buf + remainder, 0,
+ bv->bv_len - remainder);
+ bvec_kunmap_irq(buf, &flags);
+ }
+ pos += bv->bv_len;
+ }
+
+ chain = chain->bi_next;
+ }
+}
+
+/*
+ * bio_chain_clone - clone a chain of bios up to a certain length.
+ * might return a bio_pair that will need to be released.
+ */
+static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
+ struct bio_pair **bp,
+ int len, gfp_t gfpmask)
+{
+ struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL;
+ int total = 0;
+
+ if (*bp) {
+ bio_pair_release(*bp);
+ *bp = NULL;
+ }
+
+ while (old_chain && (total < len)) {
+ tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
+ if (!tmp)
+ goto err_out;
+
+ if (total + old_chain->bi_size > len) {
+ struct bio_pair *bp;
+
+ /*
+ * this split can only happen with a single paged bio,
+ * split_bio will BUG_ON if this is not the case
+ */
+ dout("bio_chain_clone split! total=%d remaining=%d"
+ "bi_size=%d\n",
+ (int)total, (int)len-total,
+ (int)old_chain->bi_size);
+
+ /* split the bio. We'll release it either in the next
+ call, or it will have to be released outside */
+ bp = bio_split(old_chain, (len - total) / 512ULL);
+ if (!bp)
+ goto err_out;
+
+ __bio_clone(tmp, &bp->bio1);
+
+ *next = &bp->bio2;
+ } else {
+ __bio_clone(tmp, old_chain);
+ *next = old_chain->bi_next;
+ }
+
+ tmp->bi_bdev = NULL;
+ gfpmask &= ~__GFP_WAIT;
+ tmp->bi_next = NULL;
+
+ if (!new_chain) {
+ new_chain = tail = tmp;
+ } else {
+ tail->bi_next = tmp;
+ tail = tmp;
+ }
+ old_chain = old_chain->bi_next;
+
+ total += tmp->bi_size;
+ }
+
+ BUG_ON(total < len);
+
+ if (tail)
+ tail->bi_next = NULL;
+
+ *old = old_chain;
+
+ return new_chain;
+
+err_out:
+ dout("bio_chain_clone with err\n");
+ bio_chain_put(new_chain);
+ return NULL;
+}
+
+/*
+ * helpers for osd request op vectors.
+ */
+static int rbd_create_rw_ops(struct ceph_osd_req_op **ops,
+ int num_ops,
+ int opcode,
+ u32 payload_len)
+{
+ *ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1),
+ GFP_NOIO);
+ if (!*ops)
+ return -ENOMEM;
+ (*ops)[0].op = opcode;
+ /*
+ * op extent offset and length will be set later on
+ * in calc_raw_layout()
+ */
+ (*ops)[0].payload_len = payload_len;
+ return 0;
+}
+
+static void rbd_destroy_ops(struct ceph_osd_req_op *ops)
+{
+ kfree(ops);
+}
+
+/*
+ * Send ceph osd request
+ */
+static int rbd_do_request(struct request *rq,
+ struct rbd_device *dev,
+ struct ceph_snap_context *snapc,
+ u64 snapid,
+ const char *obj, u64 ofs, u64 len,
+ struct bio *bio,
+ struct page **pages,
+ int num_pages,
+ int flags,
+ struct ceph_osd_req_op *ops,
+ int num_reply,
+ void (*rbd_cb)(struct ceph_osd_request *req,
+ struct ceph_msg *msg))
+{
+ struct ceph_osd_request *req;
+ struct ceph_file_layout *layout;
+ int ret;
+ u64 bno;
+ struct timespec mtime = CURRENT_TIME;
+ struct rbd_request *req_data;
+ struct ceph_osd_request_head *reqhead;
+ struct rbd_image_header *header = &dev->header;
+
+ ret = -ENOMEM;
+ req_data = kzalloc(sizeof(*req_data), GFP_NOIO);
+ if (!req_data)
+ goto done;
+
+ dout("rbd_do_request len=%lld ofs=%lld\n", len, ofs);
+
+ down_read(&header->snap_rwsem);
+
+ req = ceph_osdc_alloc_request(&dev->client->osdc, flags,
+ snapc,
+ ops,
+ false,
+ GFP_NOIO, pages, bio);
+ if (IS_ERR(req)) {
+ up_read(&header->snap_rwsem);
+ ret = PTR_ERR(req);
+ goto done_pages;
+ }
+
+ req->r_callback = rbd_cb;
+
+ req_data->rq = rq;
+ req_data->bio = bio;
+ req_data->pages = pages;
+ req_data->len = len;
+
+ req->r_priv = req_data;
+
+ reqhead = req->r_request->front.iov_base;
+ reqhead->snapid = cpu_to_le64(CEPH_NOSNAP);
+
+ strncpy(req->r_oid, obj, sizeof(req->r_oid));
+ req->r_oid_len = strlen(req->r_oid);
+
+ layout = &req->r_file_layout;
+ memset(layout, 0, sizeof(*layout));
+ layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
+ layout->fl_stripe_count = cpu_to_le32(1);
+ layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
+ layout->fl_pg_preferred = cpu_to_le32(-1);
+ layout->fl_pg_pool = cpu_to_le32(dev->poolid);
+ ceph_calc_raw_layout(&dev->client->osdc, layout, snapid,
+ ofs, &len, &bno, req, ops);
+
+ ceph_osdc_build_request(req, ofs, &len,
+ ops,
+ snapc,
+ &mtime,
+ req->r_oid, req->r_oid_len);
+ up_read(&header->snap_rwsem);
+
+ ret = ceph_osdc_start_request(&dev->client->osdc, req, false);
+ if (ret < 0)
+ goto done_err;
+
+ if (!rbd_cb) {
+ ret = ceph_osdc_wait_request(&dev->client->osdc, req);
+ ceph_osdc_put_request(req);
+ }
+ return ret;
+
+done_err:
+ bio_chain_put(req_data->bio);
+ ceph_osdc_put_request(req);
+done_pages:
+ kfree(req_data);
+done:
+ if (rq)
+ blk_end_request(rq, ret, len);
+ return ret;
+}
+
+/*
+ * Ceph osd op callback
+ */
+static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
+{
+ struct rbd_request *req_data = req->r_priv;
+ struct ceph_osd_reply_head *replyhead;
+ struct ceph_osd_op *op;
+ __s32 rc;
+ u64 bytes;
+ int read_op;
+
+ /* parse reply */
+ replyhead = msg->front.iov_base;
+ WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
+ op = (void *)(replyhead + 1);
+ rc = le32_to_cpu(replyhead->result);
+ bytes = le64_to_cpu(op->extent.length);
+ read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ);
+
+ dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc);
+
+ if (rc == -ENOENT && read_op) {
+ zero_bio_chain(req_data->bio, 0);
+ rc = 0;
+ } else if (rc == 0 && read_op && bytes < req_data->len) {
+ zero_bio_chain(req_data->bio, bytes);
+ bytes = req_data->len;
+ }
+
+ blk_end_request(req_data->rq, rc, bytes);
+
+ if (req_data->bio)
+ bio_chain_put(req_data->bio);
+
+ ceph_osdc_put_request(req);
+ kfree(req_data);
+}
+
+/*
+ * Do a synchronous ceph osd operation
+ */
+static int rbd_req_sync_op(struct rbd_device *dev,
+ struct ceph_snap_context *snapc,
+ u64 snapid,
+ int opcode,
+ int flags,
+ struct ceph_osd_req_op *orig_ops,
+ int num_reply,
+ const char *obj,
+ u64 ofs, u64 len,
+ char *buf)
+{
+ int ret;
+ struct page **pages;
+ int num_pages;
+ struct ceph_osd_req_op *ops = orig_ops;
+ u32 payload_len;
+
+ num_pages = calc_pages_for(ofs , len);
+ pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ if (!orig_ops) {
+ payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0);
+ ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
+ if (ret < 0)
+ goto done;
+
+ if ((flags & CEPH_OSD_FLAG_WRITE) && buf) {
+ ret = ceph_copy_to_page_vector(pages, buf, ofs, len);
+ if (ret < 0)
+ goto done_ops;
+ }
+ }
+
+ ret = rbd_do_request(NULL, dev, snapc, snapid,
+ obj, ofs, len, NULL,
+ pages, num_pages,
+ flags,
+ ops,
+ 2,
+ NULL);
+ if (ret < 0)
+ goto done_ops;
+
+ if ((flags & CEPH_OSD_FLAG_READ) && buf)
+ ret = ceph_copy_from_page_vector(pages, buf, ofs, ret);
+
+done_ops:
+ if (!orig_ops)
+ rbd_destroy_ops(ops);
+done:
+ ceph_release_page_vector(pages, num_pages);
+ return ret;
+}
+
+/*
+ * Do an asynchronous ceph osd operation
+ */
+static int rbd_do_op(struct request *rq,
+ struct rbd_device *rbd_dev ,
+ struct ceph_snap_context *snapc,
+ u64 snapid,
+ int opcode, int flags, int num_reply,
+ u64 ofs, u64 len,
+ struct bio *bio)
+{
+ char *seg_name;
+ u64 seg_ofs;
+ u64 seg_len;
+ int ret;
+ struct ceph_osd_req_op *ops;
+ u32 payload_len;
+
+ seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
+ if (!seg_name)
+ return -ENOMEM;
+
+ seg_len = rbd_get_segment(&rbd_dev->header,
+ rbd_dev->header.block_name,
+ ofs, len,
+ seg_name, &seg_ofs);
+
+ payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0);
+
+ ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
+ if (ret < 0)
+ goto done;
+
+ /* we've taken care of segment sizes earlier when we
+ cloned the bios. We should never have a segment
+ truncated at this point */
+ BUG_ON(seg_len < len);
+
+ ret = rbd_do_request(rq, rbd_dev, snapc, snapid,
+ seg_name, seg_ofs, seg_len,
+ bio,
+ NULL, 0,
+ flags,
+ ops,
+ num_reply,
+ rbd_req_cb);
+done:
+ kfree(seg_name);
+ return ret;
+}
+
+/*
+ * Request async osd write
+ */
+static int rbd_req_write(struct request *rq,
+ struct rbd_device *rbd_dev,
+ struct ceph_snap_context *snapc,
+ u64 ofs, u64 len,
+ struct bio *bio)
+{
+ return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP,
+ CEPH_OSD_OP_WRITE,
+ CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
+ 2,
+ ofs, len, bio);
+}
+
+/*
+ * Request async osd read
+ */
+static int rbd_req_read(struct request *rq,
+ struct rbd_device *rbd_dev,
+ u64 snapid,
+ u64 ofs, u64 len,
+ struct bio *bio)
+{
+ return rbd_do_op(rq, rbd_dev, NULL,
+ (snapid ? snapid : CEPH_NOSNAP),
+ CEPH_OSD_OP_READ,
+ CEPH_OSD_FLAG_READ,
+ 2,
+ ofs, len, bio);
+}
+
+/*
+ * Request sync osd read
+ */
+static int rbd_req_sync_read(struct rbd_device *dev,
+ struct ceph_snap_context *snapc,
+ u64 snapid,
+ const char *obj,
+ u64 ofs, u64 len,
+ char *buf)
+{
+ return rbd_req_sync_op(dev, NULL,
+ (snapid ? snapid : CEPH_NOSNAP),
+ CEPH_OSD_OP_READ,
+ CEPH_OSD_FLAG_READ,
+ NULL,
+ 1, obj, ofs, len, buf);
+}
+
+/*
+ * Request sync osd read
+ */
+static int rbd_req_sync_rollback_obj(struct rbd_device *dev,
+ u64 snapid,
+ const char *obj)
+{
+ struct ceph_osd_req_op *ops;
+ int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_ROLLBACK, 0);
+ if (ret < 0)
+ return ret;
+
+ ops[0].snap.snapid = snapid;
+
+ ret = rbd_req_sync_op(dev, NULL,
+ CEPH_NOSNAP,
+ 0,
+ CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
+ ops,
+ 1, obj, 0, 0, NULL);
+
+ rbd_destroy_ops(ops);
+
+ if (ret < 0)
+ return ret;
+
+ return ret;
+}
+
+/*
+ * Request sync osd read
+ */
+static int rbd_req_sync_exec(struct rbd_device *dev,
+ const char *obj,
+ const char *cls,
+ const char *method,
+ const char *data,
+ int len)
+{
+ struct ceph_osd_req_op *ops;
+ int cls_len = strlen(cls);
+ int method_len = strlen(method);
+ int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL,
+ cls_len + method_len + len);
+ if (ret < 0)
+ return ret;
+
+ ops[0].cls.class_name = cls;
+ ops[0].cls.class_len = (__u8)cls_len;
+ ops[0].cls.method_name = method;
+ ops[0].cls.method_len = (__u8)method_len;
+ ops[0].cls.argc = 0;
+ ops[0].cls.indata = data;
+ ops[0].cls.indata_len = len;
+
+ ret = rbd_req_sync_op(dev, NULL,
+ CEPH_NOSNAP,
+ 0,
+ CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
+ ops,
+ 1, obj, 0, 0, NULL);
+
+ rbd_destroy_ops(ops);
+
+ dout("cls_exec returned %d\n", ret);
+ return ret;
+}
+
+/*
+ * block device queue callback
+ */
+static void rbd_rq_fn(struct request_queue *q)
+{
+ struct rbd_device *rbd_dev = q->queuedata;
+ struct request *rq;
+ struct bio_pair *bp = NULL;
+
+ rq = blk_fetch_request(q);
+
+ while (1) {
+ struct bio *bio;
+ struct bio *rq_bio, *next_bio = NULL;
+ bool do_write;
+ int size, op_size = 0;
+ u64 ofs;
+
+ /* peek at request from block layer */
+ if (!rq)
+ break;
+
+ dout("fetched request\n");
+
+ /* filter out block requests we don't understand */
+ if ((rq->cmd_type != REQ_TYPE_FS)) {
+ __blk_end_request_all(rq, 0);
+ goto next;
+ }
+
+ /* deduce our operation (read, write) */
+ do_write = (rq_data_dir(rq) == WRITE);
+
+ size = blk_rq_bytes(rq);
+ ofs = blk_rq_pos(rq) * 512ULL;
+ rq_bio = rq->bio;
+ if (do_write && rbd_dev->read_only) {
+ __blk_end_request_all(rq, -EROFS);
+ goto next;
+ }
+
+ spin_unlock_irq(q->queue_lock);
+
+ dout("%s 0x%x bytes at 0x%llx\n",
+ do_write ? "write" : "read",
+ size, blk_rq_pos(rq) * 512ULL);
+
+ do {
+ /* a bio clone to be passed down to OSD req */
+ dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt);
+ op_size = rbd_get_segment(&rbd_dev->header,
+ rbd_dev->header.block_name,
+ ofs, size,
+ NULL, NULL);
+ bio = bio_chain_clone(&rq_bio, &next_bio, &bp,
+ op_size, GFP_ATOMIC);
+ if (!bio) {
+ spin_lock_irq(q->queue_lock);
+ __blk_end_request_all(rq, -ENOMEM);
+ goto next;
+ }
+
+ /* init OSD command: write or read */
+ if (do_write)
+ rbd_req_write(rq, rbd_dev,
+ rbd_dev->header.snapc,
+ ofs,
+ op_size, bio);
+ else
+ rbd_req_read(rq, rbd_dev,
+ cur_snap_id(rbd_dev),
+ ofs,
+ op_size, bio);
+
+ size -= op_size;
+ ofs += op_size;
+
+ rq_bio = next_bio;
+ } while (size > 0);
+
+ if (bp)
+ bio_pair_release(bp);
+
+ spin_lock_irq(q->queue_lock);
+next:
+ rq = blk_fetch_request(q);
+ }
+}
+
+/*
+ * a queue callback. Makes sure that we don't create a bio that spans across
+ * multiple osd objects. One exception would be with a single page bios,
+ * which we handle later at bio_chain_clone
+ */
+static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
+ struct bio_vec *bvec)
+{
+ struct rbd_device *rbd_dev = q->queuedata;
+ unsigned int chunk_sectors = 1 << (rbd_dev->header.obj_order - 9);
+ sector_t sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev);
+ unsigned int bio_sectors = bmd->bi_size >> 9;
+ int max;
+
+ max = (chunk_sectors - ((sector & (chunk_sectors - 1))
+ + bio_sectors)) << 9;
+ if (max < 0)
+ max = 0; /* bio_add cannot handle a negative return */
+ if (max <= bvec->bv_len && bio_sectors == 0)
+ return bvec->bv_len;
+ return max;
+}
+
+static void rbd_free_disk(struct rbd_device *rbd_dev)
+{
+ struct gendisk *disk = rbd_dev->disk;
+
+ if (!disk)
+ return;
+
+ rbd_header_free(&rbd_dev->header);
+
+ if (disk->flags & GENHD_FL_UP)
+ del_gendisk(disk);
+ if (disk->queue)
+ blk_cleanup_queue(disk->queue);
+ put_disk(disk);
+}
+
+/*
+ * reload the ondisk the header
+ */
+static int rbd_read_header(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header)
+{
+ ssize_t rc;
+ struct rbd_image_header_ondisk *dh;
+ int snap_count = 0;
+ u64 snap_names_len = 0;
+
+ while (1) {
+ int len = sizeof(*dh) +
+ snap_count * sizeof(struct rbd_image_snap_ondisk) +
+ snap_names_len;
+
+ rc = -ENOMEM;
+ dh = kmalloc(len, GFP_KERNEL);
+ if (!dh)
+ return -ENOMEM;
+
+ rc = rbd_req_sync_read(rbd_dev,
+ NULL, CEPH_NOSNAP,
+ rbd_dev->obj_md_name,
+ 0, len,
+ (char *)dh);
+ if (rc < 0)
+ goto out_dh;
+
+ rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL);
+ if (rc < 0)
+ goto out_dh;
+
+ if (snap_count != header->total_snaps) {
+ snap_count = header->total_snaps;
+ snap_names_len = header->snap_names_len;
+ rbd_header_free(header);
+ kfree(dh);
+ continue;
+ }
+ break;
+ }
+
+out_dh:
+ kfree(dh);
+ return rc;
+}
+
+/*
+ * create a snapshot
+ */
+static int rbd_header_add_snap(struct rbd_device *dev,
+ const char *snap_name,
+ gfp_t gfp_flags)
+{
+ int name_len = strlen(snap_name);
+ u64 new_snapid;
+ int ret;
+ void *data, *data_start, *data_end;
+
+ /* we should create a snapshot only if we're pointing at the head */
+ if (dev->cur_snap)
+ return -EINVAL;
+
+ ret = ceph_monc_create_snapid(&dev->client->monc, dev->poolid,
+ &new_snapid);
+ dout("created snapid=%lld\n", new_snapid);
+ if (ret < 0)
+ return ret;
+
+ data = kmalloc(name_len + 16, gfp_flags);
+ if (!data)
+ return -ENOMEM;
+
+ data_start = data;
+ data_end = data + name_len + 16;
+
+ ceph_encode_string_safe(&data, data_end, snap_name, name_len, bad);
+ ceph_encode_64_safe(&data, data_end, new_snapid, bad);
+
+ ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add",
+ data_start, data - data_start);
+
+ kfree(data_start);
+
+ if (ret < 0)
+ return ret;
+
+ dev->header.snapc->seq = new_snapid;
+
+ return 0;
+bad:
+ return -ERANGE;
+}
+
+/*
+ * only read the first part of the ondisk header, without the snaps info
+ */
+static int rbd_update_snaps(struct rbd_device *rbd_dev)
+{
+ int ret;
+ struct rbd_image_header h;
+ u64 snap_seq;
+
+ ret = rbd_read_header(rbd_dev, &h);
+ if (ret < 0)
+ return ret;
+
+ down_write(&rbd_dev->header.snap_rwsem);
+
+ snap_seq = rbd_dev->header.snapc->seq;
+
+ kfree(rbd_dev->header.snapc);
+ kfree(rbd_dev->header.snap_names);
+ kfree(rbd_dev->header.snap_sizes);
+
+ rbd_dev->header.total_snaps = h.total_snaps;
+ rbd_dev->header.snapc = h.snapc;
+ rbd_dev->header.snap_names = h.snap_names;
+ rbd_dev->header.snap_sizes = h.snap_sizes;
+ rbd_dev->header.snapc->seq = snap_seq;
+
+ up_write(&rbd_dev->header.snap_rwsem);
+
+ return 0;
+}
+
+static int rbd_init_disk(struct rbd_device *rbd_dev)
+{
+ struct gendisk *disk;
+ struct request_queue *q;
+ int rc;
+ u64 total_size = 0;
+
+ /* contact OSD, request size info about the object being mapped */
+ rc = rbd_read_header(rbd_dev, &rbd_dev->header);
+ if (rc)
+ return rc;
+
+ rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size);
+ if (rc)
+ return rc;
+
+ /* create gendisk info */
+ rc = -ENOMEM;
+ disk = alloc_disk(RBD_MINORS_PER_MAJOR);
+ if (!disk)
+ goto out;
+
+ sprintf(disk->disk_name, DRV_NAME "%d", rbd_dev->id);
+ disk->major = rbd_dev->major;
+ disk->first_minor = 0;
+ disk->fops = &rbd_bd_ops;
+ disk->private_data = rbd_dev;
+
+ /* init rq */
+ rc = -ENOMEM;
+ q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock);
+ if (!q)
+ goto out_disk;
+ blk_queue_merge_bvec(q, rbd_merge_bvec);
+ disk->queue = q;
+
+ q->queuedata = rbd_dev;
+
+ rbd_dev->disk = disk;
+ rbd_dev->q = q;
+
+ /* finally, announce the disk to the world */
+ set_capacity(disk, total_size / 512ULL);
+ add_disk(disk);
+
+ pr_info("%s: added with size 0x%llx\n",
+ disk->disk_name, (unsigned long long)total_size);
+ return 0;
+
+out_disk:
+ put_disk(disk);
+out:
+ return rc;
+}
+
+/********************************************************************
+ * /sys/class/rbd/
+ * add map rados objects to blkdev
+ * remove unmap rados objects
+ * list show mappings
+ *******************************************************************/
+
+static void class_rbd_release(struct class *cls)
+{
+ kfree(cls);
+}
+
+static ssize_t class_rbd_list(struct class *c,
+ struct class_attribute *attr,
+ char *data)
+{
+ int n = 0;
+ struct list_head *tmp;
+ int max = PAGE_SIZE;
+
+ mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+ n += snprintf(data, max,
+ "#id\tmajor\tclient_name\tpool\tname\tsnap\tKB\n");
+
+ list_for_each(tmp, &rbd_dev_list) {
+ struct rbd_device *rbd_dev;
+
+ rbd_dev = list_entry(tmp, struct rbd_device, node);
+ n += snprintf(data+n, max-n,
+ "%d\t%d\tclient%lld\t%s\t%s\t%s\t%lld\n",
+ rbd_dev->id,
+ rbd_dev->major,
+ ceph_client_id(rbd_dev->client),
+ rbd_dev->pool_name,
+ rbd_dev->obj, rbd_dev->snap_name,
+ rbd_dev->header.image_size >> 10);
+ if (n == max)
+ break;
+ }
+
+ mutex_unlock(&ctl_mutex);
+ return n;
+}
+
+static ssize_t class_rbd_add(struct class *c,
+ struct class_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct ceph_osd_client *osdc;
+ struct rbd_device *rbd_dev;
+ ssize_t rc = -ENOMEM;
+ int irc, new_id = 0;
+ struct list_head *tmp;
+ char *mon_dev_name;
+ char *options;
+
+ if (!try_module_get(THIS_MODULE))
+ return -ENODEV;
+
+ mon_dev_name = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL);
+ if (!mon_dev_name)
+ goto err_out_mod;
+
+ options = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL);
+ if (!options)
+ goto err_mon_dev;
+
+ /* new rbd_device object */
+ rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
+ if (!rbd_dev)
+ goto err_out_opt;
+
+ /* static rbd_device initialization */
+ spin_lock_init(&rbd_dev->lock);
+ INIT_LIST_HEAD(&rbd_dev->node);
+
+ /* generate unique id: find highest unique id, add one */
+ mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+ list_for_each(tmp, &rbd_dev_list) {
+ struct rbd_device *rbd_dev;
+
+ rbd_dev = list_entry(tmp, struct rbd_device, node);
+ if (rbd_dev->id >= new_id)
+ new_id = rbd_dev->id + 1;
+ }
+
+ rbd_dev->id = new_id;
+
+ /* add to global list */
+ list_add_tail(&rbd_dev->node, &rbd_dev_list);
+
+ /* parse add command */
+ if (sscanf(buf, "%" __stringify(RBD_MAX_OPT_LEN) "s "
+ "%" __stringify(RBD_MAX_OPT_LEN) "s "
+ "%" __stringify(RBD_MAX_POOL_NAME_LEN) "s "
+ "%" __stringify(RBD_MAX_OBJ_NAME_LEN) "s"
+ "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
+ mon_dev_name, options, rbd_dev->pool_name,
+ rbd_dev->obj, rbd_dev->snap_name) < 4) {
+ rc = -EINVAL;
+ goto err_out_slot;
+ }
+
+ if (rbd_dev->snap_name[0] == 0)
+ rbd_dev->snap_name[0] = '-';
+
+ rbd_dev->obj_len = strlen(rbd_dev->obj);
+ snprintf(rbd_dev->obj_md_name, sizeof(rbd_dev->obj_md_name), "%s%s",
+ rbd_dev->obj, RBD_SUFFIX);
+
+ /* initialize rest of new object */
+ snprintf(rbd_dev->name, DEV_NAME_LEN, DRV_NAME "%d", rbd_dev->id);
+ rc = rbd_get_client(rbd_dev, mon_dev_name, options);
+ if (rc < 0)
+ goto err_out_slot;
+
+ mutex_unlock(&ctl_mutex);
+
+ /* pick the pool */
+ osdc = &rbd_dev->client->osdc;
+ rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
+ if (rc < 0)
+ goto err_out_client;
+ rbd_dev->poolid = rc;
+
+ /* register our block device */
+ irc = register_blkdev(0, rbd_dev->name);
+ if (irc < 0) {
+ rc = irc;
+ goto err_out_client;
+ }
+ rbd_dev->major = irc;
+
+ /* set up and announce blkdev mapping */
+ rc = rbd_init_disk(rbd_dev);
+ if (rc)
+ goto err_out_blkdev;
+
+ return count;
+
+err_out_blkdev:
+ unregister_blkdev(rbd_dev->major, rbd_dev->name);
+err_out_client:
+ rbd_put_client(rbd_dev);
+ mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+err_out_slot:
+ list_del_init(&rbd_dev->node);
+ mutex_unlock(&ctl_mutex);
+
+ kfree(rbd_dev);
+err_out_opt:
+ kfree(options);
+err_mon_dev:
+ kfree(mon_dev_name);
+err_out_mod:
+ dout("Error adding device %s\n", buf);
+ module_put(THIS_MODULE);
+ return rc;
+}
+
+static struct rbd_device *__rbd_get_dev(unsigned long id)
+{
+ struct list_head *tmp;
+ struct rbd_device *rbd_dev;
+
+ list_for_each(tmp, &rbd_dev_list) {
+ rbd_dev = list_entry(tmp, struct rbd_device, node);
+ if (rbd_dev->id == id)
+ return rbd_dev;
+ }
+ return NULL;
+}
+
+static ssize_t class_rbd_remove(struct class *c,
+ struct class_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct rbd_device *rbd_dev = NULL;
+ int target_id, rc;
+ unsigned long ul;
+
+ rc = strict_strtoul(buf, 10, &ul);
+ if (rc)
+ return rc;
+
+ /* convert to int; abort if we lost anything in the conversion */
+ target_id = (int) ul;
+ if (target_id != ul)
+ return -EINVAL;
+
+ /* remove object from list immediately */
+ mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+ rbd_dev = __rbd_get_dev(target_id);
+ if (rbd_dev)
+ list_del_init(&rbd_dev->node);
+
+ mutex_unlock(&ctl_mutex);
+
+ if (!rbd_dev)
+ return -ENOENT;
+
+ rbd_put_client(rbd_dev);
+
+ /* clean up and free blkdev */
+ rbd_free_disk(rbd_dev);
+ unregister_blkdev(rbd_dev->major, rbd_dev->name);
+ kfree(rbd_dev);
+
+ /* release module ref */
+ module_put(THIS_MODULE);
+
+ return count;
+}
+
+static ssize_t class_rbd_snaps_list(struct class *c,
+ struct class_attribute *attr,
+ char *data)
+{
+ struct rbd_device *rbd_dev = NULL;
+ struct list_head *tmp;
+ struct rbd_image_header *header;
+ int i, n = 0, max = PAGE_SIZE;
+ int ret;
+
+ mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+ n += snprintf(data, max, "#id\tsnap\tKB\n");
+
+ list_for_each(tmp, &rbd_dev_list) {
+ char *names, *p;
+ struct ceph_snap_context *snapc;
+
+ rbd_dev = list_entry(tmp, struct rbd_device, node);
+ header = &rbd_dev->header;
+
+ down_read(&header->snap_rwsem);
+
+ names = header->snap_names;
+ snapc = header->snapc;
+
+ n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n",
+ rbd_dev->id, RBD_SNAP_HEAD_NAME,
+ header->image_size >> 10,
+ (!rbd_dev->cur_snap ? " (*)" : ""));
+ if (n == max)
+ break;
+
+ p = names;
+ for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) {
+ n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n",
+ rbd_dev->id, p, header->snap_sizes[i] >> 10,
+ (rbd_dev->cur_snap &&
+ (snap_index(header, i) == rbd_dev->cur_snap) ?
+ " (*)" : ""));
+ if (n == max)
+ break;
+ }
+
+ up_read(&header->snap_rwsem);
+ }
+
+
+ ret = n;
+ mutex_unlock(&ctl_mutex);
+ return ret;
+}
+
+static ssize_t class_rbd_snaps_refresh(struct class *c,
+ struct class_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct rbd_device *rbd_dev = NULL;
+ int target_id, rc;
+ unsigned long ul;
+ int ret = count;
+
+ rc = strict_strtoul(buf, 10, &ul);
+ if (rc)
+ return rc;
+
+ /* convert to int; abort if we lost anything in the conversion */
+ target_id = (int) ul;
+ if (target_id != ul)
+ return -EINVAL;
+
+ mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+ rbd_dev = __rbd_get_dev(target_id);
+ if (!rbd_dev) {
+ ret = -ENOENT;
+ goto done;
+ }
+
+ rc = rbd_update_snaps(rbd_dev);
+ if (rc < 0)
+ ret = rc;
+
+done:
+ mutex_unlock(&ctl_mutex);
+ return ret;
+}
+
+static ssize_t class_rbd_snap_create(struct class *c,
+ struct class_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct rbd_device *rbd_dev = NULL;
+ int target_id, ret;
+ char *name;
+
+ name = kmalloc(RBD_MAX_SNAP_NAME_LEN + 1, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+
+ /* parse snaps add command */
+ if (sscanf(buf, "%d "
+ "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
+ &target_id,
+ name) != 2) {
+ ret = -EINVAL;
+ goto done;
+ }
+
+ mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+ rbd_dev = __rbd_get_dev(target_id);
+ if (!rbd_dev) {
+ ret = -ENOENT;
+ goto done_unlock;
+ }
+
+ ret = rbd_header_add_snap(rbd_dev,
+ name, GFP_KERNEL);
+ if (ret < 0)
+ goto done_unlock;
+
+ ret = rbd_update_snaps(rbd_dev);
+ if (ret < 0)
+ goto done_unlock;
+
+ ret = count;
+done_unlock:
+ mutex_unlock(&ctl_mutex);
+done:
+ kfree(name);
+ return ret;
+}
+
+static ssize_t class_rbd_rollback(struct class *c,
+ struct class_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct rbd_device *rbd_dev = NULL;
+ int target_id, ret;
+ u64 snapid;
+ char snap_name[RBD_MAX_SNAP_NAME_LEN];
+ u64 cur_ofs;
+ char *seg_name;
+
+ /* parse snaps add command */
+ if (sscanf(buf, "%d "
+ "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
+ &target_id,
+ snap_name) != 2) {
+ return -EINVAL;
+ }
+
+ ret = -ENOMEM;
+ seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
+ if (!seg_name)
+ return ret;
+
+ mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+ rbd_dev = __rbd_get_dev(target_id);
+ if (!rbd_dev) {
+ ret = -ENOENT;
+ goto done_unlock;
+ }
+
+ ret = snap_by_name(&rbd_dev->header, snap_name, &snapid, NULL);
+ if (ret < 0)
+ goto done_unlock;
+
+ dout("snapid=%lld\n", snapid);
+
+ cur_ofs = 0;
+ while (cur_ofs < rbd_dev->header.image_size) {
+ cur_ofs += rbd_get_segment(&rbd_dev->header,
+ rbd_dev->obj,
+ cur_ofs, (u64)-1,
+ seg_name, NULL);
+ dout("seg_name=%s\n", seg_name);
+
+ ret = rbd_req_sync_rollback_obj(rbd_dev, snapid, seg_name);
+ if (ret < 0)
+ pr_warning("could not roll back obj %s err=%d\n",
+ seg_name, ret);
+ }
+
+ ret = rbd_update_snaps(rbd_dev);
+ if (ret < 0)
+ goto done_unlock;
+
+ ret = count;
+
+done_unlock:
+ mutex_unlock(&ctl_mutex);
+ kfree(seg_name);
+
+ return ret;
+}
+
+static struct class_attribute class_rbd_attrs[] = {
+ __ATTR(add, 0200, NULL, class_rbd_add),
+ __ATTR(remove, 0200, NULL, class_rbd_remove),
+ __ATTR(list, 0444, class_rbd_list, NULL),
+ __ATTR(snaps_refresh, 0200, NULL, class_rbd_snaps_refresh),
+ __ATTR(snap_create, 0200, NULL, class_rbd_snap_create),
+ __ATTR(snaps_list, 0444, class_rbd_snaps_list, NULL),
+ __ATTR(snap_rollback, 0200, NULL, class_rbd_rollback),
+ __ATTR_NULL
+};
+
+/*
+ * create control files in sysfs
+ * /sys/class/rbd/...
+ */
+static int rbd_sysfs_init(void)
+{
+ int ret = -ENOMEM;
+
+ class_rbd = kzalloc(sizeof(*class_rbd), GFP_KERNEL);
+ if (!class_rbd)
+ goto out;
+
+ class_rbd->name = DRV_NAME;
+ class_rbd->owner = THIS_MODULE;
+ class_rbd->class_release = class_rbd_release;
+ class_rbd->class_attrs = class_rbd_attrs;
+
+ ret = class_register(class_rbd);
+ if (ret)
+ goto out_class;
+ return 0;
+
+out_class:
+ kfree(class_rbd);
+ class_rbd = NULL;
+ pr_err(DRV_NAME ": failed to create class rbd\n");
+out:
+ return ret;
+}
+
+static void rbd_sysfs_cleanup(void)
+{
+ if (class_rbd)
+ class_destroy(class_rbd);
+ class_rbd = NULL;
+}
+
+int __init rbd_init(void)
+{
+ int rc;
+
+ rc = rbd_sysfs_init();
+ if (rc)
+ return rc;
+ spin_lock_init(&node_lock);
+ pr_info("loaded " DRV_NAME_LONG "\n");
+ return 0;
+}
+
+void __exit rbd_exit(void)
+{
+ rbd_sysfs_cleanup();
+}
+
+module_init(rbd_init);
+module_exit(rbd_exit);
+
+MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
+MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
+MODULE_DESCRIPTION("rados block device");
+
+/* following authorship retained from original osdblk.c */
+MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h
new file mode 100644
index 0000000..fc6c678
--- /dev/null
+++ b/drivers/block/rbd_types.h
@@ -0,0 +1,73 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2010 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_RBD_TYPES_H
+#define CEPH_RBD_TYPES_H
+
+#include <linux/types.h>
+
+/*
+ * rbd image 'foo' consists of objects
+ * foo.rbd - image metadata
+ * foo.00000000
+ * foo.00000001
+ * ... - data
+ */
+
+#define RBD_SUFFIX ".rbd"
+#define RBD_DIRECTORY "rbd_directory"
+#define RBD_INFO "rbd_info"
+
+#define RBD_DEFAULT_OBJ_ORDER 22 /* 4MB */
+#define RBD_MIN_OBJ_ORDER 16
+#define RBD_MAX_OBJ_ORDER 30
+
+#define RBD_MAX_OBJ_NAME_LEN 96
+#define RBD_MAX_SEG_NAME_LEN 128
+
+#define RBD_COMP_NONE 0
+#define RBD_CRYPT_NONE 0
+
+#define RBD_HEADER_TEXT "<<< Rados Block Device Image >>>\n"
+#define RBD_HEADER_SIGNATURE "RBD"
+#define RBD_HEADER_VERSION "001.005"
+
+struct rbd_info {
+ __le64 max_id;
+} __attribute__ ((packed));
+
+struct rbd_image_snap_ondisk {
+ __le64 id;
+ __le64 image_size;
+} __attribute__((packed));
+
+struct rbd_image_header_ondisk {
+ char text[40];
+ char block_name[24];
+ char signature[4];
+ char version[8];
+ struct {
+ __u8 order;
+ __u8 crypt_type;
+ __u8 comp_type;
+ __u8 unused;
+ } __attribute__((packed)) options;
+ __le64 image_size;
+ __le64 snap_seq;
+ __le32 snap_count;
+ __le32 reserved;
+ __le64 snap_names_len;
+ struct rbd_image_snap_ondisk snaps[0];
+} __attribute__((packed));
+
+
+#endif
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 1101e25..8320490 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -2,7 +2,6 @@
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
-#include <linux/smp_lock.h>
#include <linux/hdreg.h>
#include <linux/virtio.h>
#include <linux/virtio_blk.h>
@@ -222,8 +221,8 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
return err;
}
-static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned cmd, unsigned long data)
+static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long data)
{
struct gendisk *disk = bdev->bd_disk;
struct virtio_blk *vblk = disk->private_data;
@@ -238,18 +237,6 @@ static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode,
(void __user *)data);
}
-static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long param)
-{
- int ret;
-
- lock_kernel();
- ret = virtblk_locked_ioctl(bdev, mode, cmd, param);
- unlock_kernel();
-
- return ret;
-}
-
/* We provide getgeo only to please some old bootloader/partitioning tools */
static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
{
diff --git a/drivers/char/agp/Kconfig b/drivers/char/agp/Kconfig
index 4b66c69..5ddf67e 100644
--- a/drivers/char/agp/Kconfig
+++ b/drivers/char/agp/Kconfig
@@ -57,7 +57,7 @@ config AGP_AMD
config AGP_AMD64
tristate "AMD Opteron/Athlon64 on-CPU GART support"
- depends on AGP && X86 && K8_NB
+ depends on AGP && X86 && AMD_NB
help
This option gives you AGP support for the GLX component of
X using the on-CPU northbridge of the AMD Athlon64/Opteron CPUs.
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 70312da..42396df 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -15,7 +15,7 @@
#include <linux/mmzone.h>
#include <asm/page.h> /* PAGE_SIZE */
#include <asm/e820.h>
-#include <asm/k8.h>
+#include <asm/amd_nb.h>
#include <asm/gart.h>
#include "agp.h"
@@ -124,7 +124,7 @@ static int amd64_fetch_size(void)
u32 temp;
struct aper_size_info_32 *values;
- dev = k8_northbridges[0];
+ dev = k8_northbridges.nb_misc[0];
if (dev==NULL)
return 0;
@@ -181,10 +181,14 @@ static int amd_8151_configure(void)
unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real);
int i;
+ if (!k8_northbridges.gart_supported)
+ return 0;
+
/* Configure AGP regs in each x86-64 host bridge. */
- for (i = 0; i < num_k8_northbridges; i++) {
+ for (i = 0; i < k8_northbridges.num; i++) {
agp_bridge->gart_bus_addr =
- amd64_configure(k8_northbridges[i], gatt_bus);
+ amd64_configure(k8_northbridges.nb_misc[i],
+ gatt_bus);
}
k8_flush_garts();
return 0;
@@ -195,11 +199,15 @@ static void amd64_cleanup(void)
{
u32 tmp;
int i;
- for (i = 0; i < num_k8_northbridges; i++) {
- struct pci_dev *dev = k8_northbridges[i];
+
+ if (!k8_northbridges.gart_supported)
+ return;
+
+ for (i = 0; i < k8_northbridges.num; i++) {
+ struct pci_dev *dev = k8_northbridges.nb_misc[i];
/* disable gart translation */
pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp);
- tmp &= ~AMD64_GARTEN;
+ tmp &= ~GARTEN;
pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, tmp);
}
}
@@ -313,22 +321,25 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp,
if (order < 0 || !agp_aperture_valid(aper, (32*1024*1024)<<order))
return -1;
- pci_write_config_dword(nb, AMD64_GARTAPERTURECTL, order << 1);
+ gart_set_size_and_enable(nb, order);
pci_write_config_dword(nb, AMD64_GARTAPERTUREBASE, aper >> 25);
return 0;
}
-static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr)
+static __devinit int cache_nbs(struct pci_dev *pdev, u32 cap_ptr)
{
int i;
if (cache_k8_northbridges() < 0)
return -ENODEV;
+ if (!k8_northbridges.gart_supported)
+ return -ENODEV;
+
i = 0;
- for (i = 0; i < num_k8_northbridges; i++) {
- struct pci_dev *dev = k8_northbridges[i];
+ for (i = 0; i < k8_northbridges.num; i++) {
+ struct pci_dev *dev = k8_northbridges.nb_misc[i];
if (fix_northbridge(dev, pdev, cap_ptr) < 0) {
dev_err(&dev->dev, "no usable aperture found\n");
#ifdef __x86_64__
@@ -405,7 +416,8 @@ static int __devinit uli_agp_init(struct pci_dev *pdev)
}
/* shadow x86-64 registers into ULi registers */
- pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &httfea);
+ pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE,
+ &httfea);
/* if x86-64 aperture base is beyond 4G, exit here */
if ((httfea & 0x7fff) >> (32 - 25)) {
@@ -472,7 +484,8 @@ static int nforce3_agp_init(struct pci_dev *pdev)
pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp);
/* shadow x86-64 registers into NVIDIA registers */
- pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &apbase);
+ pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE,
+ &apbase);
/* if x86-64 aperture base is beyond 4G, exit here */
if ( (apbase & 0x7fff) >> (32 - 25) ) {
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index d2abf51..64255ce 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -984,7 +984,9 @@ int agp_generic_create_gatt_table(struct agp_bridge_data *bridge)
bridge->driver->cache_flush();
#ifdef CONFIG_X86
- set_memory_uc((unsigned long)table, 1 << page_order);
+ if (set_memory_uc((unsigned long)table, 1 << page_order))
+ printk(KERN_WARNING "Could not set GATT table memory to UC!");
+
bridge->gatt_table = (void *)table;
#else
bridge->gatt_table = ioremap_nocache(virt_to_phys(table),
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index 05ad4a1..7c41335 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -47,6 +47,16 @@ enum tpm_duration {
#define TPM_MAX_PROTECTED_ORDINAL 12
#define TPM_PROTECTED_ORDINAL_MASK 0xFF
+/*
+ * Bug workaround - some TPM's don't flush the most
+ * recently changed pcr on suspend, so force the flush
+ * with an extend to the selected _unused_ non-volatile pcr.
+ */
+static int tpm_suspend_pcr;
+module_param_named(suspend_pcr, tpm_suspend_pcr, uint, 0644);
+MODULE_PARM_DESC(suspend_pcr,
+ "PCR to use for dummy writes to faciltate flush on suspend.");
+
static LIST_HEAD(tpm_chip_list);
static DEFINE_SPINLOCK(driver_lock);
static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES);
@@ -1077,18 +1087,6 @@ static struct tpm_input_header savestate_header = {
.ordinal = TPM_ORD_SAVESTATE
};
-/* Bug workaround - some TPM's don't flush the most
- * recently changed pcr on suspend, so force the flush
- * with an extend to the selected _unused_ non-volatile pcr.
- */
-static int tpm_suspend_pcr;
-static int __init tpm_suspend_setup(char *str)
-{
- get_option(&str, &tpm_suspend_pcr);
- return 1;
-}
-__setup("tpm_suspend_pcr=", tpm_suspend_setup);
-
/*
* We are about to suspend. Save the TPM state
* so that it can be restored.
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index c810481..6c1b676 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -48,6 +48,9 @@ struct ports_driver_data {
/* Used for exporting per-port information to debugfs */
struct dentry *debugfs_dir;
+ /* List of all the devices we're handling */
+ struct list_head portdevs;
+
/* Number of devices this driver is handling */
unsigned int index;
@@ -108,6 +111,9 @@ struct port_buffer {
* ports for that device (vdev->priv).
*/
struct ports_device {
+ /* Next portdev in the list, head is in the pdrvdata struct */
+ struct list_head list;
+
/*
* Workqueue handlers where we process deferred work after
* notification
@@ -178,15 +184,21 @@ struct port {
struct console cons;
/* Each port associates with a separate char device */
- struct cdev cdev;
+ struct cdev *cdev;
struct device *dev;
+ /* Reference-counting to handle port hot-unplugs and file operations */
+ struct kref kref;
+
/* A waitqueue for poll() or blocking read operations */
wait_queue_head_t waitqueue;
/* The 'name' of the port that we expose via sysfs properties */
char *name;
+ /* We can notify apps of host connect / disconnect events via SIGIO */
+ struct fasync_struct *async_queue;
+
/* The 'id' to identify the port with the Host */
u32 id;
@@ -221,6 +233,41 @@ out:
return port;
}
+static struct port *find_port_by_devt_in_portdev(struct ports_device *portdev,
+ dev_t dev)
+{
+ struct port *port;
+ unsigned long flags;
+
+ spin_lock_irqsave(&portdev->ports_lock, flags);
+ list_for_each_entry(port, &portdev->ports, list)
+ if (port->cdev->dev == dev)
+ goto out;
+ port = NULL;
+out:
+ spin_unlock_irqrestore(&portdev->ports_lock, flags);
+
+ return port;
+}
+
+static struct port *find_port_by_devt(dev_t dev)
+{
+ struct ports_device *portdev;
+ struct port *port;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pdrvdata_lock, flags);
+ list_for_each_entry(portdev, &pdrvdata.portdevs, list) {
+ port = find_port_by_devt_in_portdev(portdev, dev);
+ if (port)
+ goto out;
+ }
+ port = NULL;
+out:
+ spin_unlock_irqrestore(&pdrvdata_lock, flags);
+ return port;
+}
+
static struct port *find_port_by_id(struct ports_device *portdev, u32 id)
{
struct port *port;
@@ -410,7 +457,10 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id,
static ssize_t send_control_msg(struct port *port, unsigned int event,
unsigned int value)
{
- return __send_control_msg(port->portdev, port->id, event, value);
+ /* Did the port get unplugged before userspace closed it? */
+ if (port->portdev)
+ return __send_control_msg(port->portdev, port->id, event, value);
+ return 0;
}
/* Callers must take the port->outvq_lock */
@@ -459,9 +509,12 @@ static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
/*
* Wait till the host acknowledges it pushed out the data we
- * sent. This is done for ports in blocking mode or for data
- * from the hvc_console; the tty operations are performed with
- * spinlocks held so we can't sleep here.
+ * sent. This is done for data from the hvc_console; the tty
+ * operations are performed with spinlocks held so we can't
+ * sleep here. An alternative would be to copy the data to a
+ * buffer and relax the spinning requirement. The downside is
+ * we need to kmalloc a GFP_ATOMIC buffer each time the
+ * console driver writes something out.
*/
while (!virtqueue_get_buf(out_vq, &len))
cpu_relax();
@@ -522,6 +575,10 @@ static ssize_t fill_readbuf(struct port *port, char *out_buf, size_t out_count,
/* The condition that must be true for polling to end */
static bool will_read_block(struct port *port)
{
+ if (!port->guest_connected) {
+ /* Port got hot-unplugged. Let's exit. */
+ return false;
+ }
return !port_has_data(port) && port->host_connected;
}
@@ -572,6 +629,9 @@ static ssize_t port_fops_read(struct file *filp, char __user *ubuf,
if (ret < 0)
return ret;
}
+ /* Port got hot-unplugged. */
+ if (!port->guest_connected)
+ return -ENODEV;
/*
* We could've received a disconnection message while we were
* waiting for more data.
@@ -613,6 +673,9 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf,
if (ret < 0)
return ret;
}
+ /* Port got hot-unplugged. */
+ if (!port->guest_connected)
+ return -ENODEV;
count = min((size_t)(32 * 1024), count);
@@ -626,6 +689,14 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf,
goto free_buf;
}
+ /*
+ * We now ask send_buf() to not spin for generic ports -- we
+ * can re-use the same code path that non-blocking file
+ * descriptors take for blocking file descriptors since the
+ * wait is already done and we're certain the write will go
+ * through to the host.
+ */
+ nonblock = true;
ret = send_buf(port, buf, count, nonblock);
if (nonblock && ret > 0)
@@ -645,6 +716,10 @@ static unsigned int port_fops_poll(struct file *filp, poll_table *wait)
port = filp->private_data;
poll_wait(filp, &port->waitqueue, wait);
+ if (!port->guest_connected) {
+ /* Port got unplugged */
+ return POLLHUP;
+ }
ret = 0;
if (!will_read_block(port))
ret |= POLLIN | POLLRDNORM;
@@ -656,6 +731,8 @@ static unsigned int port_fops_poll(struct file *filp, poll_table *wait)
return ret;
}
+static void remove_port(struct kref *kref);
+
static int port_fops_release(struct inode *inode, struct file *filp)
{
struct port *port;
@@ -676,6 +753,16 @@ static int port_fops_release(struct inode *inode, struct file *filp)
reclaim_consumed_buffers(port);
spin_unlock_irq(&port->outvq_lock);
+ /*
+ * Locks aren't necessary here as a port can't be opened after
+ * unplug, and if a port isn't unplugged, a kref would already
+ * exist for the port. Plus, taking ports_lock here would
+ * create a dependency on other locks taken by functions
+ * inside remove_port if we're the last holder of the port,
+ * creating many problems.
+ */
+ kref_put(&port->kref, remove_port);
+
return 0;
}
@@ -683,22 +770,31 @@ static int port_fops_open(struct inode *inode, struct file *filp)
{
struct cdev *cdev = inode->i_cdev;
struct port *port;
+ int ret;
- port = container_of(cdev, struct port, cdev);
+ port = find_port_by_devt(cdev->dev);
filp->private_data = port;
+ /* Prevent against a port getting hot-unplugged at the same time */
+ spin_lock_irq(&port->portdev->ports_lock);
+ kref_get(&port->kref);
+ spin_unlock_irq(&port->portdev->ports_lock);
+
/*
* Don't allow opening of console port devices -- that's done
* via /dev/hvc
*/
- if (is_console_port(port))
- return -ENXIO;
+ if (is_console_port(port)) {
+ ret = -ENXIO;
+ goto out;
+ }
/* Allow only one process to open a particular port at a time */
spin_lock_irq(&port->inbuf_lock);
if (port->guest_connected) {
spin_unlock_irq(&port->inbuf_lock);
- return -EMFILE;
+ ret = -EMFILE;
+ goto out;
}
port->guest_connected = true;
@@ -713,10 +809,23 @@ static int port_fops_open(struct inode *inode, struct file *filp)
reclaim_consumed_buffers(port);
spin_unlock_irq(&port->outvq_lock);
+ nonseekable_open(inode, filp);
+
/* Notify host of port being opened */
send_control_msg(filp->private_data, VIRTIO_CONSOLE_PORT_OPEN, 1);
return 0;
+out:
+ kref_put(&port->kref, remove_port);
+ return ret;
+}
+
+static int port_fops_fasync(int fd, struct file *filp, int mode)
+{
+ struct port *port;
+
+ port = filp->private_data;
+ return fasync_helper(fd, filp, mode, &port->async_queue);
}
/*
@@ -732,6 +841,8 @@ static const struct file_operations port_fops = {
.write = port_fops_write,
.poll = port_fops_poll,
.release = port_fops_release,
+ .fasync = port_fops_fasync,
+ .llseek = no_llseek,
};
/*
@@ -990,6 +1101,12 @@ static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock)
return nr_added_bufs;
}
+static void send_sigio_to_port(struct port *port)
+{
+ if (port->async_queue && port->guest_connected)
+ kill_fasync(&port->async_queue, SIGIO, POLL_OUT);
+}
+
static int add_port(struct ports_device *portdev, u32 id)
{
char debugfs_name[16];
@@ -1004,6 +1121,7 @@ static int add_port(struct ports_device *portdev, u32 id)
err = -ENOMEM;
goto fail;
}
+ kref_init(&port->kref);
port->portdev = portdev;
port->id = id;
@@ -1011,6 +1129,7 @@ static int add_port(struct ports_device *portdev, u32 id)
port->name = NULL;
port->inbuf = NULL;
port->cons.hvc = NULL;
+ port->async_queue = NULL;
port->cons.ws.ws_row = port->cons.ws.ws_col = 0;
@@ -1021,14 +1140,20 @@ static int add_port(struct ports_device *portdev, u32 id)
port->in_vq = portdev->in_vqs[port->id];
port->out_vq = portdev->out_vqs[port->id];
- cdev_init(&port->cdev, &port_fops);
+ port->cdev = cdev_alloc();
+ if (!port->cdev) {
+ dev_err(&port->portdev->vdev->dev, "Error allocating cdev\n");
+ err = -ENOMEM;
+ goto free_port;
+ }
+ port->cdev->ops = &port_fops;
devt = MKDEV(portdev->chr_major, id);
- err = cdev_add(&port->cdev, devt, 1);
+ err = cdev_add(port->cdev, devt, 1);
if (err < 0) {
dev_err(&port->portdev->vdev->dev,
"Error %d adding cdev for port %u\n", err, id);
- goto free_port;
+ goto free_cdev;
}
port->dev = device_create(pdrvdata.class, &port->portdev->vdev->dev,
devt, port, "vport%up%u",
@@ -1093,7 +1218,7 @@ free_inbufs:
free_device:
device_destroy(pdrvdata.class, port->dev->devt);
free_cdev:
- cdev_del(&port->cdev);
+ cdev_del(port->cdev);
free_port:
kfree(port);
fail:
@@ -1102,21 +1227,45 @@ fail:
return err;
}
-/* Remove all port-specific data. */
-static int remove_port(struct port *port)
+/* No users remain, remove all port-specific data. */
+static void remove_port(struct kref *kref)
+{
+ struct port *port;
+
+ port = container_of(kref, struct port, kref);
+
+ sysfs_remove_group(&port->dev->kobj, &port_attribute_group);
+ device_destroy(pdrvdata.class, port->dev->devt);
+ cdev_del(port->cdev);
+
+ kfree(port->name);
+
+ debugfs_remove(port->debugfs_file);
+
+ kfree(port);
+}
+
+/*
+ * Port got unplugged. Remove port from portdev's list and drop the
+ * kref reference. If no userspace has this port opened, it will
+ * result in immediate removal the port.
+ */
+static void unplug_port(struct port *port)
{
struct port_buffer *buf;
+ spin_lock_irq(&port->portdev->ports_lock);
+ list_del(&port->list);
+ spin_unlock_irq(&port->portdev->ports_lock);
+
if (port->guest_connected) {
port->guest_connected = false;
port->host_connected = false;
wake_up_interruptible(&port->waitqueue);
- send_control_msg(port, VIRTIO_CONSOLE_PORT_OPEN, 0);
- }
- spin_lock_irq(&port->portdev->ports_lock);
- list_del(&port->list);
- spin_unlock_irq(&port->portdev->ports_lock);
+ /* Let the app know the port is going down. */
+ send_sigio_to_port(port);
+ }
if (is_console_port(port)) {
spin_lock_irq(&pdrvdata_lock);
@@ -1135,9 +1284,6 @@ static int remove_port(struct port *port)
hvc_remove(port->cons.hvc);
#endif
}
- sysfs_remove_group(&port->dev->kobj, &port_attribute_group);
- device_destroy(pdrvdata.class, port->dev->devt);
- cdev_del(&port->cdev);
/* Remove unused data this port might have received. */
discard_port_data(port);
@@ -1148,12 +1294,19 @@ static int remove_port(struct port *port)
while ((buf = virtqueue_detach_unused_buf(port->in_vq)))
free_buf(buf);
- kfree(port->name);
-
- debugfs_remove(port->debugfs_file);
+ /*
+ * We should just assume the device itself has gone off --
+ * else a close on an open port later will try to send out a
+ * control message.
+ */
+ port->portdev = NULL;
- kfree(port);
- return 0;
+ /*
+ * Locks around here are not necessary - a port can't be
+ * opened after we removed the port struct from ports_list
+ * above.
+ */
+ kref_put(&port->kref, remove_port);
}
/* Any private messages that the Host and Guest want to share */
@@ -1192,7 +1345,7 @@ static void handle_control_message(struct ports_device *portdev,
add_port(portdev, cpkt->id);
break;
case VIRTIO_CONSOLE_PORT_REMOVE:
- remove_port(port);
+ unplug_port(port);
break;
case VIRTIO_CONSOLE_CONSOLE_PORT:
if (!cpkt->value)
@@ -1234,6 +1387,12 @@ static void handle_control_message(struct ports_device *portdev,
spin_lock_irq(&port->outvq_lock);
reclaim_consumed_buffers(port);
spin_unlock_irq(&port->outvq_lock);
+
+ /*
+ * If the guest is connected, it'll be interested in
+ * knowing the host connection state changed.
+ */
+ send_sigio_to_port(port);
break;
case VIRTIO_CONSOLE_PORT_NAME:
/*
@@ -1330,6 +1489,9 @@ static void in_intr(struct virtqueue *vq)
wake_up_interruptible(&port->waitqueue);
+ /* Send a SIGIO indicating new data in case the process asked for it */
+ send_sigio_to_port(port);
+
if (is_console_port(port) && hvc_poll(port->cons.hvc))
hvc_kick();
}
@@ -1566,6 +1728,10 @@ static int __devinit virtcons_probe(struct virtio_device *vdev)
add_port(portdev, 0);
}
+ spin_lock_irq(&pdrvdata_lock);
+ list_add_tail(&portdev->list, &pdrvdata.portdevs);
+ spin_unlock_irq(&pdrvdata_lock);
+
__send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID,
VIRTIO_CONSOLE_DEVICE_READY, 1);
return 0;
@@ -1589,23 +1755,41 @@ static void virtcons_remove(struct virtio_device *vdev)
{
struct ports_device *portdev;
struct port *port, *port2;
- struct port_buffer *buf;
- unsigned int len;
portdev = vdev->priv;
+ spin_lock_irq(&pdrvdata_lock);
+ list_del(&portdev->list);
+ spin_unlock_irq(&pdrvdata_lock);
+
+ /* Disable interrupts for vqs */
+ vdev->config->reset(vdev);
+ /* Finish up work that's lined up */
cancel_work_sync(&portdev->control_work);
list_for_each_entry_safe(port, port2, &portdev->ports, list)
- remove_port(port);
+ unplug_port(port);
unregister_chrdev(portdev->chr_major, "virtio-portsdev");
- while ((buf = virtqueue_get_buf(portdev->c_ivq, &len)))
- free_buf(buf);
+ /*
+ * When yanking out a device, we immediately lose the
+ * (device-side) queues. So there's no point in keeping the
+ * guest side around till we drop our final reference. This
+ * also means that any ports which are in an open state will
+ * have to just stop using the port, as the vqs are going
+ * away.
+ */
+ if (use_multiport(portdev)) {
+ struct port_buffer *buf;
+ unsigned int len;
- while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq)))
- free_buf(buf);
+ while ((buf = virtqueue_get_buf(portdev->c_ivq, &len)))
+ free_buf(buf);
+
+ while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq)))
+ free_buf(buf);
+ }
vdev->config->del_vqs(vdev);
kfree(portdev->in_vqs);
@@ -1652,6 +1836,7 @@ static int __init init(void)
PTR_ERR(pdrvdata.debugfs_dir));
}
INIT_LIST_HEAD(&pdrvdata.consoles);
+ INIT_LIST_HEAD(&pdrvdata.portdevs);
return register_virtio_driver(&virtio_console);
}
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 70bb350..9dbb28b 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -39,7 +39,7 @@ config EDAC_DEBUG
there're four debug levels (x=0,1,2,3 from low to high).
Usually you should select 'N'.
- config EDAC_DECODE_MCE
+config EDAC_DECODE_MCE
tristate "Decode MCEs in human-readable form (only on AMD for now)"
depends on CPU_SUP_AMD && X86_MCE
default y
@@ -51,6 +51,16 @@ config EDAC_DEBUG
which occur really early upon boot, before the module infrastructure
has been initialized.
+config EDAC_MCE_INJ
+ tristate "Simple MCE injection interface over /sysfs"
+ depends on EDAC_DECODE_MCE
+ default n
+ help
+ This is a simple interface to inject MCEs over /sysfs and test
+ the MCE decoding code in EDAC.
+
+ This is currently AMD-only.
+
config EDAC_MM_EDAC
tristate "Main Memory EDAC (Error Detection And Correction) reporting"
help
@@ -66,13 +76,13 @@ config EDAC_MCE
config EDAC_AMD64
tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h"
- depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && EDAC_DECODE_MCE
+ depends on EDAC_MM_EDAC && AMD_NB && X86_64 && PCI && EDAC_DECODE_MCE
help
Support for error detection and correction on the AMD 64
Families of Memory Controllers (K8, F10h and F11h)
config EDAC_AMD64_ERROR_INJECTION
- bool "Sysfs Error Injection facilities"
+ bool "Sysfs HW Error injection facilities"
depends on EDAC_AMD64
help
Recent Opterons (Family 10h and later) provide for Memory Error
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index ca6b1bb..32c7bc9 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -17,6 +17,9 @@ ifdef CONFIG_PCI
edac_core-objs += edac_pci.o edac_pci_sysfs.o
endif
+obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o
+
+edac_mce_amd-objs := mce_amd.o
obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o
obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index e7d5d6b..8521401 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1,5 +1,5 @@
#include "amd64_edac.h"
-#include <asm/k8.h>
+#include <asm/amd_nb.h>
static struct edac_pci_ctl_info *amd64_ctl_pci;
@@ -2073,11 +2073,18 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
amd64_handle_ue(mci, info);
}
-void amd64_decode_bus_error(int node_id, struct err_regs *regs)
+void amd64_decode_bus_error(int node_id, struct mce *m, u32 nbcfg)
{
struct mem_ctl_info *mci = mci_lookup[node_id];
+ struct err_regs regs;
- __amd64_decode_bus_error(mci, regs);
+ regs.nbsl = (u32) m->status;
+ regs.nbsh = (u32)(m->status >> 32);
+ regs.nbeal = (u32) m->addr;
+ regs.nbeah = (u32)(m->addr >> 32);
+ regs.nbcfg = nbcfg;
+
+ __amd64_decode_bus_error(mci, &regs);
/*
* Check the UE bit of the NB status high register, if set generate some
@@ -2086,7 +2093,7 @@ void amd64_decode_bus_error(int node_id, struct err_regs *regs)
*
* FIXME: this should go somewhere else, if at all.
*/
- if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors)
+ if (regs.nbsh & K8_NBSH_UC_ERR && !report_gart_errors)
edac_mc_handle_ue_no_info(mci, "UE bit is set");
}
@@ -2927,7 +2934,7 @@ static int __init amd64_edac_init(void)
* to finish initialization of the MC instances.
*/
err = -ENODEV;
- for (nb = 0; nb < num_k8_northbridges; nb++) {
+ for (nb = 0; nb < k8_northbridges.num; nb++) {
if (!pvt_lookup[nb])
continue;
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 613b938..044aee4 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -72,7 +72,7 @@
#include <linux/edac.h>
#include <asm/msr.h>
#include "edac_core.h"
-#include "edac_mce_amd.h"
+#include "mce_amd.h"
#define amd64_printk(level, fmt, arg...) \
edac_printk(level, "amd64", fmt, ##arg)
@@ -482,11 +482,10 @@ extern const char *rrrr_msgs[16];
extern const char *to_msgs[2];
extern const char *pp_msgs[4];
extern const char *ii_msgs[4];
-extern const char *ext_msgs[32];
extern const char *htlink_msgs[8];
#ifdef CONFIG_EDAC_DEBUG
-#define NUM_DBG_ATTRS 9
+#define NUM_DBG_ATTRS 5
#else
#define NUM_DBG_ATTRS 0
#endif
diff --git a/drivers/edac/amd64_edac_dbg.c b/drivers/edac/amd64_edac_dbg.c
index 59cf2cf..e356228 100644
--- a/drivers/edac/amd64_edac_dbg.c
+++ b/drivers/edac/amd64_edac_dbg.c
@@ -1,167 +1,16 @@
#include "amd64_edac.h"
-/*
- * accept a hex value and store it into the virtual error register file, field:
- * nbeal and nbeah. Assume virtual error values have already been set for: NBSL,
- * NBSH and NBCFG. Then proceed to map the error values to a MC, CSROW and
- * CHANNEL
- */
-static ssize_t amd64_nbea_store(struct mem_ctl_info *mci, const char *data,
- size_t count)
-{
- struct amd64_pvt *pvt = mci->pvt_info;
- unsigned long long value;
- int ret = 0;
-
- ret = strict_strtoull(data, 16, &value);
- if (ret != -EINVAL) {
- debugf0("received NBEA= 0x%llx\n", value);
-
- /* place the value into the virtual error packet */
- pvt->ctl_error_info.nbeal = (u32) value;
- value >>= 32;
- pvt->ctl_error_info.nbeah = (u32) value;
-
- /* Process the Mapping request */
- /* TODO: Add race prevention */
- amd_decode_nb_mce(pvt->mc_node_id, &pvt->ctl_error_info, 1);
-
- return count;
- }
- return ret;
-}
-
-/* display back what the last NBEA (MCA NB Address (MC4_ADDR)) was written */
-static ssize_t amd64_nbea_show(struct mem_ctl_info *mci, char *data)
-{
- struct amd64_pvt *pvt = mci->pvt_info;
- u64 value;
-
- value = pvt->ctl_error_info.nbeah;
- value <<= 32;
- value |= pvt->ctl_error_info.nbeal;
-
- return sprintf(data, "%llx\n", value);
-}
-
-/* store the NBSL (MCA NB Status Low (MC4_STATUS)) value user desires */
-static ssize_t amd64_nbsl_store(struct mem_ctl_info *mci, const char *data,
- size_t count)
-{
- struct amd64_pvt *pvt = mci->pvt_info;
- unsigned long value;
- int ret = 0;
-
- ret = strict_strtoul(data, 16, &value);
- if (ret != -EINVAL) {
- debugf0("received NBSL= 0x%lx\n", value);
-
- pvt->ctl_error_info.nbsl = (u32) value;
-
- return count;
- }
- return ret;
-}
-
-/* display back what the last NBSL value written */
-static ssize_t amd64_nbsl_show(struct mem_ctl_info *mci, char *data)
-{
- struct amd64_pvt *pvt = mci->pvt_info;
- u32 value;
-
- value = pvt->ctl_error_info.nbsl;
-
- return sprintf(data, "%x\n", value);
-}
-
-/* store the NBSH (MCA NB Status High) value user desires */
-static ssize_t amd64_nbsh_store(struct mem_ctl_info *mci, const char *data,
- size_t count)
-{
- struct amd64_pvt *pvt = mci->pvt_info;
- unsigned long value;
- int ret = 0;
-
- ret = strict_strtoul(data, 16, &value);
- if (ret != -EINVAL) {
- debugf0("received NBSH= 0x%lx\n", value);
-
- pvt->ctl_error_info.nbsh = (u32) value;
-
- return count;
- }
- return ret;
-}
-
-/* display back what the last NBSH value written */
-static ssize_t amd64_nbsh_show(struct mem_ctl_info *mci, char *data)
-{
- struct amd64_pvt *pvt = mci->pvt_info;
- u32 value;
-
- value = pvt->ctl_error_info.nbsh;
-
- return sprintf(data, "%x\n", value);
+#define EDAC_DCT_ATTR_SHOW(reg) \
+static ssize_t amd64_##reg##_show(struct mem_ctl_info *mci, char *data) \
+{ \
+ struct amd64_pvt *pvt = mci->pvt_info; \
+ return sprintf(data, "0x%016llx\n", (u64)pvt->reg); \
}
-/* accept and store the NBCFG (MCA NB Configuration) value user desires */
-static ssize_t amd64_nbcfg_store(struct mem_ctl_info *mci,
- const char *data, size_t count)
-{
- struct amd64_pvt *pvt = mci->pvt_info;
- unsigned long value;
- int ret = 0;
-
- ret = strict_strtoul(data, 16, &value);
- if (ret != -EINVAL) {
- debugf0("received NBCFG= 0x%lx\n", value);
-
- pvt->ctl_error_info.nbcfg = (u32) value;
-
- return count;
- }
- return ret;
-}
-
-/* various show routines for the controls of a MCI */
-static ssize_t amd64_nbcfg_show(struct mem_ctl_info *mci, char *data)
-{
- struct amd64_pvt *pvt = mci->pvt_info;
-
- return sprintf(data, "%x\n", pvt->ctl_error_info.nbcfg);
-}
-
-
-static ssize_t amd64_dhar_show(struct mem_ctl_info *mci, char *data)
-{
- struct amd64_pvt *pvt = mci->pvt_info;
-
- return sprintf(data, "%x\n", pvt->dhar);
-}
-
-
-static ssize_t amd64_dbam_show(struct mem_ctl_info *mci, char *data)
-{
- struct amd64_pvt *pvt = mci->pvt_info;
-
- return sprintf(data, "%x\n", pvt->dbam0);
-}
-
-
-static ssize_t amd64_topmem_show(struct mem_ctl_info *mci, char *data)
-{
- struct amd64_pvt *pvt = mci->pvt_info;
-
- return sprintf(data, "%llx\n", pvt->top_mem);
-}
-
-
-static ssize_t amd64_topmem2_show(struct mem_ctl_info *mci, char *data)
-{
- struct amd64_pvt *pvt = mci->pvt_info;
-
- return sprintf(data, "%llx\n", pvt->top_mem2);
-}
+EDAC_DCT_ATTR_SHOW(dhar);
+EDAC_DCT_ATTR_SHOW(dbam0);
+EDAC_DCT_ATTR_SHOW(top_mem);
+EDAC_DCT_ATTR_SHOW(top_mem2);
static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data)
{
@@ -182,38 +31,6 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
{
.attr = {
- .name = "nbea_ctl",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = amd64_nbea_show,
- .store = amd64_nbea_store,
- },
- {
- .attr = {
- .name = "nbsl_ctl",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = amd64_nbsl_show,
- .store = amd64_nbsl_store,
- },
- {
- .attr = {
- .name = "nbsh_ctl",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = amd64_nbsh_show,
- .store = amd64_nbsh_store,
- },
- {
- .attr = {
- .name = "nbcfg_ctl",
- .mode = (S_IRUGO | S_IWUSR)
- },
- .show = amd64_nbcfg_show,
- .store = amd64_nbcfg_store,
- },
- {
- .attr = {
.name = "dhar",
.mode = (S_IRUGO)
},
@@ -225,7 +42,7 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
.name = "dbam",
.mode = (S_IRUGO)
},
- .show = amd64_dbam_show,
+ .show = amd64_dbam0_show,
.store = NULL,
},
{
@@ -233,7 +50,7 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
.name = "topmem",
.mode = (S_IRUGO)
},
- .show = amd64_topmem_show,
+ .show = amd64_top_mem_show,
.store = NULL,
},
{
@@ -241,7 +58,7 @@ struct mcidev_sysfs_attribute amd64_dbg_attrs[] = {
.name = "topmem2",
.mode = (S_IRUGO)
},
- .show = amd64_topmem2_show,
+ .show = amd64_top_mem2_show,
.store = NULL,
},
{
diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c
index 0709681..2941dca 100644
--- a/drivers/edac/edac_device_sysfs.c
+++ b/drivers/edac/edac_device_sysfs.c
@@ -13,6 +13,7 @@
#include <linux/ctype.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/edac.h>
#include "edac_core.h"
#include "edac_module.h"
@@ -235,7 +236,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
debugf1("%s()\n", __func__);
/* get the /sys/devices/system/edac reference */
- edac_class = edac_get_edac_class();
+ edac_class = edac_get_sysfs_class();
if (edac_class == NULL) {
debugf1("%s() no edac_class error\n", __func__);
err = -ENODEV;
@@ -255,7 +256,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
if (!try_module_get(edac_dev->owner)) {
err = -ENODEV;
- goto err_out;
+ goto err_mod_get;
}
/* register */
@@ -282,6 +283,9 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)
err_kobj_reg:
module_put(edac_dev->owner);
+err_mod_get:
+ edac_put_sysfs_class();
+
err_out:
return err;
}
@@ -290,12 +294,11 @@ err_out:
* edac_device_unregister_sysfs_main_kobj:
* the '..../edac/<name>' kobject
*/
-void edac_device_unregister_sysfs_main_kobj(
- struct edac_device_ctl_info *edac_dev)
+void edac_device_unregister_sysfs_main_kobj(struct edac_device_ctl_info *dev)
{
debugf0("%s()\n", __func__);
debugf4("%s() name of kobject is: %s\n",
- __func__, kobject_name(&edac_dev->kobj));
+ __func__, kobject_name(&dev->kobj));
/*
* Unregister the edac device's kobject and
@@ -304,7 +307,8 @@ void edac_device_unregister_sysfs_main_kobj(
* a) module_put() this module
* b) 'kfree' the memory
*/
- kobject_put(&edac_dev->kobj);
+ kobject_put(&dev->kobj);
+ edac_put_sysfs_class();
}
/* edac_dev -> instance information */
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 8aad94d..a413586 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -11,6 +11,7 @@
#include <linux/ctype.h>
#include <linux/slab.h>
+#include <linux/edac.h>
#include <linux/bug.h>
#include "edac_core.h"
@@ -1011,13 +1012,13 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
*/
int edac_sysfs_setup_mc_kset(void)
{
- int err = 0;
+ int err = -EINVAL;
struct sysdev_class *edac_class;
debugf1("%s()\n", __func__);
/* get the /sys/devices/system/edac class reference */
- edac_class = edac_get_edac_class();
+ edac_class = edac_get_sysfs_class();
if (edac_class == NULL) {
debugf1("%s() no edac_class error=%d\n", __func__, err);
goto fail_out;
@@ -1028,15 +1029,16 @@ int edac_sysfs_setup_mc_kset(void)
if (!mc_kset) {
err = -ENOMEM;
debugf1("%s() Failed to register '.../edac/mc'\n", __func__);
- goto fail_out;
+ goto fail_kset;
}
debugf1("%s() Registered '.../edac/mc' kobject\n", __func__);
return 0;
+fail_kset:
+ edac_put_sysfs_class();
- /* error unwind stack */
fail_out:
return err;
}
@@ -1049,5 +1051,6 @@ fail_out:
void edac_sysfs_teardown_mc_kset(void)
{
kset_unregister(mc_kset);
+ edac_put_sysfs_class();
}
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c
deleted file mode 100644
index 9014df6..0000000
--- a/drivers/edac/edac_mce_amd.c
+++ /dev/null
@@ -1,452 +0,0 @@
-#include <linux/module.h>
-#include "edac_mce_amd.h"
-
-static bool report_gart_errors;
-static void (*nb_bus_decoder)(int node_id, struct err_regs *regs);
-
-void amd_report_gart_errors(bool v)
-{
- report_gart_errors = v;
-}
-EXPORT_SYMBOL_GPL(amd_report_gart_errors);
-
-void amd_register_ecc_decoder(void (*f)(int, struct err_regs *))
-{
- nb_bus_decoder = f;
-}
-EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
-
-void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *))
-{
- if (nb_bus_decoder) {
- WARN_ON(nb_bus_decoder != f);
-
- nb_bus_decoder = NULL;
- }
-}
-EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
-
-/*
- * string representation for the different MCA reported error types, see F3x48
- * or MSR0000_0411.
- */
-const char *tt_msgs[] = { /* transaction type */
- "instruction",
- "data",
- "generic",
- "reserved"
-};
-EXPORT_SYMBOL_GPL(tt_msgs);
-
-const char *ll_msgs[] = { /* cache level */
- "L0",
- "L1",
- "L2",
- "L3/generic"
-};
-EXPORT_SYMBOL_GPL(ll_msgs);
-
-const char *rrrr_msgs[] = {
- "generic",
- "generic read",
- "generic write",
- "data read",
- "data write",
- "inst fetch",
- "prefetch",
- "evict",
- "snoop",
- "reserved RRRR= 9",
- "reserved RRRR= 10",
- "reserved RRRR= 11",
- "reserved RRRR= 12",
- "reserved RRRR= 13",
- "reserved RRRR= 14",
- "reserved RRRR= 15"
-};
-EXPORT_SYMBOL_GPL(rrrr_msgs);
-
-const char *pp_msgs[] = { /* participating processor */
- "local node originated (SRC)",
- "local node responded to request (RES)",
- "local node observed as 3rd party (OBS)",
- "generic"
-};
-EXPORT_SYMBOL_GPL(pp_msgs);
-
-const char *to_msgs[] = {
- "no timeout",
- "timed out"
-};
-EXPORT_SYMBOL_GPL(to_msgs);
-
-const char *ii_msgs[] = { /* memory or i/o */
- "mem access",
- "reserved",
- "i/o access",
- "generic"
-};
-EXPORT_SYMBOL_GPL(ii_msgs);
-
-/*
- * Map the 4 or 5 (family-specific) bits of Extended Error code to the
- * string table.
- */
-const char *ext_msgs[] = {
- "K8 ECC error", /* 0_0000b */
- "CRC error on link", /* 0_0001b */
- "Sync error packets on link", /* 0_0010b */
- "Master Abort during link operation", /* 0_0011b */
- "Target Abort during link operation", /* 0_0100b */
- "Invalid GART PTE entry during table walk", /* 0_0101b */
- "Unsupported atomic RMW command received", /* 0_0110b */
- "WDT error: NB transaction timeout", /* 0_0111b */
- "ECC/ChipKill ECC error", /* 0_1000b */
- "SVM DEV Error", /* 0_1001b */
- "Link Data error", /* 0_1010b */
- "Link/L3/Probe Filter Protocol error", /* 0_1011b */
- "NB Internal Arrays Parity error", /* 0_1100b */
- "DRAM Address/Control Parity error", /* 0_1101b */
- "Link Transmission error", /* 0_1110b */
- "GART/DEV Table Walk Data error" /* 0_1111b */
- "Res 0x100 error", /* 1_0000b */
- "Res 0x101 error", /* 1_0001b */
- "Res 0x102 error", /* 1_0010b */
- "Res 0x103 error", /* 1_0011b */
- "Res 0x104 error", /* 1_0100b */
- "Res 0x105 error", /* 1_0101b */
- "Res 0x106 error", /* 1_0110b */
- "Res 0x107 error", /* 1_0111b */
- "Res 0x108 error", /* 1_1000b */
- "Res 0x109 error", /* 1_1001b */
- "Res 0x10A error", /* 1_1010b */
- "Res 0x10B error", /* 1_1011b */
- "ECC error in L3 Cache Data", /* 1_1100b */
- "L3 Cache Tag error", /* 1_1101b */
- "L3 Cache LRU Parity error", /* 1_1110b */
- "Probe Filter error" /* 1_1111b */
-};
-EXPORT_SYMBOL_GPL(ext_msgs);
-
-static void amd_decode_dc_mce(u64 mc0_status)
-{
- u32 ec = mc0_status & 0xffff;
- u32 xec = (mc0_status >> 16) & 0xf;
-
- pr_emerg("Data Cache Error");
-
- if (xec == 1 && TLB_ERROR(ec))
- pr_cont(": %s TLB multimatch.\n", LL_MSG(ec));
- else if (xec == 0) {
- if (mc0_status & (1ULL << 40))
- pr_cont(" during Data Scrub.\n");
- else if (TLB_ERROR(ec))
- pr_cont(": %s TLB parity error.\n", LL_MSG(ec));
- else if (MEM_ERROR(ec)) {
- u8 ll = ec & 0x3;
- u8 tt = (ec >> 2) & 0x3;
- u8 rrrr = (ec >> 4) & 0xf;
-
- /* see F10h BKDG (31116), Table 92. */
- if (ll == 0x1) {
- if (tt != 0x1)
- goto wrong_dc_mce;
-
- pr_cont(": Data/Tag %s error.\n", RRRR_MSG(ec));
-
- } else if (ll == 0x2 && rrrr == 0x3)
- pr_cont(" during L1 linefill from L2.\n");
- else
- goto wrong_dc_mce;
- } else if (BUS_ERROR(ec) && boot_cpu_data.x86 == 0xf)
- pr_cont(" during system linefill.\n");
- else
- goto wrong_dc_mce;
- } else
- goto wrong_dc_mce;
-
- return;
-
-wrong_dc_mce:
- pr_warning("Corrupted DC MCE info?\n");
-}
-
-static void amd_decode_ic_mce(u64 mc1_status)
-{
- u32 ec = mc1_status & 0xffff;
- u32 xec = (mc1_status >> 16) & 0xf;
-
- pr_emerg("Instruction Cache Error");
-
- if (xec == 1 && TLB_ERROR(ec))
- pr_cont(": %s TLB multimatch.\n", LL_MSG(ec));
- else if (xec == 0) {
- if (TLB_ERROR(ec))
- pr_cont(": %s TLB Parity error.\n", LL_MSG(ec));
- else if (BUS_ERROR(ec)) {
- if (boot_cpu_data.x86 == 0xf &&
- (mc1_status & (1ULL << 58)))
- pr_cont(" during system linefill.\n");
- else
- pr_cont(" during attempted NB data read.\n");
- } else if (MEM_ERROR(ec)) {
- u8 ll = ec & 0x3;
- u8 rrrr = (ec >> 4) & 0xf;
-
- if (ll == 0x2)
- pr_cont(" during a linefill from L2.\n");
- else if (ll == 0x1) {
-
- switch (rrrr) {
- case 0x5:
- pr_cont(": Parity error during "
- "data load.\n");
- break;
-
- case 0x7:
- pr_cont(": Copyback Parity/Victim"
- " error.\n");
- break;
-
- case 0x8:
- pr_cont(": Tag Snoop error.\n");
- break;
-
- default:
- goto wrong_ic_mce;
- break;
- }
- }
- } else
- goto wrong_ic_mce;
- } else
- goto wrong_ic_mce;
-
- return;
-
-wrong_ic_mce:
- pr_warning("Corrupted IC MCE info?\n");
-}
-
-static void amd_decode_bu_mce(u64 mc2_status)
-{
- u32 ec = mc2_status & 0xffff;
- u32 xec = (mc2_status >> 16) & 0xf;
-
- pr_emerg("Bus Unit Error");
-
- if (xec == 0x1)
- pr_cont(" in the write data buffers.\n");
- else if (xec == 0x3)
- pr_cont(" in the victim data buffers.\n");
- else if (xec == 0x2 && MEM_ERROR(ec))
- pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec));
- else if (xec == 0x0) {
- if (TLB_ERROR(ec))
- pr_cont(": %s error in a Page Descriptor Cache or "
- "Guest TLB.\n", TT_MSG(ec));
- else if (BUS_ERROR(ec))
- pr_cont(": %s/ECC error in data read from NB: %s.\n",
- RRRR_MSG(ec), PP_MSG(ec));
- else if (MEM_ERROR(ec)) {
- u8 rrrr = (ec >> 4) & 0xf;
-
- if (rrrr >= 0x7)
- pr_cont(": %s error during data copyback.\n",
- RRRR_MSG(ec));
- else if (rrrr <= 0x1)
- pr_cont(": %s parity/ECC error during data "
- "access from L2.\n", RRRR_MSG(ec));
- else
- goto wrong_bu_mce;
- } else
- goto wrong_bu_mce;
- } else
- goto wrong_bu_mce;
-
- return;
-
-wrong_bu_mce:
- pr_warning("Corrupted BU MCE info?\n");
-}
-
-static void amd_decode_ls_mce(u64 mc3_status)
-{
- u32 ec = mc3_status & 0xffff;
- u32 xec = (mc3_status >> 16) & 0xf;
-
- pr_emerg("Load Store Error");
-
- if (xec == 0x0) {
- u8 rrrr = (ec >> 4) & 0xf;
-
- if (!BUS_ERROR(ec) || (rrrr != 0x3 && rrrr != 0x4))
- goto wrong_ls_mce;
-
- pr_cont(" during %s.\n", RRRR_MSG(ec));
- }
- return;
-
-wrong_ls_mce:
- pr_warning("Corrupted LS MCE info?\n");
-}
-
-void amd_decode_nb_mce(int node_id, struct err_regs *regs, int handle_errors)
-{
- u32 ec = ERROR_CODE(regs->nbsl);
-
- if (!handle_errors)
- return;
-
- /*
- * GART TLB error reporting is disabled by default. Bail out early.
- */
- if (TLB_ERROR(ec) && !report_gart_errors)
- return;
-
- pr_emerg("Northbridge Error, node %d", node_id);
-
- /*
- * F10h, revD can disable ErrCpu[3:0] so check that first and also the
- * value encoding has changed so interpret those differently
- */
- if ((boot_cpu_data.x86 == 0x10) &&
- (boot_cpu_data.x86_model > 7)) {
- if (regs->nbsh & K8_NBSH_ERR_CPU_VAL)
- pr_cont(", core: %u\n", (u8)(regs->nbsh & 0xf));
- } else {
- u8 assoc_cpus = regs->nbsh & 0xf;
-
- if (assoc_cpus > 0)
- pr_cont(", core: %d", fls(assoc_cpus) - 1);
-
- pr_cont("\n");
- }
-
- pr_emerg("%s.\n", EXT_ERR_MSG(regs->nbsl));
-
- if (BUS_ERROR(ec) && nb_bus_decoder)
- nb_bus_decoder(node_id, regs);
-}
-EXPORT_SYMBOL_GPL(amd_decode_nb_mce);
-
-static void amd_decode_fr_mce(u64 mc5_status)
-{
- /* we have only one error signature so match all fields at once. */
- if ((mc5_status & 0xffff) == 0x0f0f)
- pr_emerg(" FR Error: CPU Watchdog timer expire.\n");
- else
- pr_warning("Corrupted FR MCE info?\n");
-}
-
-static inline void amd_decode_err_code(unsigned int ec)
-{
- if (TLB_ERROR(ec)) {
- pr_emerg("Transaction: %s, Cache Level %s\n",
- TT_MSG(ec), LL_MSG(ec));
- } else if (MEM_ERROR(ec)) {
- pr_emerg("Transaction: %s, Type: %s, Cache Level: %s",
- RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec));
- } else if (BUS_ERROR(ec)) {
- pr_emerg("Transaction type: %s(%s), %s, Cache Level: %s, "
- "Participating Processor: %s\n",
- RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec),
- PP_MSG(ec));
- } else
- pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
-}
-
-static int amd_decode_mce(struct notifier_block *nb, unsigned long val,
- void *data)
-{
- struct mce *m = (struct mce *)data;
- struct err_regs regs;
- int node, ecc;
-
- pr_emerg("MC%d_STATUS: ", m->bank);
-
- pr_cont("%sorrected error, other errors lost: %s, "
- "CPU context corrupt: %s",
- ((m->status & MCI_STATUS_UC) ? "Unc" : "C"),
- ((m->status & MCI_STATUS_OVER) ? "yes" : "no"),
- ((m->status & MCI_STATUS_PCC) ? "yes" : "no"));
-
- /* do the two bits[14:13] together */
- ecc = (m->status >> 45) & 0x3;
- if (ecc)
- pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U"));
-
- pr_cont("\n");
-
- switch (m->bank) {
- case 0:
- amd_decode_dc_mce(m->status);
- break;
-
- case 1:
- amd_decode_ic_mce(m->status);
- break;
-
- case 2:
- amd_decode_bu_mce(m->status);
- break;
-
- case 3:
- amd_decode_ls_mce(m->status);
- break;
-
- case 4:
- regs.nbsl = (u32) m->status;
- regs.nbsh = (u32)(m->status >> 32);
- regs.nbeal = (u32) m->addr;
- regs.nbeah = (u32)(m->addr >> 32);
- node = amd_get_nb_id(m->extcpu);
-
- amd_decode_nb_mce(node, &regs, 1);
- break;
-
- case 5:
- amd_decode_fr_mce(m->status);
- break;
-
- default:
- break;
- }
-
- amd_decode_err_code(m->status & 0xffff);
-
- return NOTIFY_STOP;
-}
-
-static struct notifier_block amd_mce_dec_nb = {
- .notifier_call = amd_decode_mce,
-};
-
-static int __init mce_amd_init(void)
-{
- /*
- * We can decode MCEs for K8, F10h and F11h CPUs:
- */
- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
- return 0;
-
- if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
- return 0;
-
- atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
-
- return 0;
-}
-early_initcall(mce_amd_init);
-
-#ifdef MODULE
-static void __exit mce_amd_exit(void)
-{
- atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
-}
-
-MODULE_DESCRIPTION("AMD MCE decoder");
-MODULE_ALIAS("edac-mce-amd");
-MODULE_LICENSE("GPL");
-module_exit(mce_amd_exit);
-#endif
diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c
index 7e1374a..be4b075 100644
--- a/drivers/edac/edac_module.c
+++ b/drivers/edac/edac_module.c
@@ -27,15 +27,6 @@ EXPORT_SYMBOL_GPL(edac_debug_level);
struct workqueue_struct *edac_workqueue;
/*
- * sysfs object: /sys/devices/system/edac
- * need to export to other files in this modules
- */
-static struct sysdev_class edac_class = {
- .name = "edac",
-};
-static int edac_class_valid;
-
-/*
* edac_op_state_to_string()
*/
char *edac_op_state_to_string(int opstate)
@@ -55,60 +46,6 @@ char *edac_op_state_to_string(int opstate)
}
/*
- * edac_get_edac_class()
- *
- * return pointer to the edac class of 'edac'
- */
-struct sysdev_class *edac_get_edac_class(void)
-{
- struct sysdev_class *classptr = NULL;
-
- if (edac_class_valid)
- classptr = &edac_class;
-
- return classptr;
-}
-
-/*
- * edac_register_sysfs_edac_name()
- *
- * register the 'edac' into /sys/devices/system
- *
- * return:
- * 0 success
- * !0 error
- */
-static int edac_register_sysfs_edac_name(void)
-{
- int err;
-
- /* create the /sys/devices/system/edac directory */
- err = sysdev_class_register(&edac_class);
-
- if (err) {
- debugf1("%s() error=%d\n", __func__, err);
- return err;
- }
-
- edac_class_valid = 1;
- return 0;
-}
-
-/*
- * sysdev_class_unregister()
- *
- * unregister the 'edac' from /sys/devices/system
- */
-static void edac_unregister_sysfs_edac_name(void)
-{
- /* only if currently registered, then unregister it */
- if (edac_class_valid)
- sysdev_class_unregister(&edac_class);
-
- edac_class_valid = 0;
-}
-
-/*
* edac_workqueue_setup
* initialize the edac work queue for polling operations
*/
@@ -154,21 +91,11 @@ static int __init edac_init(void)
edac_pci_clear_parity_errors();
/*
- * perform the registration of the /sys/devices/system/edac class object
- */
- if (edac_register_sysfs_edac_name()) {
- edac_printk(KERN_ERR, EDAC_MC,
- "Error initializing 'edac' kobject\n");
- err = -ENODEV;
- goto error;
- }
-
- /*
* now set up the mc_kset under the edac class object
*/
err = edac_sysfs_setup_mc_kset();
if (err)
- goto sysfs_setup_fail;
+ goto error;
/* Setup/Initialize the workq for this core */
err = edac_workqueue_setup();
@@ -183,9 +110,6 @@ static int __init edac_init(void)
workq_fail:
edac_sysfs_teardown_mc_kset();
-sysfs_setup_fail:
- edac_unregister_sysfs_edac_name();
-
error:
return err;
}
@@ -201,7 +125,6 @@ static void __exit edac_exit(void)
/* tear down the various subsystems */
edac_workqueue_teardown();
edac_sysfs_teardown_mc_kset();
- edac_unregister_sysfs_edac_name();
}
/*
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
index 233d479..17aabb7 100644
--- a/drivers/edac/edac_module.h
+++ b/drivers/edac/edac_module.h
@@ -42,7 +42,6 @@ extern void edac_device_unregister_sysfs_main_kobj(
struct edac_device_ctl_info *edac_dev);
extern int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev);
extern void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev);
-extern struct sysdev_class *edac_get_edac_class(void);
/* edac core workqueue: single CPU mode */
extern struct workqueue_struct *edac_workqueue;
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index c39697d..023b01c 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -7,7 +7,7 @@
*
*/
#include <linux/module.h>
-#include <linux/sysdev.h>
+#include <linux/edac.h>
#include <linux/slab.h>
#include <linux/ctype.h>
@@ -354,7 +354,7 @@ static int edac_pci_main_kobj_setup(void)
/* First time, so create the main kobject and its
* controls and atributes
*/
- edac_class = edac_get_edac_class();
+ edac_class = edac_get_sysfs_class();
if (edac_class == NULL) {
debugf1("%s() no edac_class\n", __func__);
err = -ENODEV;
@@ -368,7 +368,7 @@ static int edac_pci_main_kobj_setup(void)
if (!try_module_get(THIS_MODULE)) {
debugf1("%s() try_module_get() failed\n", __func__);
err = -ENODEV;
- goto decrement_count_fail;
+ goto mod_get_fail;
}
edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
@@ -403,6 +403,9 @@ kobject_init_and_add_fail:
kzalloc_fail:
module_put(THIS_MODULE);
+mod_get_fail:
+ edac_put_sysfs_class();
+
decrement_count_fail:
/* if are on this error exit, nothing to tear down */
atomic_dec(&edac_pci_sysfs_refcount);
@@ -429,6 +432,7 @@ static void edac_pci_main_kobj_teardown(void)
__func__);
kobject_put(edac_pci_top_main_kobj);
}
+ edac_put_sysfs_class();
}
/*
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c
index 20b428a..aab9707 100644
--- a/drivers/edac/edac_stub.c
+++ b/drivers/edac/edac_stub.c
@@ -3,10 +3,13 @@
*
* Author: Dave Jiang <djiang@mvista.com>
*
- * 2007 (c) MontaVista Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
+ * 2007 (c) MontaVista Software, Inc.
+ * 2010 (c) Advanced Micro Devices Inc.
+ * Borislav Petkov <borislav.petkov@amd.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
*
*/
#include <linux/module.h>
@@ -23,6 +26,8 @@ EXPORT_SYMBOL_GPL(edac_handlers);
int edac_err_assert = 0;
EXPORT_SYMBOL_GPL(edac_err_assert);
+static atomic_t edac_class_valid = ATOMIC_INIT(0);
+
/*
* called to determine if there is an EDAC driver interested in
* knowing an event (such as NMI) occurred
@@ -44,3 +49,41 @@ void edac_atomic_assert_error(void)
edac_err_assert++;
}
EXPORT_SYMBOL_GPL(edac_atomic_assert_error);
+
+/*
+ * sysfs object: /sys/devices/system/edac
+ * need to export to other files
+ */
+struct sysdev_class edac_class = {
+ .name = "edac",
+};
+EXPORT_SYMBOL_GPL(edac_class);
+
+/* return pointer to the 'edac' node in sysfs */
+struct sysdev_class *edac_get_sysfs_class(void)
+{
+ int err = 0;
+
+ if (atomic_read(&edac_class_valid))
+ goto out;
+
+ /* create the /sys/devices/system/edac directory */
+ err = sysdev_class_register(&edac_class);
+ if (err) {
+ printk(KERN_ERR "Error registering toplevel EDAC sysfs dir\n");
+ return NULL;
+ }
+
+out:
+ atomic_inc(&edac_class_valid);
+ return &edac_class;
+}
+EXPORT_SYMBOL_GPL(edac_get_sysfs_class);
+
+void edac_put_sysfs_class(void)
+{
+ /* last user unregisters it */
+ if (atomic_dec_and_test(&edac_class_valid))
+ sysdev_class_unregister(&edac_class);
+}
+EXPORT_SYMBOL_GPL(edac_put_sysfs_class);
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
new file mode 100644
index 0000000..c018109
--- /dev/null
+++ b/drivers/edac/mce_amd.c
@@ -0,0 +1,680 @@
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include "mce_amd.h"
+
+static struct amd_decoder_ops *fam_ops;
+
+static u8 nb_err_cpumask = 0xf;
+
+static bool report_gart_errors;
+static void (*nb_bus_decoder)(int node_id, struct mce *m, u32 nbcfg);
+
+void amd_report_gart_errors(bool v)
+{
+ report_gart_errors = v;
+}
+EXPORT_SYMBOL_GPL(amd_report_gart_errors);
+
+void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32))
+{
+ nb_bus_decoder = f;
+}
+EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
+
+void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32))
+{
+ if (nb_bus_decoder) {
+ WARN_ON(nb_bus_decoder != f);
+
+ nb_bus_decoder = NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
+
+/*
+ * string representation for the different MCA reported error types, see F3x48
+ * or MSR0000_0411.
+ */
+
+/* transaction type */
+const char *tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
+EXPORT_SYMBOL_GPL(tt_msgs);
+
+/* cache level */
+const char *ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
+EXPORT_SYMBOL_GPL(ll_msgs);
+
+/* memory transaction type */
+const char *rrrr_msgs[] = {
+ "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
+};
+EXPORT_SYMBOL_GPL(rrrr_msgs);
+
+/* participating processor */
+const char *pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
+EXPORT_SYMBOL_GPL(pp_msgs);
+
+/* request timeout */
+const char *to_msgs[] = { "no timeout", "timed out" };
+EXPORT_SYMBOL_GPL(to_msgs);
+
+/* memory or i/o */
+const char *ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
+EXPORT_SYMBOL_GPL(ii_msgs);
+
+static const char *f10h_nb_mce_desc[] = {
+ "HT link data error",
+ "Protocol error (link, L3, probe filter, etc.)",
+ "Parity error in NB-internal arrays",
+ "Link Retry due to IO link transmission error",
+ "L3 ECC data cache error",
+ "ECC error in L3 cache tag",
+ "L3 LRU parity bits error",
+ "ECC Error in the Probe Filter directory"
+};
+
+static bool f12h_dc_mce(u16 ec)
+{
+ bool ret = false;
+
+ if (MEM_ERROR(ec)) {
+ u8 ll = ec & 0x3;
+ ret = true;
+
+ if (ll == LL_L2)
+ pr_cont("during L1 linefill from L2.\n");
+ else if (ll == LL_L1)
+ pr_cont("Data/Tag %s error.\n", RRRR_MSG(ec));
+ else
+ ret = false;
+ }
+ return ret;
+}
+
+static bool f10h_dc_mce(u16 ec)
+{
+ u8 r4 = (ec >> 4) & 0xf;
+ u8 ll = ec & 0x3;
+
+ if (r4 == R4_GEN && ll == LL_L1) {
+ pr_cont("during data scrub.\n");
+ return true;
+ }
+ return f12h_dc_mce(ec);
+}
+
+static bool k8_dc_mce(u16 ec)
+{
+ if (BUS_ERROR(ec)) {
+ pr_cont("during system linefill.\n");
+ return true;
+ }
+
+ return f10h_dc_mce(ec);
+}
+
+static bool f14h_dc_mce(u16 ec)
+{
+ u8 r4 = (ec >> 4) & 0xf;
+ u8 ll = ec & 0x3;
+ u8 tt = (ec >> 2) & 0x3;
+ u8 ii = tt;
+ bool ret = true;
+
+ if (MEM_ERROR(ec)) {
+
+ if (tt != TT_DATA || ll != LL_L1)
+ return false;
+
+ switch (r4) {
+ case R4_DRD:
+ case R4_DWR:
+ pr_cont("Data/Tag parity error due to %s.\n",
+ (r4 == R4_DRD ? "load/hw prf" : "store"));
+ break;
+ case R4_EVICT:
+ pr_cont("Copyback parity error on a tag miss.\n");
+ break;
+ case R4_SNOOP:
+ pr_cont("Tag parity error during snoop.\n");
+ break;
+ default:
+ ret = false;
+ }
+ } else if (BUS_ERROR(ec)) {
+
+ if ((ii != II_MEM && ii != II_IO) || ll != LL_LG)
+ return false;
+
+ pr_cont("System read data error on a ");
+
+ switch (r4) {
+ case R4_RD:
+ pr_cont("TLB reload.\n");
+ break;
+ case R4_DWR:
+ pr_cont("store.\n");
+ break;
+ case R4_DRD:
+ pr_cont("load.\n");
+ break;
+ default:
+ ret = false;
+ }
+ } else {
+ ret = false;
+ }
+
+ return ret;
+}
+
+static void amd_decode_dc_mce(struct mce *m)
+{
+ u16 ec = m->status & 0xffff;
+ u8 xec = (m->status >> 16) & 0xf;
+
+ pr_emerg(HW_ERR "Data Cache Error: ");
+
+ /* TLB error signatures are the same across families */
+ if (TLB_ERROR(ec)) {
+ u8 tt = (ec >> 2) & 0x3;
+
+ if (tt == TT_DATA) {
+ pr_cont("%s TLB %s.\n", LL_MSG(ec),
+ (xec ? "multimatch" : "parity error"));
+ return;
+ }
+ else
+ goto wrong_dc_mce;
+ }
+
+ if (!fam_ops->dc_mce(ec))
+ goto wrong_dc_mce;
+
+ return;
+
+wrong_dc_mce:
+ pr_emerg(HW_ERR "Corrupted DC MCE info?\n");
+}
+
+static bool k8_ic_mce(u16 ec)
+{
+ u8 ll = ec & 0x3;
+ u8 r4 = (ec >> 4) & 0xf;
+ bool ret = true;
+
+ if (!MEM_ERROR(ec))
+ return false;
+
+ if (ll == 0x2)
+ pr_cont("during a linefill from L2.\n");
+ else if (ll == 0x1) {
+ switch (r4) {
+ case R4_IRD:
+ pr_cont("Parity error during data load.\n");
+ break;
+
+ case R4_EVICT:
+ pr_cont("Copyback Parity/Victim error.\n");
+ break;
+
+ case R4_SNOOP:
+ pr_cont("Tag Snoop error.\n");
+ break;
+
+ default:
+ ret = false;
+ break;
+ }
+ } else
+ ret = false;
+
+ return ret;
+}
+
+static bool f14h_ic_mce(u16 ec)
+{
+ u8 ll = ec & 0x3;
+ u8 tt = (ec >> 2) & 0x3;
+ u8 r4 = (ec >> 4) & 0xf;
+ bool ret = true;
+
+ if (MEM_ERROR(ec)) {
+ if (tt != 0 || ll != 1)
+ ret = false;
+
+ if (r4 == R4_IRD)
+ pr_cont("Data/tag array parity error for a tag hit.\n");
+ else if (r4 == R4_SNOOP)
+ pr_cont("Tag error during snoop/victimization.\n");
+ else
+ ret = false;
+ }
+ return ret;
+}
+
+static void amd_decode_ic_mce(struct mce *m)
+{
+ u16 ec = m->status & 0xffff;
+ u8 xec = (m->status >> 16) & 0xf;
+
+ pr_emerg(HW_ERR "Instruction Cache Error: ");
+
+ if (TLB_ERROR(ec))
+ pr_cont("%s TLB %s.\n", LL_MSG(ec),
+ (xec ? "multimatch" : "parity error"));
+ else if (BUS_ERROR(ec)) {
+ bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
+
+ pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
+ } else if (fam_ops->ic_mce(ec))
+ ;
+ else
+ pr_emerg(HW_ERR "Corrupted IC MCE info?\n");
+}
+
+static void amd_decode_bu_mce(struct mce *m)
+{
+ u32 ec = m->status & 0xffff;
+ u32 xec = (m->status >> 16) & 0xf;
+
+ pr_emerg(HW_ERR "Bus Unit Error");
+
+ if (xec == 0x1)
+ pr_cont(" in the write data buffers.\n");
+ else if (xec == 0x3)
+ pr_cont(" in the victim data buffers.\n");
+ else if (xec == 0x2 && MEM_ERROR(ec))
+ pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec));
+ else if (xec == 0x0) {
+ if (TLB_ERROR(ec))
+ pr_cont(": %s error in a Page Descriptor Cache or "
+ "Guest TLB.\n", TT_MSG(ec));
+ else if (BUS_ERROR(ec))
+ pr_cont(": %s/ECC error in data read from NB: %s.\n",
+ RRRR_MSG(ec), PP_MSG(ec));
+ else if (MEM_ERROR(ec)) {
+ u8 rrrr = (ec >> 4) & 0xf;
+
+ if (rrrr >= 0x7)
+ pr_cont(": %s error during data copyback.\n",
+ RRRR_MSG(ec));
+ else if (rrrr <= 0x1)
+ pr_cont(": %s parity/ECC error during data "
+ "access from L2.\n", RRRR_MSG(ec));
+ else
+ goto wrong_bu_mce;
+ } else
+ goto wrong_bu_mce;
+ } else
+ goto wrong_bu_mce;
+
+ return;
+
+wrong_bu_mce:
+ pr_emerg(HW_ERR "Corrupted BU MCE info?\n");
+}
+
+static void amd_decode_ls_mce(struct mce *m)
+{
+ u16 ec = m->status & 0xffff;
+ u8 xec = (m->status >> 16) & 0xf;
+
+ if (boot_cpu_data.x86 == 0x14) {
+ pr_emerg("You shouldn't be seeing an LS MCE on this cpu family,"
+ " please report on LKML.\n");
+ return;
+ }
+
+ pr_emerg(HW_ERR "Load Store Error");
+
+ if (xec == 0x0) {
+ u8 r4 = (ec >> 4) & 0xf;
+
+ if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
+ goto wrong_ls_mce;
+
+ pr_cont(" during %s.\n", RRRR_MSG(ec));
+ } else
+ goto wrong_ls_mce;
+
+ return;
+
+wrong_ls_mce:
+ pr_emerg(HW_ERR "Corrupted LS MCE info?\n");
+}
+
+static bool k8_nb_mce(u16 ec, u8 xec)
+{
+ bool ret = true;
+
+ switch (xec) {
+ case 0x1:
+ pr_cont("CRC error detected on HT link.\n");
+ break;
+
+ case 0x5:
+ pr_cont("Invalid GART PTE entry during GART table walk.\n");
+ break;
+
+ case 0x6:
+ pr_cont("Unsupported atomic RMW received from an IO link.\n");
+ break;
+
+ case 0x0:
+ case 0x8:
+ if (boot_cpu_data.x86 == 0x11)
+ return false;
+
+ pr_cont("DRAM ECC error detected on the NB.\n");
+ break;
+
+ case 0xd:
+ pr_cont("Parity error on the DRAM addr/ctl signals.\n");
+ break;
+
+ default:
+ ret = false;
+ break;
+ }
+
+ return ret;
+}
+
+static bool f10h_nb_mce(u16 ec, u8 xec)
+{
+ bool ret = true;
+ u8 offset = 0;
+
+ if (k8_nb_mce(ec, xec))
+ return true;
+
+ switch(xec) {
+ case 0xa ... 0xc:
+ offset = 10;
+ break;
+
+ case 0xe:
+ offset = 11;
+ break;
+
+ case 0xf:
+ if (TLB_ERROR(ec))
+ pr_cont("GART Table Walk data error.\n");
+ else if (BUS_ERROR(ec))
+ pr_cont("DMA Exclusion Vector Table Walk error.\n");
+ else
+ ret = false;
+
+ goto out;
+ break;
+
+ case 0x1c ... 0x1f:
+ offset = 24;
+ break;
+
+ default:
+ ret = false;
+
+ goto out;
+ break;
+ }
+
+ pr_cont("%s.\n", f10h_nb_mce_desc[xec - offset]);
+
+out:
+ return ret;
+}
+
+static bool nb_noop_mce(u16 ec, u8 xec)
+{
+ return false;
+}
+
+void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg)
+{
+ u8 xec = (m->status >> 16) & 0x1f;
+ u16 ec = m->status & 0xffff;
+ u32 nbsh = (u32)(m->status >> 32);
+
+ pr_emerg(HW_ERR "Northbridge Error, node %d: ", node_id);
+
+ /*
+ * F10h, revD can disable ErrCpu[3:0] so check that first and also the
+ * value encoding has changed so interpret those differently
+ */
+ if ((boot_cpu_data.x86 == 0x10) &&
+ (boot_cpu_data.x86_model > 7)) {
+ if (nbsh & K8_NBSH_ERR_CPU_VAL)
+ pr_cont(", core: %u", (u8)(nbsh & nb_err_cpumask));
+ } else {
+ u8 assoc_cpus = nbsh & nb_err_cpumask;
+
+ if (assoc_cpus > 0)
+ pr_cont(", core: %d", fls(assoc_cpus) - 1);
+ }
+
+ switch (xec) {
+ case 0x2:
+ pr_cont("Sync error (sync packets on HT link detected).\n");
+ return;
+
+ case 0x3:
+ pr_cont("HT Master abort.\n");
+ return;
+
+ case 0x4:
+ pr_cont("HT Target abort.\n");
+ return;
+
+ case 0x7:
+ pr_cont("NB Watchdog timeout.\n");
+ return;
+
+ case 0x9:
+ pr_cont("SVM DMA Exclusion Vector error.\n");
+ return;
+
+ default:
+ break;
+ }
+
+ if (!fam_ops->nb_mce(ec, xec))
+ goto wrong_nb_mce;
+
+ if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10)
+ if ((xec == 0x8 || xec == 0x0) && nb_bus_decoder)
+ nb_bus_decoder(node_id, m, nbcfg);
+
+ return;
+
+wrong_nb_mce:
+ pr_emerg(HW_ERR "Corrupted NB MCE info?\n");
+}
+EXPORT_SYMBOL_GPL(amd_decode_nb_mce);
+
+static void amd_decode_fr_mce(struct mce *m)
+{
+ if (boot_cpu_data.x86 == 0xf ||
+ boot_cpu_data.x86 == 0x11)
+ goto wrong_fr_mce;
+
+ /* we have only one error signature so match all fields at once. */
+ if ((m->status & 0xffff) == 0x0f0f) {
+ pr_emerg(HW_ERR "FR Error: CPU Watchdog timer expire.\n");
+ return;
+ }
+
+wrong_fr_mce:
+ pr_emerg(HW_ERR "Corrupted FR MCE info?\n");
+}
+
+static inline void amd_decode_err_code(u16 ec)
+{
+ if (TLB_ERROR(ec)) {
+ pr_emerg(HW_ERR "Transaction: %s, Cache Level: %s\n",
+ TT_MSG(ec), LL_MSG(ec));
+ } else if (MEM_ERROR(ec)) {
+ pr_emerg(HW_ERR "Transaction: %s, Type: %s, Cache Level: %s\n",
+ RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec));
+ } else if (BUS_ERROR(ec)) {
+ pr_emerg(HW_ERR "Transaction: %s (%s), %s, Cache Level: %s, "
+ "Participating Processor: %s\n",
+ RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec),
+ PP_MSG(ec));
+ } else
+ pr_emerg(HW_ERR "Huh? Unknown MCE error 0x%x\n", ec);
+}
+
+/*
+ * Filter out unwanted MCE signatures here.
+ */
+static bool amd_filter_mce(struct mce *m)
+{
+ u8 xec = (m->status >> 16) & 0x1f;
+
+ /*
+ * NB GART TLB error reporting is disabled by default.
+ */
+ if (m->bank == 4 && xec == 0x5 && !report_gart_errors)
+ return true;
+
+ return false;
+}
+
+int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
+{
+ struct mce *m = (struct mce *)data;
+ int node, ecc;
+
+ if (amd_filter_mce(m))
+ return NOTIFY_STOP;
+
+ pr_emerg(HW_ERR "MC%d_STATUS: ", m->bank);
+
+ pr_cont("%sorrected error, other errors lost: %s, "
+ "CPU context corrupt: %s",
+ ((m->status & MCI_STATUS_UC) ? "Unc" : "C"),
+ ((m->status & MCI_STATUS_OVER) ? "yes" : "no"),
+ ((m->status & MCI_STATUS_PCC) ? "yes" : "no"));
+
+ /* do the two bits[14:13] together */
+ ecc = (m->status >> 45) & 0x3;
+ if (ecc)
+ pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U"));
+
+ pr_cont("\n");
+
+ switch (m->bank) {
+ case 0:
+ amd_decode_dc_mce(m);
+ break;
+
+ case 1:
+ amd_decode_ic_mce(m);
+ break;
+
+ case 2:
+ amd_decode_bu_mce(m);
+ break;
+
+ case 3:
+ amd_decode_ls_mce(m);
+ break;
+
+ case 4:
+ node = amd_get_nb_id(m->extcpu);
+ amd_decode_nb_mce(node, m, 0);
+ break;
+
+ case 5:
+ amd_decode_fr_mce(m);
+ break;
+
+ default:
+ break;
+ }
+
+ amd_decode_err_code(m->status & 0xffff);
+
+ return NOTIFY_STOP;
+}
+EXPORT_SYMBOL_GPL(amd_decode_mce);
+
+static struct notifier_block amd_mce_dec_nb = {
+ .notifier_call = amd_decode_mce,
+};
+
+static int __init mce_amd_init(void)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ return 0;
+
+ if ((boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x12) &&
+ (boot_cpu_data.x86 != 0x14 || boot_cpu_data.x86_model > 0xf))
+ return 0;
+
+ fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
+ if (!fam_ops)
+ return -ENOMEM;
+
+ switch (boot_cpu_data.x86) {
+ case 0xf:
+ fam_ops->dc_mce = k8_dc_mce;
+ fam_ops->ic_mce = k8_ic_mce;
+ fam_ops->nb_mce = k8_nb_mce;
+ break;
+
+ case 0x10:
+ fam_ops->dc_mce = f10h_dc_mce;
+ fam_ops->ic_mce = k8_ic_mce;
+ fam_ops->nb_mce = f10h_nb_mce;
+ break;
+
+ case 0x11:
+ fam_ops->dc_mce = k8_dc_mce;
+ fam_ops->ic_mce = k8_ic_mce;
+ fam_ops->nb_mce = f10h_nb_mce;
+ break;
+
+ case 0x12:
+ fam_ops->dc_mce = f12h_dc_mce;
+ fam_ops->ic_mce = k8_ic_mce;
+ fam_ops->nb_mce = nb_noop_mce;
+ break;
+
+ case 0x14:
+ nb_err_cpumask = 0x3;
+ fam_ops->dc_mce = f14h_dc_mce;
+ fam_ops->ic_mce = f14h_ic_mce;
+ fam_ops->nb_mce = nb_noop_mce;
+ break;
+
+ default:
+ printk(KERN_WARNING "Huh? What family is that: %d?!\n",
+ boot_cpu_data.x86);
+ kfree(fam_ops);
+ return -EINVAL;
+ }
+
+ pr_info("MCE: In-kernel MCE decoding enabled.\n");
+
+ atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
+
+ return 0;
+}
+early_initcall(mce_amd_init);
+
+#ifdef MODULE
+static void __exit mce_amd_exit(void)
+{
+ atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
+ kfree(fam_ops);
+}
+
+MODULE_DESCRIPTION("AMD MCE decoder");
+MODULE_ALIAS("edac-mce-amd");
+MODULE_LICENSE("GPL");
+module_exit(mce_amd_exit);
+#endif
diff --git a/drivers/edac/edac_mce_amd.h b/drivers/edac/mce_amd.h
index df23ee0..35f6e0e 100644
--- a/drivers/edac/edac_mce_amd.h
+++ b/drivers/edac/mce_amd.h
@@ -1,11 +1,14 @@
#ifndef _EDAC_MCE_AMD_H
#define _EDAC_MCE_AMD_H
+#include <linux/notifier.h>
+
#include <asm/mce.h>
+#define BIT_64(n) (U64_C(1) << (n))
+
#define ERROR_CODE(x) ((x) & 0xffff)
#define EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f)
-#define EXT_ERR_MSG(x) ext_msgs[EXT_ERROR_CODE(x)]
#define LOW_SYNDROME(x) (((x) >> 15) & 0xff)
#define HIGH_SYNDROME(x) (((x) >> 24) & 0xff)
@@ -20,13 +23,14 @@
#define II_MSG(x) ii_msgs[II(x)]
#define LL(x) (((x) >> 0) & 0x3)
#define LL_MSG(x) ll_msgs[LL(x)]
-#define RRRR(x) (((x) >> 4) & 0xf)
-#define RRRR_MSG(x) rrrr_msgs[RRRR(x)]
#define TO(x) (((x) >> 8) & 0x1)
#define TO_MSG(x) to_msgs[TO(x)]
#define PP(x) (((x) >> 9) & 0x3)
#define PP_MSG(x) pp_msgs[PP(x)]
+#define RRRR(x) (((x) >> 4) & 0xf)
+#define RRRR_MSG(x) ((RRRR(x) < 9) ? rrrr_msgs[RRRR(x)] : "Wrong R4!")
+
#define K8_NBSH 0x4C
#define K8_NBSH_VALID_BIT BIT(31)
@@ -41,13 +45,45 @@
#define K8_NBSH_UECC BIT(13)
#define K8_NBSH_ERR_SCRUBER BIT(8)
+enum tt_ids {
+ TT_INSTR = 0,
+ TT_DATA,
+ TT_GEN,
+ TT_RESV,
+};
+
+enum ll_ids {
+ LL_RESV = 0,
+ LL_L1,
+ LL_L2,
+ LL_LG,
+};
+
+enum ii_ids {
+ II_MEM = 0,
+ II_RESV,
+ II_IO,
+ II_GEN,
+};
+
+enum rrrr_ids {
+ R4_GEN = 0,
+ R4_RD,
+ R4_WR,
+ R4_DRD,
+ R4_DWR,
+ R4_IRD,
+ R4_PREF,
+ R4_EVICT,
+ R4_SNOOP,
+};
+
extern const char *tt_msgs[];
extern const char *ll_msgs[];
extern const char *rrrr_msgs[];
extern const char *pp_msgs[];
extern const char *to_msgs[];
extern const char *ii_msgs[];
-extern const char *ext_msgs[];
/*
* relevant NB regs
@@ -60,10 +96,19 @@ struct err_regs {
u32 nbeal;
};
+/*
+ * per-family decoder ops
+ */
+struct amd_decoder_ops {
+ bool (*dc_mce)(u16);
+ bool (*ic_mce)(u16);
+ bool (*nb_mce)(u16, u8);
+};
void amd_report_gart_errors(bool);
-void amd_register_ecc_decoder(void (*f)(int, struct err_regs *));
-void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *));
-void amd_decode_nb_mce(int, struct err_regs *, int);
+void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32));
+void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32));
+void amd_decode_nb_mce(int, struct mce *, u32);
+int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data);
#endif /* _EDAC_MCE_AMD_H */
diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c
new file mode 100644
index 0000000..8d0688f
--- /dev/null
+++ b/drivers/edac/mce_amd_inj.c
@@ -0,0 +1,171 @@
+/*
+ * A simple MCE injection facility for testing the MCE decoding code. This
+ * driver should be built as module so that it can be loaded on production
+ * kernels for testing purposes.
+ *
+ * This file may be distributed under the terms of the GNU General Public
+ * License version 2.
+ *
+ * Copyright (c) 2010: Borislav Petkov <borislav.petkov@amd.com>
+ * Advanced Micro Devices Inc.
+ */
+
+#include <linux/kobject.h>
+#include <linux/sysdev.h>
+#include <linux/edac.h>
+#include <asm/mce.h>
+
+#include "mce_amd.h"
+
+struct edac_mce_attr {
+ struct attribute attr;
+ ssize_t (*show) (struct kobject *kobj, struct edac_mce_attr *attr, char *buf);
+ ssize_t (*store)(struct kobject *kobj, struct edac_mce_attr *attr,
+ const char *buf, size_t count);
+};
+
+#define EDAC_MCE_ATTR(_name, _mode, _show, _store) \
+static struct edac_mce_attr mce_attr_##_name = __ATTR(_name, _mode, _show, _store)
+
+static struct kobject *mce_kobj;
+
+/*
+ * Collect all the MCi_XXX settings
+ */
+static struct mce i_mce;
+
+#define MCE_INJECT_STORE(reg) \
+static ssize_t edac_inject_##reg##_store(struct kobject *kobj, \
+ struct edac_mce_attr *attr, \
+ const char *data, size_t count)\
+{ \
+ int ret = 0; \
+ unsigned long value; \
+ \
+ ret = strict_strtoul(data, 16, &value); \
+ if (ret < 0) \
+ printk(KERN_ERR "Error writing MCE " #reg " field.\n"); \
+ \
+ i_mce.reg = value; \
+ \
+ return count; \
+}
+
+MCE_INJECT_STORE(status);
+MCE_INJECT_STORE(misc);
+MCE_INJECT_STORE(addr);
+
+#define MCE_INJECT_SHOW(reg) \
+static ssize_t edac_inject_##reg##_show(struct kobject *kobj, \
+ struct edac_mce_attr *attr, \
+ char *buf) \
+{ \
+ return sprintf(buf, "0x%016llx\n", i_mce.reg); \
+}
+
+MCE_INJECT_SHOW(status);
+MCE_INJECT_SHOW(misc);
+MCE_INJECT_SHOW(addr);
+
+EDAC_MCE_ATTR(status, 0644, edac_inject_status_show, edac_inject_status_store);
+EDAC_MCE_ATTR(misc, 0644, edac_inject_misc_show, edac_inject_misc_store);
+EDAC_MCE_ATTR(addr, 0644, edac_inject_addr_show, edac_inject_addr_store);
+
+/*
+ * This denotes into which bank we're injecting and triggers
+ * the injection, at the same time.
+ */
+static ssize_t edac_inject_bank_store(struct kobject *kobj,
+ struct edac_mce_attr *attr,
+ const char *data, size_t count)
+{
+ int ret = 0;
+ unsigned long value;
+
+ ret = strict_strtoul(data, 10, &value);
+ if (ret < 0) {
+ printk(KERN_ERR "Invalid bank value!\n");
+ return -EINVAL;
+ }
+
+ if (value > 5) {
+ printk(KERN_ERR "Non-existant MCE bank: %lu\n", value);
+ return -EINVAL;
+ }
+
+ i_mce.bank = value;
+
+ amd_decode_mce(NULL, 0, &i_mce);
+
+ return count;
+}
+
+static ssize_t edac_inject_bank_show(struct kobject *kobj,
+ struct edac_mce_attr *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", i_mce.bank);
+}
+
+EDAC_MCE_ATTR(bank, 0644, edac_inject_bank_show, edac_inject_bank_store);
+
+static struct edac_mce_attr *sysfs_attrs[] = { &mce_attr_status, &mce_attr_misc,
+ &mce_attr_addr, &mce_attr_bank
+};
+
+static int __init edac_init_mce_inject(void)
+{
+ struct sysdev_class *edac_class = NULL;
+ int i, err = 0;
+
+ edac_class = edac_get_sysfs_class();
+ if (!edac_class)
+ return -EINVAL;
+
+ mce_kobj = kobject_create_and_add("mce", &edac_class->kset.kobj);
+ if (!mce_kobj) {
+ printk(KERN_ERR "Error creating a mce kset.\n");
+ err = -ENOMEM;
+ goto err_mce_kobj;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++) {
+ err = sysfs_create_file(mce_kobj, &sysfs_attrs[i]->attr);
+ if (err) {
+ printk(KERN_ERR "Error creating %s in sysfs.\n",
+ sysfs_attrs[i]->attr.name);
+ goto err_sysfs_create;
+ }
+ }
+ return 0;
+
+err_sysfs_create:
+ while (i-- >= 0)
+ sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr);
+
+ kobject_del(mce_kobj);
+
+err_mce_kobj:
+ edac_put_sysfs_class();
+
+ return err;
+}
+
+static void __exit edac_exit_mce_inject(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++)
+ sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr);
+
+ kobject_del(mce_kobj);
+
+ edac_put_sysfs_class();
+}
+
+module_init(edac_init_mce_inject);
+module_exit(edac_exit_mce_inject);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Borislav Petkov <borislav.petkov@amd.com>");
+MODULE_AUTHOR("AMD Inc.");
+MODULE_DESCRIPTION("MCE injection facility for testing MCE decoding");
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 1b05896..9dcb17d 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -2840,7 +2840,7 @@ static int __devinit pci_probe(struct pci_dev *dev,
const struct pci_device_id *ent)
{
struct fw_ohci *ohci;
- u32 bus_options, max_receive, link_speed, version, link_enh;
+ u32 bus_options, max_receive, link_speed, version;
u64 guid;
int i, err, n_ir, n_it;
size_t size;
@@ -2894,23 +2894,6 @@ static int __devinit pci_probe(struct pci_dev *dev,
if (param_quirks)
ohci->quirks = param_quirks;
- /* TI OHCI-Lynx and compatible: set recommended configuration bits. */
- if (dev->vendor == PCI_VENDOR_ID_TI) {
- pci_read_config_dword(dev, PCI_CFG_TI_LinkEnh, &link_enh);
-
- /* adjust latency of ATx FIFO: use 1.7 KB threshold */
- link_enh &= ~TI_LinkEnh_atx_thresh_mask;
- link_enh |= TI_LinkEnh_atx_thresh_1_7K;
-
- /* use priority arbitration for asynchronous responses */
- link_enh |= TI_LinkEnh_enab_unfair;
-
- /* required for aPhyEnhanceEnable to work */
- link_enh |= TI_LinkEnh_enab_accel;
-
- pci_write_config_dword(dev, PCI_CFG_TI_LinkEnh, link_enh);
- }
-
ar_context_init(&ohci->ar_request_ctx, ohci,
OHCI1394_AsReqRcvContextControlSet);
diff --git a/drivers/firewire/ohci.h b/drivers/firewire/ohci.h
index 0e6c5a4..ef5e733 100644
--- a/drivers/firewire/ohci.h
+++ b/drivers/firewire/ohci.h
@@ -155,12 +155,4 @@
#define OHCI1394_phy_tcode 0xe
-/* TI extensions */
-
-#define PCI_CFG_TI_LinkEnh 0xf4
-#define TI_LinkEnh_enab_accel 0x00000002
-#define TI_LinkEnh_enab_unfair 0x00000080
-#define TI_LinkEnh_atx_thresh_mask 0x00003000
-#define TI_LinkEnh_atx_thresh_1_7K 0x00001000
-
#endif /* _FIREWIRE_OHCI_H */
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 280c9b5..88a3ae6 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -125,7 +125,7 @@ config ISCSI_IBFT_FIND
config ISCSI_IBFT
tristate "iSCSI Boot Firmware Table Attributes module"
select ISCSI_BOOT_SYSFS
- depends on ISCSI_IBFT_FIND && SCSI
+ depends on ISCSI_IBFT_FIND && SCSI && SCSI_LOWLEVEL
default n
help
This option enables support for detection and exposing of iSCSI
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
index 5731fc9..3eef567 100644
--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -203,6 +203,7 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc,
struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
struct radeon_device *rdev = crtc->dev->dev_private;
int xorigin = 0, yorigin = 0;
+ int w = radeon_crtc->cursor_width;
if (x < 0)
xorigin = -x + 1;
@@ -213,22 +214,7 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc,
if (yorigin >= CURSOR_HEIGHT)
yorigin = CURSOR_HEIGHT - 1;
- radeon_lock_cursor(crtc, true);
- if (ASIC_IS_DCE4(rdev)) {
- /* cursors are offset into the total surface */
- x += crtc->x;
- y += crtc->y;
- DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y);
-
- /* XXX: check if evergreen has the same issues as avivo chips */
- WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset,
- ((xorigin ? 0 : x) << 16) |
- (yorigin ? 0 : y));
- WREG32(EVERGREEN_CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin);
- WREG32(EVERGREEN_CUR_SIZE + radeon_crtc->crtc_offset,
- ((radeon_crtc->cursor_width - 1) << 16) | (radeon_crtc->cursor_height - 1));
- } else if (ASIC_IS_AVIVO(rdev)) {
- int w = radeon_crtc->cursor_width;
+ if (ASIC_IS_AVIVO(rdev)) {
int i = 0;
struct drm_crtc *crtc_p;
@@ -260,7 +246,17 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc,
if (w <= 0)
w = 1;
}
+ }
+ radeon_lock_cursor(crtc, true);
+ if (ASIC_IS_DCE4(rdev)) {
+ WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset,
+ ((xorigin ? 0 : x) << 16) |
+ (yorigin ? 0 : y));
+ WREG32(EVERGREEN_CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin);
+ WREG32(EVERGREEN_CUR_SIZE + radeon_crtc->crtc_offset,
+ ((w - 1) << 16) | (radeon_crtc->cursor_height - 1));
+ } else if (ASIC_IS_AVIVO(rdev)) {
WREG32(AVIVO_D1CUR_POSITION + radeon_crtc->crtc_offset,
((xorigin ? 0 : x) << 16) |
(yorigin ? 0 : y));
diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c
index b8feac5..5795c83 100644
--- a/drivers/i2c/busses/i2c-davinci.c
+++ b/drivers/i2c/busses/i2c-davinci.c
@@ -331,21 +331,16 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop)
INIT_COMPLETION(dev->cmd_complete);
dev->cmd_err = 0;
- /* Take I2C out of reset, configure it as master and set the
- * start bit */
- flag = DAVINCI_I2C_MDR_IRS | DAVINCI_I2C_MDR_MST | DAVINCI_I2C_MDR_STT;
+ /* Take I2C out of reset and configure it as master */
+ flag = DAVINCI_I2C_MDR_IRS | DAVINCI_I2C_MDR_MST;
/* if the slave address is ten bit address, enable XA bit */
if (msg->flags & I2C_M_TEN)
flag |= DAVINCI_I2C_MDR_XA;
if (!(msg->flags & I2C_M_RD))
flag |= DAVINCI_I2C_MDR_TRX;
- if (stop)
- flag |= DAVINCI_I2C_MDR_STP;
- if (msg->len == 0) {
+ if (msg->len == 0)
flag |= DAVINCI_I2C_MDR_RM;
- flag &= ~DAVINCI_I2C_MDR_STP;
- }
/* Enable receive or transmit interrupts */
w = davinci_i2c_read_reg(dev, DAVINCI_I2C_IMR_REG);
@@ -358,17 +353,28 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop)
dev->terminate = 0;
/*
+ * Write mode register first as needed for correct behaviour
+ * on OMAP-L138, but don't set STT yet to avoid a race with XRDY
+ * occuring before we have loaded DXR
+ */
+ davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, flag);
+
+ /*
* First byte should be set here, not after interrupt,
* because transmit-data-ready interrupt can come before
* NACK-interrupt during sending of previous message and
* ICDXR may have wrong data
+ * It also saves us one interrupt, slightly faster
*/
if ((!(msg->flags & I2C_M_RD)) && dev->buf_len) {
davinci_i2c_write_reg(dev, DAVINCI_I2C_DXR_REG, *dev->buf++);
dev->buf_len--;
}
- /* write the data into mode register; start transmitting */
+ /* Set STT to begin transmit now DXR is loaded */
+ flag |= DAVINCI_I2C_MDR_STT;
+ if (stop && msg->len != 0)
+ flag |= DAVINCI_I2C_MDR_STP;
davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, flag);
r = wait_for_completion_interruptible_timeout(&dev->cmd_complete,
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index d1ff940..4c2a62b 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -159,15 +159,9 @@ static int i2c_imx_bus_busy(struct imx_i2c_struct *i2c_imx, int for_busy)
static int i2c_imx_trx_complete(struct imx_i2c_struct *i2c_imx)
{
- int result;
-
- result = wait_event_interruptible_timeout(i2c_imx->queue,
- i2c_imx->i2csr & I2SR_IIF, HZ / 10);
+ wait_event_timeout(i2c_imx->queue, i2c_imx->i2csr & I2SR_IIF, HZ / 10);
- if (unlikely(result < 0)) {
- dev_dbg(&i2c_imx->adapter.dev, "<%s> result < 0\n", __func__);
- return result;
- } else if (unlikely(!(i2c_imx->i2csr & I2SR_IIF))) {
+ if (unlikely(!(i2c_imx->i2csr & I2SR_IIF))) {
dev_dbg(&i2c_imx->adapter.dev, "<%s> Timeout\n", __func__);
return -ETIMEDOUT;
}
@@ -295,7 +289,7 @@ static irqreturn_t i2c_imx_isr(int irq, void *dev_id)
i2c_imx->i2csr = temp;
temp &= ~I2SR_IIF;
writeb(temp, i2c_imx->base + IMX_I2C_I2SR);
- wake_up_interruptible(&i2c_imx->queue);
+ wake_up(&i2c_imx->queue);
return IRQ_HANDLED;
}
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index c37ef64..cb3ccf3 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -59,18 +59,11 @@
#include <linux/hrtimer.h> /* ktime_get_real() */
#include <trace/events/power.h>
#include <linux/sched.h>
+#include <asm/mwait.h>
#define INTEL_IDLE_VERSION "0.4"
#define PREFIX "intel_idle: "
-#define MWAIT_SUBSTATE_MASK (0xf)
-#define MWAIT_CSTATE_MASK (0xf)
-#define MWAIT_SUBSTATE_SIZE (4)
-#define MWAIT_MAX_NUM_CSTATES 8
-#define CPUID_MWAIT_LEAF (5)
-#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
-#define CPUID5_ECX_INTERRUPT_BREAK (0x2)
-
static struct cpuidle_driver intel_idle_driver = {
.name = "intel_idle",
.owner = THIS_MODULE,
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index c908c5f..af9ee31 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -28,7 +28,7 @@ struct evdev {
int minor;
struct input_handle handle;
wait_queue_head_t wait;
- struct evdev_client *grab;
+ struct evdev_client __rcu *grab;
struct list_head client_list;
spinlock_t client_lock; /* protects client_list */
struct mutex mutex;
@@ -669,6 +669,9 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) {
+ if (!dev->absinfo)
+ return -EINVAL;
+
t = _IOC_NR(cmd) & ABS_MAX;
abs = dev->absinfo[t];
@@ -680,10 +683,13 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
}
}
- if (_IOC_DIR(cmd) == _IOC_READ) {
+ if (_IOC_DIR(cmd) == _IOC_WRITE) {
if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) {
+ if (!dev->absinfo)
+ return -EINVAL;
+
t = _IOC_NR(cmd) & ABS_MAX;
if (copy_from_user(&abs, p, min_t(size_t,
diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c
index c190664..7e2c12a 100644
--- a/drivers/input/misc/hp_sdc_rtc.c
+++ b/drivers/input/misc/hp_sdc_rtc.c
@@ -104,7 +104,7 @@ static int hp_sdc_rtc_do_read_bbrtc (struct rtc_time *rtctm)
t.endidx = 91;
t.seq = tseq;
t.act.semaphore = &tsem;
- init_MUTEX_LOCKED(&tsem);
+ sema_init(&tsem, 0);
if (hp_sdc_enqueue_transaction(&t)) return -1;
@@ -698,7 +698,7 @@ static int __init hp_sdc_rtc_init(void)
return -ENODEV;
#endif
- init_MUTEX(&i8042tregs);
+ sema_init(&i8042tregs, 1);
if ((ret = hp_sdc_request_timer_irq(&hp_sdc_rtc_isr)))
return ret;
diff --git a/drivers/input/serio/hil_mlc.c b/drivers/input/serio/hil_mlc.c
index c92f4ed..e5624d8 100644
--- a/drivers/input/serio/hil_mlc.c
+++ b/drivers/input/serio/hil_mlc.c
@@ -915,15 +915,15 @@ int hil_mlc_register(hil_mlc *mlc)
mlc->ostarted = 0;
rwlock_init(&mlc->lock);
- init_MUTEX(&mlc->osem);
+ sema_init(&mlc->osem, 1);
- init_MUTEX(&mlc->isem);
+ sema_init(&mlc->isem, 1);
mlc->icount = -1;
mlc->imatch = 0;
mlc->opercnt = 0;
- init_MUTEX_LOCKED(&(mlc->csem));
+ sema_init(&(mlc->csem), 0);
hil_mlc_clear_di_scratch(mlc);
hil_mlc_clear_di_map(mlc, 0);
diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c
index bcc2d30..8c0b51c 100644
--- a/drivers/input/serio/hp_sdc.c
+++ b/drivers/input/serio/hp_sdc.c
@@ -905,7 +905,7 @@ static int __init hp_sdc_init(void)
ts_sync[1] = 0x0f;
ts_sync[2] = ts_sync[3] = ts_sync[4] = ts_sync[5] = 0;
t_sync.act.semaphore = &s_sync;
- init_MUTEX_LOCKED(&s_sync);
+ sema_init(&s_sync, 0);
hp_sdc_enqueue_transaction(&t_sync);
down(&s_sync); /* Wait for t_sync to complete */
@@ -1039,7 +1039,7 @@ static int __init hp_sdc_register(void)
return hp_sdc.dev_err;
}
- init_MUTEX_LOCKED(&tq_init_sem);
+ sema_init(&tq_init_sem, 0);
tq_init.actidx = 0;
tq_init.idx = 1;
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
index 1c4ee6e..bf64e49 100644
--- a/drivers/macintosh/adb.c
+++ b/drivers/macintosh/adb.c
@@ -83,7 +83,7 @@ static struct adb_driver *adb_controller;
BLOCKING_NOTIFIER_HEAD(adb_client_list);
static int adb_got_sleep;
static int adb_inited;
-static DECLARE_MUTEX(adb_probe_mutex);
+static DEFINE_SEMAPHORE(adb_probe_mutex);
static int sleepy_trackpad;
static int autopoll_devs;
int __adb_probe_sync;
diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c
index 073f013..86294ed3 100644
--- a/drivers/media/video/v4l2-compat-ioctl32.c
+++ b/drivers/media/video/v4l2-compat-ioctl32.c
@@ -193,17 +193,24 @@ static int put_video_window32(struct video_window *kp, struct video_window32 __u
struct video_code32 {
char loadwhat[16]; /* name or tag of file being passed */
compat_int_t datasize;
- unsigned char *data;
+ compat_uptr_t data;
};
-static int get_microcode32(struct video_code *kp, struct video_code32 __user *up)
+static struct video_code __user *get_microcode32(struct video_code32 *kp)
{
- if (!access_ok(VERIFY_READ, up, sizeof(struct video_code32)) ||
- copy_from_user(kp->loadwhat, up->loadwhat, sizeof(up->loadwhat)) ||
- get_user(kp->datasize, &up->datasize) ||
- copy_from_user(kp->data, up->data, up->datasize))
- return -EFAULT;
- return 0;
+ struct video_code __user *up;
+
+ up = compat_alloc_user_space(sizeof(*up));
+
+ /*
+ * NOTE! We don't actually care if these fail. If the
+ * user address is invalid, the native ioctl will do
+ * the error handling for us
+ */
+ (void) copy_to_user(up->loadwhat, kp->loadwhat, sizeof(up->loadwhat));
+ (void) put_user(kp->datasize, &up->datasize);
+ (void) put_user(compat_ptr(kp->data), &up->data);
+ return up;
}
#define VIDIOCGTUNER32 _IOWR('v', 4, struct video_tuner32)
@@ -739,7 +746,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar
struct video_tuner vt;
struct video_buffer vb;
struct video_window vw;
- struct video_code vc;
+ struct video_code32 vc;
struct video_audio va;
#endif
struct v4l2_format v2f;
@@ -818,8 +825,11 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar
break;
case VIDIOCSMICROCODE:
- err = get_microcode32(&karg.vc, up);
- compatible_arg = 0;
+ /* Copy the 32-bit "video_code32" to kernel space */
+ if (copy_from_user(&karg.vc, up, sizeof(karg.vc)))
+ return -EFAULT;
+ /* Convert the 32-bit version to a 64-bit version in user space */
+ up = get_microcode32(&karg.vc);
break;
case VIDIOCSFREQ:
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index b2828e8..214b03a 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -30,6 +30,8 @@
#include <linux/clk.h>
#include <linux/err.h>
#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/completion.h>
#include <asm/mach/flash.h>
#include <mach/mxc_nand.h>
@@ -151,7 +153,7 @@ struct mxc_nand_host {
int irq;
int eccsize;
- wait_queue_head_t irq_waitq;
+ struct completion op_completion;
uint8_t *data_buf;
unsigned int buf_start;
@@ -164,6 +166,7 @@ struct mxc_nand_host {
void (*send_read_id)(struct mxc_nand_host *);
uint16_t (*get_dev_status)(struct mxc_nand_host *);
int (*check_int)(struct mxc_nand_host *);
+ void (*irq_control)(struct mxc_nand_host *, int);
};
/* OOB placement block for use with hardware ecc generation */
@@ -216,9 +219,12 @@ static irqreturn_t mxc_nfc_irq(int irq, void *dev_id)
{
struct mxc_nand_host *host = dev_id;
- disable_irq_nosync(irq);
+ if (!host->check_int(host))
+ return IRQ_NONE;
- wake_up(&host->irq_waitq);
+ host->irq_control(host, 0);
+
+ complete(&host->op_completion);
return IRQ_HANDLED;
}
@@ -245,11 +251,54 @@ static int check_int_v1_v2(struct mxc_nand_host *host)
if (!(tmp & NFC_V1_V2_CONFIG2_INT))
return 0;
- writew(tmp & ~NFC_V1_V2_CONFIG2_INT, NFC_V1_V2_CONFIG2);
+ if (!cpu_is_mx21())
+ writew(tmp & ~NFC_V1_V2_CONFIG2_INT, NFC_V1_V2_CONFIG2);
return 1;
}
+/*
+ * It has been observed that the i.MX21 cannot read the CONFIG2:INT bit
+ * if interrupts are masked (CONFIG1:INT_MSK is set). To handle this, the
+ * driver can enable/disable the irq line rather than simply masking the
+ * interrupts.
+ */
+static void irq_control_mx21(struct mxc_nand_host *host, int activate)
+{
+ if (activate)
+ enable_irq(host->irq);
+ else
+ disable_irq_nosync(host->irq);
+}
+
+static void irq_control_v1_v2(struct mxc_nand_host *host, int activate)
+{
+ uint16_t tmp;
+
+ tmp = readw(NFC_V1_V2_CONFIG1);
+
+ if (activate)
+ tmp &= ~NFC_V1_V2_CONFIG1_INT_MSK;
+ else
+ tmp |= NFC_V1_V2_CONFIG1_INT_MSK;
+
+ writew(tmp, NFC_V1_V2_CONFIG1);
+}
+
+static void irq_control_v3(struct mxc_nand_host *host, int activate)
+{
+ uint32_t tmp;
+
+ tmp = readl(NFC_V3_CONFIG2);
+
+ if (activate)
+ tmp &= ~NFC_V3_CONFIG2_INT_MSK;
+ else
+ tmp |= NFC_V3_CONFIG2_INT_MSK;
+
+ writel(tmp, NFC_V3_CONFIG2);
+}
+
/* This function polls the NANDFC to wait for the basic operation to
* complete by checking the INT bit of config2 register.
*/
@@ -259,10 +308,9 @@ static void wait_op_done(struct mxc_nand_host *host, int useirq)
if (useirq) {
if (!host->check_int(host)) {
-
- enable_irq(host->irq);
-
- wait_event(host->irq_waitq, host->check_int(host));
+ INIT_COMPLETION(host->op_completion);
+ host->irq_control(host, 1);
+ wait_for_completion(&host->op_completion);
}
} else {
while (max_retries-- > 0) {
@@ -799,6 +847,7 @@ static void preset_v3(struct mtd_info *mtd)
NFC_V3_CONFIG2_2CMD_PHASES |
NFC_V3_CONFIG2_SPAS(mtd->oobsize >> 1) |
NFC_V3_CONFIG2_ST_CMD(0x70) |
+ NFC_V3_CONFIG2_INT_MSK |
NFC_V3_CONFIG2_NUM_ADDR_PHASE0;
if (chip->ecc.mode == NAND_ECC_HW)
@@ -1024,6 +1073,10 @@ static int __init mxcnd_probe(struct platform_device *pdev)
host->send_read_id = send_read_id_v1_v2;
host->get_dev_status = get_dev_status_v1_v2;
host->check_int = check_int_v1_v2;
+ if (cpu_is_mx21())
+ host->irq_control = irq_control_mx21;
+ else
+ host->irq_control = irq_control_v1_v2;
}
if (nfc_is_v21()) {
@@ -1062,6 +1115,7 @@ static int __init mxcnd_probe(struct platform_device *pdev)
host->send_read_id = send_read_id_v3;
host->check_int = check_int_v3;
host->get_dev_status = get_dev_status_v3;
+ host->irq_control = irq_control_v3;
oob_smallpage = &nandv2_hw_eccoob_smallpage;
oob_largepage = &nandv2_hw_eccoob_largepage;
} else
@@ -1093,14 +1147,34 @@ static int __init mxcnd_probe(struct platform_device *pdev)
this->options |= NAND_USE_FLASH_BBT;
}
- init_waitqueue_head(&host->irq_waitq);
+ init_completion(&host->op_completion);
host->irq = platform_get_irq(pdev, 0);
+ /*
+ * mask the interrupt. For i.MX21 explicitely call
+ * irq_control_v1_v2 to use the mask bit. We can't call
+ * disable_irq_nosync() for an interrupt we do not own yet.
+ */
+ if (cpu_is_mx21())
+ irq_control_v1_v2(host, 0);
+ else
+ host->irq_control(host, 0);
+
err = request_irq(host->irq, mxc_nfc_irq, IRQF_DISABLED, DRIVER_NAME, host);
if (err)
goto eirq;
+ host->irq_control(host, 0);
+
+ /*
+ * Now that the interrupt is disabled make sure the interrupt
+ * mask bit is cleared on i.MX21. Otherwise we can't read
+ * the interrupt status bit on this machine.
+ */
+ if (cpu_is_mx21())
+ irq_control_v1_v2(host, 1);
+
/* first scan to find the device and get the page size */
if (nand_scan_ident(mtd, 1, NULL)) {
err = -ENXIO;
diff --git a/drivers/net/3c527.c b/drivers/net/3c527.c
index 70705d1..eca55c5 100644
--- a/drivers/net/3c527.c
+++ b/drivers/net/3c527.c
@@ -522,7 +522,7 @@ static int __init mc32_probe1(struct net_device *dev, int slot)
lp->tx_len = lp->exec_box->data[9]; /* Transmit list count */
lp->rx_len = lp->exec_box->data[11]; /* Receive list count */
- init_MUTEX_LOCKED(&lp->cmd_mutex);
+ sema_init(&lp->cmd_mutex, 0);
init_completion(&lp->execution_cmd);
init_completion(&lp->xceiver_cmd);
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 4b52c76..3e5d0b6 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -608,7 +608,7 @@ static int sixpack_open(struct tty_struct *tty)
spin_lock_init(&sp->lock);
atomic_set(&sp->refcnt, 1);
- init_MUTEX_LOCKED(&sp->dead_sem);
+ sema_init(&sp->dead_sem, 0);
/* !!! length of the buffers. MTU is IP MTU, not PACLEN! */
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 66e88bd..4c62839 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -747,7 +747,7 @@ static int mkiss_open(struct tty_struct *tty)
spin_lock_init(&ax->buflock);
atomic_set(&ax->refcnt, 1);
- init_MUTEX_LOCKED(&ax->dead_sem);
+ sema_init(&ax->dead_sem, 0);
ax->tty = tty;
tty->disc_data = ax;
diff --git a/drivers/net/irda/sir_dev.c b/drivers/net/irda/sir_dev.c
index 1b051da..51d7444 100644
--- a/drivers/net/irda/sir_dev.c
+++ b/drivers/net/irda/sir_dev.c
@@ -909,7 +909,7 @@ struct sir_dev * sirdev_get_instance(const struct sir_driver *drv, const char *n
dev->tx_skb = NULL;
spin_lock_init(&dev->tx_lock);
- init_MUTEX(&dev->fsm.sem);
+ sema_init(&dev->fsm.sem, 1);
dev->drv = drv;
dev->netdev = ndev;
diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c
index af50a53..78d70a6 100644
--- a/drivers/net/ppp_async.c
+++ b/drivers/net/ppp_async.c
@@ -184,7 +184,7 @@ ppp_asynctty_open(struct tty_struct *tty)
tasklet_init(&ap->tsk, ppp_async_process, (unsigned long) ap);
atomic_set(&ap->refcnt, 1);
- init_MUTEX_LOCKED(&ap->dead_sem);
+ sema_init(&ap->dead_sem, 0);
ap->chan.private = ap;
ap->chan.ops = &async_ops;
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 04c6cd4..10bafd5 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -575,7 +575,7 @@ static int cosa_probe(int base, int irq, int dma)
/* Initialize the chardev data structures */
mutex_init(&chan->rlock);
- init_MUTEX(&chan->wsem);
+ sema_init(&chan->wsem, 1);
/* Register the network interface */
if (!(chan->netdev = alloc_hdlcdev(chan))) {
diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c
index b336cd9..f9bda64 100644
--- a/drivers/oprofile/oprof.c
+++ b/drivers/oprofile/oprof.c
@@ -225,26 +225,17 @@ post_sync:
mutex_unlock(&start_mutex);
}
-int oprofile_set_backtrace(unsigned long val)
+int oprofile_set_ulong(unsigned long *addr, unsigned long val)
{
- int err = 0;
+ int err = -EBUSY;
mutex_lock(&start_mutex);
-
- if (oprofile_started) {
- err = -EBUSY;
- goto out;
- }
-
- if (!oprofile_ops.backtrace) {
- err = -EINVAL;
- goto out;
+ if (!oprofile_started) {
+ *addr = val;
+ err = 0;
}
-
- oprofile_backtrace_depth = val;
-
-out:
mutex_unlock(&start_mutex);
+
return err;
}
@@ -257,16 +248,9 @@ static int __init oprofile_init(void)
printk(KERN_INFO "oprofile: using timer interrupt.\n");
err = oprofile_timer_init(&oprofile_ops);
if (err)
- goto out_arch;
+ return err;
}
- err = oprofilefs_register();
- if (err)
- goto out_arch;
- return 0;
-
-out_arch:
- oprofile_arch_exit();
- return err;
+ return oprofilefs_register();
}
diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h
index 47e12cb..177b73d 100644
--- a/drivers/oprofile/oprof.h
+++ b/drivers/oprofile/oprof.h
@@ -37,7 +37,7 @@ void oprofile_create_files(struct super_block *sb, struct dentry *root);
int oprofile_timer_init(struct oprofile_operations *ops);
void oprofile_timer_exit(void);
-int oprofile_set_backtrace(unsigned long depth);
+int oprofile_set_ulong(unsigned long *addr, unsigned long val);
int oprofile_set_timeout(unsigned long time);
#endif /* OPROF_H */
diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c
index bbd7516..ccf099e 100644
--- a/drivers/oprofile/oprofile_files.c
+++ b/drivers/oprofile/oprofile_files.c
@@ -79,14 +79,17 @@ static ssize_t depth_write(struct file *file, char const __user *buf, size_t cou
if (*offset)
return -EINVAL;
+ if (!oprofile_ops.backtrace)
+ return -EINVAL;
+
retval = oprofilefs_ulong_from_user(&val, buf, count);
if (retval)
return retval;
- retval = oprofile_set_backtrace(val);
-
+ retval = oprofile_set_ulong(&oprofile_backtrace_depth, val);
if (retval)
return retval;
+
return count;
}
diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c
new file mode 100644
index 0000000..9046f7b
--- /dev/null
+++ b/drivers/oprofile/oprofile_perf.c
@@ -0,0 +1,328 @@
+/*
+ * Copyright 2010 ARM Ltd.
+ *
+ * Perf-events backend for OProfile.
+ */
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/oprofile.h>
+#include <linux/slab.h>
+
+/*
+ * Per performance monitor configuration as set via oprofilefs.
+ */
+struct op_counter_config {
+ unsigned long count;
+ unsigned long enabled;
+ unsigned long event;
+ unsigned long unit_mask;
+ unsigned long kernel;
+ unsigned long user;
+ struct perf_event_attr attr;
+};
+
+static int oprofile_perf_enabled;
+static DEFINE_MUTEX(oprofile_perf_mutex);
+
+static struct op_counter_config *counter_config;
+static struct perf_event **perf_events[nr_cpumask_bits];
+static int num_counters;
+
+/*
+ * Overflow callback for oprofile.
+ */
+static void op_overflow_handler(struct perf_event *event, int unused,
+ struct perf_sample_data *data, struct pt_regs *regs)
+{
+ int id;
+ u32 cpu = smp_processor_id();
+
+ for (id = 0; id < num_counters; ++id)
+ if (perf_events[cpu][id] == event)
+ break;
+
+ if (id != num_counters)
+ oprofile_add_sample(regs, id);
+ else
+ pr_warning("oprofile: ignoring spurious overflow "
+ "on cpu %u\n", cpu);
+}
+
+/*
+ * Called by oprofile_perf_setup to create perf attributes to mirror the oprofile
+ * settings in counter_config. Attributes are created as `pinned' events and
+ * so are permanently scheduled on the PMU.
+ */
+static void op_perf_setup(void)
+{
+ int i;
+ u32 size = sizeof(struct perf_event_attr);
+ struct perf_event_attr *attr;
+
+ for (i = 0; i < num_counters; ++i) {
+ attr = &counter_config[i].attr;
+ memset(attr, 0, size);
+ attr->type = PERF_TYPE_RAW;
+ attr->size = size;
+ attr->config = counter_config[i].event;
+ attr->sample_period = counter_config[i].count;
+ attr->pinned = 1;
+ }
+}
+
+static int op_create_counter(int cpu, int event)
+{
+ struct perf_event *pevent;
+
+ if (!counter_config[event].enabled || perf_events[cpu][event])
+ return 0;
+
+ pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
+ cpu, NULL,
+ op_overflow_handler);
+
+ if (IS_ERR(pevent))
+ return PTR_ERR(pevent);
+
+ if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
+ perf_event_release_kernel(pevent);
+ pr_warning("oprofile: failed to enable event %d "
+ "on CPU %d\n", event, cpu);
+ return -EBUSY;
+ }
+
+ perf_events[cpu][event] = pevent;
+
+ return 0;
+}
+
+static void op_destroy_counter(int cpu, int event)
+{
+ struct perf_event *pevent = perf_events[cpu][event];
+
+ if (pevent) {
+ perf_event_release_kernel(pevent);
+ perf_events[cpu][event] = NULL;
+ }
+}
+
+/*
+ * Called by oprofile_perf_start to create active perf events based on the
+ * perviously configured attributes.
+ */
+static int op_perf_start(void)
+{
+ int cpu, event, ret = 0;
+
+ for_each_online_cpu(cpu) {
+ for (event = 0; event < num_counters; ++event) {
+ ret = op_create_counter(cpu, event);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Called by oprofile_perf_stop at the end of a profiling run.
+ */
+static void op_perf_stop(void)
+{
+ int cpu, event;
+
+ for_each_online_cpu(cpu)
+ for (event = 0; event < num_counters; ++event)
+ op_destroy_counter(cpu, event);
+}
+
+static int oprofile_perf_create_files(struct super_block *sb, struct dentry *root)
+{
+ unsigned int i;
+
+ for (i = 0; i < num_counters; i++) {
+ struct dentry *dir;
+ char buf[4];
+
+ snprintf(buf, sizeof buf, "%d", i);
+ dir = oprofilefs_mkdir(sb, root, buf);
+ oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
+ oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
+ oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
+ oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
+ oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
+ oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
+ }
+
+ return 0;
+}
+
+static int oprofile_perf_setup(void)
+{
+ spin_lock(&oprofilefs_lock);
+ op_perf_setup();
+ spin_unlock(&oprofilefs_lock);
+ return 0;
+}
+
+static int oprofile_perf_start(void)
+{
+ int ret = -EBUSY;
+
+ mutex_lock(&oprofile_perf_mutex);
+ if (!oprofile_perf_enabled) {
+ ret = 0;
+ op_perf_start();
+ oprofile_perf_enabled = 1;
+ }
+ mutex_unlock(&oprofile_perf_mutex);
+ return ret;
+}
+
+static void oprofile_perf_stop(void)
+{
+ mutex_lock(&oprofile_perf_mutex);
+ if (oprofile_perf_enabled)
+ op_perf_stop();
+ oprofile_perf_enabled = 0;
+ mutex_unlock(&oprofile_perf_mutex);
+}
+
+#ifdef CONFIG_PM
+
+static int oprofile_perf_suspend(struct platform_device *dev, pm_message_t state)
+{
+ mutex_lock(&oprofile_perf_mutex);
+ if (oprofile_perf_enabled)
+ op_perf_stop();
+ mutex_unlock(&oprofile_perf_mutex);
+ return 0;
+}
+
+static int oprofile_perf_resume(struct platform_device *dev)
+{
+ mutex_lock(&oprofile_perf_mutex);
+ if (oprofile_perf_enabled && op_perf_start())
+ oprofile_perf_enabled = 0;
+ mutex_unlock(&oprofile_perf_mutex);
+ return 0;
+}
+
+static struct platform_driver oprofile_driver = {
+ .driver = {
+ .name = "oprofile-perf",
+ },
+ .resume = oprofile_perf_resume,
+ .suspend = oprofile_perf_suspend,
+};
+
+static struct platform_device *oprofile_pdev;
+
+static int __init init_driverfs(void)
+{
+ int ret;
+
+ ret = platform_driver_register(&oprofile_driver);
+ if (ret)
+ return ret;
+
+ oprofile_pdev = platform_device_register_simple(
+ oprofile_driver.driver.name, 0, NULL, 0);
+ if (IS_ERR(oprofile_pdev)) {
+ ret = PTR_ERR(oprofile_pdev);
+ platform_driver_unregister(&oprofile_driver);
+ }
+
+ return ret;
+}
+
+static void exit_driverfs(void)
+{
+ platform_device_unregister(oprofile_pdev);
+ platform_driver_unregister(&oprofile_driver);
+}
+
+#else
+
+static inline int init_driverfs(void) { return 0; }
+static inline void exit_driverfs(void) { }
+
+#endif /* CONFIG_PM */
+
+void oprofile_perf_exit(void)
+{
+ int cpu, id;
+ struct perf_event *event;
+
+ for_each_possible_cpu(cpu) {
+ for (id = 0; id < num_counters; ++id) {
+ event = perf_events[cpu][id];
+ if (event)
+ perf_event_release_kernel(event);
+ }
+
+ kfree(perf_events[cpu]);
+ }
+
+ kfree(counter_config);
+ exit_driverfs();
+}
+
+int __init oprofile_perf_init(struct oprofile_operations *ops)
+{
+ int cpu, ret = 0;
+
+ ret = init_driverfs();
+ if (ret)
+ return ret;
+
+ memset(&perf_events, 0, sizeof(perf_events));
+
+ num_counters = perf_num_counters();
+ if (num_counters <= 0) {
+ pr_info("oprofile: no performance counters\n");
+ ret = -ENODEV;
+ goto out;
+ }
+
+ counter_config = kcalloc(num_counters,
+ sizeof(struct op_counter_config), GFP_KERNEL);
+
+ if (!counter_config) {
+ pr_info("oprofile: failed to allocate %d "
+ "counters\n", num_counters);
+ ret = -ENOMEM;
+ num_counters = 0;
+ goto out;
+ }
+
+ for_each_possible_cpu(cpu) {
+ perf_events[cpu] = kcalloc(num_counters,
+ sizeof(struct perf_event *), GFP_KERNEL);
+ if (!perf_events[cpu]) {
+ pr_info("oprofile: failed to allocate %d perf events "
+ "for cpu %d\n", num_counters, cpu);
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ ops->create_files = oprofile_perf_create_files;
+ ops->setup = oprofile_perf_setup;
+ ops->start = oprofile_perf_start;
+ ops->stop = oprofile_perf_stop;
+ ops->shutdown = oprofile_perf_stop;
+ ops->cpu_type = op_name_from_perf_id();
+
+ if (!ops->cpu_type)
+ ret = -ENODEV;
+ else
+ pr_info("oprofile: using %s\n", ops->cpu_type);
+
+out:
+ if (ret)
+ oprofile_perf_exit();
+
+ return ret;
+}
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index 2766a6d..1944621 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -91,16 +91,20 @@ static ssize_t ulong_read_file(struct file *file, char __user *buf, size_t count
static ssize_t ulong_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset)
{
- unsigned long *value = file->private_data;
+ unsigned long value;
int retval;
if (*offset)
return -EINVAL;
- retval = oprofilefs_ulong_from_user(value, buf, count);
+ retval = oprofilefs_ulong_from_user(&value, buf, count);
+ if (retval)
+ return retval;
+ retval = oprofile_set_ulong(file->private_data, value);
if (retval)
return retval;
+
return count;
}
@@ -126,50 +130,41 @@ static const struct file_operations ulong_ro_fops = {
};
-static struct dentry *__oprofilefs_create_file(struct super_block *sb,
+static int __oprofilefs_create_file(struct super_block *sb,
struct dentry *root, char const *name, const struct file_operations *fops,
- int perm)
+ int perm, void *priv)
{
struct dentry *dentry;
struct inode *inode;
dentry = d_alloc_name(root, name);
if (!dentry)
- return NULL;
+ return -ENOMEM;
inode = oprofilefs_get_inode(sb, S_IFREG | perm);
if (!inode) {
dput(dentry);
- return NULL;
+ return -ENOMEM;
}
inode->i_fop = fops;
d_add(dentry, inode);
- return dentry;
+ dentry->d_inode->i_private = priv;
+ return 0;
}
int oprofilefs_create_ulong(struct super_block *sb, struct dentry *root,
char const *name, unsigned long *val)
{
- struct dentry *d = __oprofilefs_create_file(sb, root, name,
- &ulong_fops, 0644);
- if (!d)
- return -EFAULT;
-
- d->d_inode->i_private = val;
- return 0;
+ return __oprofilefs_create_file(sb, root, name,
+ &ulong_fops, 0644, val);
}
int oprofilefs_create_ro_ulong(struct super_block *sb, struct dentry *root,
char const *name, unsigned long *val)
{
- struct dentry *d = __oprofilefs_create_file(sb, root, name,
- &ulong_ro_fops, 0444);
- if (!d)
- return -EFAULT;
-
- d->d_inode->i_private = val;
- return 0;
+ return __oprofilefs_create_file(sb, root, name,
+ &ulong_ro_fops, 0444, val);
}
@@ -189,31 +184,22 @@ static const struct file_operations atomic_ro_fops = {
int oprofilefs_create_ro_atomic(struct super_block *sb, struct dentry *root,
char const *name, atomic_t *val)
{
- struct dentry *d = __oprofilefs_create_file(sb, root, name,
- &atomic_ro_fops, 0444);
- if (!d)
- return -EFAULT;
-
- d->d_inode->i_private = val;
- return 0;
+ return __oprofilefs_create_file(sb, root, name,
+ &atomic_ro_fops, 0444, val);
}
int oprofilefs_create_file(struct super_block *sb, struct dentry *root,
char const *name, const struct file_operations *fops)
{
- if (!__oprofilefs_create_file(sb, root, name, fops, 0644))
- return -EFAULT;
- return 0;
+ return __oprofilefs_create_file(sb, root, name, fops, 0644, NULL);
}
int oprofilefs_create_file_perm(struct super_block *sb, struct dentry *root,
char const *name, const struct file_operations *fops, int perm)
{
- if (!__oprofilefs_create_file(sb, root, name, fops, perm))
- return -EFAULT;
- return 0;
+ return __oprofilefs_create_file(sb, root, name, fops, perm, NULL);
}
diff --git a/drivers/parport/share.c b/drivers/parport/share.c
index dffa5d4..a2d9d1e 100644
--- a/drivers/parport/share.c
+++ b/drivers/parport/share.c
@@ -306,7 +306,7 @@ struct parport *parport_register_port(unsigned long base, int irq, int dma,
spin_lock_init(&tmp->pardevice_lock);
tmp->ieee1284.mode = IEEE1284_MODE_COMPAT;
tmp->ieee1284.phase = IEEE1284_PH_FWD_IDLE;
- init_MUTEX_LOCKED (&tmp->ieee1284.irq); /* actually a semaphore at 0 */
+ sema_init(&tmp->ieee1284.irq, 0);
tmp->spintime = parport_default_spintime;
atomic_set (&tmp->ref_count, 1);
INIT_LIST_HEAD(&tmp->full_list);
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index ad0ed21..348fba0 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -1046,13 +1046,13 @@ int scsi_get_vpd_page(struct scsi_device *sdev, u8 page, unsigned char *buf,
/* If the user actually wanted this page, we can skip the rest */
if (page == 0)
- return -EINVAL;
+ return 0;
for (i = 0; i < min((int)buf[3], buf_len - 4); i++)
if (buf[i + 4] == page)
goto found;
- if (i < buf[3] && i > buf_len)
+ if (i < buf[3] && i >= buf_len - 4)
/* ran off the end of the buffer, give us benefit of doubt */
goto found;
/* The device claims it doesn't support the requested page */
diff --git a/drivers/serial/ioc3_serial.c b/drivers/serial/ioc3_serial.c
index 93de907..800c546 100644
--- a/drivers/serial/ioc3_serial.c
+++ b/drivers/serial/ioc3_serial.c
@@ -2044,6 +2044,7 @@ ioc3uart_probe(struct ioc3_submodule *is, struct ioc3_driver_data *idd)
if (!port) {
printk(KERN_WARNING
"IOC3 serial memory not available for port\n");
+ ret = -ENOMEM;
goto out4;
}
spin_lock_init(&port->ip_lock);
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 7c80082..17927b1 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -127,7 +127,10 @@ static void handle_tx(struct vhost_net *net)
size_t len, total_len = 0;
int err, wmem;
size_t hdr_size;
- struct socket *sock = rcu_dereference(vq->private_data);
+ struct socket *sock;
+
+ sock = rcu_dereference_check(vq->private_data,
+ lockdep_is_held(&vq->mutex));
if (!sock)
return;
@@ -582,7 +585,10 @@ static void vhost_net_disable_vq(struct vhost_net *n,
static void vhost_net_enable_vq(struct vhost_net *n,
struct vhost_virtqueue *vq)
{
- struct socket *sock = vq->private_data;
+ struct socket *sock;
+
+ sock = rcu_dereference_protected(vq->private_data,
+ lockdep_is_held(&vq->mutex));
if (!sock)
return;
if (vq == n->vqs + VHOST_NET_VQ_TX) {
@@ -598,7 +604,8 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n,
struct socket *sock;
mutex_lock(&vq->mutex);
- sock = vq->private_data;
+ sock = rcu_dereference_protected(vq->private_data,
+ lockdep_is_held(&vq->mutex));
vhost_net_disable_vq(n, vq);
rcu_assign_pointer(vq->private_data, NULL);
mutex_unlock(&vq->mutex);
@@ -736,7 +743,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
}
/* start polling new socket */
- oldsock = vq->private_data;
+ oldsock = rcu_dereference_protected(vq->private_data,
+ lockdep_is_held(&vq->mutex));
if (sock != oldsock) {
vhost_net_disable_vq(n, vq);
rcu_assign_pointer(vq->private_data, sock);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index dd3d6f7..8b5a1b3 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -320,7 +320,7 @@ long vhost_dev_reset_owner(struct vhost_dev *dev)
vhost_dev_cleanup(dev);
memory->nregions = 0;
- dev->memory = memory;
+ RCU_INIT_POINTER(dev->memory, memory);
return 0;
}
@@ -352,8 +352,9 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
fput(dev->log_file);
dev->log_file = NULL;
/* No one will access memory at this point */
- kfree(dev->memory);
- dev->memory = NULL;
+ kfree(rcu_dereference_protected(dev->memory,
+ lockdep_is_held(&dev->mutex)));
+ RCU_INIT_POINTER(dev->memory, NULL);
if (dev->mm)
mmput(dev->mm);
dev->mm = NULL;
@@ -440,14 +441,22 @@ static int vq_access_ok(unsigned int num,
/* Caller should have device mutex but not vq mutex */
int vhost_log_access_ok(struct vhost_dev *dev)
{
- return memory_access_ok(dev, dev->memory, 1);
+ struct vhost_memory *mp;
+
+ mp = rcu_dereference_protected(dev->memory,
+ lockdep_is_held(&dev->mutex));
+ return memory_access_ok(dev, mp, 1);
}
/* Verify access for write logging. */
/* Caller should have vq mutex and device mutex */
static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base)
{
- return vq_memory_access_ok(log_base, vq->dev->memory,
+ struct vhost_memory *mp;
+
+ mp = rcu_dereference_protected(vq->dev->memory,
+ lockdep_is_held(&vq->mutex));
+ return vq_memory_access_ok(log_base, mp,
vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) &&
(!vq->log_used || log_access_ok(log_base, vq->log_addr,
sizeof *vq->used +
@@ -487,7 +496,8 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
kfree(newmem);
return -EFAULT;
}
- oldmem = d->memory;
+ oldmem = rcu_dereference_protected(d->memory,
+ lockdep_is_held(&d->mutex));
rcu_assign_pointer(d->memory, newmem);
synchronize_rcu();
kfree(oldmem);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index afd7729..af3c11d 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -106,7 +106,7 @@ struct vhost_virtqueue {
* vhost_work execution acts instead of rcu_read_lock() and the end of
* vhost_work execution acts instead of rcu_read_lock().
* Writers use virtqueue mutex. */
- void *private_data;
+ void __rcu *private_data;
/* Log write descriptors */
void __user *log_base;
struct vhost_log log[VHOST_NET_MAX_SG];
@@ -116,7 +116,7 @@ struct vhost_dev {
/* Readers use RCU to access memory table pointer
* log base pointer and features.
* Writers use mutex below.*/
- struct vhost_memory *memory;
+ struct vhost_memory __rcu *memory;
struct mm_struct *mm;
struct mutex mutex;
unsigned acked_features;
@@ -173,7 +173,11 @@ enum {
static inline int vhost_has_feature(struct vhost_dev *dev, int bit)
{
- unsigned acked_features = rcu_dereference(dev->acked_features);
+ unsigned acked_features;
+
+ acked_features =
+ rcu_dereference_index_check(dev->acked_features,
+ lockdep_is_held(&dev->mutex));
return acked_features & (1 << bit);
}
OpenPOWER on IntegriCloud