diff options
93 files changed, 772 insertions, 435 deletions
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 3bae418..4303614 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -16,6 +16,8 @@ befs.txt - information about the BeOS filesystem for Linux. bfs.txt - info for the SCO UnixWare Boot Filesystem (BFS). +ceph.txt + - info for the Ceph Distributed File System cifs.txt - description of the CIFS filesystem. coda.txt diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt index 6e03917..0660c9f 100644 --- a/Documentation/filesystems/ceph.txt +++ b/Documentation/filesystems/ceph.txt @@ -8,7 +8,7 @@ Basic features include: * POSIX semantics * Seamless scaling from 1 to many thousands of nodes - * High availability and reliability. No single points of failure. + * High availability and reliability. No single point of failure. * N-way replication of data across storage nodes * Fast recovery from node failures * Automatic rebalancing of data on node addition/removal @@ -94,7 +94,7 @@ Mount Options wsize=X Specify the maximum write size in bytes. By default there is no - maximu. Ceph will normally size writes based on the file stripe + maximum. Ceph will normally size writes based on the file stripe size. rsize=X @@ -115,7 +115,7 @@ Mount Options number of entries in that directory. nocrc - Disable CRC32C calculation for data writes. If set, the OSD + Disable CRC32C calculation for data writes. If set, the storage node must rely on TCP's error correction to detect data corruption in the data payload. @@ -133,7 +133,8 @@ For more information on Ceph, see the home page at http://ceph.newdream.net/ The Linux kernel client source tree is available at - git://ceph.newdream.net/linux-ceph-client.git + git://ceph.newdream.net/git/ceph-client.git + git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git and the source for the full system is at - git://ceph.newdream.net/ceph.git + git://ceph.newdream.net/git/ceph.git diff --git a/MAINTAINERS b/MAINTAINERS index 28d4bf0..088bd41 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1443,7 +1443,7 @@ F: arch/powerpc/platforms/cell/ CEPH DISTRIBUTED FILE SYSTEM CLIENT M: Sage Weil <sage@newdream.net> -L: ceph-devel@lists.sourceforge.net +L: ceph-devel@vger.kernel.org W: http://ceph.newdream.net/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git S: Supported @@ -3083,6 +3083,7 @@ F: include/scsi/*iscsi* ISDN SUBSYSTEM M: Karsten Keil <isdn@linux-pingi.de> L: isdn4linux@listserv.isdn4linux.de (subscribers-only) +L: netdev@vger.kernel.org W: http://www.isdn4linux.de T: git git://git.kernel.org/pub/scm/linux/kernel/git/kkeil/isdn-2.6.git S: Maintained diff --git a/arch/cris/arch-v32/drivers/pci/bios.c b/arch/cris/arch-v32/drivers/pci/bios.c index d4b9c36..bc0cfda 100644 --- a/arch/cris/arch-v32/drivers/pci/bios.c +++ b/arch/cris/arch-v32/drivers/pci/bios.c @@ -50,7 +50,7 @@ pcibios_align_resource(void *data, const struct resource *res, if ((res->flags & IORESOURCE_IO) && (start & 0x300)) start = (start + 0x3ff) & ~0x3ff; - return start + return start; } int pcibios_enable_resources(struct pci_dev *dev, int mask) diff --git a/arch/frv/mb93090-mb00/pci-frv.c b/arch/frv/mb93090-mb00/pci-frv.c index 16bc2cb..6b4fb28 100644 --- a/arch/frv/mb93090-mb00/pci-frv.c +++ b/arch/frv/mb93090-mb00/pci-frv.c @@ -41,7 +41,7 @@ pcibios_align_resource(void *data, const struct resource *res, if ((res->flags & IORESOURCE_IO) && (start & 0x300)) start = (start + 0x3ff) & ~0x3ff; - return start + return start; } diff --git a/arch/sparc/include/asm/stat.h b/arch/sparc/include/asm/stat.h index 39327d6..a232e9e 100644 --- a/arch/sparc/include/asm/stat.h +++ b/arch/sparc/include/asm/stat.h @@ -53,8 +53,8 @@ struct stat { ino_t st_ino; mode_t st_mode; short st_nlink; - uid16_t st_uid; - gid16_t st_gid; + unsigned short st_uid; + unsigned short st_gid; unsigned short st_rdev; off_t st_size; time_t st_atime; diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 68cb9b4..e277193 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1337,7 +1337,7 @@ static void perf_callchain_user_32(struct pt_regs *regs, callchain_store(entry, PERF_CONTEXT_USER); callchain_store(entry, regs->tpc); - ufp = regs->u_regs[UREG_I6]; + ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; do { struct sparc_stackf32 *usf, sf; unsigned long pc; diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c index ca39c60..1eb8b00 100644 --- a/arch/sparc/kernel/sysfs.c +++ b/arch/sparc/kernel/sysfs.c @@ -107,12 +107,12 @@ static unsigned long run_on_cpu(unsigned long cpu, unsigned long ret; /* should return -EINVAL to userspace */ - if (set_cpus_allowed(current, cpumask_of_cpu(cpu))) + if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) return 0; ret = func(arg); - set_cpus_allowed(current, old_affinity); + set_cpus_allowed_ptr(current, &old_affinity); return ret; } diff --git a/arch/sparc/kernel/us2e_cpufreq.c b/arch/sparc/kernel/us2e_cpufreq.c index 791c151..8f982b7 100644 --- a/arch/sparc/kernel/us2e_cpufreq.c +++ b/arch/sparc/kernel/us2e_cpufreq.c @@ -238,12 +238,12 @@ static unsigned int us2e_freq_get(unsigned int cpu) return 0; cpus_allowed = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, cpumask_of(cpu)); clock_tick = sparc64_get_clock_tick(cpu) / 1000; estar = read_hbreg(HBIRD_ESTAR_MODE_ADDR); - set_cpus_allowed(current, cpus_allowed); + set_cpus_allowed_ptr(current, &cpus_allowed); return clock_tick / estar_to_divisor(estar); } @@ -259,7 +259,7 @@ static void us2e_set_cpu_divider_index(unsigned int cpu, unsigned int index) return; cpus_allowed = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, cpumask_of(cpu)); new_freq = clock_tick = sparc64_get_clock_tick(cpu) / 1000; new_bits = index_to_estar_mode(index); @@ -281,7 +281,7 @@ static void us2e_set_cpu_divider_index(unsigned int cpu, unsigned int index) cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - set_cpus_allowed(current, cpus_allowed); + set_cpus_allowed_ptr(current, &cpus_allowed); } static int us2e_freq_target(struct cpufreq_policy *policy, diff --git a/arch/sparc/kernel/us3_cpufreq.c b/arch/sparc/kernel/us3_cpufreq.c index 365b646..f35d1e7 100644 --- a/arch/sparc/kernel/us3_cpufreq.c +++ b/arch/sparc/kernel/us3_cpufreq.c @@ -86,12 +86,12 @@ static unsigned int us3_freq_get(unsigned int cpu) return 0; cpus_allowed = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, cpumask_of(cpu)); reg = read_safari_cfg(); ret = get_current_freq(cpu, reg); - set_cpus_allowed(current, cpus_allowed); + set_cpus_allowed_ptr(current, &cpus_allowed); return ret; } @@ -106,7 +106,7 @@ static void us3_set_cpu_divider_index(unsigned int cpu, unsigned int index) return; cpus_allowed = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); + set_cpus_allowed_ptr(current, cpumask_of(cpu)); new_freq = sparc64_get_clock_tick(cpu) / 1000; switch (index) { @@ -140,7 +140,7 @@ static void us3_set_cpu_divider_index(unsigned int cpu, unsigned int index) cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - set_cpus_allowed(current, cpus_allowed); + set_cpus_allowed_ptr(current, &cpus_allowed); } static int us3_freq_target(struct cpufreq_policy *policy, diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index adedeef..b2e2460 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -7,6 +7,7 @@ #include <linux/init.h> #include <linux/start_kernel.h> +#include <linux/mm.h> #include <asm/setup.h> #include <asm/sections.h> @@ -44,9 +45,10 @@ void __init i386_start_kernel(void) #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + /* Assume only end is not page aligned */ u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_size = boot_params.hdr.ramdisk_size; - u64 ramdisk_end = ramdisk_image + ramdisk_size; + u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); } #endif diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index b5a9896..7147143 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -103,9 +103,10 @@ void __init x86_64_start_reservations(char *real_mode_data) #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + /* Assume only end is not page aligned */ unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; - unsigned long ramdisk_end = ramdisk_image + ramdisk_size; + unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); } #endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5d7ba1a..d76e185 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -314,16 +314,17 @@ static void __init reserve_brk(void) #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) static void __init relocate_initrd(void) { - + /* Assume only end is not page aligned */ u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_size = boot_params.hdr.ramdisk_size; + u64 area_size = PAGE_ALIGN(ramdisk_size); u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; u64 ramdisk_here; unsigned long slop, clen, mapaddr; char *p, *q; /* We need to move the initrd down into lowmem */ - ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size, + ramdisk_here = find_e820_area(0, end_of_lowmem, area_size, PAGE_SIZE); if (ramdisk_here == -1ULL) @@ -332,7 +333,7 @@ static void __init relocate_initrd(void) /* Note: this includes all the lowmem currently occupied by the initrd, we rely on that fact to keep the data intact. */ - reserve_early(ramdisk_here, ramdisk_here + ramdisk_size, + reserve_early(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK"); initrd_start = ramdisk_here + PAGE_OFFSET; initrd_end = initrd_start + ramdisk_size; @@ -376,9 +377,10 @@ static void __init relocate_initrd(void) static void __init reserve_initrd(void) { + /* Assume only end is not page aligned */ u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_size = boot_params.hdr.ramdisk_size; - u64 ramdisk_end = ramdisk_image + ramdisk_size; + u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; if (!boot_params.hdr.type_of_loader || diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 44879df..2cc2497 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -291,8 +291,8 @@ SECTIONS .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { __smp_locks = .; *(.smp_locks) - __smp_locks_end = .; . = ALIGN(PAGE_SIZE); + __smp_locks_end = .; } #ifdef CONFIG_X86_64 diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index e71c5cb..452ee5b 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -331,11 +331,23 @@ int devmem_is_allowed(unsigned long pagenr) void free_init_pages(char *what, unsigned long begin, unsigned long end) { - unsigned long addr = begin; + unsigned long addr; + unsigned long begin_aligned, end_aligned; - if (addr >= end) + /* Make sure boundaries are page aligned */ + begin_aligned = PAGE_ALIGN(begin); + end_aligned = end & PAGE_MASK; + + if (WARN_ON(begin_aligned != begin || end_aligned != end)) { + begin = begin_aligned; + end = end_aligned; + } + + if (begin >= end) return; + addr = begin; + /* * If debugging page accesses then do not free this memory but * mark them not present - any buggy init-section access will @@ -343,7 +355,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) */ #ifdef CONFIG_DEBUG_PAGEALLOC printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", - begin, PAGE_ALIGN(end)); + begin, end); set_memory_np(begin, (end - begin) >> PAGE_SHIFT); #else /* @@ -358,8 +370,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) for (; addr < end; addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); init_page_count(virt_to_page(addr)); - memset((void *)(addr & ~(PAGE_SIZE-1)), - POISON_FREE_INITMEM, PAGE_SIZE); + memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); free_page(addr); totalram_pages++; } @@ -376,6 +387,15 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_init_pages("initrd memory", start, end); + /* + * end could be not aligned, and We can not align that, + * decompresser could be confused by aligned initrd_end + * We already reserve the end partial page before in + * - i386_start_kernel() + * - x86_64_start_kernel() + * - relocate_initrd() + * So here We can do PAGE_ALIGN() safely to get partial page to be freed + */ + free_init_pages("initrd memory", start, PAGE_ALIGN(end)); } #endif diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index 95d39c3..c59b4071 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c @@ -576,6 +576,10 @@ static int via_init_one(struct pci_dev *pdev, const struct pci_device_id *id) u8 rev = isa->revision; pci_dev_put(isa); + if ((id->device == 0x0415 || id->device == 0x3164) && + (config->id != id->device)) + continue; + if (rev >= config->rev_min && rev <= config->rev_max) break; } diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index e4595e6..9be8e17 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -217,8 +217,8 @@ config SENSORS_ASC7621 depends on HWMON && I2C help If you say yes here you get support for the aSC7621 - family of SMBus sensors chip found on most Intel X48, X38, 975, - 965 and 945 desktop boards. Currently supported chips: + family of SMBus sensors chip found on most Intel X38, X48, X58, + 945, 965 and 975 desktop boards. Currently supported chips: aSC7621 aSC7621a diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 2d7bcee..e9b7fbc 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -228,7 +228,7 @@ static int __devinit adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device * if (err) { dev_warn(dev, "Unable to access MSR 0xEE, for Tjmax, left" - " at default"); + " at default\n"); } else if (eax & 0x40000000) { tjmax = tjmax_ee; } @@ -466,7 +466,7 @@ static int __init coretemp_init(void) family 6 CPU */ if ((c->x86 == 0x6) && (c->x86_model > 0xf)) printk(KERN_WARNING DRVNAME ": Unknown CPU " - "model %x\n", c->x86_model); + "model 0x%x\n", c->x86_model); continue; } diff --git a/drivers/hwmon/w83793.c b/drivers/hwmon/w83793.c index 9de81a4..612807d 100644 --- a/drivers/hwmon/w83793.c +++ b/drivers/hwmon/w83793.c @@ -1294,7 +1294,7 @@ static int watchdog_close(struct inode *inode, struct file *filp) static ssize_t watchdog_write(struct file *filp, const char __user *buf, size_t count, loff_t *offset) { - size_t ret; + ssize_t ret; struct w83793_data *data = filp->private_data; if (count) { diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index fbedd35..4c3d1bfe 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -695,14 +695,8 @@ static int ide_probe_port(ide_hwif_t *hwif) if (irqd) disable_irq(hwif->irq); - rc = ide_port_wait_ready(hwif); - if (rc == -ENODEV) { - printk(KERN_INFO "%s: no devices on the port\n", hwif->name); - goto out; - } else if (rc == -EBUSY) - printk(KERN_ERR "%s: not ready before the probe\n", hwif->name); - else - rc = -ENODEV; + if (ide_port_wait_ready(hwif) == -EBUSY) + printk(KERN_DEBUG "%s: Wait for ready failed before probe !\n", hwif->name); /* * Second drive should only exist if first drive was found, @@ -713,7 +707,7 @@ static int ide_probe_port(ide_hwif_t *hwif) if (drive->dev_flags & IDE_DFLAG_PRESENT) rc = 0; } -out: + /* * Use cached IRQ number. It might be (and is...) changed by probe * code above diff --git a/drivers/ide/via82cxxx.c b/drivers/ide/via82cxxx.c index e65d010..48fd4ef 100644 --- a/drivers/ide/via82cxxx.c +++ b/drivers/ide/via82cxxx.c @@ -110,7 +110,6 @@ struct via82cxxx_dev { struct via_isa_bridge *via_config; unsigned int via_80w; - u8 cached_device[2]; }; /** @@ -403,66 +402,10 @@ static const struct ide_port_ops via_port_ops = { .cable_detect = via82cxxx_cable_detect, }; -static void via_write_devctl(ide_hwif_t *hwif, u8 ctl) -{ - struct via82cxxx_dev *vdev = hwif->host->host_priv; - - outb(ctl, hwif->io_ports.ctl_addr); - outb(vdev->cached_device[hwif->channel], hwif->io_ports.device_addr); -} - -static void __via_dev_select(ide_drive_t *drive, u8 select) -{ - ide_hwif_t *hwif = drive->hwif; - struct via82cxxx_dev *vdev = hwif->host->host_priv; - - outb(select, hwif->io_ports.device_addr); - vdev->cached_device[hwif->channel] = select; -} - -static void via_dev_select(ide_drive_t *drive) -{ - __via_dev_select(drive, drive->select | ATA_DEVICE_OBS); -} - -static void via_tf_load(ide_drive_t *drive, struct ide_taskfile *tf, u8 valid) -{ - ide_hwif_t *hwif = drive->hwif; - struct ide_io_ports *io_ports = &hwif->io_ports; - - if (valid & IDE_VALID_FEATURE) - outb(tf->feature, io_ports->feature_addr); - if (valid & IDE_VALID_NSECT) - outb(tf->nsect, io_ports->nsect_addr); - if (valid & IDE_VALID_LBAL) - outb(tf->lbal, io_ports->lbal_addr); - if (valid & IDE_VALID_LBAM) - outb(tf->lbam, io_ports->lbam_addr); - if (valid & IDE_VALID_LBAH) - outb(tf->lbah, io_ports->lbah_addr); - if (valid & IDE_VALID_DEVICE) - __via_dev_select(drive, tf->device); -} - -const struct ide_tp_ops via_tp_ops = { - .exec_command = ide_exec_command, - .read_status = ide_read_status, - .read_altstatus = ide_read_altstatus, - .write_devctl = via_write_devctl, - - .dev_select = via_dev_select, - .tf_load = via_tf_load, - .tf_read = ide_tf_read, - - .input_data = ide_input_data, - .output_data = ide_output_data, -}; - static const struct ide_port_info via82cxxx_chipset __devinitdata = { .name = DRV_NAME, .init_chipset = init_chipset_via82cxxx, .enablebits = { { 0x40, 0x02, 0x02 }, { 0x40, 0x01, 0x01 } }, - .tp_ops = &via_tp_ops, .port_ops = &via_port_ops, .host_flags = IDE_HFLAG_PIO_NO_BLACKLIST | IDE_HFLAG_POST_SET_MODE | diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c index 9ba5470..0ebd820 100644 --- a/drivers/net/atlx/atl1.c +++ b/drivers/net/atlx/atl1.c @@ -84,7 +84,7 @@ #define ATLX_DRIVER_VERSION "2.1.3" MODULE_AUTHOR("Xiong Huang <xiong.huang@atheros.com>, \ - Chris Snook <csnook@redhat.com>, Jay Cliburn <jcliburn@gmail.com>"); +Chris Snook <csnook@redhat.com>, Jay Cliburn <jcliburn@gmail.com>"); MODULE_LICENSE("GPL"); MODULE_VERSION(ATLX_DRIVER_VERSION); diff --git a/drivers/net/benet/be_ethtool.c b/drivers/net/benet/be_ethtool.c index 9560d48..51e1065 100644 --- a/drivers/net/benet/be_ethtool.c +++ b/drivers/net/benet/be_ethtool.c @@ -490,7 +490,7 @@ be_test_ddr_dma(struct be_adapter *adapter) { int ret, i; struct be_dma_mem ddrdma_cmd; - u64 pattern[2] = {0x5a5a5a5a5a5a5a5a, 0xa5a5a5a5a5a5a5a5}; + u64 pattern[2] = {0x5a5a5a5a5a5a5a5aULL, 0xa5a5a5a5a5a5a5a5ULL}; ddrdma_cmd.size = sizeof(struct be_cmd_req_ddrdma_test); ddrdma_cmd.va = pci_alloc_consistent(adapter->pdev, ddrdma_cmd.size, diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 430c022..5b92fbf 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1235,6 +1235,11 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) write_lock_bh(&bond->curr_slave_lock); } } + + /* resend IGMP joins since all were sent on curr_active_slave */ + if (bond->params.mode == BOND_MODE_ROUNDROBIN) { + bond_resend_igmp_join_requests(bond); + } } /** @@ -4138,22 +4143,41 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev struct bonding *bond = netdev_priv(bond_dev); struct slave *slave, *start_at; int i, slave_no, res = 1; + struct iphdr *iph = ip_hdr(skb); read_lock(&bond->lock); if (!BOND_IS_OK(bond)) goto out; - /* - * Concurrent TX may collide on rr_tx_counter; we accept that - * as being rare enough not to justify using an atomic op here + * Start with the curr_active_slave that joined the bond as the + * default for sending IGMP traffic. For failover purposes one + * needs to maintain some consistency for the interface that will + * send the join/membership reports. The curr_active_slave found + * will send all of this type of traffic. */ - slave_no = bond->rr_tx_counter++ % bond->slave_cnt; + if ((iph->protocol == htons(IPPROTO_IGMP)) && + (skb->protocol == htons(ETH_P_IP))) { - bond_for_each_slave(bond, slave, i) { - slave_no--; - if (slave_no < 0) - break; + read_lock(&bond->curr_slave_lock); + slave = bond->curr_active_slave; + read_unlock(&bond->curr_slave_lock); + + if (!slave) + goto out; + } else { + /* + * Concurrent TX may collide on rr_tx_counter; we accept + * that as being rare enough not to justify using an + * atomic op here. + */ + slave_no = bond->rr_tx_counter++ % bond->slave_cnt; + + bond_for_each_slave(bond, slave, i) { + slave_no--; + if (slave_no < 0) + break; + } } start_at = slave; diff --git a/drivers/net/e1000/e1000.h b/drivers/net/e1000/e1000.h index 9902b33..2f29c21 100644 --- a/drivers/net/e1000/e1000.h +++ b/drivers/net/e1000/e1000.h @@ -261,7 +261,6 @@ struct e1000_adapter { /* TX */ struct e1000_tx_ring *tx_ring; /* One per active queue */ unsigned int restart_queue; - unsigned long tx_queue_len; u32 txd_cmd; u32 tx_int_delay; u32 tx_abs_int_delay; diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 8be6fae..b15ece2 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -383,8 +383,6 @@ static void e1000_configure(struct e1000_adapter *adapter) adapter->alloc_rx_buf(adapter, ring, E1000_DESC_UNUSED(ring)); } - - adapter->tx_queue_len = netdev->tx_queue_len; } int e1000_up(struct e1000_adapter *adapter) @@ -503,7 +501,6 @@ void e1000_down(struct e1000_adapter *adapter) del_timer_sync(&adapter->watchdog_timer); del_timer_sync(&adapter->phy_info_timer); - netdev->tx_queue_len = adapter->tx_queue_len; adapter->link_speed = 0; adapter->link_duplex = 0; netif_carrier_off(netdev); @@ -2316,19 +2313,15 @@ static void e1000_watchdog(unsigned long data) E1000_CTRL_RFCE) ? "RX" : ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None" ))); - /* tweak tx_queue_len according to speed/duplex - * and adjust the timeout factor */ - netdev->tx_queue_len = adapter->tx_queue_len; + /* adjust timeout factor according to speed/duplex */ adapter->tx_timeout_factor = 1; switch (adapter->link_speed) { case SPEED_10: txb2b = false; - netdev->tx_queue_len = 10; adapter->tx_timeout_factor = 16; break; case SPEED_100: txb2b = false; - netdev->tx_queue_len = 100; /* maybe add some timeout factor ? */ break; } diff --git a/drivers/net/e1000e/e1000.h b/drivers/net/e1000e/e1000.h index c2ec095..118bdf4 100644 --- a/drivers/net/e1000e/e1000.h +++ b/drivers/net/e1000e/e1000.h @@ -279,7 +279,6 @@ struct e1000_adapter { struct napi_struct napi; - unsigned long tx_queue_len; unsigned int restart_queue; u32 txd_cmd; diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 88d54d3..e1cceb6 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -2289,8 +2289,6 @@ static void e1000_configure_tx(struct e1000_adapter *adapter) ew32(TCTL, tctl); e1000e_config_collision_dist(hw); - - adapter->tx_queue_len = adapter->netdev->tx_queue_len; } /** @@ -2877,7 +2875,6 @@ void e1000e_down(struct e1000_adapter *adapter) del_timer_sync(&adapter->watchdog_timer); del_timer_sync(&adapter->phy_info_timer); - netdev->tx_queue_len = adapter->tx_queue_len; netif_carrier_off(netdev); adapter->link_speed = 0; adapter->link_duplex = 0; @@ -3588,21 +3585,15 @@ static void e1000_watchdog_task(struct work_struct *work) "link gets many collisions.\n"); } - /* - * tweak tx_queue_len according to speed/duplex - * and adjust the timeout factor - */ - netdev->tx_queue_len = adapter->tx_queue_len; + /* adjust timeout factor according to speed/duplex */ adapter->tx_timeout_factor = 1; switch (adapter->link_speed) { case SPEED_10: txb2b = 0; - netdev->tx_queue_len = 10; adapter->tx_timeout_factor = 16; break; case SPEED_100: txb2b = 0; - netdev->tx_queue_len = 100; adapter->tx_timeout_factor = 10; break; } diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index b671555..669de02 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -2393,6 +2393,7 @@ struct sk_buff * gfar_new_skb(struct net_device *dev) * as many bytes as needed to align the data properly */ skb_reserve(skb, alignamount); + GFAR_CB(skb)->alignamount = alignamount; return skb; } @@ -2533,13 +2534,13 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit) newskb = skb; else if (skb) { /* - * We need to reset ->data to what it + * We need to un-reserve() the skb to what it * was before gfar_new_skb() re-aligned * it to an RXBUF_ALIGNMENT boundary * before we put the skb back on the * recycle list. */ - skb->data = skb->head + NET_SKB_PAD; + skb_reserve(skb, -GFAR_CB(skb)->alignamount); __skb_queue_head(&priv->rx_recycle, skb); } } else { diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h index 3d72dc4..17d25e7 100644 --- a/drivers/net/gianfar.h +++ b/drivers/net/gianfar.h @@ -566,6 +566,12 @@ struct rxfcb { u16 vlctl; /* VLAN control word */ }; +struct gianfar_skb_cb { + int alignamount; +}; + +#define GFAR_CB(skb) ((struct gianfar_skb_cb *)((skb)->cb)) + struct rmon_mib { u32 tr64; /* 0x.680 - Transmit and Receive 64-byte Frame Counter */ diff --git a/drivers/net/igb/e1000_mac.c b/drivers/net/igb/e1000_mac.c index 2a8a886..be8d010 100644 --- a/drivers/net/igb/e1000_mac.c +++ b/drivers/net/igb/e1000_mac.c @@ -1367,7 +1367,8 @@ out: * igb_enable_mng_pass_thru - Enable processing of ARP's * @hw: pointer to the HW structure * - * Verifies the hardware needs to allow ARPs to be processed by the host. + * Verifies the hardware needs to leave interface enabled so that frames can + * be directed to and from the management interface. **/ bool igb_enable_mng_pass_thru(struct e1000_hw *hw) { @@ -1380,8 +1381,7 @@ bool igb_enable_mng_pass_thru(struct e1000_hw *hw) manc = rd32(E1000_MANC); - if (!(manc & E1000_MANC_RCV_TCO_EN) || - !(manc & E1000_MANC_EN_MAC_ADDR_FILTER)) + if (!(manc & E1000_MANC_RCV_TCO_EN)) goto out; if (hw->mac.arc_subsystem_valid) { diff --git a/drivers/net/igbvf/igbvf.h b/drivers/net/igbvf/igbvf.h index a1774b2..debeee2 100644 --- a/drivers/net/igbvf/igbvf.h +++ b/drivers/net/igbvf/igbvf.h @@ -198,7 +198,6 @@ struct igbvf_adapter { struct igbvf_ring *tx_ring /* One per active queue */ ____cacheline_aligned_in_smp; - unsigned long tx_queue_len; unsigned int restart_queue; u32 txd_cmd; diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c index a77afd8..b41037e 100644 --- a/drivers/net/igbvf/netdev.c +++ b/drivers/net/igbvf/netdev.c @@ -1304,8 +1304,6 @@ static void igbvf_configure_tx(struct igbvf_adapter *adapter) /* enable Report Status bit */ adapter->txd_cmd |= E1000_ADVTXD_DCMD_RS; - - adapter->tx_queue_len = adapter->netdev->tx_queue_len; } /** @@ -1524,7 +1522,6 @@ void igbvf_down(struct igbvf_adapter *adapter) del_timer_sync(&adapter->watchdog_timer); - netdev->tx_queue_len = adapter->tx_queue_len; netif_carrier_off(netdev); /* record the stats before reset*/ @@ -1857,21 +1854,15 @@ static void igbvf_watchdog_task(struct work_struct *work) &adapter->link_duplex); igbvf_print_link_info(adapter); - /* - * tweak tx_queue_len according to speed/duplex - * and adjust the timeout factor - */ - netdev->tx_queue_len = adapter->tx_queue_len; + /* adjust timeout factor according to speed/duplex */ adapter->tx_timeout_factor = 1; switch (adapter->link_speed) { case SPEED_10: txb2b = 0; - netdev->tx_queue_len = 10; adapter->tx_timeout_factor = 16; break; case SPEED_100: txb2b = 0; - netdev->tx_queue_len = 100; /* maybe add some timeout factor ? */ break; } diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h index 19e94ee..79c35ae 100644 --- a/drivers/net/ixgbe/ixgbe.h +++ b/drivers/net/ixgbe/ixgbe.h @@ -204,14 +204,17 @@ enum ixgbe_ring_f_enum { #define IXGBE_MAX_FDIR_INDICES 64 #ifdef IXGBE_FCOE #define IXGBE_MAX_FCOE_INDICES 8 +#define MAX_RX_QUEUES (IXGBE_MAX_FDIR_INDICES + IXGBE_MAX_FCOE_INDICES) +#define MAX_TX_QUEUES (IXGBE_MAX_FDIR_INDICES + IXGBE_MAX_FCOE_INDICES) +#else +#define MAX_RX_QUEUES IXGBE_MAX_FDIR_INDICES +#define MAX_TX_QUEUES IXGBE_MAX_FDIR_INDICES #endif /* IXGBE_FCOE */ struct ixgbe_ring_feature { int indices; int mask; } ____cacheline_internodealigned_in_smp; -#define MAX_RX_QUEUES 128 -#define MAX_TX_QUEUES 128 #define MAX_RX_PACKET_BUFFERS ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) \ ? 8 : 1) diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c index 7949a44..1959ef7 100644 --- a/drivers/net/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ixgbe/ixgbe_ethtool.c @@ -1853,6 +1853,26 @@ static void ixgbe_diag_test(struct net_device *netdev, if (ixgbe_link_test(adapter, &data[4])) eth_test->flags |= ETH_TEST_FL_FAILED; + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { + int i; + for (i = 0; i < adapter->num_vfs; i++) { + if (adapter->vfinfo[i].clear_to_send) { + netdev_warn(netdev, "%s", + "offline diagnostic is not " + "supported when VFs are " + "present\n"); + data[0] = 1; + data[1] = 1; + data[2] = 1; + data[3] = 1; + eth_test->flags |= ETH_TEST_FL_FAILED; + clear_bit(__IXGBE_TESTING, + &adapter->state); + goto skip_ol_tests; + } + } + } + if (if_running) /* indicate we're in test mode */ dev_close(netdev); @@ -1908,6 +1928,7 @@ skip_loopback: clear_bit(__IXGBE_TESTING, &adapter->state); } +skip_ol_tests: msleep_interruptible(4 * 1000); } diff --git a/drivers/net/ixgbe/ixgbe_fcoe.c b/drivers/net/ixgbe/ixgbe_fcoe.c index 700cfc0..9276d59 100644 --- a/drivers/net/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ixgbe/ixgbe_fcoe.c @@ -202,6 +202,15 @@ int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid, addr = sg_dma_address(sg); len = sg_dma_len(sg); while (len) { + /* max number of buffers allowed in one DDP context */ + if (j >= IXGBE_BUFFCNT_MAX) { + netif_err(adapter, drv, adapter->netdev, + "xid=%x:%d,%d,%d:addr=%llx " + "not enough descriptors\n", + xid, i, j, dmacount, (u64)addr); + goto out_noddp_free; + } + /* get the offset of length of current buffer */ thisoff = addr & ((dma_addr_t)bufflen - 1); thislen = min((bufflen - thisoff), len); @@ -227,20 +236,13 @@ int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid, len -= thislen; addr += thislen; j++; - /* max number of buffers allowed in one DDP context */ - if (j > IXGBE_BUFFCNT_MAX) { - DPRINTK(DRV, ERR, "xid=%x:%d,%d,%d:addr=%llx " - "not enough descriptors\n", - xid, i, j, dmacount, (u64)addr); - goto out_noddp_free; - } } } /* only the last buffer may have non-full bufflen */ lastsize = thisoff + thislen; fcbuff = (IXGBE_FCBUFF_4KB << IXGBE_FCBUFF_BUFFSIZE_SHIFT); - fcbuff |= (j << IXGBE_FCBUFF_BUFFCNT_SHIFT); + fcbuff |= ((j & 0xff) << IXGBE_FCBUFF_BUFFCNT_SHIFT); fcbuff |= (firstoff << IXGBE_FCBUFF_OFFSET_SHIFT); fcbuff |= (IXGBE_FCBUFF_VALID); @@ -520,6 +522,9 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter) /* Enable L2 eth type filter for FCoE */ IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_FCOE), (ETH_P_FCOE | IXGBE_ETQF_FCOE | IXGBE_ETQF_FILTER_EN)); + /* Enable L2 eth type filter for FIP */ + IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_FIP), + (ETH_P_FIP | IXGBE_ETQF_FILTER_EN)); if (adapter->ring_feature[RING_F_FCOE].indices) { /* Use multiple rx queues for FCoE by redirection table */ for (i = 0; i < IXGBE_FCRETA_SIZE; i++) { @@ -530,6 +535,12 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter) } IXGBE_WRITE_REG(hw, IXGBE_FCRECTL, IXGBE_FCRECTL_ENA); IXGBE_WRITE_REG(hw, IXGBE_ETQS(IXGBE_ETQF_FILTER_FCOE), 0); + fcoe_i = f->mask; + fcoe_i &= IXGBE_FCRETA_ENTRY_MASK; + fcoe_q = adapter->rx_ring[fcoe_i]->reg_idx; + IXGBE_WRITE_REG(hw, IXGBE_ETQS(IXGBE_ETQF_FILTER_FIP), + IXGBE_ETQS_QUEUE_EN | + (fcoe_q << IXGBE_ETQS_RX_QUEUE_SHIFT)); } else { /* Use single rx queue for FCoE */ fcoe_i = f->mask; @@ -539,6 +550,12 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter) IXGBE_ETQS_QUEUE_EN | (fcoe_q << IXGBE_ETQS_RX_QUEUE_SHIFT)); } + /* send FIP frames to the first FCoE queue */ + fcoe_i = f->mask; + fcoe_q = adapter->rx_ring[fcoe_i]->reg_idx; + IXGBE_WRITE_REG(hw, IXGBE_ETQS(IXGBE_ETQF_FILTER_FIP), + IXGBE_ETQS_QUEUE_EN | + (fcoe_q << IXGBE_ETQS_RX_QUEUE_SHIFT)); IXGBE_WRITE_REG(hw, IXGBE_FCRXCTRL, IXGBE_FCRXCTRL_FCOELLI | diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index d75c46f..0c553f6 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -3056,6 +3056,14 @@ void ixgbe_reinit_locked(struct ixgbe_adapter *adapter) while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state)) msleep(1); ixgbe_down(adapter); + /* + * If SR-IOV enabled then wait a bit before bringing the adapter + * back up to give the VFs time to respond to the reset. The + * two second wait is based upon the watchdog timer cycle in + * the VF driver. + */ + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + msleep(2000); ixgbe_up(adapter); clear_bit(__IXGBE_RESETTING, &adapter->state); } @@ -3236,13 +3244,15 @@ void ixgbe_down(struct ixgbe_adapter *adapter) /* disable receive for all VFs and wait one second */ if (adapter->num_vfs) { - for (i = 0 ; i < adapter->num_vfs; i++) - adapter->vfinfo[i].clear_to_send = 0; - /* ping all the active vfs to let them know we are going down */ ixgbe_ping_all_vfs(adapter); + /* Disable all VFTE/VFRE TX/RX */ ixgbe_disable_tx_rx(adapter); + + /* Mark all the VFs as inactive */ + for (i = 0 ; i < adapter->num_vfs; i++) + adapter->vfinfo[i].clear_to_send = 0; } /* disable receives */ @@ -5638,7 +5648,8 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb) #ifdef IXGBE_FCOE if ((adapter->flags & IXGBE_FLAG_FCOE_ENABLED) && - (skb->protocol == htons(ETH_P_FCOE))) { + ((skb->protocol == htons(ETH_P_FCOE)) || + (skb->protocol == htons(ETH_P_FIP)))) { txq &= (adapter->ring_feature[RING_F_FCOE].indices - 1); txq += adapter->ring_feature[RING_F_FCOE].mask; return txq; @@ -5685,18 +5696,25 @@ static netdev_tx_t ixgbe_xmit_frame(struct sk_buff *skb, tx_ring = adapter->tx_ring[skb->queue_mapping]; - if ((adapter->flags & IXGBE_FLAG_FCOE_ENABLED) && - (skb->protocol == htons(ETH_P_FCOE))) { - tx_flags |= IXGBE_TX_FLAGS_FCOE; #ifdef IXGBE_FCOE + if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { #ifdef CONFIG_IXGBE_DCB - tx_flags &= ~(IXGBE_TX_FLAGS_VLAN_PRIO_MASK - << IXGBE_TX_FLAGS_VLAN_SHIFT); - tx_flags |= ((adapter->fcoe.up << 13) - << IXGBE_TX_FLAGS_VLAN_SHIFT); -#endif + /* for FCoE with DCB, we force the priority to what + * was specified by the switch */ + if ((skb->protocol == htons(ETH_P_FCOE)) || + (skb->protocol == htons(ETH_P_FIP))) { + tx_flags &= ~(IXGBE_TX_FLAGS_VLAN_PRIO_MASK + << IXGBE_TX_FLAGS_VLAN_SHIFT); + tx_flags |= ((adapter->fcoe.up << 13) + << IXGBE_TX_FLAGS_VLAN_SHIFT); + } #endif + /* flag for FCoE offloads */ + if (skb->protocol == htons(ETH_P_FCOE)) + tx_flags |= IXGBE_TX_FLAGS_FCOE; } +#endif + /* four things can cause us to need a context descriptor */ if (skb_is_gso(skb) || (skb->ip_summed == CHECKSUM_PARTIAL) || @@ -6051,7 +6069,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, indices += min_t(unsigned int, num_possible_cpus(), IXGBE_MAX_FCOE_INDICES); #endif - indices = min_t(unsigned int, indices, MAX_TX_QUEUES); netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices); if (!netdev) { err = -ENOMEM; diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h index 0ed5ab3..4ec6dc1 100644 --- a/drivers/net/ixgbe/ixgbe_type.h +++ b/drivers/net/ixgbe/ixgbe_type.h @@ -1298,6 +1298,7 @@ #define IXGBE_ETQF_FILTER_BCN 1 #define IXGBE_ETQF_FILTER_FCOE 2 #define IXGBE_ETQF_FILTER_1588 3 +#define IXGBE_ETQF_FILTER_FIP 4 /* VLAN Control Bit Masks */ #define IXGBE_VLNCTRL_VET 0x0000FFFF /* bits 0-15 */ #define IXGBE_VLNCTRL_CFI 0x10000000 /* bit 28 */ diff --git a/drivers/net/ixgbevf/ixgbevf_main.c b/drivers/net/ixgbevf/ixgbevf_main.c index d6cbd94..1bbbef3 100644 --- a/drivers/net/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ixgbevf/ixgbevf_main.c @@ -2943,9 +2943,10 @@ static int ixgbevf_tx_map(struct ixgbevf_adapter *adapter, struct ixgbevf_tx_buffer *tx_buffer_info; unsigned int len; unsigned int total = skb->len; - unsigned int offset = 0, size, count = 0, i; + unsigned int offset = 0, size, count = 0; unsigned int nr_frags = skb_shinfo(skb)->nr_frags; unsigned int f; + int i; i = tx_ring->next_to_use; diff --git a/drivers/net/netxen/netxen_nic.h b/drivers/net/netxen/netxen_nic.h index 144d2e8..0f70383 100644 --- a/drivers/net/netxen/netxen_nic.h +++ b/drivers/net/netxen/netxen_nic.h @@ -53,8 +53,8 @@ #define _NETXEN_NIC_LINUX_MAJOR 4 #define _NETXEN_NIC_LINUX_MINOR 0 -#define _NETXEN_NIC_LINUX_SUBVERSION 72 -#define NETXEN_NIC_LINUX_VERSIONID "4.0.72" +#define _NETXEN_NIC_LINUX_SUBVERSION 73 +#define NETXEN_NIC_LINUX_VERSIONID "4.0.73" #define NETXEN_VERSION_CODE(a, b, c) (((a) << 24) + ((b) << 16) + (c)) #define _major(v) (((v) >> 24) & 0xff) diff --git a/drivers/net/netxen/netxen_nic_ctx.c b/drivers/net/netxen/netxen_nic_ctx.c index 2a8ef5f..f26e547 100644 --- a/drivers/net/netxen/netxen_nic_ctx.c +++ b/drivers/net/netxen/netxen_nic_ctx.c @@ -669,13 +669,15 @@ int netxen_alloc_hw_resources(struct netxen_adapter *adapter) } sds_ring->desc_head = (struct status_desc *)addr; - sds_ring->crb_sts_consumer = - netxen_get_ioaddr(adapter, - recv_crb_registers[port].crb_sts_consumer[ring]); + if (NX_IS_REVISION_P2(adapter->ahw.revision_id)) { + sds_ring->crb_sts_consumer = + netxen_get_ioaddr(adapter, + recv_crb_registers[port].crb_sts_consumer[ring]); - sds_ring->crb_intr_mask = - netxen_get_ioaddr(adapter, - recv_crb_registers[port].sw_int_mask[ring]); + sds_ring->crb_intr_mask = + netxen_get_ioaddr(adapter, + recv_crb_registers[port].sw_int_mask[ring]); + } } diff --git a/drivers/net/netxen/netxen_nic_init.c b/drivers/net/netxen/netxen_nic_init.c index 1c63610e..7eb925a 100644 --- a/drivers/net/netxen/netxen_nic_init.c +++ b/drivers/net/netxen/netxen_nic_init.c @@ -761,7 +761,7 @@ nx_get_bios_version(struct netxen_adapter *adapter) if (adapter->fw_type == NX_UNIFIED_ROMIMAGE) { bios_ver = cpu_to_le32(*((u32 *) (&fw->data[prd_off]) + NX_UNI_BIOS_VERSION_OFF)); - return (bios_ver << 24) + ((bios_ver >> 8) & 0xff00) + + return (bios_ver << 16) + ((bios_ver >> 8) & 0xff00) + (bios_ver >> 24); } else return cpu_to_le32(*(u32 *)&fw->data[NX_BIOS_VERSION_OFFSET]); diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index 9a7a0f3..01808b2 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -604,16 +604,14 @@ netxen_cleanup_pci_map(struct netxen_adapter *adapter) static int netxen_setup_pci_map(struct netxen_adapter *adapter) { - void __iomem *mem_ptr0 = NULL; - void __iomem *mem_ptr1 = NULL; - void __iomem *mem_ptr2 = NULL; void __iomem *db_ptr = NULL; resource_size_t mem_base, db_base; - unsigned long mem_len, db_len = 0, pci_len0 = 0; + unsigned long mem_len, db_len = 0; struct pci_dev *pdev = adapter->pdev; int pci_func = adapter->ahw.pci_func; + struct netxen_hardware_context *ahw = &adapter->ahw; int err = 0; @@ -630,24 +628,40 @@ netxen_setup_pci_map(struct netxen_adapter *adapter) /* 128 Meg of memory */ if (mem_len == NETXEN_PCI_128MB_SIZE) { - mem_ptr0 = ioremap(mem_base, FIRST_PAGE_GROUP_SIZE); - mem_ptr1 = ioremap(mem_base + SECOND_PAGE_GROUP_START, + + ahw->pci_base0 = ioremap(mem_base, FIRST_PAGE_GROUP_SIZE); + ahw->pci_base1 = ioremap(mem_base + SECOND_PAGE_GROUP_START, SECOND_PAGE_GROUP_SIZE); - mem_ptr2 = ioremap(mem_base + THIRD_PAGE_GROUP_START, + ahw->pci_base2 = ioremap(mem_base + THIRD_PAGE_GROUP_START, THIRD_PAGE_GROUP_SIZE); - pci_len0 = FIRST_PAGE_GROUP_SIZE; + if (ahw->pci_base0 == NULL || ahw->pci_base1 == NULL || + ahw->pci_base2 == NULL) { + dev_err(&pdev->dev, "failed to map PCI bar 0\n"); + err = -EIO; + goto err_out; + } + + ahw->pci_len0 = FIRST_PAGE_GROUP_SIZE; + } else if (mem_len == NETXEN_PCI_32MB_SIZE) { - mem_ptr1 = ioremap(mem_base, SECOND_PAGE_GROUP_SIZE); - mem_ptr2 = ioremap(mem_base + THIRD_PAGE_GROUP_START - + + ahw->pci_base1 = ioremap(mem_base, SECOND_PAGE_GROUP_SIZE); + ahw->pci_base2 = ioremap(mem_base + THIRD_PAGE_GROUP_START - SECOND_PAGE_GROUP_START, THIRD_PAGE_GROUP_SIZE); + if (ahw->pci_base1 == NULL || ahw->pci_base2 == NULL) { + dev_err(&pdev->dev, "failed to map PCI bar 0\n"); + err = -EIO; + goto err_out; + } + } else if (mem_len == NETXEN_PCI_2MB_SIZE) { - mem_ptr0 = pci_ioremap_bar(pdev, 0); - if (mem_ptr0 == NULL) { + ahw->pci_base0 = pci_ioremap_bar(pdev, 0); + if (ahw->pci_base0 == NULL) { dev_err(&pdev->dev, "failed to map PCI bar 0\n"); return -EIO; } - pci_len0 = mem_len; + ahw->pci_len0 = mem_len; } else { return -EIO; } @@ -656,11 +670,6 @@ netxen_setup_pci_map(struct netxen_adapter *adapter) dev_info(&pdev->dev, "%dMB memory map\n", (int)(mem_len>>20)); - adapter->ahw.pci_base0 = mem_ptr0; - adapter->ahw.pci_len0 = pci_len0; - adapter->ahw.pci_base1 = mem_ptr1; - adapter->ahw.pci_base2 = mem_ptr2; - if (NX_IS_REVISION_P3P(adapter->ahw.revision_id)) { adapter->ahw.ocm_win_crb = netxen_get_ioaddr(adapter, NETXEN_PCIX_PS_REG(PCIX_OCM_WINDOW_REG(pci_func))); diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index 776cad2..1028fcb 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -1549,6 +1549,7 @@ static struct pcmcia_device_id pcnet_ids[] = { PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x021b, 0x0101), PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x08a1, 0xc0ab), PCMCIA_PFC_DEVICE_PROD_ID12(0, "AnyCom", "Fast Ethernet + 56K COMBO", 0x578ba6e7, 0xb0ac62c4), + PCMCIA_PFC_DEVICE_PROD_ID12(0, "ATKK", "LM33-PCM-T", 0xba9eb7e2, 0x077c174e), PCMCIA_PFC_DEVICE_PROD_ID12(0, "D-Link", "DME336T", 0x1a424a1c, 0xb23897ff), PCMCIA_PFC_DEVICE_PROD_ID12(0, "Grey Cell", "GCS3000", 0x2a151fac, 0x48b932ae), PCMCIA_PFC_DEVICE_PROD_ID12(0, "Linksys", "EtherFast 10&100 + 56K PC Card (PCMLM56)", 0x0733cc81, 0xb3765033), @@ -1740,7 +1741,7 @@ static struct pcmcia_device_id pcnet_ids[] = { PCMCIA_MFC_DEVICE_CIS_PROD_ID12(0, "DAYNA COMMUNICATIONS", "LAN AND MODEM MULTIFUNCTION", 0x8fdf8f89, 0xdd5ed9e8, "cis/DP83903.cis"), PCMCIA_MFC_DEVICE_CIS_PROD_ID4(0, "NSC MF LAN/Modem", 0x58fc6056, "cis/DP83903.cis"), PCMCIA_MFC_DEVICE_CIS_MANF_CARD(0, 0x0175, 0x0000, "cis/DP83903.cis"), - PCMCIA_DEVICE_CIS_MANF_CARD(0xc00f, 0x0002, "cis/LA-PCM.cis"), + PCMCIA_DEVICE_CIS_PROD_ID12("Allied Telesis,K.K", "Ethernet LAN Card", 0x2ad62f3c, 0x9fd2f0a2, "cis/LA-PCM.cis"), PCMCIA_DEVICE_CIS_PROD_ID12("KTI", "PE520 PLUS", 0xad180345, 0x9d58d392, "cis/PE520.cis"), PCMCIA_DEVICE_CIS_PROD_ID12("NDC", "Ethernet", 0x01c43ae1, 0x00b2e941, "cis/NE2K.cis"), PCMCIA_DEVICE_CIS_PROD_ID12("PMX ", "PE-200", 0x34f3f1c8, 0x10b59f8c, "cis/PE-200.cis"), diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 9d3ebf3e97..9674005 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -186,8 +186,13 @@ static DEFINE_PCI_DEVICE_TABLE(rtl8169_pci_tbl) = { MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl); -static int rx_copybreak = 200; -static int use_dac = -1; +/* + * we set our copybreak very high so that we don't have + * to allocate 16k frames all the time (see note in + * rtl8169_open() + */ +static int rx_copybreak = 16383; +static int use_dac; static struct { u32 msg_enable; } debug = { -1 }; @@ -511,8 +516,7 @@ MODULE_DESCRIPTION("RealTek RTL-8169 Gigabit Ethernet driver"); module_param(rx_copybreak, int, 0); MODULE_PARM_DESC(rx_copybreak, "Copy breakpoint for copy-only-tiny-frames"); module_param(use_dac, int, 0); -MODULE_PARM_DESC(use_dac, "Enable PCI DAC. -1 defaults on for PCI Express only." -" Unsafe on 32 bit PCI slot."); +MODULE_PARM_DESC(use_dac, "Enable PCI DAC. Unsafe on 32 bit PCI slot."); module_param_named(debug, debug.msg_enable, int, 0); MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., 16=all)"); MODULE_LICENSE("GPL"); @@ -2821,8 +2825,8 @@ static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr) spin_lock_irq(&tp->lock); RTL_W8(Cfg9346, Cfg9346_Unlock); - RTL_W32(MAC0, low); RTL_W32(MAC4, high); + RTL_W32(MAC0, low); RTL_W8(Cfg9346, Cfg9346_Lock); spin_unlock_irq(&tp->lock); @@ -2974,7 +2978,6 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) void __iomem *ioaddr; unsigned int i; int rc; - int this_use_dac = use_dac; if (netif_msg_drv(&debug)) { printk(KERN_INFO "%s Gigabit Ethernet driver %s loaded\n", @@ -3040,17 +3043,8 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->cp_cmd = PCIMulRW | RxChkSum; - tp->pcie_cap = pci_find_capability(pdev, PCI_CAP_ID_EXP); - if (!tp->pcie_cap) - netif_info(tp, probe, dev, "no PCI Express capability\n"); - - if (this_use_dac < 0) - this_use_dac = tp->pcie_cap != 0; - if ((sizeof(dma_addr_t) > 4) && - this_use_dac && - !pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - netif_info(tp, probe, dev, "using 64-bit DMA\n"); + !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) && use_dac) { tp->cp_cmd |= PCIDAC; dev->features |= NETIF_F_HIGHDMA; } else { @@ -3069,6 +3063,10 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_free_res_4; } + tp->pcie_cap = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (!tp->pcie_cap) + netif_info(tp, probe, dev, "no PCI Express capability\n"); + RTL_W16(IntrMask, 0x0000); /* Soft reset the chip. */ @@ -3224,9 +3222,13 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev) } static void rtl8169_set_rxbufsize(struct rtl8169_private *tp, - struct net_device *dev) + unsigned int mtu) { - unsigned int max_frame = dev->mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; + unsigned int max_frame = mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; + + if (max_frame != 16383) + printk(KERN_WARNING "WARNING! Changing of MTU on this NIC" + "May lead to frame reception errors!\n"); tp->rx_buf_sz = (max_frame > RX_BUF_SIZE) ? max_frame : RX_BUF_SIZE; } @@ -3238,7 +3240,17 @@ static int rtl8169_open(struct net_device *dev) int retval = -ENOMEM; - rtl8169_set_rxbufsize(tp, dev); + /* + * Note that we use a magic value here, its wierd I know + * its done because, some subset of rtl8169 hardware suffers from + * a problem in which frames received that are longer than + * the size set in RxMaxSize register return garbage sizes + * when received. To avoid this we need to turn off filtering, + * which is done by setting a value of 16383 in the RxMaxSize register + * and allocating 16k frames to handle the largest possible rx value + * thats what the magic math below does. + */ + rtl8169_set_rxbufsize(tp, 16383 - VLAN_ETH_HLEN - ETH_FCS_LEN); /* * Rx and Tx desscriptors needs 256 bytes alignment. @@ -3891,7 +3903,7 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu) rtl8169_down(dev); - rtl8169_set_rxbufsize(tp, dev); + rtl8169_set_rxbufsize(tp, dev->mtu); ret = rtl8169_init_ring(dev); if (ret < 0) @@ -4754,8 +4766,8 @@ static void rtl_set_rx_mode(struct net_device *dev) mc_filter[1] = swab32(data); } - RTL_W32(MAR0 + 0, mc_filter[0]); RTL_W32(MAR0 + 4, mc_filter[1]); + RTL_W32(MAR0 + 0, mc_filter[0]); RTL_W32(RxConfig, tmp); diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c index 0ab05af..a4f09d4 100644 --- a/drivers/net/tulip/uli526x.c +++ b/drivers/net/tulip/uli526x.c @@ -851,13 +851,15 @@ static void uli526x_rx_packet(struct net_device *dev, struct uli526x_board_info if ( !(rdes0 & 0x8000) || ((db->cr6_data & CR6_PM) && (rxlen>6)) ) { + struct sk_buff *new_skb = NULL; + skb = rxptr->rx_skb_ptr; /* Good packet, send to upper layer */ /* Shorst packet used new SKB */ - if ( (rxlen < RX_COPY_SIZE) && - ( (skb = dev_alloc_skb(rxlen + 2) ) - != NULL) ) { + if ((rxlen < RX_COPY_SIZE) && + (((new_skb = dev_alloc_skb(rxlen + 2)) != NULL))) { + skb = new_skb; /* size less than COPY_SIZE, allocate a rxlen SKB */ skb_reserve(skb, 2); /* 16byte align */ memcpy(skb_put(skb, rxlen), diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c index e91db4b..175d202 100644 --- a/drivers/serial/serial_cs.c +++ b/drivers/serial/serial_cs.c @@ -745,6 +745,7 @@ static struct pcmcia_device_id serial_ids[] = { PCMCIA_PFC_DEVICE_PROD_ID13(1, "Xircom", "REM10", 0x2e3ee845, 0x76df1d29), PCMCIA_PFC_DEVICE_PROD_ID13(1, "Xircom", "XEM5600", 0x2e3ee845, 0xf1403719), PCMCIA_PFC_DEVICE_PROD_ID12(1, "AnyCom", "Fast Ethernet + 56K COMBO", 0x578ba6e7, 0xb0ac62c4), + PCMCIA_PFC_DEVICE_PROD_ID12(1, "ATKK", "LM33-PCM-T", 0xba9eb7e2, 0x077c174e), PCMCIA_PFC_DEVICE_PROD_ID12(1, "D-Link", "DME336T", 0x1a424a1c, 0xb23897ff), PCMCIA_PFC_DEVICE_PROD_ID12(1, "Gateway 2000", "XJEM3336", 0xdd9989be, 0x662c394c), PCMCIA_PFC_DEVICE_PROD_ID12(1, "Grey Cell", "GCS3000", 0x2a151fac, 0x48b932ae), diff --git a/drivers/staging/et131x/et1310_mac.c b/drivers/staging/et131x/et1310_mac.c index a292b1e..737a9f54 100644 --- a/drivers/staging/et131x/et1310_mac.c +++ b/drivers/staging/et131x/et1310_mac.c @@ -226,7 +226,7 @@ void ConfigMACRegs2(struct et131x_adapter *etdev) } /* Enable TXMAC */ - ctl |= 0x05; /* TX mac enable, FC disable */ + ctl |= 0x09; /* TX mac enable, FC disable */ writel(ctl, &etdev->regs->txmac.ctl); /* Ready to start the RXDMA/TXDMA engine */ diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 23bb0ce..ce8ef61 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -919,6 +919,10 @@ static int context_is_writeable_or_written(struct inode *inode, /* * We are only allowed to write into/dirty the page if the page is * clean, or already dirty within the same snap context. + * + * called with page locked. + * return success with page locked, + * or any failure (incl -EAGAIN) with page unlocked. */ static int ceph_update_writeable_page(struct file *file, loff_t pos, unsigned len, @@ -961,9 +965,11 @@ retry_locked: snapc = ceph_get_snap_context((void *)page->private); unlock_page(page); ceph_queue_writeback(inode); - wait_event_interruptible(ci->i_cap_wq, + r = wait_event_interruptible(ci->i_cap_wq, context_is_writeable_or_written(inode, snapc)); ceph_put_snap_context(snapc); + if (r == -ERESTARTSYS) + return r; return -EAGAIN; } @@ -1035,7 +1041,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, int r; do { - /* get a page*/ + /* get a page */ page = grab_cache_page_write_begin(mapping, index, 0); if (!page) return -ENOMEM; diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index f031842..8d8a849 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c @@ -28,6 +28,12 @@ static int ceph_x_is_authenticated(struct ceph_auth_client *ac) return (ac->want_keys & xi->have_keys) == ac->want_keys; } +static int ceph_x_encrypt_buflen(int ilen) +{ + return sizeof(struct ceph_x_encrypt_header) + ilen + 16 + + sizeof(u32); +} + static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *ibuf, int ilen, void *obuf, size_t olen) { @@ -150,6 +156,11 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, struct timespec validity; struct ceph_crypto_key old_key; void *tp, *tpend; + struct ceph_timespec new_validity; + struct ceph_crypto_key new_session_key; + struct ceph_buffer *new_ticket_blob; + unsigned long new_expires, new_renew_after; + u64 new_secret_id; ceph_decode_need(&p, end, sizeof(u32) + 1, bad); @@ -182,16 +193,16 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, goto bad; memcpy(&old_key, &th->session_key, sizeof(old_key)); - ret = ceph_crypto_key_decode(&th->session_key, &dp, dend); + ret = ceph_crypto_key_decode(&new_session_key, &dp, dend); if (ret) goto out; - ceph_decode_copy(&dp, &th->validity, sizeof(th->validity)); - ceph_decode_timespec(&validity, &th->validity); - th->expires = get_seconds() + validity.tv_sec; - th->renew_after = th->expires - (validity.tv_sec / 4); - dout(" expires=%lu renew_after=%lu\n", th->expires, - th->renew_after); + ceph_decode_copy(&dp, &new_validity, sizeof(new_validity)); + ceph_decode_timespec(&validity, &new_validity); + new_expires = get_seconds() + validity.tv_sec; + new_renew_after = new_expires - (validity.tv_sec / 4); + dout(" expires=%lu renew_after=%lu\n", new_expires, + new_renew_after); /* ticket blob for service */ ceph_decode_8_safe(&p, end, is_enc, bad); @@ -216,10 +227,21 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, dout(" ticket blob is %d bytes\n", dlen); ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); struct_v = ceph_decode_8(&tp); - th->secret_id = ceph_decode_64(&tp); - ret = ceph_decode_buffer(&th->ticket_blob, &tp, tpend); + new_secret_id = ceph_decode_64(&tp); + ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); if (ret) goto out; + + /* all is well, update our ticket */ + ceph_crypto_key_destroy(&th->session_key); + if (th->ticket_blob) + ceph_buffer_put(th->ticket_blob); + th->session_key = new_session_key; + th->ticket_blob = new_ticket_blob; + th->validity = new_validity; + th->secret_id = new_secret_id; + th->expires = new_expires; + th->renew_after = new_renew_after; dout(" got ticket service %d (%s) secret_id %lld len %d\n", type, ceph_entity_type_name(type), th->secret_id, (int)th->ticket_blob->vec.iov_len); @@ -242,7 +264,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, struct ceph_x_ticket_handler *th, struct ceph_x_authorizer *au) { - int len; + int maxlen; struct ceph_x_authorize_a *msg_a; struct ceph_x_authorize_b msg_b; void *p, *end; @@ -253,15 +275,15 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, dout("build_authorizer for %s %p\n", ceph_entity_type_name(th->service), au); - len = sizeof(*msg_a) + sizeof(msg_b) + sizeof(u32) + - ticket_blob_len + 16; - dout(" need len %d\n", len); - if (au->buf && au->buf->alloc_len < len) { + maxlen = sizeof(*msg_a) + sizeof(msg_b) + + ceph_x_encrypt_buflen(ticket_blob_len); + dout(" need len %d\n", maxlen); + if (au->buf && au->buf->alloc_len < maxlen) { ceph_buffer_put(au->buf); au->buf = NULL; } if (!au->buf) { - au->buf = ceph_buffer_new(len, GFP_NOFS); + au->buf = ceph_buffer_new(maxlen, GFP_NOFS); if (!au->buf) return -ENOMEM; } @@ -296,6 +318,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, au->buf->vec.iov_len = p - au->buf->vec.iov_base; dout(" built authorizer nonce %llx len %d\n", au->nonce, (int)au->buf->vec.iov_len); + BUG_ON(au->buf->vec.iov_len > maxlen); return 0; out_buf: diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index db122bb..7d0a0d0 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1407,6 +1407,7 @@ static int try_nonblocking_invalidate(struct inode *inode) */ void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_session *session) + __releases(session->s_mutex) { struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); struct ceph_mds_client *mdsc = &client->mdsc; @@ -1414,7 +1415,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_cap *cap; int file_wanted, used; int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ - int drop_session_lock = session ? 0 : 1; int issued, implemented, want, retain, revoking, flushing = 0; int mds = -1; /* keep track of how far we've gone through i_caps list to avoid an infinite loop on retry */ @@ -1639,7 +1639,7 @@ ack: if (queue_invalidate) ceph_queue_invalidate(inode); - if (session && drop_session_lock) + if (session) mutex_unlock(&session->s_mutex); if (took_snap_rwsem) up_read(&mdsc->snap_rwsem); @@ -2195,18 +2195,19 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, * Handle a cap GRANT message from the MDS. (Note that a GRANT may * actually be a revocation if it specifies a smaller cap set.) * - * caller holds s_mutex. + * caller holds s_mutex and i_lock, we drop both. + * * return value: * 0 - ok * 1 - check_caps on auth cap only (writeback) * 2 - check_caps (ack revoke) */ -static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, - struct ceph_mds_session *session, - struct ceph_cap *cap, - struct ceph_buffer *xattr_buf) +static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, + struct ceph_mds_session *session, + struct ceph_cap *cap, + struct ceph_buffer *xattr_buf) __releases(inode->i_lock) - + __releases(session->s_mutex) { struct ceph_inode_info *ci = ceph_inode(inode); int mds = session->s_mds; @@ -2216,7 +2217,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, u64 size = le64_to_cpu(grant->size); u64 max_size = le64_to_cpu(grant->max_size); struct timespec mtime, atime, ctime; - int reply = 0; + int check_caps = 0; int wake = 0; int writeback = 0; int revoked_rdcache = 0; @@ -2329,11 +2330,12 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER) writeback = 1; /* will delay ack */ else if (dirty & ~newcaps) - reply = 1; /* initiate writeback in check_caps */ + check_caps = 1; /* initiate writeback in check_caps */ else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 || revoked_rdcache) - reply = 2; /* send revoke ack in check_caps */ + check_caps = 2; /* send revoke ack in check_caps */ cap->issued = newcaps; + cap->implemented |= newcaps; } else if (cap->issued == newcaps) { dout("caps unchanged: %s -> %s\n", ceph_cap_string(cap->issued), ceph_cap_string(newcaps)); @@ -2346,6 +2348,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, * pending revocation */ wake = 1; } + BUG_ON(cap->issued & ~cap->implemented); spin_unlock(&inode->i_lock); if (writeback) @@ -2359,7 +2362,14 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, ceph_queue_invalidate(inode); if (wake) wake_up(&ci->i_cap_wq); - return reply; + + if (check_caps == 1) + ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY, + session); + else if (check_caps == 2) + ceph_check_caps(ci, CHECK_CAPS_NODELAY, session); + else + mutex_unlock(&session->s_mutex); } /* @@ -2548,9 +2558,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, ci->i_cap_exporting_issued = cap->issued; } __ceph_remove_cap(cap); - } else { - WARN_ON(!cap); } + /* else, we already released it */ spin_unlock(&inode->i_lock); } @@ -2621,9 +2630,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, u64 cap_id; u64 size, max_size; u64 tid; - int check_caps = 0; void *snaptrace; - int r; dout("handle_caps from mds%d\n", mds); @@ -2668,8 +2675,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, case CEPH_CAP_OP_IMPORT: handle_cap_import(mdsc, inode, h, session, snaptrace, le32_to_cpu(h->snap_trace_len)); - check_caps = 1; /* we may have sent a RELEASE to the old auth */ - goto done; + ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, + session); + goto done_unlocked; } /* the rest require a cap */ @@ -2686,16 +2694,8 @@ void ceph_handle_caps(struct ceph_mds_session *session, switch (op) { case CEPH_CAP_OP_REVOKE: case CEPH_CAP_OP_GRANT: - r = handle_cap_grant(inode, h, session, cap, msg->middle); - if (r == 1) - ceph_check_caps(ceph_inode(inode), - CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY, - session); - else if (r == 2) - ceph_check_caps(ceph_inode(inode), - CHECK_CAPS_NODELAY, - session); - break; + handle_cap_grant(inode, h, session, cap, msg->middle); + goto done_unlocked; case CEPH_CAP_OP_FLUSH_ACK: handle_cap_flush_ack(inode, tid, h, session, cap); @@ -2713,9 +2713,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, done: mutex_unlock(&session->s_mutex); - - if (check_caps) - ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, NULL); +done_unlocked: if (inode) iput(inode); return; @@ -2838,11 +2836,18 @@ int ceph_encode_inode_release(void **p, struct inode *inode, struct ceph_cap *cap; struct ceph_mds_request_release *rel = *p; int ret = 0; - - dout("encode_inode_release %p mds%d drop %s unless %s\n", inode, - mds, ceph_cap_string(drop), ceph_cap_string(unless)); + int used = 0; spin_lock(&inode->i_lock); + used = __ceph_caps_used(ci); + + dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode, + mds, ceph_cap_string(used), ceph_cap_string(drop), + ceph_cap_string(unless)); + + /* only drop unused caps */ + drop &= ~used; + cap = __get_cap_for_mds(ci, mds); if (cap && __cap_is_valid(cap)) { if (force || diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 5107384..8a9116e 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -288,8 +288,10 @@ more: CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; /* discard old result, if any */ - if (fi->last_readdir) + if (fi->last_readdir) { ceph_mdsc_put_request(fi->last_readdir); + fi->last_readdir = NULL; + } /* requery frag tree, as the frag topology may have changed */ frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 7abe1ae..aca82d5 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -378,6 +378,22 @@ void ceph_destroy_inode(struct inode *inode) ceph_queue_caps_release(inode); + /* + * we may still have a snap_realm reference if there are stray + * caps in i_cap_exporting_issued or i_snap_caps. + */ + if (ci->i_snap_realm) { + struct ceph_mds_client *mdsc = + &ceph_client(ci->vfs_inode.i_sb)->mdsc; + struct ceph_snap_realm *realm = ci->i_snap_realm; + + dout(" dropping residual ref to snap realm %p\n", realm); + spin_lock(&realm->inodes_with_caps_lock); + list_del_init(&ci->i_snap_realm_item); + spin_unlock(&realm->inodes_with_caps_lock); + ceph_put_snap_realm(mdsc, realm); + } + kfree(ci->i_symlink); while ((n = rb_first(&ci->i_fragtree)) != NULL) { frag = rb_entry(n, struct ceph_inode_frag, node); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a260010..5c7920b 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -328,6 +328,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, struct ceph_mds_session *s; s = kzalloc(sizeof(*s), GFP_NOFS); + if (!s) + return ERR_PTR(-ENOMEM); s->s_mdsc = mdsc; s->s_mds = mds; s->s_state = CEPH_MDS_SESSION_NEW; @@ -529,7 +531,7 @@ static void __unregister_request(struct ceph_mds_client *mdsc, { dout("__unregister_request %p tid %lld\n", req, req->r_tid); rb_erase(&req->r_node, &mdsc->request_tree); - ceph_mdsc_put_request(req); + RB_CLEAR_NODE(&req->r_node); if (req->r_unsafe_dir) { struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir); @@ -538,6 +540,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc, list_del_init(&req->r_unsafe_dir_item); spin_unlock(&ci->i_unsafe_lock); } + + ceph_mdsc_put_request(req); } /* @@ -862,6 +866,7 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, if (time_after_eq(jiffies, session->s_cap_ttl) && time_after_eq(session->s_cap_ttl, session->s_renew_requested)) pr_info("mds%d caps stale\n", session->s_mds); + session->s_renew_requested = jiffies; /* do not try to renew caps until a recovering mds has reconnected * with its clients. */ @@ -874,7 +879,6 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, dout("send_renew_caps to mds%d (%s)\n", session->s_mds, ceph_mds_state_name(state)); - session->s_renew_requested = jiffies; msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, ++session->s_renew_seq); if (IS_ERR(msg)) @@ -1566,8 +1570,13 @@ static int __do_request(struct ceph_mds_client *mdsc, /* get, open session */ session = __ceph_lookup_mds_session(mdsc, mds); - if (!session) + if (!session) { session = register_session(mdsc, mds); + if (IS_ERR(session)) { + err = PTR_ERR(session); + goto finish; + } + } dout("do_request mds%d session %p state %s\n", mds, session, session_state_name(session->s_state)); if (session->s_state != CEPH_MDS_SESSION_OPEN && @@ -1770,7 +1779,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) dout("handle_reply %p\n", req); /* correct session? */ - if (!req->r_session && req->r_session != session) { + if (req->r_session != session) { pr_err("mdsc_handle_reply got %llu on session mds%d" " not mds%d\n", tid, session->s_mds, req->r_session ? req->r_session->s_mds : -1); @@ -2682,29 +2691,41 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) */ static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) { - struct ceph_mds_request *req = NULL; + struct ceph_mds_request *req = NULL, *nextreq; struct rb_node *n; mutex_lock(&mdsc->mutex); dout("wait_unsafe_requests want %lld\n", want_tid); +restart: req = __get_oldest_req(mdsc); while (req && req->r_tid <= want_tid) { + /* find next request */ + n = rb_next(&req->r_node); + if (n) + nextreq = rb_entry(n, struct ceph_mds_request, r_node); + else + nextreq = NULL; if ((req->r_op & CEPH_MDS_OP_WRITE)) { /* write op */ ceph_mdsc_get_request(req); + if (nextreq) + ceph_mdsc_get_request(nextreq); mutex_unlock(&mdsc->mutex); dout("wait_unsafe_requests wait on %llu (want %llu)\n", req->r_tid, want_tid); wait_for_completion(&req->r_safe_completion); mutex_lock(&mdsc->mutex); - n = rb_next(&req->r_node); ceph_mdsc_put_request(req); - } else { - n = rb_next(&req->r_node); + if (!nextreq) + break; /* next dne before, so we're done! */ + if (RB_EMPTY_NODE(&nextreq->r_node)) { + /* next request was removed from tree */ + ceph_mdsc_put_request(nextreq); + goto restart; + } + ceph_mdsc_put_request(nextreq); /* won't go away */ } - if (!n) - break; - req = rb_entry(n, struct ceph_mds_request, r_node); + req = nextreq; } mutex_unlock(&mdsc->mutex); dout("wait_unsafe_requests done\n"); diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 781656a..a32f0f8 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -366,6 +366,14 @@ void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) } /* + * return true if this connection ever successfully opened + */ +bool ceph_con_opened(struct ceph_connection *con) +{ + return con->connect_seq > 0; +} + +/* * generic get/put */ struct ceph_connection *ceph_con_get(struct ceph_connection *con) @@ -830,13 +838,6 @@ static void prepare_read_connect(struct ceph_connection *con) con->in_base_pos = 0; } -static void prepare_read_connect_retry(struct ceph_connection *con) -{ - dout("prepare_read_connect_retry %p\n", con); - con->in_base_pos = strlen(CEPH_BANNER) + sizeof(con->actual_peer_addr) - + sizeof(con->peer_addr_for_me); -} - static void prepare_read_ack(struct ceph_connection *con) { dout("prepare_read_ack %p\n", con); @@ -1146,7 +1147,7 @@ static int process_connect(struct ceph_connection *con) } con->auth_retry = 1; prepare_write_connect(con->msgr, con, 0); - prepare_read_connect_retry(con); + prepare_read_connect(con); break; case CEPH_MSGR_TAG_RESETSESSION: @@ -1843,8 +1844,6 @@ static void ceph_fault(struct ceph_connection *con) goto out; } - clear_bit(BUSY, &con->state); /* to avoid an improbable race */ - mutex_lock(&con->mutex); if (test_bit(CLOSED, &con->state)) goto out_unlock; diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index 4caaa59..a343dae 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h @@ -223,6 +223,7 @@ extern void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con); extern void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr); +extern bool ceph_con_opened(struct ceph_connection *con); extern void ceph_con_close(struct ceph_connection *con); extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg); extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg); diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index dbe63db9..c7b4ded 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -413,11 +413,22 @@ static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all) */ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) { + struct ceph_osd_request *req; int ret = 0; dout("__reset_osd %p osd%d\n", osd, osd->o_osd); if (list_empty(&osd->o_requests)) { __remove_osd(osdc, osd); + } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], + &osd->o_con.peer_addr, + sizeof(osd->o_con.peer_addr)) == 0 && + !ceph_con_opened(&osd->o_con)) { + dout(" osd addr hasn't changed and connection never opened," + " letting msgr retry"); + /* touch each r_stamp for handle_timeout()'s benfit */ + list_for_each_entry(req, &osd->o_requests, r_osd_item) + req->r_stamp = jiffies; + ret = -EAGAIN; } else { ceph_con_close(&osd->o_con); ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]); @@ -633,7 +644,7 @@ static int __send_request(struct ceph_osd_client *osdc, reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */ reqhead->reassert_version = req->r_reassert_version; - req->r_sent_stamp = jiffies; + req->r_stamp = jiffies; list_move_tail(&osdc->req_lru, &req->r_req_lru_item); ceph_msg_get(req->r_request); /* send consumes a ref */ @@ -660,7 +671,7 @@ static void handle_timeout(struct work_struct *work) unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ; unsigned long keepalive = osdc->client->mount_args->osd_keepalive_timeout * HZ; - unsigned long last_sent = 0; + unsigned long last_stamp = 0; struct rb_node *p; struct list_head slow_osds; @@ -697,12 +708,12 @@ static void handle_timeout(struct work_struct *work) req = list_entry(osdc->req_lru.next, struct ceph_osd_request, r_req_lru_item); - if (time_before(jiffies, req->r_sent_stamp + timeout)) + if (time_before(jiffies, req->r_stamp + timeout)) break; - BUG_ON(req == last_req && req->r_sent_stamp == last_sent); + BUG_ON(req == last_req && req->r_stamp == last_stamp); last_req = req; - last_sent = req->r_sent_stamp; + last_stamp = req->r_stamp; osd = req->r_osd; BUG_ON(!osd); @@ -718,7 +729,7 @@ static void handle_timeout(struct work_struct *work) */ INIT_LIST_HEAD(&slow_osds); list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { - if (time_before(jiffies, req->r_sent_stamp + keepalive)) + if (time_before(jiffies, req->r_stamp + keepalive)) break; osd = req->r_osd; @@ -862,7 +873,9 @@ static int __kick_requests(struct ceph_osd_client *osdc, dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); if (kickosd) { - __reset_osd(osdc, kickosd); + err = __reset_osd(osdc, kickosd); + if (err == -EAGAIN) + return 1; } else { for (p = rb_first(&osdc->osds); p; p = n) { struct ceph_osd *osd = @@ -913,7 +926,7 @@ static int __kick_requests(struct ceph_osd_client *osdc, kick: dout("kicking %p tid %llu osd%d\n", req, req->r_tid, - req->r_osd->o_osd); + req->r_osd ? req->r_osd->o_osd : -1); req->r_flags |= CEPH_OSD_FLAG_RETRY; err = __send_request(osdc, req); if (err) { diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h index 1b1a3ca..b075991 100644 --- a/fs/ceph/osd_client.h +++ b/fs/ceph/osd_client.h @@ -70,7 +70,7 @@ struct ceph_osd_request { char r_oid[40]; /* object name */ int r_oid_len; - unsigned long r_sent_stamp; + unsigned long r_stamp; /* send OR check time */ bool r_resend; /* msg send failed, needs retry */ struct ceph_file_layout r_file_layout; diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index b83f269..d82fe87 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c @@ -480,6 +480,14 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) return NULL; } +void __decode_pool(void **p, struct ceph_pg_pool_info *pi) +{ + ceph_decode_copy(p, &pi->v, sizeof(pi->v)); + calc_pg_masks(pi); + *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64); + *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; +} + /* * decode a full map. */ @@ -526,12 +534,8 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ev, CEPH_PG_POOL_VERSION); goto bad; } - ceph_decode_copy(p, &pi->v, sizeof(pi->v)); + __decode_pool(p, pi); __insert_pg_pool(&map->pg_pools, pi); - calc_pg_masks(pi); - *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64); - *p += le32_to_cpu(pi->v.num_removed_snap_intervals) - * sizeof(u64) * 2; } ceph_decode_32_safe(p, end, map->pool_max, bad); @@ -714,8 +718,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, pi->id = pool; __insert_pg_pool(&map->pg_pools, pi); } - ceph_decode_copy(p, &pi->v, sizeof(pi->v)); - calc_pg_masks(pi); + __decode_pool(p, pi); } /* old_pool */ diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index bf2a5f3..df04e21 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -314,9 +314,9 @@ static int build_snap_context(struct ceph_snap_realm *realm) because we rebuild_snap_realms() works _downward_ in hierarchy after each update.) */ if (realm->cached_context && - realm->cached_context->seq <= realm->seq && + realm->cached_context->seq == realm->seq && (!parent || - realm->cached_context->seq <= parent->cached_context->seq)) { + realm->cached_context->seq >= parent->cached_context->seq)) { dout("build_snap_context %llx %p: %p seq %lld (%d snaps)" " (unchanged)\n", realm->ino, realm, realm->cached_context, @@ -818,7 +818,9 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, * queued (again) by ceph_update_snap_trace() * below. Queue it _now_, under the old context. */ + spin_lock(&realm->inodes_with_caps_lock); list_del_init(&ci->i_snap_realm_item); + spin_unlock(&realm->inodes_with_caps_lock); spin_unlock(&inode->i_lock); ceph_queue_cap_snap(ci, diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index ef9008b..0d0e97e 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -582,7 +582,9 @@ got: inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); - ei->i_state = EXT3_STATE_NEW; + ei->i_state_flags = 0; + ext3_set_inode_state(inode, EXT3_STATE_NEW); + ei->i_extra_isize = (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ? sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 7f920b7..ea33bdf 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2811,7 +2811,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino) inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; - ei->i_state = 0; + ei->i_state_flags = 0; ei->i_dir_start_lookup = 0; ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); /* We now have enough fields to check if the inode was active or not. diff --git a/fs/fscache/object.c b/fs/fscache/object.c index e513ac5..0b589a9 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -53,7 +53,7 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = { static void fscache_object_slow_work_put_ref(struct slow_work *); static int fscache_object_slow_work_get_ref(struct slow_work *); static void fscache_object_slow_work_execute(struct slow_work *); -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *); #endif static void fscache_initialise_object(struct fscache_object *); @@ -69,7 +69,7 @@ const struct slow_work_ops fscache_object_slow_work_ops = { .get_ref = fscache_object_slow_work_get_ref, .put_ref = fscache_object_slow_work_put_ref, .execute = fscache_object_slow_work_execute, -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG .desc = fscache_object_slow_work_desc, #endif }; @@ -364,7 +364,7 @@ static void fscache_object_slow_work_execute(struct slow_work *work) /* * describe an object for slow-work debugging */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG static void fscache_object_slow_work_desc(struct slow_work *work, struct seq_file *m) { diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 313e79a..9f6c928d 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -500,7 +500,7 @@ static void fscache_op_execute(struct slow_work *work) /* * describe an operation for slow-work debugging */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG static void fscache_op_desc(struct slow_work *work, struct seq_file *m) { struct fscache_operation *op = @@ -517,7 +517,7 @@ const struct slow_work_ops fscache_op_slow_work_ops = { .get_ref = fscache_op_get_ref, .put_ref = fscache_op_put_ref, .execute = fscache_op_execute, -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG .desc = fscache_op_desc, #endif }; diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 0501974..8ccf0f8 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -30,6 +30,8 @@ #include "alloc.h" #include "dlmglue.h" #include "file.h" +#include "inode.h" +#include "journal.h" #include "ocfs2_fs.h" #include "xattr.h" @@ -166,6 +168,60 @@ static struct posix_acl *ocfs2_get_acl(struct inode *inode, int type) } /* + * Helper function to set i_mode in memory and disk. Some call paths + * will not have di_bh or a journal handle to pass, in which case it + * will create it's own. + */ +static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh, + handle_t *handle, umode_t new_mode) +{ + int ret, commit_handle = 0; + struct ocfs2_dinode *di; + + if (di_bh == NULL) { + ret = ocfs2_read_inode_block(inode, &di_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + } else + get_bh(di_bh); + + if (handle == NULL) { + handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), + OCFS2_INODE_UPDATE_CREDITS); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto out_brelse; + } + + commit_handle = 1; + } + + di = (struct ocfs2_dinode *)di_bh->b_data; + ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + + inode->i_mode = new_mode; + di->i_mode = cpu_to_le16(inode->i_mode); + + ocfs2_journal_dirty(handle, di_bh); + +out_commit: + if (commit_handle) + ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); +out_brelse: + brelse(di_bh); +out: + return ret; +} + +/* * Set the access or default ACL of an inode. */ static int ocfs2_set_acl(handle_t *handle, @@ -193,9 +249,14 @@ static int ocfs2_set_acl(handle_t *handle, if (ret < 0) return ret; else { - inode->i_mode = mode; if (ret == 0) acl = NULL; + + ret = ocfs2_acl_set_mode(inode, di_bh, + handle, mode); + if (ret) + return ret; + } } break; @@ -283,6 +344,7 @@ int ocfs2_init_acl(handle_t *handle, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct posix_acl *acl = NULL; int ret = 0; + mode_t mode; if (!S_ISLNK(inode->i_mode)) { if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { @@ -291,12 +353,17 @@ int ocfs2_init_acl(handle_t *handle, if (IS_ERR(acl)) return PTR_ERR(acl); } - if (!acl) - inode->i_mode &= ~current_umask(); + if (!acl) { + mode = inode->i_mode & ~current_umask(); + ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); + if (ret) { + mlog_errno(ret); + goto cleanup; + } + } } if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { struct posix_acl *clone; - mode_t mode; if (S_ISDIR(inode->i_mode)) { ret = ocfs2_set_acl(handle, inode, di_bh, @@ -313,7 +380,7 @@ int ocfs2_init_acl(handle_t *handle, mode = inode->i_mode; ret = posix_acl_create_masq(clone, &mode); if (ret >= 0) { - inode->i_mode = mode; + ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); if (ret > 0) { ret = ocfs2_set_acl(handle, inode, di_bh, ACL_TYPE_ACCESS, diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index a659606..9289b43 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -1875,7 +1875,6 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, ok: spin_unlock(&res->spinlock); } - spin_unlock(&dlm->spinlock); // mlog(0, "woo! got an assert_master from node %u!\n", // assert->node_idx); @@ -1926,7 +1925,6 @@ ok: /* master is known, detach if not already detached. * ensures that only one assert_master call will happen * on this mle. */ - spin_lock(&dlm->spinlock); spin_lock(&dlm->master_lock); rr = atomic_read(&mle->mle_refs.refcount); @@ -1959,7 +1957,6 @@ ok: __dlm_put_mle(mle); } spin_unlock(&dlm->master_lock); - spin_unlock(&dlm->spinlock); } else if (res) { if (res->owner != assert->node_idx) { mlog(0, "assert_master from %u, but current " @@ -1967,6 +1964,7 @@ ok: res->owner, namelen, name); } } + spin_unlock(&dlm->spinlock); done: ret = 0; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 278a223..ab20790 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -891,6 +891,21 @@ static int ocfs2_query_inode_wipe(struct inode *inode, /* Do some basic inode verification... */ di = (struct ocfs2_dinode *) di_bh->b_data; if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) { + /* + * Inodes in the orphan dir must have ORPHANED_FL. The only + * inodes that come back out of the orphan dir are reflink + * targets. A reflink target may be moved out of the orphan + * dir between the time we scan the directory and the time we + * process it. This would lead to HAS_REFCOUNT_FL being set but + * ORPHANED_FL not. + */ + if (di->i_dyn_features & cpu_to_le16(OCFS2_HAS_REFCOUNT_FL)) { + mlog(0, "Reflinked inode %llu is no longer orphaned. " + "it shouldn't be deleted\n", + (unsigned long long)oi->ip_blkno); + goto bail; + } + /* for lack of a better error? */ status = -EEXIST; mlog(ML_ERROR, diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index ca992d9..c983715 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -872,8 +872,10 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, (unsigned long long)la_start_blk, (unsigned long long)blkno); - status = ocfs2_free_clusters(handle, main_bm_inode, - main_bm_bh, blkno, count); + status = ocfs2_release_clusters(handle, + main_bm_inode, + main_bm_bh, blkno, + count); if (status < 0) { mlog_errno(status); goto bail; @@ -984,8 +986,7 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, } retry_enospc: - (*ac)->ac_bits_wanted = osb->local_alloc_bits; - + (*ac)->ac_bits_wanted = osb->local_alloc_default_bits; status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); if (status == -ENOSPC) { if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == @@ -1061,6 +1062,7 @@ retry_enospc: OCFS2_LA_DISABLED) goto bail; + ac->ac_bits_wanted = osb->local_alloc_default_bits; status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits, &cluster_off, diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c index 544ac62..b5cb3ed 100644 --- a/fs/ocfs2/locks.c +++ b/fs/ocfs2/locks.c @@ -133,7 +133,7 @@ int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; - if (__mandatory_lock(inode)) + if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) return -ENOLCK; return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index d9cd4e3..b1eb50a 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -84,7 +84,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, static int ocfs2_orphan_add(struct ocfs2_super *osb, handle_t *handle, struct inode *inode, - struct ocfs2_dinode *fe, + struct buffer_head *fe_bh, char *name, struct ocfs2_dir_lookup_result *lookup, struct inode *orphan_dir_inode); @@ -879,7 +879,7 @@ static int ocfs2_unlink(struct inode *dir, fe = (struct ocfs2_dinode *) fe_bh->b_data; if (inode_is_unlinkable(inode)) { - status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name, + status = ocfs2_orphan_add(osb, handle, inode, fe_bh, orphan_name, &orphan_insert, orphan_dir); if (status < 0) { mlog_errno(status); @@ -1300,7 +1300,7 @@ static int ocfs2_rename(struct inode *old_dir, if (S_ISDIR(new_inode->i_mode) || (ocfs2_read_links_count(newfe) == 1)) { status = ocfs2_orphan_add(osb, handle, new_inode, - newfe, orphan_name, + newfe_bh, orphan_name, &orphan_insert, orphan_dir); if (status < 0) { mlog_errno(status); @@ -1911,7 +1911,7 @@ leave: static int ocfs2_orphan_add(struct ocfs2_super *osb, handle_t *handle, struct inode *inode, - struct ocfs2_dinode *fe, + struct buffer_head *fe_bh, char *name, struct ocfs2_dir_lookup_result *lookup, struct inode *orphan_dir_inode) @@ -1919,6 +1919,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, struct buffer_head *orphan_dir_bh = NULL; int status = 0; struct ocfs2_dinode *orphan_fe; + struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); @@ -1959,6 +1960,21 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, goto leave; } + /* + * We're going to journal the change of i_flags and i_orphaned_slot. + * It's safe anyway, though some callers may duplicate the journaling. + * Journaling within the func just make the logic look more + * straightforward. + */ + status = ocfs2_journal_access_di(handle, + INODE_CACHE(inode), + fe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto leave; + } + le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); /* Record which orphan dir our inode now resides @@ -1966,6 +1982,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, * dir to lock. */ fe->i_orphaned_slot = cpu_to_le16(osb->slot_num); + ocfs2_journal_dirty(handle, fe_bh); + mlog(0, "Inode %llu orphaned in slot %d\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); @@ -2123,7 +2141,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, } di = (struct ocfs2_dinode *)new_di_bh->b_data; - status = ocfs2_orphan_add(osb, handle, inode, di, orphan_name, + status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name, &orphan_insert, orphan_dir); if (status < 0) { mlog_errno(status); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 1238b49..adf5e2e 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -763,8 +763,18 @@ static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb, return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); } -#define ocfs2_set_bit ext2_set_bit -#define ocfs2_clear_bit ext2_clear_bit +static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) +{ + ext2_set_bit(bit, bitmap); +} +#define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr)) + +static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap) +{ + ext2_clear_bit(bit, bitmap); +} +#define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr)) + #define ocfs2_test_bit ext2_test_bit #define ocfs2_find_next_zero_bit ext2_find_next_zero_bit #define ocfs2_find_next_bit ext2_find_next_bit diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 9e96921..29405f2 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4075,6 +4075,7 @@ static int ocfs2_complete_reflink(struct inode *s_inode, OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features; spin_unlock(&OCFS2_I(t_inode)->ip_lock); i_size_write(t_inode, size); + t_inode->i_blocks = s_inode->i_blocks; di->i_xattr_inline_size = s_di->i_xattr_inline_size; di->i_clusters = s_di->i_clusters; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index c3c60bc..19ba00f 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -95,13 +95,6 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, struct buffer_head *group_bh, unsigned int bit_off, unsigned int num_bits); -static inline int ocfs2_block_group_clear_bits(handle_t *handle, - struct inode *alloc_inode, - struct ocfs2_group_desc *bg, - struct buffer_head *group_bh, - unsigned int bit_off, - unsigned int num_bits); - static int ocfs2_relink_block_group(handle_t *handle, struct inode *alloc_inode, struct buffer_head *fe_bh, @@ -152,7 +145,7 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) #define do_error(fmt, ...) \ do{ \ - if (clean_error) \ + if (resize) \ mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \ else \ ocfs2_error(sb, fmt, ##__VA_ARGS__); \ @@ -160,7 +153,7 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) static int ocfs2_validate_gd_self(struct super_block *sb, struct buffer_head *bh, - int clean_error) + int resize) { struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; @@ -211,7 +204,7 @@ static int ocfs2_validate_gd_self(struct super_block *sb, static int ocfs2_validate_gd_parent(struct super_block *sb, struct ocfs2_dinode *di, struct buffer_head *bh, - int clean_error) + int resize) { unsigned int max_bits; struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; @@ -233,8 +226,11 @@ static int ocfs2_validate_gd_parent(struct super_block *sb, return -EINVAL; } - if (le16_to_cpu(gd->bg_chain) >= - le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { + /* In resize, we may meet the case bg_chain == cl_next_free_rec. */ + if ((le16_to_cpu(gd->bg_chain) > + le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) || + ((le16_to_cpu(gd->bg_chain) == + le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) && !resize)) { do_error("Group descriptor #%llu has bad chain %u", (unsigned long long)bh->b_blocknr, le16_to_cpu(gd->bg_chain)); @@ -1975,18 +1971,18 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb, bits_wanted, cluster_start, num_clusters); } -static inline int ocfs2_block_group_clear_bits(handle_t *handle, - struct inode *alloc_inode, - struct ocfs2_group_desc *bg, - struct buffer_head *group_bh, - unsigned int bit_off, - unsigned int num_bits) +static int ocfs2_block_group_clear_bits(handle_t *handle, + struct inode *alloc_inode, + struct ocfs2_group_desc *bg, + struct buffer_head *group_bh, + unsigned int bit_off, + unsigned int num_bits, + void (*undo_fn)(unsigned int bit, + unsigned long *bmap)) { int status; unsigned int tmp; - int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; struct ocfs2_group_desc *undo_bg = NULL; - int cluster_bitmap = 0; mlog_entry_void(); @@ -1996,20 +1992,18 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, mlog(0, "off = %u, num = %u\n", bit_off, num_bits); - if (ocfs2_is_cluster_bitmap(alloc_inode)) - journal_type = OCFS2_JOURNAL_ACCESS_UNDO; - + BUG_ON(undo_fn && !ocfs2_is_cluster_bitmap(alloc_inode)); status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), - group_bh, journal_type); + group_bh, + undo_fn ? + OCFS2_JOURNAL_ACCESS_UNDO : + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; } - if (ocfs2_is_cluster_bitmap(alloc_inode)) - cluster_bitmap = 1; - - if (cluster_bitmap) { + if (undo_fn) { jbd_lock_bh_state(group_bh); undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data; @@ -2020,13 +2014,13 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, while(tmp--) { ocfs2_clear_bit((bit_off + tmp), (unsigned long *) bg->bg_bitmap); - if (cluster_bitmap) - ocfs2_set_bit(bit_off + tmp, - (unsigned long *) undo_bg->bg_bitmap); + if (undo_fn) + undo_fn(bit_off + tmp, + (unsigned long *) undo_bg->bg_bitmap); } le16_add_cpu(&bg->bg_free_bits_count, num_bits); - if (cluster_bitmap) + if (undo_fn) jbd_unlock_bh_state(group_bh); status = ocfs2_journal_dirty(handle, group_bh); @@ -2039,12 +2033,14 @@ bail: /* * expects the suballoc inode to already be locked. */ -int ocfs2_free_suballoc_bits(handle_t *handle, - struct inode *alloc_inode, - struct buffer_head *alloc_bh, - unsigned int start_bit, - u64 bg_blkno, - unsigned int count) +static int _ocfs2_free_suballoc_bits(handle_t *handle, + struct inode *alloc_inode, + struct buffer_head *alloc_bh, + unsigned int start_bit, + u64 bg_blkno, + unsigned int count, + void (*undo_fn)(unsigned int bit, + unsigned long *bitmap)) { int status = 0; u32 tmp_used; @@ -2079,7 +2075,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle, status = ocfs2_block_group_clear_bits(handle, alloc_inode, group, group_bh, - start_bit, count); + start_bit, count, undo_fn); if (status < 0) { mlog_errno(status); goto bail; @@ -2110,6 +2106,17 @@ bail: return status; } +int ocfs2_free_suballoc_bits(handle_t *handle, + struct inode *alloc_inode, + struct buffer_head *alloc_bh, + unsigned int start_bit, + u64 bg_blkno, + unsigned int count) +{ + return _ocfs2_free_suballoc_bits(handle, alloc_inode, alloc_bh, + start_bit, bg_blkno, count, NULL); +} + int ocfs2_free_dinode(handle_t *handle, struct inode *inode_alloc_inode, struct buffer_head *inode_alloc_bh, @@ -2123,11 +2130,13 @@ int ocfs2_free_dinode(handle_t *handle, inode_alloc_bh, bit, bg_blkno, 1); } -int ocfs2_free_clusters(handle_t *handle, - struct inode *bitmap_inode, - struct buffer_head *bitmap_bh, - u64 start_blk, - unsigned int num_clusters) +static int _ocfs2_free_clusters(handle_t *handle, + struct inode *bitmap_inode, + struct buffer_head *bitmap_bh, + u64 start_blk, + unsigned int num_clusters, + void (*undo_fn)(unsigned int bit, + unsigned long *bitmap)) { int status; u16 bg_start_bit; @@ -2154,9 +2163,9 @@ int ocfs2_free_clusters(handle_t *handle, mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n", (unsigned long long)bg_blkno, bg_start_bit); - status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, - bg_start_bit, bg_blkno, - num_clusters); + status = _ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, + bg_start_bit, bg_blkno, + num_clusters, undo_fn); if (status < 0) { mlog_errno(status); goto out; @@ -2170,6 +2179,32 @@ out: return status; } +int ocfs2_free_clusters(handle_t *handle, + struct inode *bitmap_inode, + struct buffer_head *bitmap_bh, + u64 start_blk, + unsigned int num_clusters) +{ + return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh, + start_blk, num_clusters, + _ocfs2_set_bit); +} + +/* + * Give never-used clusters back to the global bitmap. We don't need + * to protect these bits in the undo buffer. + */ +int ocfs2_release_clusters(handle_t *handle, + struct inode *bitmap_inode, + struct buffer_head *bitmap_bh, + u64 start_blk, + unsigned int num_clusters) +{ + return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh, + start_blk, num_clusters, + _ocfs2_clear_bit); +} + static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg) { printk("Block Group:\n"); diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index fa60723..e0f46df 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -127,6 +127,11 @@ int ocfs2_free_clusters(handle_t *handle, struct buffer_head *bitmap_bh, u64 start_blk, unsigned int num_clusters); +int ocfs2_release_clusters(handle_t *handle, + struct inode *bitmap_inode, + struct buffer_head *bitmap_bh, + u64 start_blk, + unsigned int num_clusters); static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) { diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index d1b0d38..3e77730 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1622,7 +1622,7 @@ static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc) /* Now tell xh->xh_entries about it */ for (i = 0; i < count; i++) { offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset); - if (offset < namevalue_offset) + if (offset <= namevalue_offset) le16_add_cpu(&xh->xh_entries[i].xe_name_offset, namevalue_size); } @@ -6528,13 +6528,11 @@ static int ocfs2_create_empty_xattr_block(struct inode *inode, int indexed) { int ret; - struct ocfs2_alloc_context *meta_ac; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct ocfs2_xattr_set_ctxt ctxt = { - .meta_ac = meta_ac, - }; + struct ocfs2_xattr_set_ctxt ctxt; - ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); + memset(&ctxt, 0, sizeof(ctxt)); + ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac); if (ret < 0) { mlog_errno(ret); return ret; @@ -6556,7 +6554,7 @@ static int ocfs2_create_empty_xattr_block(struct inode *inode, ocfs2_commit_trans(osb, ctxt.handle); out: - ocfs2_free_alloc_context(meta_ac); + ocfs2_free_alloc_context(ctxt.meta_ac); return ret; } diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index cac84b0..5f494b4 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -565,17 +565,17 @@ enum { static inline int ext3_test_inode_state(struct inode *inode, int bit) { - return test_bit(bit, &EXT3_I(inode)->i_state); + return test_bit(bit, &EXT3_I(inode)->i_state_flags); } static inline void ext3_set_inode_state(struct inode *inode, int bit) { - set_bit(bit, &EXT3_I(inode)->i_state); + set_bit(bit, &EXT3_I(inode)->i_state_flags); } static inline void ext3_clear_inode_state(struct inode *inode, int bit) { - clear_bit(bit, &EXT3_I(inode)->i_state); + clear_bit(bit, &EXT3_I(inode)->i_state_flags); } #else /* Assume that user mode programs are passing in an ext3fs superblock, not diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h index 7679acd..f42c098 100644 --- a/include/linux/ext3_fs_i.h +++ b/include/linux/ext3_fs_i.h @@ -87,7 +87,7 @@ struct ext3_inode_info { * near to their parent directory's inode. */ __u32 i_block_group; - unsigned long i_state; /* Dynamic state flags for ext3 */ + unsigned long i_state_flags; /* Dynamic state flags for ext3 */ /* block reservation info */ struct ext3_block_alloc_info *i_block_alloc_info; diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 7be0c6f..c57db27 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -105,7 +105,7 @@ struct fscache_operation { /* operation releaser */ fscache_operation_release_t release; -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG const char *name; /* operation name */ const char *state; /* operation state */ #define fscache_set_op_name(OP, N) do { (OP)->name = (N); } while(0) diff --git a/include/linux/socket.h b/include/linux/socket.h index 7b3aae2..354cc56 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -255,6 +255,7 @@ struct ucred { #define MSG_ERRQUEUE 0x2000 /* Fetch message from error queue */ #define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */ #define MSG_MORE 0x8000 /* Sender will send more */ +#define MSG_WAITFORONE 0x10000 /* recvmmsg(): block until 1+ packets avail */ #define MSG_EOF MSG_FIN diff --git a/kernel/early_res.c b/kernel/early_res.c index 3cb2c66..31aa933 100644 --- a/kernel/early_res.c +++ b/kernel/early_res.c @@ -333,6 +333,12 @@ void __init free_early_partial(u64 start, u64 end) struct early_res *r; int i; + if (start == end) + return; + + if (WARN_ONCE(start > end, " wrong range [%#llx, %#llx]\n", start, end)) + return; + try_next: i = find_overlapped_early(start, end); if (i >= max_early_res) diff --git a/kernel/slow-work.c b/kernel/slow-work.c index 7494bbf..7d3f4fa 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -637,7 +637,7 @@ int delayed_slow_work_enqueue(struct delayed_slow_work *dwork, goto cancelled; /* the timer holds a reference whilst it is pending */ - ret = work->ops->get_ref(work); + ret = slow_work_get_ref(work); if (ret < 0) goto cant_get_ref; diff --git a/kernel/slow-work.h b/kernel/slow-work.h index 321f3c5..a29ebd1 100644 --- a/kernel/slow-work.h +++ b/kernel/slow-work.h @@ -43,28 +43,28 @@ extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *); */ static inline void slow_work_set_thread_pid(int id, pid_t pid) { -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG slow_work_pids[id] = pid; #endif } static inline void slow_work_mark_time(struct slow_work *work) { -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG work->mark = CURRENT_TIME; #endif } static inline void slow_work_begin_exec(int id, struct slow_work *work) { -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG slow_work_execs[id] = work; #endif } static inline void slow_work_end_exec(int id, struct slow_work *work) { -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG write_lock(&slow_work_execs_lock); slow_work_execs[id] = NULL; write_unlock(&slow_work_execs_lock); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 51ca946..3feb2b3 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1194,7 +1194,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) hlist_for_each_entry_rcu(dev, node, head, index_hlist) { if (idx < s_idx) goto cont; - if (idx > s_idx) + if (h > s_h || idx > s_idx) s_ip_idx = 0; in_dev = __in_dev_get_rcu(dev); if (!in_dev) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 0b9d03c..d0a6092 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1616,17 +1616,20 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) int ct; struct rtnexthop *nhp; struct net *net = mfc_net(c); - struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev; u8 *b = skb_tail_pointer(skb); struct rtattr *mp_head; - if (dev) - RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); + /* If cache is unresolved, don't try to parse IIF and OIF */ + if (c->mfc_parent > MAXVIFS) + return -ENOENT; + + if (VIF_EXISTS(net, c->mfc_parent)) + RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex); mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { - if (c->mfc_un.res.ttls[ct] < 255) { + if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) { if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) goto rtattr_failure; nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 54fd68c..d413b57 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1097,7 +1097,7 @@ static int slow_chain_length(const struct rtable *head) } static int rt_intern_hash(unsigned hash, struct rtable *rt, - struct rtable **rp, struct sk_buff *skb) + struct rtable **rp, struct sk_buff *skb, int ifindex) { struct rtable *rth, **rthp; unsigned long now; @@ -1212,11 +1212,16 @@ restart: slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { struct net *net = dev_net(rt->u.dst.dev); int num = ++net->ipv4.current_rt_cache_rebuild_count; - if (!rt_caching(dev_net(rt->u.dst.dev))) { + if (!rt_caching(net)) { printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", rt->u.dst.dev->name, num); } - rt_emergency_hash_rebuild(dev_net(rt->u.dst.dev)); + rt_emergency_hash_rebuild(net); + spin_unlock_bh(rt_hash_lock_addr(hash)); + + hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, + ifindex, rt_genid(net)); + goto restart; } } @@ -1477,7 +1482,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, &netevent); rt_del(hash, rth); - if (!rt_intern_hash(hash, rt, &rt, NULL)) + if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif)) ip_rt_put(rt); goto do_next; } @@ -1931,7 +1936,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, in_dev_put(in_dev); hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); - return rt_intern_hash(hash, rth, NULL, skb); + return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); e_nobufs: in_dev_put(in_dev); @@ -2098,7 +2103,7 @@ static int ip_mkroute_input(struct sk_buff *skb, /* put it into the cache */ hash = rt_hash(daddr, saddr, fl->iif, rt_genid(dev_net(rth->u.dst.dev))); - return rt_intern_hash(hash, rth, NULL, skb); + return rt_intern_hash(hash, rth, NULL, skb, fl->iif); } /* @@ -2255,7 +2260,7 @@ local_input: } rth->rt_type = res.type; hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); - err = rt_intern_hash(hash, rth, NULL, skb); + err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); goto done; no_route: @@ -2502,7 +2507,7 @@ static int ip_mkroute_output(struct rtable **rp, if (err == 0) { hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, rt_genid(dev_net(dev_out))); - err = rt_intern_hash(hash, rth, rp, NULL); + err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif); } return err; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3381b43..7e567ae 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3610,7 +3610,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, hlist_for_each_entry_rcu(dev, node, head, index_hlist) { if (idx < s_idx) goto cont; - if (idx > s_idx) + if (h > s_h || idx > s_idx) s_ip_idx = 0; ip_idx = 0; if ((idev = __in6_dev_get(dev)) == NULL) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 23e4ac0..27acfb5 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1695,17 +1695,20 @@ ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm) int ct; struct rtnexthop *nhp; struct net *net = mfc6_net(c); - struct net_device *dev = net->ipv6.vif6_table[c->mf6c_parent].dev; u8 *b = skb_tail_pointer(skb); struct rtattr *mp_head; - if (dev) - RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); + /* If cache is unresolved, don't try to parse IIF and OIF */ + if (c->mf6c_parent > MAXMIFS) + return -ENOENT; + + if (MIF_EXISTS(net, c->mf6c_parent)) + RTA_PUT(skb, RTA_IIF, 4, &net->ipv6.vif6_table[c->mf6c_parent].dev->ifindex); mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { - if (c->mfc_un.res.ttls[ct] < 255) { + if (MIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) { if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) goto rtattr_failure; nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7fcb0e5..0d7713c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -890,12 +890,17 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) struct rt6_info *rt = (struct rt6_info *) dst; if (rt) { - if (rt->rt6i_flags & RTF_CACHE) - ip6_del_rt(rt); - else + if (rt->rt6i_flags & RTF_CACHE) { + if (rt6_check_expired(rt)) { + ip6_del_rt(rt); + dst = NULL; + } + } else { dst_release(dst); + dst = NULL; + } } - return NULL; + return dst; } static void ip6_link_failure(struct sk_buff *skb) diff --git a/net/socket.c b/net/socket.c index 769c386..f55ffe9 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2135,6 +2135,10 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, break; ++datagrams; + /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */ + if (flags & MSG_WAITFORONE) + flags |= MSG_DONTWAIT; + if (timeout) { ktime_get_ts(timeout); *timeout = timespec_sub(end_time, *timeout); diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index b546ac2..a2ff861 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -148,6 +148,9 @@ static void pcm_debug_name(struct snd_pcm_substream *substream, #define xrun_debug(substream, mask) \ ((substream)->pstr->xrun_debug & (mask)) +#else +#define xrun_debug(substream, mask) 0 +#endif #define dump_stack_on_xrun(substream) do { \ if (xrun_debug(substream, XRUN_DEBUG_STACK)) \ @@ -169,6 +172,7 @@ static void xrun(struct snd_pcm_substream *substream) } } +#ifdef CONFIG_SND_PCM_XRUN_DEBUG #define hw_ptr_error(substream, fmt, args...) \ do { \ if (xrun_debug(substream, XRUN_DEBUG_BASIC)) { \ @@ -255,8 +259,6 @@ static void xrun_log_show(struct snd_pcm_substream *substream) #else /* ! CONFIG_SND_PCM_XRUN_DEBUG */ -#define xrun_debug(substream, mask) 0 -#define xrun(substream) do { } while (0) #define hw_ptr_error(substream, fmt, args...) do { } while (0) #define xrun_log(substream, pos) do { } while (0) #define xrun_log_show(substream) do { } while (0) diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index 1caf5e3..e68c98e 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -1852,12 +1852,14 @@ static unsigned int ad1981_jacks_blacklist[] = { 0x10140523, /* Thinkpad R40 */ 0x10140534, /* Thinkpad X31 */ 0x10140537, /* Thinkpad T41p */ + 0x1014053e, /* Thinkpad R40e */ 0x10140554, /* Thinkpad T42p/R50p */ 0x10140567, /* Thinkpad T43p 2668-G7U */ 0x10140581, /* Thinkpad X41-2527 */ 0x10280160, /* Dell Dimension 2400 */ 0x104380b0, /* Asus A7V8X-MX */ 0x11790241, /* Toshiba Satellite A-15 S127 */ + 0x1179ff10, /* Toshiba P500 */ 0x144dc01a, /* Samsung NP-X20C004/SEG */ 0 /* end */ }; diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 8b29156..4bb9067 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2269,6 +2269,7 @@ static struct snd_pci_quirk position_fix_list[] __devinitdata = { SND_PCI_QUIRK(0x103c, 0x306d, "HP dv3", POS_FIX_LPIB), SND_PCI_QUIRK(0x1106, 0x3288, "ASUS M2V-MX SE", POS_FIX_LPIB), SND_PCI_QUIRK(0x1043, 0x813d, "ASUS P5AD2", POS_FIX_LPIB), + SND_PCI_QUIRK(0x1458, 0xa022, "ga-ma770-ud3", POS_FIX_LPIB), SND_PCI_QUIRK(0x1462, 0x1002, "MSI Wind U115", POS_FIX_LPIB), SND_PCI_QUIRK(0x1565, 0x820f, "Biostar Microtech", POS_FIX_LPIB), SND_PCI_QUIRK(0x8086, 0xd601, "eMachines T5212", POS_FIX_LPIB), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 053d53d..9a23444 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10043,8 +10043,11 @@ static void alc882_auto_set_output_and_unmute(struct hda_codec *codec, alc_set_pin_output(codec, nid, pin_type); if (spec->multiout.dac_nids[dac_idx] == 0x25) idx = 4; - else + else { + if (spec->multiout.num_dacs >= dac_idx) + return; idx = spec->multiout.dac_nids[dac_idx] - 2; + } snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_CONNECT_SEL, idx); } |