diff options
100 files changed, 2204 insertions, 965 deletions
diff --git a/Documentation/acpi/method-customizing.txt b/Documentation/acpi/method-customizing.txt index 3e1d25a..5f55373 100644 --- a/Documentation/acpi/method-customizing.txt +++ b/Documentation/acpi/method-customizing.txt @@ -66,3 +66,8 @@ Note: We can use a kernel with multiple custom ACPI method running, But each individual write to debugfs can implement a SINGLE method override. i.e. if we want to insert/override multiple ACPI methods, we need to redo step c) ~ g) for multiple times. + +Note: Be aware that root can mis-use this driver to modify arbitrary + memory and gain additional rights, if root's privileges got + restricted (for example if root is not allowed to load additional + modules after boot). diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index ff31b1c..1a9446b 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -6,6 +6,42 @@ be removed from this file. --------------------------- +What: x86 floppy disable_hlt +When: 2012 +Why: ancient workaround of dubious utility clutters the + code used by everybody else. +Who: Len Brown <len.brown@intel.com> + +--------------------------- + +What: CONFIG_APM_CPU_IDLE, and its ability to call APM BIOS in idle +When: 2012 +Why: This optional sub-feature of APM is of dubious reliability, + and ancient APM laptops are likely better served by calling HLT. + Deleting CONFIG_APM_CPU_IDLE allows x86 to stop exporting + the pm_idle function pointer to modules. +Who: Len Brown <len.brown@intel.com> + +---------------------------- + +What: x86_32 "no-hlt" cmdline param +When: 2012 +Why: remove a branch from idle path, simplify code used by everybody. + This option disabled the use of HLT in idle and machine_halt() + for hardware that was flakey 15-years ago. Today we have + "idle=poll" that removed HLT from idle, and so if such a machine + is still running the upstream kernel, "idle=poll" is likely sufficient. +Who: Len Brown <len.brown@intel.com> + +---------------------------- + +What: x86 "idle=mwait" cmdline param +When: 2012 +Why: simplify x86 idle code +Who: Len Brown <len.brown@intel.com> + +---------------------------- + What: PRISM54 When: 2.6.34 diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index e1e5010..0249b8b 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -11,6 +11,7 @@ config TILE select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP select GENERIC_IRQ_SHOW + select SYS_HYPERVISOR # FIXME: investigate whether we need/want these options. # select HAVE_IOREMAP_PROT diff --git a/arch/tile/include/asm/hardwall.h b/arch/tile/include/asm/hardwall.h index 0bed3ec..2ac4228 100644 --- a/arch/tile/include/asm/hardwall.h +++ b/arch/tile/include/asm/hardwall.h @@ -40,6 +40,10 @@ #define HARDWALL_DEACTIVATE \ _IO(HARDWALL_IOCTL_BASE, _HARDWALL_DEACTIVATE) +#define _HARDWALL_GET_ID 4 +#define HARDWALL_GET_ID \ + _IO(HARDWALL_IOCTL_BASE, _HARDWALL_GET_ID) + #ifndef __KERNEL__ /* This is the canonical name expected by userspace. */ @@ -47,9 +51,14 @@ #else -/* Hook for /proc/tile/hardwall. */ -struct seq_file; -int proc_tile_hardwall_show(struct seq_file *sf, void *v); +/* /proc hooks for hardwall. */ +struct proc_dir_entry; +#ifdef CONFIG_HARDWALL +void proc_tile_hardwall_init(struct proc_dir_entry *root); +int proc_pid_hardwall(struct task_struct *task, char *buffer); +#else +static inline void proc_tile_hardwall_init(struct proc_dir_entry *root) {} +#endif #endif diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index b4c8e8e..b4dbc05 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile @@ -5,7 +5,7 @@ extra-y := vmlinux.lds head_$(BITS).o obj-y := backtrace.o entry.o init_task.o irq.o messaging.o \ pci-dma.o proc.o process.o ptrace.o reboot.o \ - setup.o signal.o single_step.o stack.o sys.o time.o traps.o \ + setup.o signal.o single_step.o stack.o sys.o sysfs.o time.o traps.o \ intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o obj-$(CONFIG_HARDWALL) += hardwall.o diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index 3bddef7..8c41891 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -40,16 +40,25 @@ struct hardwall_info { struct list_head list; /* "rectangles" list */ struct list_head task_head; /* head of tasks in this hardwall */ + struct cpumask cpumask; /* cpus in the rectangle */ int ulhc_x; /* upper left hand corner x coord */ int ulhc_y; /* upper left hand corner y coord */ int width; /* rectangle width */ int height; /* rectangle height */ + int id; /* integer id for this hardwall */ int teardown_in_progress; /* are we tearing this one down? */ }; /* Currently allocated hardwall rectangles */ static LIST_HEAD(rectangles); +/* /proc/tile/hardwall */ +static struct proc_dir_entry *hardwall_proc_dir; + +/* Functions to manage files in /proc/tile/hardwall. */ +static void hardwall_add_proc(struct hardwall_info *rect); +static void hardwall_remove_proc(struct hardwall_info *rect); + /* * Guard changes to the hardwall data structures. * This could be finer grained (e.g. one lock for the list of hardwall @@ -105,6 +114,8 @@ static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask) r->ulhc_y = cpu_y(ulhc); r->width = cpu_x(lrhc) - r->ulhc_x + 1; r->height = cpu_y(lrhc) - r->ulhc_y + 1; + cpumask_copy(&r->cpumask, mask); + r->id = ulhc; /* The ulhc cpu id can be the hardwall id. */ /* Width and height must be positive */ if (r->width <= 0 || r->height <= 0) @@ -388,6 +399,9 @@ static struct hardwall_info *hardwall_create( /* Set up appropriate hardwalling on all affected cpus. */ hardwall_setup(rect); + /* Create a /proc/tile/hardwall entry. */ + hardwall_add_proc(rect); + return rect; } @@ -645,6 +659,9 @@ static void hardwall_destroy(struct hardwall_info *rect) /* Restart switch and disable firewall. */ on_each_cpu_mask(&mask, restart_udn_switch, NULL, 1); + /* Remove the /proc/tile/hardwall entry. */ + hardwall_remove_proc(rect); + /* Now free the rectangle from the list. */ spin_lock_irqsave(&hardwall_lock, flags); BUG_ON(!list_empty(&rect->task_head)); @@ -654,35 +671,57 @@ static void hardwall_destroy(struct hardwall_info *rect) } -/* - * Dump hardwall state via /proc; initialized in arch/tile/sys/proc.c. - */ -int proc_tile_hardwall_show(struct seq_file *sf, void *v) +static int hardwall_proc_show(struct seq_file *sf, void *v) { - struct hardwall_info *r; + struct hardwall_info *rect = sf->private; + char buf[256]; - if (udn_disabled) { - seq_printf(sf, "%dx%d 0,0 pids:\n", smp_width, smp_height); - return 0; - } - - spin_lock_irq(&hardwall_lock); - list_for_each_entry(r, &rectangles, list) { - struct task_struct *p; - seq_printf(sf, "%dx%d %d,%d pids:", - r->width, r->height, r->ulhc_x, r->ulhc_y); - list_for_each_entry(p, &r->task_head, thread.hardwall_list) { - unsigned int cpu = cpumask_first(&p->cpus_allowed); - unsigned int x = cpu % smp_width; - unsigned int y = cpu / smp_width; - seq_printf(sf, " %d@%d,%d", p->pid, x, y); - } - seq_printf(sf, "\n"); - } - spin_unlock_irq(&hardwall_lock); + int rc = cpulist_scnprintf(buf, sizeof(buf), &rect->cpumask); + buf[rc++] = '\n'; + seq_write(sf, buf, rc); return 0; } +static int hardwall_proc_open(struct inode *inode, + struct file *file) +{ + return single_open(file, hardwall_proc_show, PDE(inode)->data); +} + +static const struct file_operations hardwall_proc_fops = { + .open = hardwall_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void hardwall_add_proc(struct hardwall_info *rect) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "%d", rect->id); + proc_create_data(buf, 0444, hardwall_proc_dir, + &hardwall_proc_fops, rect); +} + +static void hardwall_remove_proc(struct hardwall_info *rect) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "%d", rect->id); + remove_proc_entry(buf, hardwall_proc_dir); +} + +int proc_pid_hardwall(struct task_struct *task, char *buffer) +{ + struct hardwall_info *rect = task->thread.hardwall; + return rect ? sprintf(buffer, "%d\n", rect->id) : 0; +} + +void proc_tile_hardwall_init(struct proc_dir_entry *root) +{ + if (!udn_disabled) + hardwall_proc_dir = proc_mkdir("hardwall", root); +} + /* * Character device support via ioctl/close. @@ -716,6 +755,9 @@ static long hardwall_ioctl(struct file *file, unsigned int a, unsigned long b) return -EINVAL; return hardwall_deactivate(current); + case _HARDWALL_GET_ID: + return rect ? rect->id : -EINVAL; + default: return -EINVAL; } diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c index 2e02c41..62d8208 100644 --- a/arch/tile/kernel/proc.c +++ b/arch/tile/kernel/proc.c @@ -27,6 +27,7 @@ #include <asm/processor.h> #include <asm/sections.h> #include <asm/homecache.h> +#include <asm/hardwall.h> #include <arch/chip.h> @@ -88,3 +89,75 @@ const struct seq_operations cpuinfo_op = { .stop = c_stop, .show = show_cpuinfo, }; + +/* + * Support /proc/tile directory + */ + +static int __init proc_tile_init(void) +{ + struct proc_dir_entry *root = proc_mkdir("tile", NULL); + if (root == NULL) + return 0; + + proc_tile_hardwall_init(root); + + return 0; +} + +arch_initcall(proc_tile_init); + +/* + * Support /proc/sys/tile directory + */ + +#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */ +static ctl_table unaligned_subtable[] = { + { + .procname = "enabled", + .data = &unaligned_fixup, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "printk", + .data = &unaligned_printk, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "count", + .data = &unaligned_fixup_count, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + {} +}; + +static ctl_table unaligned_table[] = { + { + .procname = "unaligned_fixup", + .mode = 0555, + .child = unaligned_subtable + }, + {} +}; +#endif + +static struct ctl_path tile_path[] = { + { .procname = "tile" }, + { } +}; + +static int __init proc_sys_tile_init(void) +{ +#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */ + register_sysctl_paths(tile_path, unaligned_table); +#endif + return 0; +} + +arch_initcall(proc_sys_tile_init); diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c new file mode 100644 index 0000000..b671a86 --- /dev/null +++ b/arch/tile/kernel/sysfs.c @@ -0,0 +1,185 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * /sys entry support. + */ + +#include <linux/sysdev.h> +#include <linux/cpu.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <hv/hypervisor.h> + +/* Return a string queried from the hypervisor, truncated to page size. */ +static ssize_t get_hv_confstr(char *page, int query) +{ + ssize_t n = hv_confstr(query, (unsigned long)page, PAGE_SIZE - 1); + n = n < 0 ? 0 : min(n, (ssize_t)PAGE_SIZE - 1) - 1; + if (n) + page[n++] = '\n'; + page[n] = '\0'; + return n; +} + +static ssize_t chip_width_show(struct sysdev_class *dev, + struct sysdev_class_attribute *attr, + char *page) +{ + return sprintf(page, "%u\n", smp_width); +} +static SYSDEV_CLASS_ATTR(chip_width, 0444, chip_width_show, NULL); + +static ssize_t chip_height_show(struct sysdev_class *dev, + struct sysdev_class_attribute *attr, + char *page) +{ + return sprintf(page, "%u\n", smp_height); +} +static SYSDEV_CLASS_ATTR(chip_height, 0444, chip_height_show, NULL); + +static ssize_t chip_serial_show(struct sysdev_class *dev, + struct sysdev_class_attribute *attr, + char *page) +{ + return get_hv_confstr(page, HV_CONFSTR_CHIP_SERIAL_NUM); +} +static SYSDEV_CLASS_ATTR(chip_serial, 0444, chip_serial_show, NULL); + +static ssize_t chip_revision_show(struct sysdev_class *dev, + struct sysdev_class_attribute *attr, + char *page) +{ + return get_hv_confstr(page, HV_CONFSTR_CHIP_REV); +} +static SYSDEV_CLASS_ATTR(chip_revision, 0444, chip_revision_show, NULL); + + +static ssize_t type_show(struct sysdev_class *dev, + struct sysdev_class_attribute *attr, + char *page) +{ + return sprintf(page, "tilera\n"); +} +static SYSDEV_CLASS_ATTR(type, 0444, type_show, NULL); + +#define HV_CONF_ATTR(name, conf) \ + static ssize_t name ## _show(struct sysdev_class *dev, \ + struct sysdev_class_attribute *attr, \ + char *page) \ + { \ + return get_hv_confstr(page, conf); \ + } \ + static SYSDEV_CLASS_ATTR(name, 0444, name ## _show, NULL); + +HV_CONF_ATTR(version, HV_CONFSTR_HV_SW_VER) +HV_CONF_ATTR(config_version, HV_CONFSTR_HV_CONFIG_VER) + +HV_CONF_ATTR(board_part, HV_CONFSTR_BOARD_PART_NUM) +HV_CONF_ATTR(board_serial, HV_CONFSTR_BOARD_SERIAL_NUM) +HV_CONF_ATTR(board_revision, HV_CONFSTR_BOARD_REV) +HV_CONF_ATTR(board_description, HV_CONFSTR_BOARD_DESC) +HV_CONF_ATTR(mezz_part, HV_CONFSTR_MEZZ_PART_NUM) +HV_CONF_ATTR(mezz_serial, HV_CONFSTR_MEZZ_SERIAL_NUM) +HV_CONF_ATTR(mezz_revision, HV_CONFSTR_MEZZ_REV) +HV_CONF_ATTR(mezz_description, HV_CONFSTR_MEZZ_DESC) +HV_CONF_ATTR(switch_control, HV_CONFSTR_SWITCH_CONTROL) + +static struct attribute *board_attrs[] = { + &attr_board_part.attr, + &attr_board_serial.attr, + &attr_board_revision.attr, + &attr_board_description.attr, + &attr_mezz_part.attr, + &attr_mezz_serial.attr, + &attr_mezz_revision.attr, + &attr_mezz_description.attr, + &attr_switch_control.attr, + NULL +}; + +static struct attribute_group board_attr_group = { + .name = "board", + .attrs = board_attrs, +}; + + +static struct bin_attribute hvconfig_bin; + +static ssize_t +hvconfig_bin_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + static size_t size; + + /* Lazily learn the true size (minus the trailing NUL). */ + if (size == 0) + size = hv_confstr(HV_CONFSTR_HV_CONFIG, 0, 0) - 1; + + /* Check and adjust input parameters. */ + if (off > size) + return -EINVAL; + if (count > size - off) + count = size - off; + + if (count) { + /* Get a copy of the hvc and copy out the relevant portion. */ + char *hvc; + + size = off + count; + hvc = kmalloc(size, GFP_KERNEL); + if (hvc == NULL) + return -ENOMEM; + hv_confstr(HV_CONFSTR_HV_CONFIG, (unsigned long)hvc, size); + memcpy(buf, hvc + off, count); + kfree(hvc); + } + + return count; +} + +static int __init create_sysfs_entries(void) +{ + struct sysdev_class *cls = &cpu_sysdev_class; + int err = 0; + +#define create_cpu_attr(name) \ + if (!err) \ + err = sysfs_create_file(&cls->kset.kobj, &attr_##name.attr); + create_cpu_attr(chip_width); + create_cpu_attr(chip_height); + create_cpu_attr(chip_serial); + create_cpu_attr(chip_revision); + +#define create_hv_attr(name) \ + if (!err) \ + err = sysfs_create_file(hypervisor_kobj, &attr_##name.attr); + create_hv_attr(type); + create_hv_attr(version); + create_hv_attr(config_version); + + if (!err) + err = sysfs_create_group(hypervisor_kobj, &board_attr_group); + + if (!err) { + sysfs_bin_attr_init(&hvconfig_bin); + hvconfig_bin.attr.name = "hvconfig"; + hvconfig_bin.attr.mode = S_IRUGO; + hvconfig_bin.read = hvconfig_bin_read; + hvconfig_bin.size = PAGE_SIZE; + err = sysfs_create_bin_file(hypervisor_kobj, &hvconfig_bin); + } + + return err; +} +subsys_initcall(create_sysfs_entries); diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 416d865..610001d 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -139,7 +139,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) boot_cpu_data.x86_model <= 0x05 && boot_cpu_data.x86_mask < 0x0A) return 1; - else if (c1e_detected) + else if (amd_e400_c1e_detected) return 1; else return max_cstate; diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h index 38d8737..f49253d7 100644 --- a/arch/x86/include/asm/idle.h +++ b/arch/x86/include/asm/idle.h @@ -16,6 +16,6 @@ static inline void enter_idle(void) { } static inline void exit_idle(void) { } #endif /* CONFIG_X86_64 */ -void c1e_remove_cpu(int cpu); +void amd_e400_remove_cpu(int cpu); #endif /* _ASM_X86_IDLE_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 4c25ab4..2193715 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -754,10 +754,10 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx) extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); extern void select_idle_routine(const struct cpuinfo_x86 *c); -extern void init_c1e_mask(void); +extern void init_amd_e400_c1e_mask(void); extern unsigned long boot_option_idle_override; -extern bool c1e_detected; +extern bool amd_e400_c1e_detected; enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, IDLE_POLL, IDLE_FORCE_MWAIT}; diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 3bfa022..965a766 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -361,6 +361,7 @@ struct apm_user { * idle percentage above which bios idle calls are done */ #ifdef CONFIG_APM_CPU_IDLE +#warning deprecated CONFIG_APM_CPU_IDLE will be deleted in 2012 #define DEFAULT_IDLE_THRESHOLD 95 #else #define DEFAULT_IDLE_THRESHOLD 100 @@ -904,6 +905,7 @@ static void apm_cpu_idle(void) unsigned int jiffies_since_last_check = jiffies - last_jiffies; unsigned int bucket; + WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012"); recalc: if (jiffies_since_last_check > IDLE_CALC_LIMIT) { use_apm_idle = 0; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c39576c..525514c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -19,6 +19,7 @@ static int __init no_halt(char *s) { + WARN_ONCE(1, "\"no-hlt\" is deprecated, please use \"idle=poll\"\n"); boot_cpu_data.hlt_works_ok = 0; return 1; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 53f02f5..22a073d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -902,7 +902,7 @@ static void vgetcpu_set_mode(void) void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); - init_c1e_mask(); + init_amd_e400_c1e_mask(); #ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 88a90a9..426a5b6 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -337,7 +337,9 @@ EXPORT_SYMBOL(boot_option_idle_override); * Powermanagement idle function, if any.. */ void (*pm_idle)(void); +#if defined(CONFIG_APM_MODULE) && defined(CONFIG_APM_CPU_IDLE) EXPORT_SYMBOL(pm_idle); +#endif #ifdef CONFIG_X86_32 /* @@ -397,7 +399,7 @@ void default_idle(void) cpu_relax(); } } -#ifdef CONFIG_APM_MODULE +#if defined(CONFIG_APM_MODULE) && defined(CONFIG_APM_CPU_IDLE) EXPORT_SYMBOL(default_idle); #endif @@ -535,45 +537,45 @@ int mwait_usable(const struct cpuinfo_x86 *c) return (edx & MWAIT_EDX_C1); } -bool c1e_detected; -EXPORT_SYMBOL(c1e_detected); +bool amd_e400_c1e_detected; +EXPORT_SYMBOL(amd_e400_c1e_detected); -static cpumask_var_t c1e_mask; +static cpumask_var_t amd_e400_c1e_mask; -void c1e_remove_cpu(int cpu) +void amd_e400_remove_cpu(int cpu) { - if (c1e_mask != NULL) - cpumask_clear_cpu(cpu, c1e_mask); + if (amd_e400_c1e_mask != NULL) + cpumask_clear_cpu(cpu, amd_e400_c1e_mask); } /* - * C1E aware idle routine. We check for C1E active in the interrupt + * AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt * pending message MSR. If we detect C1E, then we handle it the same * way as C3 power states (local apic timer and TSC stop) */ -static void c1e_idle(void) +static void amd_e400_idle(void) { if (need_resched()) return; - if (!c1e_detected) { + if (!amd_e400_c1e_detected) { u32 lo, hi; rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); if (lo & K8_INTP_C1E_ACTIVE_MASK) { - c1e_detected = true; + amd_e400_c1e_detected = true; if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) mark_tsc_unstable("TSC halt in AMD C1E"); printk(KERN_INFO "System has AMD C1E enabled\n"); } } - if (c1e_detected) { + if (amd_e400_c1e_detected) { int cpu = smp_processor_id(); - if (!cpumask_test_cpu(cpu, c1e_mask)) { - cpumask_set_cpu(cpu, c1e_mask); + if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) { + cpumask_set_cpu(cpu, amd_e400_c1e_mask); /* * Force broadcast so ACPI can not interfere. */ @@ -616,17 +618,17 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) pm_idle = mwait_idle; } else if (cpu_has_amd_erratum(amd_erratum_400)) { /* E400: APIC timer interrupt does not wake up CPU from C1e */ - printk(KERN_INFO "using C1E aware idle routine\n"); - pm_idle = c1e_idle; + printk(KERN_INFO "using AMD E400 aware idle routine\n"); + pm_idle = amd_e400_idle; } else pm_idle = default_idle; } -void __init init_c1e_mask(void) +void __init init_amd_e400_c1e_mask(void) { - /* If we're using c1e_idle, we need to allocate c1e_mask. */ - if (pm_idle == c1e_idle) - zalloc_cpumask_var(&c1e_mask, GFP_KERNEL); + /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */ + if (pm_idle == amd_e400_idle) + zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL); } static int __init idle_setup(char *str) @@ -640,6 +642,7 @@ static int __init idle_setup(char *str) boot_option_idle_override = IDLE_POLL; } else if (!strcmp(str, "mwait")) { boot_option_idle_override = IDLE_FORCE_MWAIT; + WARN_ONCE(1, "\idle=mwait\" will be removed in 2012\"\n"); } else if (!strcmp(str, "halt")) { /* * When the boot option of idle=halt is added, halt is diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a3c430b..eefd967 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1307,7 +1307,7 @@ void play_dead_common(void) { idle_task_exit(); reset_lazy_tlbstate(); - c1e_remove_cpu(raw_smp_processor_id()); + amd_e400_remove_cpu(raw_smp_processor_id()); mb(); /* Ack it */ diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index bc2218d..de0e3df 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -369,6 +369,21 @@ config ACPI_HED which is used to report some hardware errors notified via SCI, mainly the corrected errors. +config ACPI_CUSTOM_METHOD + tristate "Allow ACPI methods to be inserted/replaced at run time" + depends on DEBUG_FS + default n + help + This debug facility allows ACPI AML methods to me inserted and/or + replaced without rebooting the system. For details refer to: + Documentation/acpi/method-customizing.txt. + + NOTE: This option is security sensitive, because it allows arbitrary + kernel memory to be written to by root (uid=0) users, allowing them + to bypass certain security measures (e.g. if root is not allowed to + load additional kernel modules after boot, this feature may be used + to override that restriction). + source "drivers/acpi/apei/Kconfig" endif # ACPI diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index b66fbb2..ecb26b4 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_ACPI_SBS) += sbshc.o obj-$(CONFIG_ACPI_SBS) += sbs.o obj-$(CONFIG_ACPI_HED) += hed.o obj-$(CONFIG_ACPI_EC_DEBUGFS) += ec_sys.o +obj-$(CONFIG_ACPI_CUSTOM_METHOD)+= custom_method.o # processor has its own "processor." module_param namespace processor-y := processor_driver.o processor_throttling.o diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile index a122471..301bd2d 100644 --- a/drivers/acpi/acpica/Makefile +++ b/drivers/acpi/acpica/Makefile @@ -14,7 +14,7 @@ acpi-y := dsfield.o dsmthdat.o dsopcode.o dswexec.o dswscope.o \ acpi-y += evevent.o evregion.o evsci.o evxfevnt.o \ evmisc.o evrgnini.o evxface.o evxfregn.o \ - evgpe.o evgpeblk.o evgpeinit.o evgpeutil.o evxfgpe.o + evgpe.o evgpeblk.o evgpeinit.o evgpeutil.o evxfgpe.o evglock.o acpi-y += exconfig.o exfield.o exnames.o exoparg6.o exresolv.o exstorob.o\ exconvrt.o exfldio.o exoparg1.o exprep.o exresop.o exsystem.o\ diff --git a/drivers/acpi/acpica/acconfig.h b/drivers/acpi/acpica/acconfig.h index ab87396..bc533dd 100644 --- a/drivers/acpi/acpica/acconfig.h +++ b/drivers/acpi/acpica/acconfig.h @@ -187,7 +187,6 @@ /* Operation regions */ -#define ACPI_NUM_PREDEFINED_REGIONS 9 #define ACPI_USER_REGION_BEGIN 0x80 /* Maximum space_ids for Operation Regions */ diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h index 41d247d..bea3b48 100644 --- a/drivers/acpi/acpica/acevents.h +++ b/drivers/acpi/acpica/acevents.h @@ -58,12 +58,6 @@ u32 acpi_ev_fixed_event_detect(void); */ u8 acpi_ev_is_notify_object(struct acpi_namespace_node *node); -acpi_status acpi_ev_acquire_global_lock(u16 timeout); - -acpi_status acpi_ev_release_global_lock(void); - -acpi_status acpi_ev_init_global_lock_handler(void); - u32 acpi_ev_get_gpe_number_index(u32 gpe_number); acpi_status @@ -71,6 +65,17 @@ acpi_ev_queue_notify_request(struct acpi_namespace_node *node, u32 notify_value); /* + * evglock - Global Lock support + */ +acpi_status acpi_ev_init_global_lock_handler(void); + +acpi_status acpi_ev_acquire_global_lock(u16 timeout); + +acpi_status acpi_ev_release_global_lock(void); + +acpi_status acpi_ev_remove_global_lock_handler(void); + +/* * evgpe - Low-level GPE support */ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info *gpe_xrupt_list); diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index d69750b..73863d86 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -214,24 +214,23 @@ ACPI_EXTERN struct acpi_mutex_info acpi_gbl_mutex_info[ACPI_NUM_MUTEX]; /* * Global lock mutex is an actual AML mutex object - * Global lock semaphore works in conjunction with the HW global lock + * Global lock semaphore works in conjunction with the actual global lock + * Global lock spinlock is used for "pending" handshake */ ACPI_EXTERN union acpi_operand_object *acpi_gbl_global_lock_mutex; ACPI_EXTERN acpi_semaphore acpi_gbl_global_lock_semaphore; +ACPI_EXTERN acpi_spinlock acpi_gbl_global_lock_pending_lock; ACPI_EXTERN u16 acpi_gbl_global_lock_handle; ACPI_EXTERN u8 acpi_gbl_global_lock_acquired; ACPI_EXTERN u8 acpi_gbl_global_lock_present; +ACPI_EXTERN u8 acpi_gbl_global_lock_pending; /* * Spinlocks are used for interfaces that can be possibly called at * interrupt level */ -ACPI_EXTERN spinlock_t _acpi_gbl_gpe_lock; /* For GPE data structs and registers */ -ACPI_EXTERN spinlock_t _acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ -ACPI_EXTERN spinlock_t _acpi_ev_global_lock_pending_lock; /* For global lock */ -#define acpi_gbl_gpe_lock &_acpi_gbl_gpe_lock -#define acpi_gbl_hardware_lock &_acpi_gbl_hardware_lock -#define acpi_ev_global_lock_pending_lock &_acpi_ev_global_lock_pending_lock +ACPI_EXTERN acpi_spinlock acpi_gbl_gpe_lock; /* For GPE data structs and registers */ +ACPI_EXTERN acpi_spinlock acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ /***************************************************************************** * diff --git a/drivers/acpi/acpica/amlcode.h b/drivers/acpi/acpica/amlcode.h index f4f0998..1077f17 100644 --- a/drivers/acpi/acpica/amlcode.h +++ b/drivers/acpi/acpica/amlcode.h @@ -394,21 +394,6 @@ #define AML_CLASS_METHOD_CALL 0x09 #define AML_CLASS_UNKNOWN 0x0A -/* Predefined Operation Region space_iDs */ - -typedef enum { - REGION_MEMORY = 0, - REGION_IO, - REGION_PCI_CONFIG, - REGION_EC, - REGION_SMBUS, - REGION_CMOS, - REGION_PCI_BAR, - REGION_IPMI, - REGION_DATA_TABLE, /* Internal use only */ - REGION_FIXED_HW = 0x7F -} AML_REGION_TYPES; - /* Comparison operation codes for match_op operator */ typedef enum { diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c index 23a3b1a..324acec 100644 --- a/drivers/acpi/acpica/dswload.c +++ b/drivers/acpi/acpica/dswload.c @@ -450,7 +450,7 @@ acpi_status acpi_ds_load1_end_op(struct acpi_walk_state *walk_state) status = acpi_ex_create_region(op->named.data, op->named.length, - REGION_DATA_TABLE, + ACPI_ADR_SPACE_DATA_TABLE, walk_state); if (ACPI_FAILURE(status)) { return_ACPI_STATUS(status); diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c index 4be4e92..9763181 100644 --- a/drivers/acpi/acpica/dswload2.c +++ b/drivers/acpi/acpica/dswload2.c @@ -562,7 +562,7 @@ acpi_status acpi_ds_load2_end_op(struct acpi_walk_state *walk_state) ((op->common.value.arg)->common.value. integer); } else { - region_space = REGION_DATA_TABLE; + region_space = ACPI_ADR_SPACE_DATA_TABLE; } /* diff --git a/drivers/acpi/acpica/evglock.c b/drivers/acpi/acpica/evglock.c new file mode 100644 index 0000000..56a562a --- /dev/null +++ b/drivers/acpi/acpica/evglock.c @@ -0,0 +1,335 @@ +/****************************************************************************** + * + * Module Name: evglock - Global Lock support + * + *****************************************************************************/ + +/* + * Copyright (C) 2000 - 2011, Intel Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + */ + +#include <acpi/acpi.h> +#include "accommon.h" +#include "acevents.h" +#include "acinterp.h" + +#define _COMPONENT ACPI_EVENTS +ACPI_MODULE_NAME("evglock") + +/* Local prototypes */ +static u32 acpi_ev_global_lock_handler(void *context); + +/******************************************************************************* + * + * FUNCTION: acpi_ev_init_global_lock_handler + * + * PARAMETERS: None + * + * RETURN: Status + * + * DESCRIPTION: Install a handler for the global lock release event + * + ******************************************************************************/ + +acpi_status acpi_ev_init_global_lock_handler(void) +{ + acpi_status status; + + ACPI_FUNCTION_TRACE(ev_init_global_lock_handler); + + /* Attempt installation of the global lock handler */ + + status = acpi_install_fixed_event_handler(ACPI_EVENT_GLOBAL, + acpi_ev_global_lock_handler, + NULL); + + /* + * If the global lock does not exist on this platform, the attempt to + * enable GBL_STATUS will fail (the GBL_ENABLE bit will not stick). + * Map to AE_OK, but mark global lock as not present. Any attempt to + * actually use the global lock will be flagged with an error. + */ + acpi_gbl_global_lock_present = FALSE; + if (status == AE_NO_HARDWARE_RESPONSE) { + ACPI_ERROR((AE_INFO, + "No response from Global Lock hardware, disabling lock")); + + return_ACPI_STATUS(AE_OK); + } + + status = acpi_os_create_lock(&acpi_gbl_global_lock_pending_lock); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + acpi_gbl_global_lock_pending = FALSE; + acpi_gbl_global_lock_present = TRUE; + return_ACPI_STATUS(status); +} + +/******************************************************************************* + * + * FUNCTION: acpi_ev_remove_global_lock_handler + * + * PARAMETERS: None + * + * RETURN: Status + * + * DESCRIPTION: Remove the handler for the Global Lock + * + ******************************************************************************/ + +acpi_status acpi_ev_remove_global_lock_handler(void) +{ + acpi_status status; + + ACPI_FUNCTION_TRACE(ev_remove_global_lock_handler); + + acpi_gbl_global_lock_present = FALSE; + status = acpi_remove_fixed_event_handler(ACPI_EVENT_GLOBAL, + acpi_ev_global_lock_handler); + + return_ACPI_STATUS(status); +} + +/******************************************************************************* + * + * FUNCTION: acpi_ev_global_lock_handler + * + * PARAMETERS: Context - From thread interface, not used + * + * RETURN: ACPI_INTERRUPT_HANDLED + * + * DESCRIPTION: Invoked directly from the SCI handler when a global lock + * release interrupt occurs. If there is actually a pending + * request for the lock, signal the waiting thread. + * + ******************************************************************************/ + +static u32 acpi_ev_global_lock_handler(void *context) +{ + acpi_status status; + acpi_cpu_flags flags; + + flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); + + /* + * If a request for the global lock is not actually pending, + * we are done. This handles "spurious" global lock interrupts + * which are possible (and have been seen) with bad BIOSs. + */ + if (!acpi_gbl_global_lock_pending) { + goto cleanup_and_exit; + } + + /* + * Send a unit to the global lock semaphore. The actual acquisition + * of the global lock will be performed by the waiting thread. + */ + status = acpi_os_signal_semaphore(acpi_gbl_global_lock_semaphore, 1); + if (ACPI_FAILURE(status)) { + ACPI_ERROR((AE_INFO, "Could not signal Global Lock semaphore")); + } + + acpi_gbl_global_lock_pending = FALSE; + + cleanup_and_exit: + + acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); + return (ACPI_INTERRUPT_HANDLED); +} + +/****************************************************************************** + * + * FUNCTION: acpi_ev_acquire_global_lock + * + * PARAMETERS: Timeout - Max time to wait for the lock, in millisec. + * + * RETURN: Status + * + * DESCRIPTION: Attempt to gain ownership of the Global Lock. + * + * MUTEX: Interpreter must be locked + * + * Note: The original implementation allowed multiple threads to "acquire" the + * Global Lock, and the OS would hold the lock until the last thread had + * released it. However, this could potentially starve the BIOS out of the + * lock, especially in the case where there is a tight handshake between the + * Embedded Controller driver and the BIOS. Therefore, this implementation + * allows only one thread to acquire the HW Global Lock at a time, and makes + * the global lock appear as a standard mutex on the OS side. + * + *****************************************************************************/ + +acpi_status acpi_ev_acquire_global_lock(u16 timeout) +{ + acpi_cpu_flags flags; + acpi_status status; + u8 acquired = FALSE; + + ACPI_FUNCTION_TRACE(ev_acquire_global_lock); + + /* + * Only one thread can acquire the GL at a time, the global_lock_mutex + * enforces this. This interface releases the interpreter if we must wait. + */ + status = + acpi_ex_system_wait_mutex(acpi_gbl_global_lock_mutex->mutex. + os_mutex, timeout); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + /* + * Update the global lock handle and check for wraparound. The handle is + * only used for the external global lock interfaces, but it is updated + * here to properly handle the case where a single thread may acquire the + * lock via both the AML and the acpi_acquire_global_lock interfaces. The + * handle is therefore updated on the first acquire from a given thread + * regardless of where the acquisition request originated. + */ + acpi_gbl_global_lock_handle++; + if (acpi_gbl_global_lock_handle == 0) { + acpi_gbl_global_lock_handle = 1; + } + + /* + * Make sure that a global lock actually exists. If not, just + * treat the lock as a standard mutex. + */ + if (!acpi_gbl_global_lock_present) { + acpi_gbl_global_lock_acquired = TRUE; + return_ACPI_STATUS(AE_OK); + } + + flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); + + do { + + /* Attempt to acquire the actual hardware lock */ + + ACPI_ACQUIRE_GLOBAL_LOCK(acpi_gbl_FACS, acquired); + if (acquired) { + acpi_gbl_global_lock_acquired = TRUE; + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, + "Acquired hardware Global Lock\n")); + break; + } + + /* + * Did not get the lock. The pending bit was set above, and + * we must now wait until we receive the global lock + * released interrupt. + */ + acpi_gbl_global_lock_pending = TRUE; + acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); + + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, + "Waiting for hardware Global Lock\n")); + + /* + * Wait for handshake with the global lock interrupt handler. + * This interface releases the interpreter if we must wait. + */ + status = + acpi_ex_system_wait_semaphore + (acpi_gbl_global_lock_semaphore, ACPI_WAIT_FOREVER); + + flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); + + } while (ACPI_SUCCESS(status)); + + acpi_gbl_global_lock_pending = FALSE; + acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); + + return_ACPI_STATUS(status); +} + +/******************************************************************************* + * + * FUNCTION: acpi_ev_release_global_lock + * + * PARAMETERS: None + * + * RETURN: Status + * + * DESCRIPTION: Releases ownership of the Global Lock. + * + ******************************************************************************/ + +acpi_status acpi_ev_release_global_lock(void) +{ + u8 pending = FALSE; + acpi_status status = AE_OK; + + ACPI_FUNCTION_TRACE(ev_release_global_lock); + + /* Lock must be already acquired */ + + if (!acpi_gbl_global_lock_acquired) { + ACPI_WARNING((AE_INFO, + "Cannot release the ACPI Global Lock, it has not been acquired")); + return_ACPI_STATUS(AE_NOT_ACQUIRED); + } + + if (acpi_gbl_global_lock_present) { + + /* Allow any thread to release the lock */ + + ACPI_RELEASE_GLOBAL_LOCK(acpi_gbl_FACS, pending); + + /* + * If the pending bit was set, we must write GBL_RLS to the control + * register + */ + if (pending) { + status = + acpi_write_bit_register + (ACPI_BITREG_GLOBAL_LOCK_RELEASE, + ACPI_ENABLE_EVENT); + } + + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, + "Released hardware Global Lock\n")); + } + + acpi_gbl_global_lock_acquired = FALSE; + + /* Release the local GL mutex */ + + acpi_os_release_mutex(acpi_gbl_global_lock_mutex->mutex.os_mutex); + return_ACPI_STATUS(status); +} diff --git a/drivers/acpi/acpica/evmisc.c b/drivers/acpi/acpica/evmisc.c index 7dc8094..d0b3318 100644 --- a/drivers/acpi/acpica/evmisc.c +++ b/drivers/acpi/acpica/evmisc.c @@ -45,7 +45,6 @@ #include "accommon.h" #include "acevents.h" #include "acnamesp.h" -#include "acinterp.h" #define _COMPONENT ACPI_EVENTS ACPI_MODULE_NAME("evmisc") @@ -53,10 +52,6 @@ ACPI_MODULE_NAME("evmisc") /* Local prototypes */ static void ACPI_SYSTEM_XFACE acpi_ev_notify_dispatch(void *context); -static u32 acpi_ev_global_lock_handler(void *context); - -static acpi_status acpi_ev_remove_global_lock_handler(void); - /******************************************************************************* * * FUNCTION: acpi_ev_is_notify_object @@ -275,304 +270,6 @@ static void ACPI_SYSTEM_XFACE acpi_ev_notify_dispatch(void *context) acpi_ut_delete_generic_state(notify_info); } -/******************************************************************************* - * - * FUNCTION: acpi_ev_global_lock_handler - * - * PARAMETERS: Context - From thread interface, not used - * - * RETURN: ACPI_INTERRUPT_HANDLED - * - * DESCRIPTION: Invoked directly from the SCI handler when a global lock - * release interrupt occurs. If there's a thread waiting for - * the global lock, signal it. - * - * NOTE: Assumes that the semaphore can be signaled from interrupt level. If - * this is not possible for some reason, a separate thread will have to be - * scheduled to do this. - * - ******************************************************************************/ -static u8 acpi_ev_global_lock_pending; - -static u32 acpi_ev_global_lock_handler(void *context) -{ - acpi_status status; - acpi_cpu_flags flags; - - flags = acpi_os_acquire_lock(acpi_ev_global_lock_pending_lock); - - if (!acpi_ev_global_lock_pending) { - goto out; - } - - /* Send a unit to the semaphore */ - - status = acpi_os_signal_semaphore(acpi_gbl_global_lock_semaphore, 1); - if (ACPI_FAILURE(status)) { - ACPI_ERROR((AE_INFO, "Could not signal Global Lock semaphore")); - } - - acpi_ev_global_lock_pending = FALSE; - - out: - acpi_os_release_lock(acpi_ev_global_lock_pending_lock, flags); - - return (ACPI_INTERRUPT_HANDLED); -} - -/******************************************************************************* - * - * FUNCTION: acpi_ev_init_global_lock_handler - * - * PARAMETERS: None - * - * RETURN: Status - * - * DESCRIPTION: Install a handler for the global lock release event - * - ******************************************************************************/ - -acpi_status acpi_ev_init_global_lock_handler(void) -{ - acpi_status status; - - ACPI_FUNCTION_TRACE(ev_init_global_lock_handler); - - /* Attempt installation of the global lock handler */ - - status = acpi_install_fixed_event_handler(ACPI_EVENT_GLOBAL, - acpi_ev_global_lock_handler, - NULL); - - /* - * If the global lock does not exist on this platform, the attempt to - * enable GBL_STATUS will fail (the GBL_ENABLE bit will not stick). - * Map to AE_OK, but mark global lock as not present. Any attempt to - * actually use the global lock will be flagged with an error. - */ - if (status == AE_NO_HARDWARE_RESPONSE) { - ACPI_ERROR((AE_INFO, - "No response from Global Lock hardware, disabling lock")); - - acpi_gbl_global_lock_present = FALSE; - return_ACPI_STATUS(AE_OK); - } - - acpi_gbl_global_lock_present = TRUE; - return_ACPI_STATUS(status); -} - -/******************************************************************************* - * - * FUNCTION: acpi_ev_remove_global_lock_handler - * - * PARAMETERS: None - * - * RETURN: Status - * - * DESCRIPTION: Remove the handler for the Global Lock - * - ******************************************************************************/ - -static acpi_status acpi_ev_remove_global_lock_handler(void) -{ - acpi_status status; - - ACPI_FUNCTION_TRACE(ev_remove_global_lock_handler); - - acpi_gbl_global_lock_present = FALSE; - status = acpi_remove_fixed_event_handler(ACPI_EVENT_GLOBAL, - acpi_ev_global_lock_handler); - - return_ACPI_STATUS(status); -} - -/****************************************************************************** - * - * FUNCTION: acpi_ev_acquire_global_lock - * - * PARAMETERS: Timeout - Max time to wait for the lock, in millisec. - * - * RETURN: Status - * - * DESCRIPTION: Attempt to gain ownership of the Global Lock. - * - * MUTEX: Interpreter must be locked - * - * Note: The original implementation allowed multiple threads to "acquire" the - * Global Lock, and the OS would hold the lock until the last thread had - * released it. However, this could potentially starve the BIOS out of the - * lock, especially in the case where there is a tight handshake between the - * Embedded Controller driver and the BIOS. Therefore, this implementation - * allows only one thread to acquire the HW Global Lock at a time, and makes - * the global lock appear as a standard mutex on the OS side. - * - *****************************************************************************/ -static acpi_thread_id acpi_ev_global_lock_thread_id; -static int acpi_ev_global_lock_acquired; - -acpi_status acpi_ev_acquire_global_lock(u16 timeout) -{ - acpi_cpu_flags flags; - acpi_status status = AE_OK; - u8 acquired = FALSE; - - ACPI_FUNCTION_TRACE(ev_acquire_global_lock); - - /* - * Only one thread can acquire the GL at a time, the global_lock_mutex - * enforces this. This interface releases the interpreter if we must wait. - */ - status = acpi_ex_system_wait_mutex( - acpi_gbl_global_lock_mutex->mutex.os_mutex, 0); - if (status == AE_TIME) { - if (acpi_ev_global_lock_thread_id == acpi_os_get_thread_id()) { - acpi_ev_global_lock_acquired++; - return AE_OK; - } - } - - if (ACPI_FAILURE(status)) { - status = acpi_ex_system_wait_mutex( - acpi_gbl_global_lock_mutex->mutex.os_mutex, - timeout); - } - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - - acpi_ev_global_lock_thread_id = acpi_os_get_thread_id(); - acpi_ev_global_lock_acquired++; - - /* - * Update the global lock handle and check for wraparound. The handle is - * only used for the external global lock interfaces, but it is updated - * here to properly handle the case where a single thread may acquire the - * lock via both the AML and the acpi_acquire_global_lock interfaces. The - * handle is therefore updated on the first acquire from a given thread - * regardless of where the acquisition request originated. - */ - acpi_gbl_global_lock_handle++; - if (acpi_gbl_global_lock_handle == 0) { - acpi_gbl_global_lock_handle = 1; - } - - /* - * Make sure that a global lock actually exists. If not, just treat the - * lock as a standard mutex. - */ - if (!acpi_gbl_global_lock_present) { - acpi_gbl_global_lock_acquired = TRUE; - return_ACPI_STATUS(AE_OK); - } - - flags = acpi_os_acquire_lock(acpi_ev_global_lock_pending_lock); - - do { - - /* Attempt to acquire the actual hardware lock */ - - ACPI_ACQUIRE_GLOBAL_LOCK(acpi_gbl_FACS, acquired); - if (acquired) { - acpi_gbl_global_lock_acquired = TRUE; - - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "Acquired hardware Global Lock\n")); - break; - } - - acpi_ev_global_lock_pending = TRUE; - - acpi_os_release_lock(acpi_ev_global_lock_pending_lock, flags); - - /* - * Did not get the lock. The pending bit was set above, and we - * must wait until we get the global lock released interrupt. - */ - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "Waiting for hardware Global Lock\n")); - - /* - * Wait for handshake with the global lock interrupt handler. - * This interface releases the interpreter if we must wait. - */ - status = acpi_ex_system_wait_semaphore( - acpi_gbl_global_lock_semaphore, - ACPI_WAIT_FOREVER); - - flags = acpi_os_acquire_lock(acpi_ev_global_lock_pending_lock); - - } while (ACPI_SUCCESS(status)); - - acpi_ev_global_lock_pending = FALSE; - - acpi_os_release_lock(acpi_ev_global_lock_pending_lock, flags); - - return_ACPI_STATUS(status); -} - -/******************************************************************************* - * - * FUNCTION: acpi_ev_release_global_lock - * - * PARAMETERS: None - * - * RETURN: Status - * - * DESCRIPTION: Releases ownership of the Global Lock. - * - ******************************************************************************/ - -acpi_status acpi_ev_release_global_lock(void) -{ - u8 pending = FALSE; - acpi_status status = AE_OK; - - ACPI_FUNCTION_TRACE(ev_release_global_lock); - - /* Lock must be already acquired */ - - if (!acpi_gbl_global_lock_acquired) { - ACPI_WARNING((AE_INFO, - "Cannot release the ACPI Global Lock, it has not been acquired")); - return_ACPI_STATUS(AE_NOT_ACQUIRED); - } - - acpi_ev_global_lock_acquired--; - if (acpi_ev_global_lock_acquired > 0) { - return AE_OK; - } - - if (acpi_gbl_global_lock_present) { - - /* Allow any thread to release the lock */ - - ACPI_RELEASE_GLOBAL_LOCK(acpi_gbl_FACS, pending); - - /* - * If the pending bit was set, we must write GBL_RLS to the control - * register - */ - if (pending) { - status = - acpi_write_bit_register - (ACPI_BITREG_GLOBAL_LOCK_RELEASE, - ACPI_ENABLE_EVENT); - } - - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "Released hardware Global Lock\n")); - } - - acpi_gbl_global_lock_acquired = FALSE; - - /* Release the local GL mutex */ - acpi_ev_global_lock_thread_id = 0; - acpi_ev_global_lock_acquired = 0; - acpi_os_release_mutex(acpi_gbl_global_lock_mutex->mutex.os_mutex); - return_ACPI_STATUS(status); -} - /****************************************************************************** * * FUNCTION: acpi_ev_terminate diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index bea7223..f0edf5c 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -55,6 +55,8 @@ static u8 acpi_ev_has_default_handler(struct acpi_namespace_node *node, acpi_adr_space_type space_id); +static void acpi_ev_orphan_ec_reg_method(void); + static acpi_status acpi_ev_reg_run(acpi_handle obj_handle, u32 level, void *context, void **return_value); @@ -561,7 +563,9 @@ acpi_ev_detach_region(union acpi_operand_object *region_obj, /* Now stop region accesses by executing the _REG method */ - status = acpi_ev_execute_reg_method(region_obj, 0); + status = + acpi_ev_execute_reg_method(region_obj, + ACPI_REG_DISCONNECT); if (ACPI_FAILURE(status)) { ACPI_EXCEPTION((AE_INFO, status, "from region _REG, [%s]", @@ -1062,6 +1066,12 @@ acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, ACPI_NS_WALK_UNLOCK, acpi_ev_reg_run, NULL, &space_id, NULL); + /* Special case for EC: handle "orphan" _REG methods with no region */ + + if (space_id == ACPI_ADR_SPACE_EC) { + acpi_ev_orphan_ec_reg_method(); + } + return_ACPI_STATUS(status); } @@ -1120,6 +1130,113 @@ acpi_ev_reg_run(acpi_handle obj_handle, return (AE_OK); } - status = acpi_ev_execute_reg_method(obj_desc, 1); + status = acpi_ev_execute_reg_method(obj_desc, ACPI_REG_CONNECT); return (status); } + +/******************************************************************************* + * + * FUNCTION: acpi_ev_orphan_ec_reg_method + * + * PARAMETERS: None + * + * RETURN: None + * + * DESCRIPTION: Execute an "orphan" _REG method that appears under the EC + * device. This is a _REG method that has no corresponding region + * within the EC device scope. The orphan _REG method appears to + * have been enabled by the description of the ECDT in the ACPI + * specification: "The availability of the region space can be + * detected by providing a _REG method object underneath the + * Embedded Controller device." + * + * To quickly access the EC device, we use the EC_ID that appears + * within the ECDT. Otherwise, we would need to perform a time- + * consuming namespace walk, executing _HID methods to find the + * EC device. + * + ******************************************************************************/ + +static void acpi_ev_orphan_ec_reg_method(void) +{ + struct acpi_table_ecdt *table; + acpi_status status; + struct acpi_object_list args; + union acpi_object objects[2]; + struct acpi_namespace_node *ec_device_node; + struct acpi_namespace_node *reg_method; + struct acpi_namespace_node *next_node; + + ACPI_FUNCTION_TRACE(ev_orphan_ec_reg_method); + + /* Get the ECDT (if present in system) */ + + status = acpi_get_table(ACPI_SIG_ECDT, 0, + ACPI_CAST_INDIRECT_PTR(struct acpi_table_header, + &table)); + if (ACPI_FAILURE(status)) { + return_VOID; + } + + /* We need a valid EC_ID string */ + + if (!(*table->id)) { + return_VOID; + } + + /* Namespace is currently locked, must release */ + + (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); + + /* Get a handle to the EC device referenced in the ECDT */ + + status = acpi_get_handle(NULL, + ACPI_CAST_PTR(char, table->id), + ACPI_CAST_PTR(acpi_handle, &ec_device_node)); + if (ACPI_FAILURE(status)) { + goto exit; + } + + /* Get a handle to a _REG method immediately under the EC device */ + + status = acpi_get_handle(ec_device_node, + METHOD_NAME__REG, ACPI_CAST_PTR(acpi_handle, + ®_method)); + if (ACPI_FAILURE(status)) { + goto exit; + } + + /* + * Execute the _REG method only if there is no Operation Region in + * this scope with the Embedded Controller space ID. Otherwise, it + * will already have been executed. Note, this allows for Regions + * with other space IDs to be present; but the code below will then + * execute the _REG method with the EC space ID argument. + */ + next_node = acpi_ns_get_next_node(ec_device_node, NULL); + while (next_node) { + if ((next_node->type == ACPI_TYPE_REGION) && + (next_node->object) && + (next_node->object->region.space_id == ACPI_ADR_SPACE_EC)) { + goto exit; /* Do not execute _REG */ + } + next_node = acpi_ns_get_next_node(ec_device_node, next_node); + } + + /* Evaluate the _REG(EC,Connect) method */ + + args.count = 2; + args.pointer = objects; + objects[0].type = ACPI_TYPE_INTEGER; + objects[0].integer.value = ACPI_ADR_SPACE_EC; + objects[1].type = ACPI_TYPE_INTEGER; + objects[1].integer.value = ACPI_REG_CONNECT; + + status = acpi_evaluate_object(reg_method, NULL, &args, NULL); + + exit: + /* We ignore all errors from above, don't care */ + + status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); + return_VOID; +} diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c index 9659cee..55a5d35 100644 --- a/drivers/acpi/acpica/evrgnini.c +++ b/drivers/acpi/acpica/evrgnini.c @@ -637,7 +637,7 @@ acpi_ev_initialize_region(union acpi_operand_object *region_obj, status = acpi_ev_execute_reg_method - (region_obj, 1); + (region_obj, ACPI_REG_CONNECT); if (acpi_ns_locked) { status = diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c index c85c8c4..00cd956 100644 --- a/drivers/acpi/acpica/evxfregn.c +++ b/drivers/acpi/acpica/evxfregn.c @@ -130,20 +130,21 @@ acpi_install_address_space_handler(acpi_handle device, case ACPI_ADR_SPACE_PCI_CONFIG: case ACPI_ADR_SPACE_DATA_TABLE: - if (acpi_gbl_reg_methods_executed) { + if (!acpi_gbl_reg_methods_executed) { - /* Run all _REG methods for this address space */ - - status = acpi_ev_execute_reg_methods(node, space_id); + /* We will defer execution of the _REG methods for this space */ + goto unlock_and_exit; } break; default: - - status = acpi_ev_execute_reg_methods(node, space_id); break; } + /* Run all _REG methods for this address space */ + + status = acpi_ev_execute_reg_methods(node, space_id); + unlock_and_exit: (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); return_ACPI_STATUS(status); diff --git a/drivers/acpi/acpica/excreate.c b/drivers/acpi/acpica/excreate.c index e7b372d..110711a 100644 --- a/drivers/acpi/acpica/excreate.c +++ b/drivers/acpi/acpica/excreate.c @@ -305,7 +305,8 @@ acpi_ex_create_region(u8 * aml_start, * range */ if ((region_space >= ACPI_NUM_PREDEFINED_REGIONS) && - (region_space < ACPI_USER_REGION_BEGIN)) { + (region_space < ACPI_USER_REGION_BEGIN) && + (region_space != ACPI_ADR_SPACE_DATA_TABLE)) { ACPI_ERROR((AE_INFO, "Invalid AddressSpace type 0x%X", region_space)); return_ACPI_STATUS(AE_AML_INVALID_SPACE_ID); diff --git a/drivers/acpi/acpica/nsrepair.c b/drivers/acpi/acpica/nsrepair.c index 1d76ac8..ac7b854 100644 --- a/drivers/acpi/acpica/nsrepair.c +++ b/drivers/acpi/acpica/nsrepair.c @@ -74,7 +74,6 @@ ACPI_MODULE_NAME("nsrepair") * * Additional possible repairs: * - * Optional/unnecessary NULL package elements removed * Required package elements that are NULL replaced by Integer/String/Buffer * Incorrect standalone package wrapped with required outer package * @@ -623,16 +622,12 @@ acpi_ns_remove_null_elements(struct acpi_predefined_data *data, ACPI_FUNCTION_NAME(ns_remove_null_elements); /* - * PTYPE1 packages contain no subpackages. - * PTYPE2 packages contain a variable number of sub-packages. We can - * safely remove all NULL elements from the PTYPE2 packages. + * We can safely remove all NULL elements from these package types: + * PTYPE1_VAR packages contain a variable number of simple data types. + * PTYPE2 packages contain a variable number of sub-packages. */ switch (package_type) { - case ACPI_PTYPE1_FIXED: case ACPI_PTYPE1_VAR: - case ACPI_PTYPE1_OPTION: - return; - case ACPI_PTYPE2: case ACPI_PTYPE2_COUNT: case ACPI_PTYPE2_PKG_COUNT: @@ -642,6 +637,8 @@ acpi_ns_remove_null_elements(struct acpi_predefined_data *data, break; default: + case ACPI_PTYPE1_FIXED: + case ACPI_PTYPE1_OPTION: return; } diff --git a/drivers/acpi/acpica/utdecode.c b/drivers/acpi/acpica/utdecode.c index 136a814..97cb36f 100644 --- a/drivers/acpi/acpica/utdecode.c +++ b/drivers/acpi/acpica/utdecode.c @@ -170,8 +170,7 @@ const char *acpi_gbl_region_types[ACPI_NUM_PREDEFINED_REGIONS] = { "SMBus", "SystemCMOS", "PCIBARTarget", - "IPMI", - "DataTable" + "IPMI" }; char *acpi_ut_get_region_name(u8 space_id) @@ -179,6 +178,8 @@ char *acpi_ut_get_region_name(u8 space_id) if (space_id >= ACPI_USER_REGION_BEGIN) { return ("UserDefinedRegion"); + } else if (space_id == ACPI_ADR_SPACE_DATA_TABLE) { + return ("DataTable"); } else if (space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { return ("FunctionalFixedHW"); } else if (space_id >= ACPI_NUM_PREDEFINED_REGIONS) { diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c index a946c68..7d797e2 100644 --- a/drivers/acpi/acpica/utmutex.c +++ b/drivers/acpi/acpica/utmutex.c @@ -83,9 +83,15 @@ acpi_status acpi_ut_mutex_initialize(void) /* Create the spinlocks for use at interrupt level */ - spin_lock_init(acpi_gbl_gpe_lock); - spin_lock_init(acpi_gbl_hardware_lock); - spin_lock_init(acpi_ev_global_lock_pending_lock); + status = acpi_os_create_lock (&acpi_gbl_gpe_lock); + if (ACPI_FAILURE (status)) { + return_ACPI_STATUS (status); + } + + status = acpi_os_create_lock (&acpi_gbl_hardware_lock); + if (ACPI_FAILURE (status)) { + return_ACPI_STATUS (status); + } /* Mutex for _OSI support */ status = acpi_os_create_mutex(&acpi_gbl_osi_mutex); diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 9749980..d1e06c1 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -227,7 +227,7 @@ static int __acpi_bus_set_power(struct acpi_device *device, int state) acpi_status status = AE_OK; char object_name[5] = { '_', 'P', 'S', '0' + state, '\0' }; - if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3)) + if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3_COLD)) return -EINVAL; /* Make sure this is a valid target state */ diff --git a/drivers/acpi/custom_method.c b/drivers/acpi/custom_method.c new file mode 100644 index 0000000..5d42c24 --- /dev/null +++ b/drivers/acpi/custom_method.c @@ -0,0 +1,100 @@ +/* + * debugfs.c - ACPI debugfs interface to userspace. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/uaccess.h> +#include <linux/debugfs.h> +#include <acpi/acpi_drivers.h> + +#include "internal.h" + +#define _COMPONENT ACPI_SYSTEM_COMPONENT +ACPI_MODULE_NAME("custom_method"); +MODULE_LICENSE("GPL"); + +static struct dentry *cm_dentry; + +/* /sys/kernel/debug/acpi/custom_method */ + +static ssize_t cm_write(struct file *file, const char __user * user_buf, + size_t count, loff_t *ppos) +{ + static char *buf; + static u32 max_size; + static u32 uncopied_bytes; + + struct acpi_table_header table; + acpi_status status; + + if (!(*ppos)) { + /* parse the table header to get the table length */ + if (count <= sizeof(struct acpi_table_header)) + return -EINVAL; + if (copy_from_user(&table, user_buf, + sizeof(struct acpi_table_header))) + return -EFAULT; + uncopied_bytes = max_size = table.length; + buf = kzalloc(max_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + } + + if (buf == NULL) + return -EINVAL; + + if ((*ppos > max_size) || + (*ppos + count > max_size) || + (*ppos + count < count) || + (count > uncopied_bytes)) + return -EINVAL; + + if (copy_from_user(buf + (*ppos), user_buf, count)) { + kfree(buf); + buf = NULL; + return -EFAULT; + } + + uncopied_bytes -= count; + *ppos += count; + + if (!uncopied_bytes) { + status = acpi_install_method(buf); + kfree(buf); + buf = NULL; + if (ACPI_FAILURE(status)) + return -EINVAL; + add_taint(TAINT_OVERRIDDEN_ACPI_TABLE); + } + + return count; +} + +static const struct file_operations cm_fops = { + .write = cm_write, + .llseek = default_llseek, +}; + +static int __init acpi_custom_method_init(void) +{ + if (acpi_debugfs_dir == NULL) + return -ENOENT; + + cm_dentry = debugfs_create_file("custom_method", S_IWUSR, + acpi_debugfs_dir, NULL, &cm_fops); + if (cm_dentry == NULL) + return -ENODEV; + + return 0; +} + +static void __exit acpi_custom_method_exit(void) +{ + if (cm_dentry) + debugfs_remove(cm_dentry); + } + +module_init(acpi_custom_method_init); +module_exit(acpi_custom_method_exit); diff --git a/drivers/acpi/debugfs.c b/drivers/acpi/debugfs.c index 384f7ab..182a9fc 100644 --- a/drivers/acpi/debugfs.c +++ b/drivers/acpi/debugfs.c @@ -3,100 +3,16 @@ */ #include <linux/init.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/uaccess.h> #include <linux/debugfs.h> #include <acpi/acpi_drivers.h> #define _COMPONENT ACPI_SYSTEM_COMPONENT ACPI_MODULE_NAME("debugfs"); +struct dentry *acpi_debugfs_dir; +EXPORT_SYMBOL_GPL(acpi_debugfs_dir); -/* /sys/modules/acpi/parameters/aml_debug_output */ - -module_param_named(aml_debug_output, acpi_gbl_enable_aml_debug_object, - bool, 0644); -MODULE_PARM_DESC(aml_debug_output, - "To enable/disable the ACPI Debug Object output."); - -/* /sys/kernel/debug/acpi/custom_method */ - -static ssize_t cm_write(struct file *file, const char __user * user_buf, - size_t count, loff_t *ppos) +void __init acpi_debugfs_init(void) { - static char *buf; - static u32 max_size; - static u32 uncopied_bytes; - - struct acpi_table_header table; - acpi_status status; - - if (!(*ppos)) { - /* parse the table header to get the table length */ - if (count <= sizeof(struct acpi_table_header)) - return -EINVAL; - if (copy_from_user(&table, user_buf, - sizeof(struct acpi_table_header))) - return -EFAULT; - uncopied_bytes = max_size = table.length; - buf = kzalloc(max_size, GFP_KERNEL); - if (!buf) - return -ENOMEM; - } - - if (buf == NULL) - return -EINVAL; - - if ((*ppos > max_size) || - (*ppos + count > max_size) || - (*ppos + count < count) || - (count > uncopied_bytes)) - return -EINVAL; - - if (copy_from_user(buf + (*ppos), user_buf, count)) { - kfree(buf); - buf = NULL; - return -EFAULT; - } - - uncopied_bytes -= count; - *ppos += count; - - if (!uncopied_bytes) { - status = acpi_install_method(buf); - kfree(buf); - buf = NULL; - if (ACPI_FAILURE(status)) - return -EINVAL; - add_taint(TAINT_OVERRIDDEN_ACPI_TABLE); - } - - return count; -} - -static const struct file_operations cm_fops = { - .write = cm_write, - .llseek = default_llseek, -}; - -int __init acpi_debugfs_init(void) -{ - struct dentry *acpi_dir, *cm_dentry; - - acpi_dir = debugfs_create_dir("acpi", NULL); - if (!acpi_dir) - goto err; - - cm_dentry = debugfs_create_file("custom_method", S_IWUSR, - acpi_dir, NULL, &cm_fops); - if (!cm_dentry) - goto err; - - return 0; - -err: - if (acpi_dir) - debugfs_remove(acpi_dir); - return -EINVAL; + acpi_debugfs_dir = debugfs_create_dir("acpi", NULL); } diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index fa848c4..b19a18d 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -69,7 +69,6 @@ enum ec_command { #define ACPI_EC_DELAY 500 /* Wait 500ms max. during EC ops */ #define ACPI_EC_UDELAY_GLK 1000 /* Wait 1ms max. to get global lock */ -#define ACPI_EC_CDELAY 10 /* Wait 10us before polling EC */ #define ACPI_EC_MSI_UDELAY 550 /* Wait 550us for MSI EC */ #define ACPI_EC_STORM_THRESHOLD 8 /* number of false interrupts @@ -433,8 +432,7 @@ EXPORT_SYMBOL(ec_write); int ec_transaction(u8 command, const u8 * wdata, unsigned wdata_len, - u8 * rdata, unsigned rdata_len, - int force_poll) + u8 * rdata, unsigned rdata_len) { struct transaction t = {.command = command, .wdata = wdata, .rdata = rdata, @@ -592,8 +590,6 @@ static void acpi_ec_gpe_query(void *ec_cxt) mutex_unlock(&ec->lock); } -static void acpi_ec_gpe_query(void *ec_cxt); - static int ec_check_sci(struct acpi_ec *ec, u8 state) { if (state & ACPI_EC_FLAG_SCI) { @@ -808,8 +804,6 @@ static int acpi_ec_add(struct acpi_device *device) return -EINVAL; } - ec->handle = device->handle; - /* Find and register all query methods */ acpi_walk_namespace(ACPI_TYPE_METHOD, ec->handle, 1, acpi_ec_register_query_methods, NULL, ec, NULL); @@ -938,8 +932,19 @@ static struct dmi_system_id __initdata ec_dmi_table[] = { ec_flag_msi, "MSI hardware", { DMI_MATCH(DMI_CHASSIS_VENDOR, "MICRO-STAR")}, NULL}, { + ec_flag_msi, "Quanta hardware", { + DMI_MATCH(DMI_SYS_VENDOR, "Quanta"), + DMI_MATCH(DMI_PRODUCT_NAME, "TW8/SW8/DW8"),}, NULL}, + { + ec_flag_msi, "Quanta hardware", { + DMI_MATCH(DMI_SYS_VENDOR, "Quanta"), + DMI_MATCH(DMI_PRODUCT_NAME, "TW9/SW9"),}, NULL}, + { ec_validate_ecdt, "ASUS hardware", { DMI_MATCH(DMI_BIOS_VENDOR, "ASUS") }, NULL}, + { + ec_validate_ecdt, "ASUS hardware", { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer Inc.") }, NULL}, {}, }; diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 4bfb759d..ca75b9c 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -28,9 +28,10 @@ int acpi_scan_init(void); int acpi_sysfs_init(void); #ifdef CONFIG_DEBUG_FS +extern struct dentry *acpi_debugfs_dir; int acpi_debugfs_init(void); #else -static inline int acpi_debugfs_init(void) { return 0; } +static inline void acpi_debugfs_init(void) { return; } #endif /* -------------------------------------------------------------------------- diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 45ad4ff..52ca964 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -902,14 +902,6 @@ void acpi_os_wait_events_complete(void *context) EXPORT_SYMBOL(acpi_os_wait_events_complete); -/* - * Deallocate the memory for a spinlock. - */ -void acpi_os_delete_lock(acpi_spinlock handle) -{ - return; -} - acpi_status acpi_os_create_semaphore(u32 max_units, u32 initial_units, acpi_handle * handle) { @@ -1341,6 +1333,31 @@ int acpi_resources_are_enforced(void) EXPORT_SYMBOL(acpi_resources_are_enforced); /* + * Create and initialize a spinlock. + */ +acpi_status +acpi_os_create_lock(acpi_spinlock *out_handle) +{ + spinlock_t *lock; + + lock = ACPI_ALLOCATE(sizeof(spinlock_t)); + if (!lock) + return AE_NO_MEMORY; + spin_lock_init(lock); + *out_handle = lock; + + return AE_OK; +} + +/* + * Deallocate the memory for a spinlock. + */ +void acpi_os_delete_lock(acpi_spinlock handle) +{ + ACPI_FREE(handle); +} + +/* * Acquire a spinlock. * * handle is a pointer to the spinlock_t. diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 25bf17d..02d2a4c 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -37,7 +37,6 @@ static struct dmi_system_id __initdata processor_idle_dmi_table[] = { {}, }; -#ifdef CONFIG_SMP static int map_lapic_id(struct acpi_subtable_header *entry, u32 acpi_id, int *apic_id) { @@ -165,7 +164,9 @@ exit: int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id) { +#ifdef CONFIG_SMP int i; +#endif int apic_id = -1; apic_id = map_mat_entry(handle, type, acpi_id); @@ -174,14 +175,19 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id) if (apic_id == -1) return apic_id; +#ifdef CONFIG_SMP for_each_possible_cpu(i) { if (cpu_physical_id(i) == apic_id) return i; } +#else + /* In UP kernel, only processor 0 is valid */ + if (apic_id == 0) + return apic_id; +#endif return -1; } EXPORT_SYMBOL_GPL(acpi_get_cpuid); -#endif static bool __init processor_physically_present(acpi_handle handle) { @@ -217,7 +223,7 @@ static bool __init processor_physically_present(acpi_handle handle) type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0; cpuid = acpi_get_cpuid(handle, type, acpi_id); - if ((cpuid == -1) && (num_possible_cpus() > 1)) + if (cpuid == -1) return false; return true; diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index d615b7d..431ab11 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -161,7 +161,7 @@ static void lapic_timer_check_state(int state, struct acpi_processor *pr, if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT)) return; - if (c1e_detected) + if (amd_e400_c1e_detected) type = ACPI_STATE_C1; /* diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index 61891e7..77255f2 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -220,6 +220,14 @@ module_param_call(trace_state, param_set_trace_state, param_get_trace_state, NULL, 0644); #endif /* CONFIG_ACPI_DEBUG */ + +/* /sys/modules/acpi/parameters/aml_debug_output */ + +module_param_named(aml_debug_output, acpi_gbl_enable_aml_debug_object, + bool, 0644); +MODULE_PARM_DESC(aml_debug_output, + "To enable/disable the ACPI Debug Object output."); + /* /sys/module/acpi/parameters/acpica_version */ static int param_get_acpica_version(char *buffer, struct kernel_param *kp) { diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index db8f885..98de8f4 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1038,6 +1038,7 @@ static void floppy_disable_hlt(void) { unsigned long flags; + WARN_ONCE(1, "floppy_disable_hlt() scheduled for removal in 2012"); spin_lock_irqsave(&floppy_hlt_lock, flags); if (!hlt_disabled) { hlt_disabled = 1; diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index f508690..c47f3d0 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -237,6 +237,7 @@ static int menu_select(struct cpuidle_device *dev) unsigned int power_usage = -1; int i; int multiplier; + struct timespec t; if (data->needs_update) { menu_update(dev); @@ -251,8 +252,9 @@ static int menu_select(struct cpuidle_device *dev) return 0; /* determine the expected residency time, round up */ + t = ktime_to_timespec(tick_nohz_get_sleep_length()); data->expected_us = - DIV_ROUND_UP((u32)ktime_to_ns(tick_nohz_get_sleep_length()), 1000); + t.tv_sec * USEC_PER_SEC + t.tv_nsec / NSEC_PER_USEC; data->bucket = which_bucket(data->expected_us); diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 76a5af0..2067288 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -19,6 +19,8 @@ #define DM_MSG_PREFIX "io" #define DM_IO_MAX_REGIONS BITS_PER_LONG +#define MIN_IOS 16 +#define MIN_BIOS 16 struct dm_io_client { mempool_t *pool; @@ -41,33 +43,21 @@ struct io { static struct kmem_cache *_dm_io_cache; /* - * io contexts are only dynamically allocated for asynchronous - * io. Since async io is likely to be the majority of io we'll - * have the same number of io contexts as bios! (FIXME: must reduce this). - */ - -static unsigned int pages_to_ios(unsigned int pages) -{ - return 4 * pages; /* too many ? */ -} - -/* * Create a client with mempool and bioset. */ -struct dm_io_client *dm_io_client_create(unsigned num_pages) +struct dm_io_client *dm_io_client_create(void) { - unsigned ios = pages_to_ios(num_pages); struct dm_io_client *client; client = kmalloc(sizeof(*client), GFP_KERNEL); if (!client) return ERR_PTR(-ENOMEM); - client->pool = mempool_create_slab_pool(ios, _dm_io_cache); + client->pool = mempool_create_slab_pool(MIN_IOS, _dm_io_cache); if (!client->pool) goto bad; - client->bios = bioset_create(16, 0); + client->bios = bioset_create(MIN_BIOS, 0); if (!client->bios) goto bad; @@ -81,13 +71,6 @@ struct dm_io_client *dm_io_client_create(unsigned num_pages) } EXPORT_SYMBOL(dm_io_client_create); -int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client) -{ - return mempool_resize(client->pool, pages_to_ios(num_pages), - GFP_KERNEL); -} -EXPORT_SYMBOL(dm_io_client_resize); - void dm_io_client_destroy(struct dm_io_client *client) { mempool_destroy(client->pool); diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 1bb73a1..819e37e 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -27,15 +27,19 @@ #include "dm.h" +#define SUB_JOB_SIZE 128 +#define SPLIT_COUNT 8 +#define MIN_JOBS 8 +#define RESERVE_PAGES (DIV_ROUND_UP(SUB_JOB_SIZE << SECTOR_SHIFT, PAGE_SIZE)) + /*----------------------------------------------------------------- * Each kcopyd client has its own little pool of preallocated * pages for kcopyd io. *---------------------------------------------------------------*/ struct dm_kcopyd_client { - spinlock_t lock; struct page_list *pages; - unsigned int nr_pages; - unsigned int nr_free_pages; + unsigned nr_reserved_pages; + unsigned nr_free_pages; struct dm_io_client *io_client; @@ -67,15 +71,18 @@ static void wake(struct dm_kcopyd_client *kc) queue_work(kc->kcopyd_wq, &kc->kcopyd_work); } -static struct page_list *alloc_pl(void) +/* + * Obtain one page for the use of kcopyd. + */ +static struct page_list *alloc_pl(gfp_t gfp) { struct page_list *pl; - pl = kmalloc(sizeof(*pl), GFP_KERNEL); + pl = kmalloc(sizeof(*pl), gfp); if (!pl) return NULL; - pl->page = alloc_page(GFP_KERNEL); + pl->page = alloc_page(gfp); if (!pl->page) { kfree(pl); return NULL; @@ -90,41 +97,56 @@ static void free_pl(struct page_list *pl) kfree(pl); } -static int kcopyd_get_pages(struct dm_kcopyd_client *kc, - unsigned int nr, struct page_list **pages) +/* + * Add the provided pages to a client's free page list, releasing + * back to the system any beyond the reserved_pages limit. + */ +static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl) { - struct page_list *pl; - - spin_lock(&kc->lock); - if (kc->nr_free_pages < nr) { - spin_unlock(&kc->lock); - return -ENOMEM; - } - - kc->nr_free_pages -= nr; - for (*pages = pl = kc->pages; --nr; pl = pl->next) - ; + struct page_list *next; - kc->pages = pl->next; - pl->next = NULL; + do { + next = pl->next; - spin_unlock(&kc->lock); + if (kc->nr_free_pages >= kc->nr_reserved_pages) + free_pl(pl); + else { + pl->next = kc->pages; + kc->pages = pl; + kc->nr_free_pages++; + } - return 0; + pl = next; + } while (pl); } -static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl) +static int kcopyd_get_pages(struct dm_kcopyd_client *kc, + unsigned int nr, struct page_list **pages) { - struct page_list *cursor; + struct page_list *pl; + + *pages = NULL; + + do { + pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY); + if (unlikely(!pl)) { + /* Use reserved pages */ + pl = kc->pages; + if (unlikely(!pl)) + goto out_of_memory; + kc->pages = pl->next; + kc->nr_free_pages--; + } + pl->next = *pages; + *pages = pl; + } while (--nr); - spin_lock(&kc->lock); - for (cursor = pl; cursor->next; cursor = cursor->next) - kc->nr_free_pages++; + return 0; - kc->nr_free_pages++; - cursor->next = kc->pages; - kc->pages = pl; - spin_unlock(&kc->lock); +out_of_memory: + if (*pages) + kcopyd_put_pages(kc, *pages); + return -ENOMEM; } /* @@ -141,13 +163,16 @@ static void drop_pages(struct page_list *pl) } } -static int client_alloc_pages(struct dm_kcopyd_client *kc, unsigned int nr) +/* + * Allocate and reserve nr_pages for the use of a specific client. + */ +static int client_reserve_pages(struct dm_kcopyd_client *kc, unsigned nr_pages) { - unsigned int i; + unsigned i; struct page_list *pl = NULL, *next; - for (i = 0; i < nr; i++) { - next = alloc_pl(); + for (i = 0; i < nr_pages; i++) { + next = alloc_pl(GFP_KERNEL); if (!next) { if (pl) drop_pages(pl); @@ -157,17 +182,18 @@ static int client_alloc_pages(struct dm_kcopyd_client *kc, unsigned int nr) pl = next; } + kc->nr_reserved_pages += nr_pages; kcopyd_put_pages(kc, pl); - kc->nr_pages += nr; + return 0; } static void client_free_pages(struct dm_kcopyd_client *kc) { - BUG_ON(kc->nr_free_pages != kc->nr_pages); + BUG_ON(kc->nr_free_pages != kc->nr_reserved_pages); drop_pages(kc->pages); kc->pages = NULL; - kc->nr_free_pages = kc->nr_pages = 0; + kc->nr_free_pages = kc->nr_reserved_pages = 0; } /*----------------------------------------------------------------- @@ -216,16 +242,17 @@ struct kcopyd_job { struct mutex lock; atomic_t sub_jobs; sector_t progress; -}; -/* FIXME: this should scale with the number of pages */ -#define MIN_JOBS 512 + struct kcopyd_job *master_job; +}; static struct kmem_cache *_job_cache; int __init dm_kcopyd_init(void) { - _job_cache = KMEM_CACHE(kcopyd_job, 0); + _job_cache = kmem_cache_create("kcopyd_job", + sizeof(struct kcopyd_job) * (SPLIT_COUNT + 1), + __alignof__(struct kcopyd_job), 0, NULL); if (!_job_cache) return -ENOMEM; @@ -299,7 +326,12 @@ static int run_complete_job(struct kcopyd_job *job) if (job->pages) kcopyd_put_pages(kc, job->pages); - mempool_free(job, kc->job_pool); + /* + * If this is the master job, the sub jobs have already + * completed so we can free everything. + */ + if (job->master_job == job) + mempool_free(job, kc->job_pool); fn(read_err, write_err, context); if (atomic_dec_and_test(&kc->nr_jobs)) @@ -460,14 +492,14 @@ static void dispatch_job(struct kcopyd_job *job) wake(kc); } -#define SUB_JOB_SIZE 128 static void segment_complete(int read_err, unsigned long write_err, void *context) { /* FIXME: tidy this function */ sector_t progress = 0; sector_t count = 0; - struct kcopyd_job *job = (struct kcopyd_job *) context; + struct kcopyd_job *sub_job = (struct kcopyd_job *) context; + struct kcopyd_job *job = sub_job->master_job; struct dm_kcopyd_client *kc = job->kc; mutex_lock(&job->lock); @@ -498,8 +530,6 @@ static void segment_complete(int read_err, unsigned long write_err, if (count) { int i; - struct kcopyd_job *sub_job = mempool_alloc(kc->job_pool, - GFP_NOIO); *sub_job = *job; sub_job->source.sector += progress; @@ -511,7 +541,7 @@ static void segment_complete(int read_err, unsigned long write_err, } sub_job->fn = segment_complete; - sub_job->context = job; + sub_job->context = sub_job; dispatch_job(sub_job); } else if (atomic_dec_and_test(&job->sub_jobs)) { @@ -531,19 +561,19 @@ static void segment_complete(int read_err, unsigned long write_err, } /* - * Create some little jobs that will do the move between - * them. + * Create some sub jobs to share the work between them. */ -#define SPLIT_COUNT 8 -static void split_job(struct kcopyd_job *job) +static void split_job(struct kcopyd_job *master_job) { int i; - atomic_inc(&job->kc->nr_jobs); + atomic_inc(&master_job->kc->nr_jobs); - atomic_set(&job->sub_jobs, SPLIT_COUNT); - for (i = 0; i < SPLIT_COUNT; i++) - segment_complete(0, 0u, job); + atomic_set(&master_job->sub_jobs, SPLIT_COUNT); + for (i = 0; i < SPLIT_COUNT; i++) { + master_job[i + 1].master_job = master_job; + segment_complete(0, 0u, &master_job[i + 1]); + } } int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, @@ -553,7 +583,8 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, struct kcopyd_job *job; /* - * Allocate a new job. + * Allocate an array of jobs consisting of one master job + * followed by SPLIT_COUNT sub jobs. */ job = mempool_alloc(kc->job_pool, GFP_NOIO); @@ -577,10 +608,10 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, job->fn = fn; job->context = context; + job->master_job = job; - if (job->source.count < SUB_JOB_SIZE) + if (job->source.count <= SUB_JOB_SIZE) dispatch_job(job); - else { mutex_init(&job->lock); job->progress = 0; @@ -606,17 +637,15 @@ int kcopyd_cancel(struct kcopyd_job *job, int block) /*----------------------------------------------------------------- * Client setup *---------------------------------------------------------------*/ -int dm_kcopyd_client_create(unsigned int nr_pages, - struct dm_kcopyd_client **result) +struct dm_kcopyd_client *dm_kcopyd_client_create(void) { int r = -ENOMEM; struct dm_kcopyd_client *kc; kc = kmalloc(sizeof(*kc), GFP_KERNEL); if (!kc) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - spin_lock_init(&kc->lock); spin_lock_init(&kc->job_lock); INIT_LIST_HEAD(&kc->complete_jobs); INIT_LIST_HEAD(&kc->io_jobs); @@ -633,12 +662,12 @@ int dm_kcopyd_client_create(unsigned int nr_pages, goto bad_workqueue; kc->pages = NULL; - kc->nr_pages = kc->nr_free_pages = 0; - r = client_alloc_pages(kc, nr_pages); + kc->nr_reserved_pages = kc->nr_free_pages = 0; + r = client_reserve_pages(kc, RESERVE_PAGES); if (r) goto bad_client_pages; - kc->io_client = dm_io_client_create(nr_pages); + kc->io_client = dm_io_client_create(); if (IS_ERR(kc->io_client)) { r = PTR_ERR(kc->io_client); goto bad_io_client; @@ -647,8 +676,7 @@ int dm_kcopyd_client_create(unsigned int nr_pages, init_waitqueue_head(&kc->destroyq); atomic_set(&kc->nr_jobs, 0); - *result = kc; - return 0; + return kc; bad_io_client: client_free_pages(kc); @@ -659,7 +687,7 @@ bad_workqueue: bad_slab: kfree(kc); - return r; + return ERR_PTR(r); } EXPORT_SYMBOL(dm_kcopyd_client_create); diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index a1f3218..948e3f4 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -449,8 +449,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, lc->io_req.mem.type = DM_IO_VMA; lc->io_req.notify.fn = NULL; - lc->io_req.client = dm_io_client_create(dm_div_up(buf_size, - PAGE_SIZE)); + lc->io_req.client = dm_io_client_create(); if (IS_ERR(lc->io_req.client)) { r = PTR_ERR(lc->io_req.client); DMWARN("couldn't allocate disk io client"); diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index a550a05..aa4e570 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1290,7 +1290,7 @@ static int do_end_io(struct multipath *m, struct request *clone, if (!error && !clone->errors) return 0; /* I/O complete */ - if (error == -EOPNOTSUPP || error == -EREMOTEIO) + if (error == -EOPNOTSUPP || error == -EREMOTEIO || error == -EILSEQ) return error; if (mpio->pgpath) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 976ad46..9bfd057 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -22,8 +22,6 @@ #define DM_MSG_PREFIX "raid1" #define MAX_RECOVERY 1 /* Maximum number of regions recovered in parallel. */ -#define DM_IO_PAGES 64 -#define DM_KCOPYD_PAGES 64 #define DM_RAID1_HANDLE_ERRORS 0x01 #define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS) @@ -887,7 +885,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, return NULL; } - ms->io_client = dm_io_client_create(DM_IO_PAGES); + ms->io_client = dm_io_client_create(); if (IS_ERR(ms->io_client)) { ti->error = "Error creating dm_io client"; mempool_destroy(ms->read_record_pool); @@ -1117,9 +1115,11 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto err_destroy_wq; } - r = dm_kcopyd_client_create(DM_KCOPYD_PAGES, &ms->kcopyd_client); - if (r) + ms->kcopyd_client = dm_kcopyd_client_create(); + if (IS_ERR(ms->kcopyd_client)) { + r = PTR_ERR(ms->kcopyd_client); goto err_destroy_wq; + } wakeup_mirrord(ms); return 0; diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 95891df..135c2f1 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -154,11 +154,6 @@ struct pstore { struct workqueue_struct *metadata_wq; }; -static unsigned sectors_to_pages(unsigned sectors) -{ - return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9); -} - static int alloc_area(struct pstore *ps) { int r = -ENOMEM; @@ -318,8 +313,7 @@ static int read_header(struct pstore *ps, int *new_snapshot) chunk_size_supplied = 0; } - ps->io_client = dm_io_client_create(sectors_to_pages(ps->store-> - chunk_size)); + ps->io_client = dm_io_client_create(); if (IS_ERR(ps->io_client)) return PTR_ERR(ps->io_client); @@ -368,11 +362,6 @@ static int read_header(struct pstore *ps, int *new_snapshot) return r; } - r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size), - ps->io_client); - if (r) - return r; - r = alloc_area(ps); return r; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index a2d3309..9ecff5f 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -40,11 +40,6 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge"; #define SNAPSHOT_COPY_PRIORITY 2 /* - * Reserve 1MB for each snapshot initially (with minimum of 1 page). - */ -#define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1) - -/* * The size of the mempool used to track chunks in use. */ #define MIN_IOS 256 @@ -1116,8 +1111,9 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } - r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); - if (r) { + s->kcopyd_client = dm_kcopyd_client_create(); + if (IS_ERR(s->kcopyd_client)) { + r = PTR_ERR(s->kcopyd_client); ti->error = "Could not create kcopyd client"; goto bad_kcopyd; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index cb8380c..451c3bb 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -362,6 +362,7 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { + struct request_queue *q; struct queue_limits *limits = data; struct block_device *bdev = dev->bdev; sector_t dev_size = @@ -370,6 +371,22 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, limits->logical_block_size >> SECTOR_SHIFT; char b[BDEVNAME_SIZE]; + /* + * Some devices exist without request functions, + * such as loop devices not yet bound to backing files. + * Forbid the use of such devices. + */ + q = bdev_get_queue(bdev); + if (!q || !q->make_request_fn) { + DMWARN("%s: %s is not yet initialised: " + "start=%llu, len=%llu, dev_size=%llu", + dm_device_name(ti->table->md), bdevname(bdev, b), + (unsigned long long)start, + (unsigned long long)len, + (unsigned long long)dev_size); + return 1; + } + if (!dev_size) return 0; @@ -1346,7 +1363,8 @@ bool dm_table_supports_discards(struct dm_table *t) return 0; /* - * Ensure that at least one underlying device supports discards. + * Unless any target used by the table set discards_supported, + * require at least one underlying device to support discards. * t->devices includes internal dm devices such as mirror logs * so we need to use iterate_devices here, which targets * supporting discard must provide. @@ -1354,6 +1372,9 @@ bool dm_table_supports_discards(struct dm_table *t) while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); + if (ti->discards_supported) + return 1; + if (ti->type->iterate_devices && ti->type->iterate_devices(ti, device_discard_capable, NULL)) return 1; diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 7c3b18e..d36f41e 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -195,6 +195,8 @@ static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev) return PCI_D2; case ACPI_STATE_D3: return PCI_D3hot; + case ACPI_STATE_D3_COLD: + return PCI_D3cold; } return PCI_POWER_ERROR; } diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c index c16a276..9b3afb6 100644 --- a/drivers/platform/x86/compal-laptop.c +++ b/drivers/platform/x86/compal-laptop.c @@ -200,8 +200,8 @@ static bool extra_features; * watching the output of address 0x4F (do an ec_transaction writing 0x33 * into 0x4F and read a few bytes from the output, like so: * u8 writeData = 0x33; - * ec_transaction(0x4F, &writeData, 1, buffer, 32, 0); - * That address is labelled "fan1 table information" in the service manual. + * ec_transaction(0x4F, &writeData, 1, buffer, 32); + * That address is labeled "fan1 table information" in the service manual. * It should be clear which value in 'buffer' changes). This seems to be * related to fan speed. It isn't a proper 'realtime' fan speed value * though, because physically stopping or speeding up the fan doesn't @@ -286,7 +286,7 @@ static int get_backlight_level(void) static void set_backlight_state(bool on) { u8 data = on ? BACKLIGHT_STATE_ON_DATA : BACKLIGHT_STATE_OFF_DATA; - ec_transaction(BACKLIGHT_STATE_ADDR, &data, 1, NULL, 0, 0); + ec_transaction(BACKLIGHT_STATE_ADDR, &data, 1, NULL, 0); } @@ -294,24 +294,24 @@ static void set_backlight_state(bool on) static void pwm_enable_control(void) { unsigned char writeData = PWM_ENABLE_DATA; - ec_transaction(PWM_ENABLE_ADDR, &writeData, 1, NULL, 0, 0); + ec_transaction(PWM_ENABLE_ADDR, &writeData, 1, NULL, 0); } static void pwm_disable_control(void) { unsigned char writeData = PWM_DISABLE_DATA; - ec_transaction(PWM_DISABLE_ADDR, &writeData, 1, NULL, 0, 0); + ec_transaction(PWM_DISABLE_ADDR, &writeData, 1, NULL, 0); } static void set_pwm(int pwm) { - ec_transaction(PWM_ADDRESS, &pwm_lookup_table[pwm], 1, NULL, 0, 0); + ec_transaction(PWM_ADDRESS, &pwm_lookup_table[pwm], 1, NULL, 0); } static int get_fan_rpm(void) { u8 value, data = FAN_DATA; - ec_transaction(FAN_ADDRESS, &data, 1, &value, 1, 0); + ec_transaction(FAN_ADDRESS, &data, 1, &value, 1); return 100 * (int)value; } diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c index 23fb2af..821ad7b 100644 --- a/drivers/platform/x86/msi-laptop.c +++ b/drivers/platform/x86/msi-laptop.c @@ -135,7 +135,7 @@ static int set_lcd_level(int level) buf[1] = (u8) (level*31); return ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, buf, sizeof(buf), - NULL, 0, 1); + NULL, 0); } static int get_lcd_level(void) @@ -144,7 +144,7 @@ static int get_lcd_level(void) int result; result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, &wdata, 1, - &rdata, 1, 1); + &rdata, 1); if (result < 0) return result; @@ -157,7 +157,7 @@ static int get_auto_brightness(void) int result; result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, &wdata, 1, - &rdata, 1, 1); + &rdata, 1); if (result < 0) return result; @@ -172,7 +172,7 @@ static int set_auto_brightness(int enable) wdata[0] = 4; result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, wdata, 1, - &rdata, 1, 1); + &rdata, 1); if (result < 0) return result; @@ -180,7 +180,7 @@ static int set_auto_brightness(int enable) wdata[1] = (rdata & 0xF7) | (enable ? 8 : 0); return ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, wdata, 2, - NULL, 0, 1); + NULL, 0); } static ssize_t set_device_state(const char *buf, size_t count, u8 mask) @@ -217,7 +217,7 @@ static int get_wireless_state(int *wlan, int *bluetooth) u8 wdata = 0, rdata; int result; - result = ec_transaction(MSI_EC_COMMAND_WIRELESS, &wdata, 1, &rdata, 1, 1); + result = ec_transaction(MSI_EC_COMMAND_WIRELESS, &wdata, 1, &rdata, 1); if (result < 0) return -1; diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index fc6f2a5..0b1c82a 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -499,7 +499,7 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) dev_set_drvdata(hwmon->device, hwmon); result = device_create_file(hwmon->device, &dev_attr_name); if (result) - goto unregister_hwmon_device; + goto free_mem; register_sys_interface: tz->hwmon = hwmon; @@ -513,7 +513,7 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) sysfs_attr_init(&tz->temp_input.attr.attr); result = device_create_file(hwmon->device, &tz->temp_input.attr); if (result) - goto unregister_hwmon_device; + goto unregister_name; if (tz->ops->get_crit_temp) { unsigned long temperature; @@ -527,7 +527,7 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) result = device_create_file(hwmon->device, &tz->temp_crit.attr); if (result) - goto unregister_hwmon_device; + goto unregister_input; } } @@ -539,9 +539,9 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) return 0; - unregister_hwmon_device: - device_remove_file(hwmon->device, &tz->temp_crit.attr); + unregister_input: device_remove_file(hwmon->device, &tz->temp_input.attr); + unregister_name: if (new_hwmon_device) { device_remove_file(hwmon->device, &dev_attr_name); hwmon_device_unregister(hwmon->device); diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 8f17006..21de1d6 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -74,8 +74,9 @@ shrink_idmap_tree(struct rb_root *root, int nr_to_scan, int *nr_rem, * Run idmap cache shrinker. */ static int -cifs_idmap_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) +cifs_idmap_shrinker(struct shrinker *shrink, struct shrink_control *sc) { + int nr_to_scan = sc->nr_to_scan; int nr_del = 0; int nr_rem = 0; struct rb_root *root; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index bbbc6bf..dd25c2a 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -21,25 +21,13 @@ #include "delegation.h" #include "internal.h" -static void nfs_do_free_delegation(struct nfs_delegation *delegation) -{ - kfree(delegation); -} - -static void nfs_free_delegation_callback(struct rcu_head *head) -{ - struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu); - - nfs_do_free_delegation(delegation); -} - static void nfs_free_delegation(struct nfs_delegation *delegation) { if (delegation->cred) { put_rpccred(delegation->cred); delegation->cred = NULL; } - call_rcu(&delegation->rcu, nfs_free_delegation_callback); + kfree_rcu(delegation, rcu); } /** diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 57bb31a..873c6fa 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1298,8 +1298,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } - dprintk("NFS: isize change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); + dprintk("NFS: isize change on server for file %s/%ld " + "(%Ld to %Ld)\n", + inode->i_sb->s_id, + inode->i_ino, + (long long)cur_isize, + (long long)new_isize); } } else invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cf1b339..d0e15db 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -267,9 +267,11 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc break; nfs4_schedule_stateid_recovery(server, state); goto wait_on_recovery; + case -NFS4ERR_EXPIRED: + if (state != NULL) + nfs4_schedule_stateid_recovery(server, state); case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_EXPIRED: nfs4_schedule_lease_recovery(clp); goto wait_on_recovery; #if defined(CONFIG_NFS_V4_1) @@ -3670,9 +3672,11 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, break; nfs4_schedule_stateid_recovery(server, state); goto wait_on_recovery; + case -NFS4ERR_EXPIRED: + if (state != NULL) + nfs4_schedule_stateid_recovery(server, state); case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_EXPIRED: nfs4_schedule_lease_recovery(clp); goto wait_on_recovery; #if defined(CONFIG_NFS_V4_1) @@ -4543,6 +4547,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) case -ESTALE: goto out; case -NFS4ERR_EXPIRED: + nfs4_schedule_stateid_recovery(server, state); case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: nfs4_schedule_lease_recovery(server->nfs_client); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 036f5ad..e97dd21 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1466,7 +1466,10 @@ static int nfs4_reclaim_lease(struct nfs_client *clp) #ifdef CONFIG_NFS_V4_1 void nfs4_schedule_session_recovery(struct nfs4_session *session) { - nfs4_schedule_lease_recovery(session->clp); + struct nfs_client *clp = session->clp; + + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + nfs4_schedule_lease_recovery(clp); } EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); @@ -1549,6 +1552,7 @@ static int nfs4_reset_session(struct nfs_client *clp) status = nfs4_recovery_handle_error(clp, status); goto out; } + clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); /* create_session negotiated new slot table */ clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index c541093..c4744e1 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -87,7 +87,7 @@ #define NFS_ROOT "/tftpboot/%s" /* Default NFSROOT mount options. */ -#define NFS_DEF_OPTIONS "udp" +#define NFS_DEF_OPTIONS "vers=2,udp,rsize=4096,wsize=4096" /* Parameters passed from the kernel command line */ static char nfs_root_parms[256] __initdata = ""; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f57f528..101c85a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1009,7 +1009,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata) { struct nfs_inode *nfsi = NFS_I(wdata->inode); - loff_t end_pos = wdata->args.offset + wdata->res.count; + loff_t end_pos = wdata->mds_offset + wdata->res.count; bool mark_as_dirty = false; spin_lock(&nfsi->vfs_inode.i_lock); diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index ad000ae..b9566e4 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1354,12 +1354,6 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp) if (IS_ERR(exp)) return nfserrno(PTR_ERR(exp)); rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL); - if (rv) - goto out; - rv = check_nfsd_access(exp, rqstp); - if (rv) - fh_put(fhp); -out: exp_put(exp); return rv; } diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 2247fc9..9095f3c 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -245,7 +245,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, } /* Now create the file and set attributes */ - nfserr = nfsd_create_v3(rqstp, dirfhp, argp->name, argp->len, + nfserr = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len, attr, newfhp, argp->createmode, argp->verf, NULL, NULL); diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index ad48fac..08c6e36 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -842,7 +842,7 @@ out: return rv; } -__be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) +static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) { struct svc_fh fh; int err; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5fcb139..3a6dbd7 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -196,9 +196,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o /* * Note: create modes (UNCHECKED,GUARDED...) are the same - * in NFSv4 as in v3. + * in NFSv4 as in v3 except EXCLUSIVE4_1. */ - status = nfsd_create_v3(rqstp, current_fh, open->op_fname.data, + status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, open->op_fname.len, &open->op_iattr, &resfh, open->op_createmode, (u32 *)open->op_verf.data, @@ -403,7 +403,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, putfh->pf_fhlen); - return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); + return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS); } static __be32 @@ -762,6 +762,9 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 err; fh_init(&resfh, NFS4_FHSIZE); + err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC); + if (err) + return err; err = nfsd_lookup_dentry(rqstp, &cstate->current_fh, secinfo->si_name, secinfo->si_namelen, &exp, &dentry); @@ -986,6 +989,9 @@ enum nfsd4_op_flags { ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */ + /* For rfc 5661 section 2.6.3.1.1: */ + OP_HANDLES_WRONGSEC = 1 << 3, + OP_IS_PUTFH_LIKE = 1 << 4, }; struct nfsd4_operation { @@ -1031,6 +1037,44 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args) return nfs_ok; } +static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op) +{ + return &nfsd4_ops[op->opnum]; +} + +static bool need_wrongsec_check(struct svc_rqst *rqstp) +{ + struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct nfsd4_compoundargs *argp = rqstp->rq_argp; + struct nfsd4_op *this = &argp->ops[resp->opcnt - 1]; + struct nfsd4_op *next = &argp->ops[resp->opcnt]; + struct nfsd4_operation *thisd; + struct nfsd4_operation *nextd; + + thisd = OPDESC(this); + /* + * Most ops check wronsec on our own; only the putfh-like ops + * have special rules. + */ + if (!(thisd->op_flags & OP_IS_PUTFH_LIKE)) + return false; + /* + * rfc 5661 2.6.3.1.1.6: don't bother erroring out a + * put-filehandle operation if we're not going to use the + * result: + */ + if (argp->opcnt == resp->opcnt) + return false; + + nextd = OPDESC(next); + /* + * Rest of 2.6.3.1.1: certain operations will return WRONGSEC + * errors themselves as necessary; others should check for them + * now: + */ + return !(nextd->op_flags & OP_HANDLES_WRONGSEC); +} + /* * COMPOUND call. */ @@ -1108,7 +1152,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, goto encode_op; } - opdesc = &nfsd4_ops[op->opnum]; + opdesc = OPDESC(op); if (!cstate->current_fh.fh_dentry) { if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) { @@ -1126,6 +1170,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, else BUG_ON(op->status == nfs_ok); + if (!op->status && need_wrongsec_check(rqstp)) + op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp); + encode_op: /* Only from SEQUENCE */ if (resp->cstate.status == nfserr_replay_cache) { @@ -1217,10 +1264,12 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_LOOKUP] = { .op_func = (nfsd4op_func)nfsd4_lookup, + .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_LOOKUP", }, [OP_LOOKUPP] = { .op_func = (nfsd4op_func)nfsd4_lookupp, + .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_LOOKUPP", }, [OP_NVERIFY] = { @@ -1229,6 +1278,7 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_OPEN] = { .op_func = (nfsd4op_func)nfsd4_open, + .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_OPEN", }, [OP_OPEN_CONFIRM] = { @@ -1241,17 +1291,20 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_PUTFH] = { .op_func = (nfsd4op_func)nfsd4_putfh, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE, .op_name = "OP_PUTFH", }, [OP_PUTPUBFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE, .op_name = "OP_PUTPUBFH", }, [OP_PUTROOTFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE, .op_name = "OP_PUTROOTFH", }, [OP_READ] = { @@ -1281,15 +1334,18 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_RESTOREFH] = { .op_func = (nfsd4op_func)nfsd4_restorefh, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE, .op_name = "OP_RESTOREFH", }, [OP_SAVEFH] = { .op_func = (nfsd4op_func)nfsd4_savefh, + .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SAVEFH", }, [OP_SECINFO] = { .op_func = (nfsd4op_func)nfsd4_secinfo, + .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO", }, [OP_SETATTR] = { @@ -1353,6 +1409,7 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_SECINFO_NO_NAME] = { .op_func = (nfsd4op_func)nfsd4_secinfo_no_name, + .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO_NO_NAME", }, }; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4cf04e1..e98f3c2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1519,6 +1519,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, bool confirm_me = false; int status = 0; + if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) + return nfserr_inval; + nfs4_lock_state(); unconf = find_unconfirmed_client(&cr_ses->clientid); conf = find_confirmed_client(&cr_ses->clientid); @@ -1637,8 +1640,9 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, return nfserr_badsession; status = nfsd4_map_bcts_dir(&bcts->dir); - nfsd4_new_conn(rqstp, cstate->session, bcts->dir); - return nfs_ok; + if (!status) + nfsd4_new_conn(rqstp, cstate->session, bcts->dir); + return status; } static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) @@ -1725,6 +1729,13 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi return; } +static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session) +{ + struct nfsd4_compoundargs *args = rqstp->rq_argp; + + return args->opcnt > session->se_fchannel.maxops; +} + __be32 nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -1753,6 +1764,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (!session) goto out; + status = nfserr_too_many_ops; + if (nfsd4_session_too_many_ops(rqstp, session)) + goto out; + status = nfserr_badslot; if (seq->slotid >= session->se_fchannel.maxreqs) goto out; @@ -1808,6 +1823,8 @@ out: __be32 nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc) { + int status = 0; + if (rc->rca_one_fs) { if (!cstate->current_fh.fh_dentry) return nfserr_nofilehandle; @@ -1817,9 +1834,14 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta */ return nfs_ok; } + nfs4_lock_state(); - if (is_client_expired(cstate->session->se_client)) { - nfs4_unlock_state(); + status = nfserr_complete_already; + if (cstate->session->se_client->cl_firststate) + goto out; + + status = nfserr_stale_clientid; + if (is_client_expired(cstate->session->se_client)) /* * The following error isn't really legal. * But we only get here if the client just explicitly @@ -1827,11 +1849,13 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta * error it gets back on an operation for the dead * client. */ - return nfserr_stale_clientid; - } + goto out; + + status = nfs_ok; nfsd4_create_clid_dir(cstate->session->se_client); +out: nfs4_unlock_state(); - return nfs_ok; + return status; } __be32 @@ -2462,7 +2486,7 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid) return NULL; } -int share_access_to_flags(u32 share_access) +static int share_access_to_flags(u32 share_access) { share_access &= ~NFS4_SHARE_WANT_MASK; @@ -2882,7 +2906,7 @@ out: return status; } -struct lock_manager nfsd4_manager = { +static struct lock_manager nfsd4_manager = { }; static void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c6766af..9901811 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -424,15 +424,12 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) { DECODE_HEAD; - u32 dummy; READ_BUF(NFS4_MAX_SESSIONID_LEN + 8); COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); READ32(bcts->dir); - /* XXX: Perhaps Tom Tucker could help us figure out how we - * should be using ctsa_use_conn_in_rdma_mode: */ - READ32(dummy); - + /* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker + * could help us figure out we should be using it. */ DECODE_TAIL; } @@ -588,8 +585,6 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) READ_BUF(lockt->lt_owner.len); READMEM(lockt->lt_owner.data, lockt->lt_owner.len); - if (argp->minorversion && !zero_clientid(&lockt->lt_clientid)) - return nfserr_inval; DECODE_TAIL; } @@ -3120,7 +3115,7 @@ nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr, return nfserr; } -__be32 +static __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_sequence *seq) { diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 55c8e63..90c6aa6 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -344,7 +344,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) * which clients virtually always use auth_sys for, * even while using RPCSEC_GSS for NFS. */ - if (access & NFSD_MAY_LOCK) + if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS) goto skip_pseudoflavor_check; /* * Clients may expect to be able to use auth_sys during mount, diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 129f3c9..d571827 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -181,16 +181,10 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, struct svc_export *exp; struct dentry *dparent; struct dentry *dentry; - __be32 err; int host_err; dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); - /* Obtain dentry and export. */ - err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); - if (err) - return err; - dparent = fhp->fh_dentry; exp = fhp->fh_export; exp_get(exp); @@ -254,6 +248,9 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, struct dentry *dentry; __be32 err; + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); + if (err) + return err; err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry); if (err) return err; @@ -877,13 +874,11 @@ static __be32 nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, unsigned long *count) { - struct inode *inode; mm_segment_t oldfs; __be32 err; int host_err; err = nfserr_perm; - inode = file->f_path.dentry->d_inode; if (file->f_op->splice_read && rqstp->rq_splice_ok) { struct splice_desc sd = { @@ -1340,11 +1335,18 @@ out_nfserr: } #ifdef CONFIG_NFSD_V3 + +static inline int nfsd_create_is_exclusive(int createmode) +{ + return createmode == NFS3_CREATE_EXCLUSIVE + || createmode == NFS4_CREATE_EXCLUSIVE4_1; +} + /* - * NFSv3 version of nfsd_create + * NFSv3 and NFSv4 version of nfsd_create */ __be32 -nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, +do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, char *fname, int flen, struct iattr *iap, struct svc_fh *resfhp, int createmode, u32 *verifier, int *truncp, int *created) @@ -1396,7 +1398,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, if (err) goto out; - if (createmode == NFS3_CREATE_EXCLUSIVE) { + if (nfsd_create_is_exclusive(createmode)) { /* solaris7 gets confused (bugid 4218508) if these have * the high bit set, so just clear the high bits. If this is * ever changed to use different attrs for storing the @@ -1437,6 +1439,11 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, && dchild->d_inode->i_atime.tv_sec == v_atime && dchild->d_inode->i_size == 0 ) break; + case NFS4_CREATE_EXCLUSIVE4_1: + if ( dchild->d_inode->i_mtime.tv_sec == v_mtime + && dchild->d_inode->i_atime.tv_sec == v_atime + && dchild->d_inode->i_size == 0 ) + goto set_attr; /* fallthru */ case NFS3_CREATE_GUARDED: err = nfserr_exist; @@ -1455,7 +1462,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_check_ignore_resizing(iap); - if (createmode == NFS3_CREATE_EXCLUSIVE) { + if (nfsd_create_is_exclusive(createmode)) { /* Cram the verifier into atime/mtime */ iap->ia_valid = ATTR_MTIME|ATTR_ATIME | ATTR_MTIME_SET|ATTR_ATIME_SET; @@ -2034,7 +2041,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, struct inode *inode = dentry->d_inode; int err; - if (acc == NFSD_MAY_NOP) + if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP) return 0; #if 0 dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n", diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 9a370a5..e0bbac0 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -17,10 +17,14 @@ #define NFSD_MAY_SATTR 8 #define NFSD_MAY_TRUNC 16 #define NFSD_MAY_LOCK 32 +#define NFSD_MAY_MASK 63 + +/* extra hints to permission and open routines: */ #define NFSD_MAY_OWNER_OVERRIDE 64 #define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ #define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 #define NFSD_MAY_NOT_BREAK_LEASE 512 +#define NFSD_MAY_BYPASS_GSS 1024 #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) @@ -54,7 +58,7 @@ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, int type, dev_t rdev, struct svc_fh *res); #ifdef CONFIG_NFSD_V3 __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *); -__be32 nfsd_create_v3(struct svc_rqst *, struct svc_fh *, +__be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, struct svc_fh *res, int createmode, u32 *verifier, int *truncp, int *created); diff --git a/fs/proc/base.c b/fs/proc/base.c index 4ede550..14def99 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -83,6 +83,9 @@ #include <linux/pid_namespace.h> #include <linux/fs_struct.h> #include <linux/slab.h> +#ifdef CONFIG_HARDWALL +#include <asm/hardwall.h> +#endif #include "internal.h" /* NOTE: @@ -2842,6 +2845,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_TASK_IO_ACCOUNTING INF("io", S_IRUGO, proc_tgid_io_accounting), #endif +#ifdef CONFIG_HARDWALL + INF("hardwall", S_IRUGO, proc_pid_hardwall), +#endif }; static int proc_tgid_base_readdir(struct file * filp, @@ -3181,6 +3187,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_TASK_IO_ACCOUNTING INF("io", S_IRUGO, proc_tid_io_accounting), #endif +#ifdef CONFIG_HARDWALL + INF("hardwall", S_IRUGO, proc_pid_hardwall), +#endif }; static int proc_tid_base_readdir(struct file * filp, diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c index 730c562..5e1101f 100644 --- a/fs/squashfs/export.c +++ b/fs/squashfs/export.c @@ -147,7 +147,7 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb, * table[0] points to the first inode lookup table metadata block, * this should be less than lookup_table_start */ - if (!IS_ERR(table) && table[0] >= lookup_table_start) { + if (!IS_ERR(table) && le64_to_cpu(table[0]) >= lookup_table_start) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c index 1516a649..0ed6edb 100644 --- a/fs/squashfs/fragment.c +++ b/fs/squashfs/fragment.c @@ -90,7 +90,7 @@ __le64 *squashfs_read_fragment_index_table(struct super_block *sb, * table[0] points to the first fragment table metadata block, this * should be less than fragment_table_start */ - if (!IS_ERR(table) && table[0] >= fragment_table_start) { + if (!IS_ERR(table) && le64_to_cpu(table[0]) >= fragment_table_start) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c index a70858e..d38ea3d 100644 --- a/fs/squashfs/id.c +++ b/fs/squashfs/id.c @@ -93,7 +93,7 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb, * table[0] points to the first id lookup table metadata block, this * should be less than id_table_start */ - if (!IS_ERR(table) && table[0] >= id_table_start) { + if (!IS_ERR(table) && le64_to_cpu(table[0]) >= id_table_start) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 6f26abe..7438850 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -245,7 +245,7 @@ allocate_id_index_table: msblk->id_table = NULL; goto failed_mount; } - next_table = msblk->id_table[0]; + next_table = le64_to_cpu(msblk->id_table[0]); /* Handle inode lookup table */ lookup_table_start = le64_to_cpu(sblk->lookup_table_start); @@ -261,7 +261,7 @@ allocate_id_index_table: msblk->inode_lookup_table = NULL; goto failed_mount; } - next_table = msblk->inode_lookup_table[0]; + next_table = le64_to_cpu(msblk->inode_lookup_table[0]); sb->s_export_op = &squashfs_export_ops; @@ -286,7 +286,7 @@ handle_fragments: msblk->fragment_index = NULL; goto failed_mount; } - next_table = msblk->fragment_index[0]; + next_table = le64_to_cpu(msblk->fragment_index[0]); check_directory_table: /* Sanity check directory_table */ diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index 46961c0..ca953a9 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -277,8 +277,9 @@ static int kick_a_thread(void) return 0; } -int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask) +int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) { + int nr = sc->nr_to_scan; int freed, contention = 0; long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 93d1412..a70d7b4 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1614,7 +1614,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); int ubifs_tnc_end_commit(struct ubifs_info *c); /* shrinker.c */ -int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); +int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc); /* commit.c */ int ubifs_bg_thread(void *info); diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index a3252a5..a756bc8 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -98,6 +98,9 @@ acpi_os_table_override(struct acpi_table_header *existing_table, /* * Spinlock primitives */ +acpi_status +acpi_os_create_lock(acpi_spinlock *out_handle); + void acpi_os_delete_lock(acpi_spinlock handle); acpi_cpu_flags acpi_os_acquire_lock(acpi_spinlock handle); diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index f6ad63d..2ed0a84 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -47,7 +47,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20110316 +#define ACPI_CA_VERSION 0x20110413 #include "actypes.h" #include "actbl.h" diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 64f838b..b67231b 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -501,8 +501,9 @@ typedef u64 acpi_integer; #define ACPI_STATE_D1 (u8) 1 #define ACPI_STATE_D2 (u8) 2 #define ACPI_STATE_D3 (u8) 3 -#define ACPI_D_STATES_MAX ACPI_STATE_D3 -#define ACPI_D_STATE_COUNT 4 +#define ACPI_STATE_D3_COLD (u8) 4 +#define ACPI_D_STATES_MAX ACPI_STATE_D3_COLD +#define ACPI_D_STATE_COUNT 5 #define ACPI_STATE_C0 (u8) 0 #define ACPI_STATE_C1 (u8) 1 @@ -712,8 +713,24 @@ typedef u8 acpi_adr_space_type; #define ACPI_ADR_SPACE_CMOS (acpi_adr_space_type) 5 #define ACPI_ADR_SPACE_PCI_BAR_TARGET (acpi_adr_space_type) 6 #define ACPI_ADR_SPACE_IPMI (acpi_adr_space_type) 7 -#define ACPI_ADR_SPACE_DATA_TABLE (acpi_adr_space_type) 8 -#define ACPI_ADR_SPACE_FIXED_HARDWARE (acpi_adr_space_type) 127 + +#define ACPI_NUM_PREDEFINED_REGIONS 8 + +/* + * Special Address Spaces + * + * Note: A Data Table region is a special type of operation region + * that has its own AML opcode. However, internally, the AML + * interpreter simply creates an operation region with an an address + * space type of ACPI_ADR_SPACE_DATA_TABLE. + */ +#define ACPI_ADR_SPACE_DATA_TABLE (acpi_adr_space_type) 0x7E /* Internal to ACPICA only */ +#define ACPI_ADR_SPACE_FIXED_HARDWARE (acpi_adr_space_type) 0x7F + +/* Values for _REG connection code */ + +#define ACPI_REG_DISCONNECT 0 +#define ACPI_REG_CONNECT 1 /* * bit_register IDs diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 55192ac..ba4928c 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -310,14 +310,7 @@ static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit) /* in processor_core.c */ void acpi_processor_set_pdc(acpi_handle handle); -#ifdef CONFIG_SMP int acpi_get_cpuid(acpi_handle, int type, u32 acpi_id); -#else -static inline int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id) -{ - return -1; -} -#endif /* in processor_throttling.c */ int acpi_processor_tstate_has_changed(struct acpi_processor *pr); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a2e910e..1deb2a7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -150,8 +150,7 @@ extern int ec_read(u8 addr, u8 *val); extern int ec_write(u8 addr, u8 val); extern int ec_transaction(u8 command, const u8 *wdata, unsigned wdata_len, - u8 *rdata, unsigned rdata_len, - int force_poll); + u8 *rdata, unsigned rdata_len); #if defined(CONFIG_ACPI_WMI) || defined(CONFIG_ACPI_WMI_MODULE) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 32a4423..4427e04 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -191,6 +191,12 @@ struct dm_target { /* Used to provide an error string from the ctr */ char *error; + + /* + * Set if this target needs to receive discards regardless of + * whether or not its underlying devices have support. + */ + unsigned discards_supported:1; }; /* Each target can link one of these into the table */ diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index 5c9186b..f4b0aa3 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -69,8 +69,7 @@ struct dm_io_request { * * Create/destroy may block. */ -struct dm_io_client *dm_io_client_create(unsigned num_pages); -int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client); +struct dm_io_client *dm_io_client_create(void); void dm_io_client_destroy(struct dm_io_client *client); /* diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h index 5db2163..298d587 100644 --- a/include/linux/dm-kcopyd.h +++ b/include/linux/dm-kcopyd.h @@ -25,8 +25,7 @@ * To use kcopyd you must first create a dm_kcopyd_client object. */ struct dm_kcopyd_client; -int dm_kcopyd_client_create(unsigned num_pages, - struct dm_kcopyd_client **result); +struct dm_kcopyd_client *dm_kcopyd_client_create(void); void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc); /* diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 178fafe..8e66c5c 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -570,9 +570,11 @@ struct nfs4_sessionid { }; /* Create Session Flags */ -#define SESSION4_PERSIST 0x001 -#define SESSION4_BACK_CHAN 0x002 -#define SESSION4_RDMA 0x004 +#define SESSION4_PERSIST 0x001 +#define SESSION4_BACK_CHAN 0x002 +#define SESSION4_RDMA 0x004 + +#define SESSION4_FLAG_MASK_A 0x007 enum state_protect_how4 { SP4_NONE = 0, diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h index 77cbddb..a7d87f9 100644 --- a/include/linux/pm_qos_params.h +++ b/include/linux/pm_qos_params.h @@ -16,6 +16,10 @@ #define PM_QOS_NUM_CLASSES 4 #define PM_QOS_DEFAULT_VALUE -1 +#define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) +#define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) +#define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 + struct pm_qos_request_list { struct plist_node list; int pm_qos_class; diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 77e6248..c68a147 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -145,6 +145,7 @@ typedef __be32 rpc_fraghdr; #define RPCBIND_NETID_TCP "tcp" #define RPCBIND_NETID_UDP6 "udp6" #define RPCBIND_NETID_TCP6 "tcp6" +#define RPCBIND_NETID_LOCAL "local" /* * Note that RFC 1833 does not put any size restrictions on the diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 04dba23..85c50b4 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -28,6 +28,7 @@ struct svc_sock { /* private TCP part */ u32 sk_reclen; /* length of record */ u32 sk_tcplen; /* current read length */ + struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ }; /* diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a0f998c..81cce3b 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -141,7 +141,8 @@ enum xprt_transports { XPRT_TRANSPORT_UDP = IPPROTO_UDP, XPRT_TRANSPORT_TCP = IPPROTO_TCP, XPRT_TRANSPORT_BC_TCP = IPPROTO_TCP | XPRT_TRANSPORT_BC, - XPRT_TRANSPORT_RDMA = 256 + XPRT_TRANSPORT_RDMA = 256, + XPRT_TRANSPORT_LOCAL = 257, }; struct rpc_xprt { diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index fd8d1e0..6824ca7 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c @@ -54,11 +54,17 @@ enum pm_qos_type { PM_QOS_MIN /* return the smallest value */ }; +/* + * Note: The lockless read path depends on the CPU accessing + * target_value atomically. Atomic access is only guaranteed on all CPU + * types linux supports for 32 bit quantites + */ struct pm_qos_object { struct plist_head requests; struct blocking_notifier_head *notifiers; struct miscdevice pm_qos_power_miscdev; char *name; + s32 target_value; /* Do not change to 64 bit */ s32 default_value; enum pm_qos_type type; }; @@ -71,7 +77,8 @@ static struct pm_qos_object cpu_dma_pm_qos = { .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock), .notifiers = &cpu_dma_lat_notifier, .name = "cpu_dma_latency", - .default_value = 2000 * USEC_PER_SEC, + .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, + .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, .type = PM_QOS_MIN, }; @@ -80,7 +87,8 @@ static struct pm_qos_object network_lat_pm_qos = { .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock), .notifiers = &network_lat_notifier, .name = "network_latency", - .default_value = 2000 * USEC_PER_SEC, + .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, + .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, .type = PM_QOS_MIN }; @@ -90,7 +98,8 @@ static struct pm_qos_object network_throughput_pm_qos = { .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock), .notifiers = &network_throughput_notifier, .name = "network_throughput", - .default_value = 0, + .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, + .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, .type = PM_QOS_MAX, }; @@ -136,6 +145,16 @@ static inline int pm_qos_get_value(struct pm_qos_object *o) } } +static inline s32 pm_qos_read_value(struct pm_qos_object *o) +{ + return o->target_value; +} + +static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value) +{ + o->target_value = value; +} + static void update_target(struct pm_qos_object *o, struct plist_node *node, int del, int value) { @@ -160,6 +179,7 @@ static void update_target(struct pm_qos_object *o, struct plist_node *node, plist_add(node, &o->requests); } curr_value = pm_qos_get_value(o); + pm_qos_set_value(o, curr_value); spin_unlock_irqrestore(&pm_qos_lock, flags); if (prev_value != curr_value) @@ -194,18 +214,11 @@ static int find_pm_qos_object_by_minor(int minor) * pm_qos_request - returns current system wide qos expectation * @pm_qos_class: identification of which qos value is requested * - * This function returns the current target value in an atomic manner. + * This function returns the current target value. */ int pm_qos_request(int pm_qos_class) { - unsigned long flags; - int value; - - spin_lock_irqsave(&pm_qos_lock, flags); - value = pm_qos_get_value(pm_qos_array[pm_qos_class]); - spin_unlock_irqrestore(&pm_qos_lock, flags); - - return value; + return pm_qos_read_value(pm_qos_array[pm_qos_class]); } EXPORT_SYMBOL_GPL(pm_qos_request); @@ -352,6 +352,11 @@ void __init anon_vma_init(void) * The page might have been remapped to a different anon_vma or the anon_vma * returned may already be freed (and even reused). * + * In case it was remapped to a different anon_vma, the new anon_vma will be a + * child of the old anon_vma, and the anon_vma lifetime rules will therefore + * ensure that any anon_vma obtained from the page will still be valid for as + * long as we observe page_mapped() [ hence all those page_mapped() tests ]. + * * All users of this function must be very careful when walking the anon_vma * chain and verify that the page in question is indeed mapped in it * [ something equivalent to page_mapped_in_vma() ]. @@ -421,7 +426,7 @@ struct anon_vma *page_lock_anon_vma(struct page *page) /* * If the page is still mapped, then this anon_vma is still * its anon_vma, and holding the mutex ensures that it will - * not go away, see __put_anon_vma(). + * not go away, see anon_vma_free(). */ if (!page_mapped(page)) { mutex_unlock(&root_anon_vma->mutex); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8d83f9d..b84d739 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -13,10 +13,6 @@ * and need to be refreshed, or when a packet was damaged in transit. * This may be have to be moved to the VFS layer. * - * NB: BSD uses a more intelligent approach to guessing when a request - * or reply has been lost by keeping the RTO estimate for each procedure. - * We currently make do with a constant timeout value. - * * Copyright (C) 1992,1993 Rick Sladkey <jrs@world.std.com> * Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de> */ @@ -32,7 +28,9 @@ #include <linux/slab.h> #include <linux/utsname.h> #include <linux/workqueue.h> +#include <linux/in.h> #include <linux/in6.h> +#include <linux/un.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/rpc_pipe_fs.h> @@ -298,22 +296,27 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) * up a string representation of the passed-in address. */ if (args->servername == NULL) { + struct sockaddr_un *sun = + (struct sockaddr_un *)args->address; + struct sockaddr_in *sin = + (struct sockaddr_in *)args->address; + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *)args->address; + servername[0] = '\0'; switch (args->address->sa_family) { - case AF_INET: { - struct sockaddr_in *sin = - (struct sockaddr_in *)args->address; + case AF_LOCAL: + snprintf(servername, sizeof(servername), "%s", + sun->sun_path); + break; + case AF_INET: snprintf(servername, sizeof(servername), "%pI4", &sin->sin_addr.s_addr); break; - } - case AF_INET6: { - struct sockaddr_in6 *sin = - (struct sockaddr_in6 *)args->address; + case AF_INET6: snprintf(servername, sizeof(servername), "%pI6", - &sin->sin6_addr); + &sin6->sin6_addr); break; - } default: /* caller wants default server name, but * address family isn't recognized. */ diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index c652e4c..9a80a92 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/socket.h> +#include <linux/un.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/kernel.h> @@ -32,6 +33,8 @@ # define RPCDBG_FACILITY RPCDBG_BIND #endif +#define RPCBIND_SOCK_PATHNAME "/var/run/rpcbind.sock" + #define RPCBIND_PROGRAM (100000u) #define RPCBIND_PORT (111u) @@ -158,20 +161,69 @@ static void rpcb_map_release(void *data) kfree(map); } -static const struct sockaddr_in rpcb_inaddr_loopback = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_LOOPBACK), - .sin_port = htons(RPCBIND_PORT), -}; +/* + * Returns zero on success, otherwise a negative errno value + * is returned. + */ +static int rpcb_create_local_unix(void) +{ + static const struct sockaddr_un rpcb_localaddr_rpcbind = { + .sun_family = AF_LOCAL, + .sun_path = RPCBIND_SOCK_PATHNAME, + }; + struct rpc_create_args args = { + .net = &init_net, + .protocol = XPRT_TRANSPORT_LOCAL, + .address = (struct sockaddr *)&rpcb_localaddr_rpcbind, + .addrsize = sizeof(rpcb_localaddr_rpcbind), + .servername = "localhost", + .program = &rpcb_program, + .version = RPCBVERS_2, + .authflavor = RPC_AUTH_NULL, + }; + struct rpc_clnt *clnt, *clnt4; + int result = 0; + + /* + * Because we requested an RPC PING at transport creation time, + * this works only if the user space portmapper is rpcbind, and + * it's listening on AF_LOCAL on the named socket. + */ + clnt = rpc_create(&args); + if (IS_ERR(clnt)) { + dprintk("RPC: failed to create AF_LOCAL rpcbind " + "client (errno %ld).\n", PTR_ERR(clnt)); + result = -PTR_ERR(clnt); + goto out; + } + + clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); + if (IS_ERR(clnt4)) { + dprintk("RPC: failed to bind second program to " + "rpcbind v4 client (errno %ld).\n", + PTR_ERR(clnt4)); + clnt4 = NULL; + } + + /* Protected by rpcb_create_local_mutex */ + rpcb_local_clnt = clnt; + rpcb_local_clnt4 = clnt4; -static DEFINE_MUTEX(rpcb_create_local_mutex); +out: + return result; +} /* * Returns zero on success, otherwise a negative errno value * is returned. */ -static int rpcb_create_local(void) +static int rpcb_create_local_net(void) { + static const struct sockaddr_in rpcb_inaddr_loopback = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + .sin_port = htons(RPCBIND_PORT), + }; struct rpc_create_args args = { .net = &init_net, .protocol = XPRT_TRANSPORT_TCP, @@ -186,13 +238,6 @@ static int rpcb_create_local(void) struct rpc_clnt *clnt, *clnt4; int result = 0; - if (rpcb_local_clnt) - return result; - - mutex_lock(&rpcb_create_local_mutex); - if (rpcb_local_clnt) - goto out; - clnt = rpc_create(&args); if (IS_ERR(clnt)) { dprintk("RPC: failed to create local rpcbind " @@ -214,10 +259,34 @@ static int rpcb_create_local(void) clnt4 = NULL; } + /* Protected by rpcb_create_local_mutex */ rpcb_local_clnt = clnt; rpcb_local_clnt4 = clnt4; out: + return result; +} + +/* + * Returns zero on success, otherwise a negative errno value + * is returned. + */ +static int rpcb_create_local(void) +{ + static DEFINE_MUTEX(rpcb_create_local_mutex); + int result = 0; + + if (rpcb_local_clnt) + return result; + + mutex_lock(&rpcb_create_local_mutex); + if (rpcb_local_clnt) + goto out; + + if (rpcb_create_local_unix() != 0) + result = rpcb_create_local_net(); + +out: mutex_unlock(&rpcb_create_local_mutex); return result; } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 08e05a8..2b90292 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -942,6 +942,8 @@ static void svc_unregister(const struct svc_serv *serv) if (progp->pg_vers[i]->vs_hidden) continue; + dprintk("svc: attempting to unregister %sv%u\n", + progp->pg_name, i); __svc_unregister(progp->pg_prog, i, progp->pg_name); } } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b7d435c..af04f77 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -387,6 +387,33 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, return len; } +static int svc_partial_recvfrom(struct svc_rqst *rqstp, + struct kvec *iov, int nr, + int buflen, unsigned int base) +{ + size_t save_iovlen; + void __user *save_iovbase; + unsigned int i; + int ret; + + if (base == 0) + return svc_recvfrom(rqstp, iov, nr, buflen); + + for (i = 0; i < nr; i++) { + if (iov[i].iov_len > base) + break; + base -= iov[i].iov_len; + } + save_iovlen = iov[i].iov_len; + save_iovbase = iov[i].iov_base; + iov[i].iov_len -= base; + iov[i].iov_base += base; + ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen); + iov[i].iov_len = save_iovlen; + iov[i].iov_base = save_iovbase; + return ret; +} + /* * Set socket snd and rcv buffer lengths */ @@ -409,7 +436,6 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, lock_sock(sock->sk); sock->sk->sk_sndbuf = snd * 2; sock->sk->sk_rcvbuf = rcv * 2; - sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; sock->sk->sk_write_space(sock->sk); release_sock(sock->sk); #endif @@ -884,6 +910,56 @@ failed: return NULL; } +static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + return 0; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + if (rqstp->rq_pages[i] != NULL) + put_page(rqstp->rq_pages[i]); + BUG_ON(svsk->sk_pages[i] == NULL); + rqstp->rq_pages[i] = svsk->sk_pages[i]; + svsk->sk_pages[i] = NULL; + } + rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]); + return len; +} + +static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + return; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + svsk->sk_pages[i] = rqstp->rq_pages[i]; + rqstp->rq_pages[i] = NULL; + } +} + +static void svc_tcp_clear_pages(struct svc_sock *svsk) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + goto out; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + BUG_ON(svsk->sk_pages[i] == NULL); + put_page(svsk->sk_pages[i]); + svsk->sk_pages[i] = NULL; + } +out: + svsk->sk_tcplen = 0; +} + /* * Receive data. * If we haven't gotten the record length yet, get the next four bytes. @@ -893,31 +969,15 @@ failed: static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) { struct svc_serv *serv = svsk->sk_xprt.xpt_server; + unsigned int want; int len; - if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) - /* sndbuf needs to have room for one request - * per thread, otherwise we can stall even when the - * network isn't a bottleneck. - * - * We count all threads rather than threads in a - * particular pool, which provides an upper bound - * on the number of threads which will access the socket. - * - * rcvbuf just needs to be able to hold a few requests. - * Normally they will be removed from the queue - * as soon a a complete request arrives. - */ - svc_sock_setbufsize(svsk->sk_sock, - (serv->sv_nrthreads+3) * serv->sv_max_mesg, - 3 * serv->sv_max_mesg); - clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { - int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; struct kvec iov; + want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; iov.iov_len = want; if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) @@ -927,7 +987,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) if (len < want) { dprintk("svc: short recvfrom while reading record " "length (%d of %d)\n", len, want); - goto err_again; /* record header not complete */ + return -EAGAIN; } svsk->sk_reclen = ntohl(svsk->sk_reclen); @@ -954,83 +1014,75 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) } } - /* Check whether enough data is available */ - len = svc_recv_available(svsk); - if (len < 0) - goto error; + if (svsk->sk_reclen < 8) + goto err_delete; /* client is nuts. */ - if (len < svsk->sk_reclen) { - dprintk("svc: incomplete TCP record (%d of %d)\n", - len, svsk->sk_reclen); - goto err_again; /* record not complete */ - } len = svsk->sk_reclen; - set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); return len; - error: - if (len == -EAGAIN) - dprintk("RPC: TCP recv_record got EAGAIN\n"); +error: + dprintk("RPC: TCP recv_record got %d\n", len); return len; - err_delete: +err_delete: set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - err_again: return -EAGAIN; } -static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, - struct rpc_rqst **reqpp, struct kvec *vec) +static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) { + struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; struct rpc_rqst *req = NULL; - u32 *p; - u32 xid; - u32 calldir; - int len; - - len = svc_recvfrom(rqstp, vec, 1, 8); - if (len < 0) - goto error; + struct kvec *src, *dst; + __be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base; + __be32 xid; + __be32 calldir; - p = (u32 *)rqstp->rq_arg.head[0].iov_base; xid = *p++; calldir = *p; - if (calldir == 0) { - /* REQUEST is the most common case */ - vec[0] = rqstp->rq_arg.head[0]; - } else { - /* REPLY */ - struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; - - if (bc_xprt) - req = xprt_lookup_rqst(bc_xprt, xid); - - if (!req) { - printk(KERN_NOTICE - "%s: Got unrecognized reply: " - "calldir 0x%x xpt_bc_xprt %p xid %08x\n", - __func__, ntohl(calldir), - bc_xprt, xid); - vec[0] = rqstp->rq_arg.head[0]; - goto out; - } + if (bc_xprt) + req = xprt_lookup_rqst(bc_xprt, xid); - memcpy(&req->rq_private_buf, &req->rq_rcv_buf, - sizeof(struct xdr_buf)); - /* copy the xid and call direction */ - memcpy(req->rq_private_buf.head[0].iov_base, - rqstp->rq_arg.head[0].iov_base, 8); - vec[0] = req->rq_private_buf.head[0]; + if (!req) { + printk(KERN_NOTICE + "%s: Got unrecognized reply: " + "calldir 0x%x xpt_bc_xprt %p xid %08x\n", + __func__, ntohl(calldir), + bc_xprt, xid); + return -EAGAIN; } - out: - vec[0].iov_base += 8; - vec[0].iov_len -= 8; - len = svsk->sk_reclen - 8; - error: - *reqpp = req; - return len; + + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); + /* + * XXX!: cheating for now! Only copying HEAD. + * But we know this is good enough for now (in fact, for any + * callback reply in the forseeable future). + */ + dst = &req->rq_private_buf.head[0]; + src = &rqstp->rq_arg.head[0]; + if (dst->iov_len < src->iov_len) + return -EAGAIN; /* whatever; just giving up. */ + memcpy(dst->iov_base, src->iov_base, src->iov_len); + xprt_complete_rqst(req->rq_task, svsk->sk_reclen); + rqstp->rq_arg.len = 0; + return 0; } +static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) +{ + int i = 0; + int t = 0; + + while (t < len) { + vec[i].iov_base = page_address(pages[i]); + vec[i].iov_len = PAGE_SIZE; + i++; + t += PAGE_SIZE; + } + return i; +} + + /* * Receive data from a TCP socket. */ @@ -1041,8 +1093,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) struct svc_serv *serv = svsk->sk_xprt.xpt_server; int len; struct kvec *vec; - int pnum, vlen; - struct rpc_rqst *req = NULL; + unsigned int want, base; + __be32 *p; + __be32 calldir; + int pnum; dprintk("svc: tcp_recv %p data %d conn %d close %d\n", svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), @@ -1053,87 +1107,73 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (len < 0) goto error; + base = svc_tcp_restore_pages(svsk, rqstp); + want = svsk->sk_reclen - base; + vec = rqstp->rq_vec; - vec[0] = rqstp->rq_arg.head[0]; - vlen = PAGE_SIZE; - /* - * We have enough data for the whole tcp record. Let's try and read the - * first 8 bytes to get the xid and the call direction. We can use this - * to figure out if this is a call or a reply to a callback. If - * sk_reclen is < 8 (xid and calldir), then this is a malformed packet. - * In that case, don't bother with the calldir and just read the data. - * It will be rejected in svc_process. - */ - if (len >= 8) { - len = svc_process_calldir(svsk, rqstp, &req, vec); - if (len < 0) - goto err_again; - vlen -= 8; - } + pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], + svsk->sk_reclen); - pnum = 1; - while (vlen < len) { - vec[pnum].iov_base = (req) ? - page_address(req->rq_private_buf.pages[pnum - 1]) : - page_address(rqstp->rq_pages[pnum]); - vec[pnum].iov_len = PAGE_SIZE; - pnum++; - vlen += PAGE_SIZE; - } rqstp->rq_respages = &rqstp->rq_pages[pnum]; /* Now receive data */ - len = svc_recvfrom(rqstp, vec, pnum, len); - if (len < 0) - goto err_again; - - /* - * Account for the 8 bytes we read earlier - */ - len += 8; - - if (req) { - xprt_complete_rqst(req->rq_task, len); - len = 0; - goto out; + len = svc_partial_recvfrom(rqstp, vec, pnum, want, base); + if (len >= 0) + svsk->sk_tcplen += len; + if (len != want) { + if (len < 0 && len != -EAGAIN) + goto err_other; + svc_tcp_save_pages(svsk, rqstp); + dprintk("svc: incomplete TCP record (%d of %d)\n", + svsk->sk_tcplen, svsk->sk_reclen); + goto err_noclose; } - dprintk("svc: TCP complete record (%d bytes)\n", len); - rqstp->rq_arg.len = len; + + rqstp->rq_arg.len = svsk->sk_reclen; rqstp->rq_arg.page_base = 0; - if (len <= rqstp->rq_arg.head[0].iov_len) { - rqstp->rq_arg.head[0].iov_len = len; + if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) { + rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len; rqstp->rq_arg.page_len = 0; - } else { - rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; - } + } else + rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; -out: + p = (__be32 *)rqstp->rq_arg.head[0].iov_base; + calldir = p[1]; + if (calldir) + len = receive_cb_reply(svsk, rqstp); + /* Reset TCP read info */ svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + /* If we have more data, signal svc_xprt_enqueue() to try again */ + if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + + if (len < 0) + goto error; svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); if (serv->sv_stats) serv->sv_stats->nettcpcnt++; - return len; + dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len); + return rqstp->rq_arg.len; -err_again: - if (len == -EAGAIN) { - dprintk("RPC: TCP recvfrom got EAGAIN\n"); - return len; - } error: - if (len != -EAGAIN) { - printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", - svsk->sk_xprt.xpt_server->sv_name, -len); - set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - } + if (len != -EAGAIN) + goto err_other; + dprintk("RPC: TCP recvfrom got EAGAIN\n"); return -EAGAIN; +err_other: + printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", + svsk->sk_xprt.xpt_server->sv_name, -len); + set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); +err_noclose: + return -EAGAIN; /* record not complete */ } /* @@ -1304,18 +1344,10 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; - /* initialise setting must have enough space to - * receive and respond to one request. - * svc_tcp_recvfrom will re-adjust if necessary - */ - svc_sock_setbufsize(svsk->sk_sock, - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); - - set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (sk->sk_state != TCP_ESTABLISHED) set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1379,8 +1411,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Initialize the socket */ if (sock->type == SOCK_DGRAM) svc_udp_init(svsk, serv); - else + else { + /* initialise setting must have enough space to + * receive and respond to one request. + */ + svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg, + 4 * serv->sv_max_mesg); svc_tcp_init(svsk, serv); + } dprintk("svc: svc_setup_socket created %p (inet %p)\n", svsk, svsk->sk_sk); @@ -1562,8 +1600,10 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) svc_sock_detach(xprt); - if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) + if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) { + svc_tcp_clear_pages(svsk); kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR); + } } /* diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index bf005d3..72abb73 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -19,6 +19,7 @@ */ #include <linux/types.h> +#include <linux/string.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/capability.h> @@ -28,6 +29,7 @@ #include <linux/in.h> #include <linux/net.h> #include <linux/mm.h> +#include <linux/un.h> #include <linux/udp.h> #include <linux/tcp.h> #include <linux/sunrpc/clnt.h> @@ -45,6 +47,9 @@ #include <net/tcp.h> #include "sunrpc.h" + +static void xs_close(struct rpc_xprt *xprt); + /* * xprtsock tunables */ @@ -261,6 +266,11 @@ static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) return (struct sockaddr *) &xprt->addr; } +static inline struct sockaddr_un *xs_addr_un(struct rpc_xprt *xprt) +{ + return (struct sockaddr_un *) &xprt->addr; +} + static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt) { return (struct sockaddr_in *) &xprt->addr; @@ -276,23 +286,34 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) struct sockaddr *sap = xs_addr(xprt); struct sockaddr_in6 *sin6; struct sockaddr_in *sin; + struct sockaddr_un *sun; char buf[128]; - (void)rpc_ntop(sap, buf, sizeof(buf)); - xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); - switch (sap->sa_family) { + case AF_LOCAL: + sun = xs_addr_un(xprt); + strlcpy(buf, sun->sun_path, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = + kstrdup(buf, GFP_KERNEL); + break; case AF_INET: + (void)rpc_ntop(sap, buf, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = + kstrdup(buf, GFP_KERNEL); sin = xs_addr_in(xprt); snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); break; case AF_INET6: + (void)rpc_ntop(sap, buf, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = + kstrdup(buf, GFP_KERNEL); sin6 = xs_addr_in6(xprt); snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); break; default: BUG(); } + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); } @@ -495,6 +516,70 @@ static int xs_nospace(struct rpc_task *task) return ret; } +/* + * Construct a stream transport record marker in @buf. + */ +static inline void xs_encode_stream_record_marker(struct xdr_buf *buf) +{ + u32 reclen = buf->len - sizeof(rpc_fraghdr); + rpc_fraghdr *base = buf->head[0].iov_base; + *base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen); +} + +/** + * xs_local_send_request - write an RPC request to an AF_LOCAL socket + * @task: RPC task that manages the state of an RPC request + * + * Return values: + * 0: The request has been sent + * EAGAIN: The socket was blocked, please call again later to + * complete the request + * ENOTCONN: Caller needs to invoke connect logic then call again + * other: Some other error occured, the request was not sent + */ +static int xs_local_send_request(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + struct xdr_buf *xdr = &req->rq_snd_buf; + int status; + + xs_encode_stream_record_marker(&req->rq_snd_buf); + + xs_pktdump("packet data:", + req->rq_svec->iov_base, req->rq_svec->iov_len); + + status = xs_sendpages(transport->sock, NULL, 0, + xdr, req->rq_bytes_sent); + dprintk("RPC: %s(%u) = %d\n", + __func__, xdr->len - req->rq_bytes_sent, status); + if (likely(status >= 0)) { + req->rq_bytes_sent += status; + req->rq_xmit_bytes_sent += status; + if (likely(req->rq_bytes_sent >= req->rq_slen)) { + req->rq_bytes_sent = 0; + return 0; + } + status = -EAGAIN; + } + + switch (status) { + case -EAGAIN: + status = xs_nospace(task); + break; + default: + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); + case -EPIPE: + xs_close(xprt); + status = -ENOTCONN; + } + + return status; +} + /** * xs_udp_send_request - write an RPC request to a UDP socket * @task: address of RPC task that manages the state of an RPC request @@ -574,13 +659,6 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt) kernel_sock_shutdown(sock, SHUT_WR); } -static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf) -{ - u32 reclen = buf->len - sizeof(rpc_fraghdr); - rpc_fraghdr *base = buf->head[0].iov_base; - *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen); -} - /** * xs_tcp_send_request - write an RPC request to a TCP socket * @task: address of RPC task that manages the state of an RPC request @@ -603,7 +681,7 @@ static int xs_tcp_send_request(struct rpc_task *task) struct xdr_buf *xdr = &req->rq_snd_buf; int status; - xs_encode_tcp_record_marker(&req->rq_snd_buf); + xs_encode_stream_record_marker(&req->rq_snd_buf); xs_pktdump("packet data:", req->rq_svec->iov_base, @@ -785,6 +863,88 @@ static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) return (struct rpc_xprt *) sk->sk_user_data; } +static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) +{ + struct xdr_skb_reader desc = { + .skb = skb, + .offset = sizeof(rpc_fraghdr), + .count = skb->len - sizeof(rpc_fraghdr), + }; + + if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0) + return -1; + if (desc.count) + return -1; + return 0; +} + +/** + * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets + * @sk: socket with data to read + * @len: how much data to read + * + * Currently this assumes we can read the whole reply in a single gulp. + */ +static void xs_local_data_ready(struct sock *sk, int len) +{ + struct rpc_task *task; + struct rpc_xprt *xprt; + struct rpc_rqst *rovr; + struct sk_buff *skb; + int err, repsize, copied; + u32 _xid; + __be32 *xp; + + read_lock_bh(&sk->sk_callback_lock); + dprintk("RPC: %s...\n", __func__); + xprt = xprt_from_sock(sk); + if (xprt == NULL) + goto out; + + skb = skb_recv_datagram(sk, 0, 1, &err); + if (skb == NULL) + goto out; + + if (xprt->shutdown) + goto dropit; + + repsize = skb->len - sizeof(rpc_fraghdr); + if (repsize < 4) { + dprintk("RPC: impossible RPC reply size %d\n", repsize); + goto dropit; + } + + /* Copy the XID from the skb... */ + xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid); + if (xp == NULL) + goto dropit; + + /* Look up and lock the request corresponding to the given XID */ + spin_lock(&xprt->transport_lock); + rovr = xprt_lookup_rqst(xprt, *xp); + if (!rovr) + goto out_unlock; + task = rovr->rq_task; + + copied = rovr->rq_private_buf.buflen; + if (copied > repsize) + copied = repsize; + + if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) { + dprintk("RPC: sk_buff copy failed\n"); + goto out_unlock; + } + + xprt_complete_rqst(task, copied); + + out_unlock: + spin_unlock(&xprt->transport_lock); + dropit: + skb_free_datagram(sk, skb); + out: + read_unlock_bh(&sk->sk_callback_lock); +} + /** * xs_udp_data_ready - "data ready" callback for UDP sockets * @sk: socket with data to read @@ -1344,7 +1504,6 @@ static void xs_tcp_state_change(struct sock *sk) case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ xprt_force_disconnect(xprt); - case TCP_SYN_SENT: xprt->connect_cookie++; case TCP_CLOSING: /* @@ -1571,11 +1730,31 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) return err; } +/* + * We don't support autobind on AF_LOCAL sockets + */ +static void xs_local_rpcbind(struct rpc_task *task) +{ + xprt_set_bound(task->tk_xprt); +} + +static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port) +{ +} #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key xs_key[2]; static struct lock_class_key xs_slock_key[2]; +static inline void xs_reclassify_socketu(struct socket *sock) +{ + struct sock *sk = sock->sk; + + BUG_ON(sock_owned_by_user(sk)); + sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC", + &xs_slock_key[1], "sk_lock-AF_LOCAL-RPC", &xs_key[1]); +} + static inline void xs_reclassify_socket4(struct socket *sock) { struct sock *sk = sock->sk; @@ -1597,6 +1776,9 @@ static inline void xs_reclassify_socket6(struct socket *sock) static inline void xs_reclassify_socket(int family, struct socket *sock) { switch (family) { + case AF_LOCAL: + xs_reclassify_socketu(sock); + break; case AF_INET: xs_reclassify_socket4(sock); break; @@ -1606,6 +1788,10 @@ static inline void xs_reclassify_socket(int family, struct socket *sock) } } #else +static inline void xs_reclassify_socketu(struct socket *sock) +{ +} + static inline void xs_reclassify_socket4(struct socket *sock) { } @@ -1644,6 +1830,94 @@ out: return ERR_PTR(err); } +static int xs_local_finish_connecting(struct rpc_xprt *xprt, + struct socket *sock) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, + xprt); + + if (!transport->inet) { + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + xs_save_old_callbacks(transport, sk); + + sk->sk_user_data = xprt; + sk->sk_data_ready = xs_local_data_ready; + sk->sk_write_space = xs_udp_write_space; + sk->sk_error_report = xs_error_report; + sk->sk_allocation = GFP_ATOMIC; + + xprt_clear_connected(xprt); + + /* Reset to new socket */ + transport->sock = sock; + transport->inet = sk; + + write_unlock_bh(&sk->sk_callback_lock); + } + + /* Tell the socket layer to start connecting... */ + xprt->stat.connect_count++; + xprt->stat.connect_start = jiffies; + return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0); +} + +/** + * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint + * @xprt: RPC transport to connect + * @transport: socket transport to connect + * @create_sock: function to create a socket of the correct type + * + * Invoked by a work queue tasklet. + */ +static void xs_local_setup_socket(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; + struct socket *sock; + int status = -EIO; + + if (xprt->shutdown) + goto out; + + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); + status = __sock_create(xprt->xprt_net, AF_LOCAL, + SOCK_STREAM, 0, &sock, 1); + if (status < 0) { + dprintk("RPC: can't create AF_LOCAL " + "transport socket (%d).\n", -status); + goto out; + } + xs_reclassify_socketu(sock); + + dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + + status = xs_local_finish_connecting(xprt, sock); + switch (status) { + case 0: + dprintk("RPC: xprt %p connected to %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + xprt_set_connected(xprt); + break; + case -ENOENT: + dprintk("RPC: xprt %p: socket %s does not exist\n", + xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + break; + default: + printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n", + __func__, -status, + xprt->address_strings[RPC_DISPLAY_ADDR]); + } + +out: + xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); +} + static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1758,6 +2032,7 @@ static void xs_tcp_reuse_connection(struct sock_xprt *transport) static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + int ret = -ENOTCONN; if (!transport->inet) { struct sock *sk = sock->sk; @@ -1789,12 +2064,22 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) } if (!xprt_bound(xprt)) - return -ENOTCONN; + goto out; /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; - return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); + ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); + switch (ret) { + case 0: + case -EINPROGRESS: + /* SYN_SENT! */ + xprt->connect_cookie++; + if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + } +out: + return ret; } /** @@ -1917,6 +2202,32 @@ static void xs_connect(struct rpc_task *task) } /** + * xs_local_print_stats - display AF_LOCAL socket-specifc stats + * @xprt: rpc_xprt struct containing statistics + * @seq: output file + * + */ +static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) +{ + long idle_time = 0; + + if (xprt_connected(xprt)) + idle_time = (long)(jiffies - xprt->last_used) / HZ; + + seq_printf(seq, "\txprt:\tlocal %lu %lu %lu %ld %lu %lu %lu " + "%llu %llu\n", + xprt->stat.bind_count, + xprt->stat.connect_count, + xprt->stat.connect_time, + idle_time, + xprt->stat.sends, + xprt->stat.recvs, + xprt->stat.bad_xids, + xprt->stat.req_u, + xprt->stat.bklog_u); +} + +/** * xs_udp_print_stats - display UDP socket-specifc stats * @xprt: rpc_xprt struct containing statistics * @seq: output file @@ -2014,10 +2325,7 @@ static int bc_sendto(struct rpc_rqst *req) unsigned long headoff; unsigned long tailoff; - /* - * Set up the rpc header and record marker stuff - */ - xs_encode_tcp_record_marker(xbufp); + xs_encode_stream_record_marker(xbufp); tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK; headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK; @@ -2089,6 +2397,21 @@ static void bc_destroy(struct rpc_xprt *xprt) { } +static struct rpc_xprt_ops xs_local_ops = { + .reserve_xprt = xprt_reserve_xprt, + .release_xprt = xs_tcp_release_xprt, + .rpcbind = xs_local_rpcbind, + .set_port = xs_local_set_port, + .connect = xs_connect, + .buf_alloc = rpc_malloc, + .buf_free = rpc_free, + .send_request = xs_local_send_request, + .set_retrans_timeout = xprt_set_retrans_timeout_def, + .close = xs_close, + .destroy = xs_destroy, + .print_stats = xs_local_print_stats, +}; + static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, @@ -2150,6 +2473,8 @@ static int xs_init_anyaddr(const int family, struct sockaddr *sap) }; switch (family) { + case AF_LOCAL: + break; case AF_INET: memcpy(sap, &sin, sizeof(sin)); break; @@ -2197,6 +2522,70 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, return xprt; } +static const struct rpc_timeout xs_local_default_timeout = { + .to_initval = 10 * HZ, + .to_maxval = 10 * HZ, + .to_retries = 2, +}; + +/** + * xs_setup_local - Set up transport to use an AF_LOCAL socket + * @args: rpc transport creation arguments + * + * AF_LOCAL is a "tpi_cots_ord" transport, just like TCP + */ +static struct rpc_xprt *xs_setup_local(struct xprt_create *args) +{ + struct sockaddr_un *sun = (struct sockaddr_un *)args->dstaddr; + struct sock_xprt *transport; + struct rpc_xprt *xprt; + struct rpc_xprt *ret; + + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); + if (IS_ERR(xprt)) + return xprt; + transport = container_of(xprt, struct sock_xprt, xprt); + + xprt->prot = 0; + xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); + xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; + + xprt->bind_timeout = XS_BIND_TO; + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + xprt->idle_timeout = XS_IDLE_DISC_TO; + + xprt->ops = &xs_local_ops; + xprt->timeout = &xs_local_default_timeout; + + switch (sun->sun_family) { + case AF_LOCAL: + if (sun->sun_path[0] != '/') { + dprintk("RPC: bad AF_LOCAL address: %s\n", + sun->sun_path); + ret = ERR_PTR(-EINVAL); + goto out_err; + } + xprt_set_bound(xprt); + INIT_DELAYED_WORK(&transport->connect_worker, + xs_local_setup_socket); + xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); + break; + default: + ret = ERR_PTR(-EAFNOSUPPORT); + goto out_err; + } + + dprintk("RPC: set up xprt to %s via AF_LOCAL\n", + xprt->address_strings[RPC_DISPLAY_ADDR]); + + if (try_module_get(THIS_MODULE)) + return xprt; + ret = ERR_PTR(-EINVAL); +out_err: + xprt_free(xprt); + return ret; +} + static const struct rpc_timeout xs_udp_default_timeout = { .to_initval = 5 * HZ, .to_maxval = 30 * HZ, @@ -2438,6 +2827,14 @@ out_err: return ret; } +static struct xprt_class xs_local_transport = { + .list = LIST_HEAD_INIT(xs_local_transport.list), + .name = "named UNIX socket", + .owner = THIS_MODULE, + .ident = XPRT_TRANSPORT_LOCAL, + .setup = xs_setup_local, +}; + static struct xprt_class xs_udp_transport = { .list = LIST_HEAD_INIT(xs_udp_transport.list), .name = "udp", @@ -2473,6 +2870,7 @@ int init_socket_xprt(void) sunrpc_table_header = register_sysctl_table(sunrpc_table); #endif + xprt_register_transport(&xs_local_transport); xprt_register_transport(&xs_udp_transport); xprt_register_transport(&xs_tcp_transport); xprt_register_transport(&xs_bc_tcp_transport); @@ -2493,6 +2891,7 @@ void cleanup_socket_xprt(void) } #endif + xprt_unregister_transport(&xs_local_transport); xprt_unregister_transport(&xs_udp_transport); xprt_unregister_transport(&xs_tcp_transport); xprt_unregister_transport(&xs_bc_tcp_transport); |