diff options
32 files changed, 753 insertions, 133 deletions
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index e052780..e71bc6c 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -11,6 +11,8 @@ Joel Schopp <jschopp@austin.ibm.com> ia64/x86_64: Ashok Raj <ashok.raj@intel.com> + s390: + Heiko Carstens <heiko.carstens@de.ibm.com> Authors: Ashok Raj <ashok.raj@intel.com> Lots of feedback: Nathan Lynch <nathanl@austin.ibm.com>, @@ -44,11 +46,9 @@ maxcpus=n Restrict boot time cpus to n. Say if you have 4 cpus, using maxcpus=2 will only boot 2. You can choose to bring the other cpus later online, read FAQ's for more info. -additional_cpus*=n Use this to limit hotpluggable cpus. This option sets - cpu_possible_map = cpu_present_map + additional_cpus - -(*) Option valid only for following architectures -- x86_64, ia64 +additional_cpus=n [x86_64, s390 only] use this to limit hotpluggable cpus. + This option sets + cpu_possible_map = cpu_present_map + additional_cpus ia64 and x86_64 use the number of disabled local apics in ACPI tables MADT to determine the number of potentially hot-pluggable cpus. The implementation @@ -58,6 +58,12 @@ mark such hot-pluggable cpus as disabled entries, one could use this parameter "additional_cpus=x" to represent those cpus in the cpu_possible_map. +possible_cpus=n [s390 only] use this to set hotpluggable cpus. + This option sets possible_cpus bits in + cpu_possible_map. Thus keeping the numbers of bits set + constant even if the machine gets rebooted. + This option overrides additional_cpus. + CPU maps and such ----------------- [More on cpumaps and primitive to manipulate, please check @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 16 -EXTRAVERSION =-rc3 +EXTRAVERSION =-rc4 NAME=Sliding Snow Leopard # *DOCUMENTATION* diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b66602a..b7ca5bf 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -80,6 +80,10 @@ config HOTPLUG_CPU can be controlled through /sys/devices/system/cpu/cpu#. Say N if you want to disable CPU hotplug. +config DEFAULT_MIGRATION_COST + int + default "1000000" + config MATHEMU bool "IEEE FPU emulation" depends on MARCH_G5 diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 2d02162..cc058dc 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -905,8 +905,8 @@ asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * sta return ret; } -asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename, - struct stat64_emu31 __user* statbuf, int flag) +asmlinkage long sys32_fstatat64(unsigned int dfd, char __user *filename, + struct stat64_emu31 __user* statbuf, int flag) { struct kstat stat; int error = -EINVAL; diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index dd2d6c3..615964c 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1523,13 +1523,13 @@ compat_sys_futimesat_wrapper: llgtr %r4,%r4 # struct timeval * jg compat_sys_futimesat - .globl sys32_fstatat_wrapper -sys32_fstatat_wrapper: + .globl sys32_fstatat64_wrapper +sys32_fstatat64_wrapper: llgfr %r2,%r2 # unsigned int llgtr %r3,%r3 # char * llgtr %r4,%r4 # struct stat64 * lgfr %r5,%r5 # int - jg sys32_fstatat + jg sys32_fstatat64 .globl sys_unlinkat_wrapper sys_unlinkat_wrapper: diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 008c745..da6fbae 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -128,8 +128,10 @@ void default_idle(void) __ctl_set_bit(8, 15); #ifdef CONFIG_HOTPLUG_CPU - if (cpu_is_offline(cpu)) + if (cpu_is_offline(cpu)) { + preempt_enable_no_resched(); cpu_die(); + } #endif local_mcck_disable(); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index de87842..24f62f1 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -600,6 +600,7 @@ setup_arch(char **cmdline_p) init_mm.brk = (unsigned long) &_end; parse_cmdline_early(cmdline_p); + parse_early_param(); setup_memory(); setup_resources(); @@ -607,6 +608,7 @@ setup_arch(char **cmdline_p) cpu_init(); __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr; + smp_setup_cpu_possible_map(); /* * Create kernel page tables and switch to virtual addressing. diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 0d1ad5db..7dbe00c 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1,8 +1,7 @@ /* * arch/s390/kernel/smp.c * - * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright (C) IBM Corp. 1999,2006 * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * Martin Schwidefsky (schwidefsky@de.ibm.com) * Heiko Carstens (heiko.carstens@de.ibm.com) @@ -41,8 +40,6 @@ #include <asm/cpcmd.h> #include <asm/tlbflush.h> -/* prototypes */ - extern volatile int __cpu_logical_map[]; /* @@ -51,13 +48,11 @@ extern volatile int __cpu_logical_map[]; struct _lowcore *lowcore_ptr[NR_CPUS]; -cpumask_t cpu_online_map; -cpumask_t cpu_possible_map = CPU_MASK_ALL; +cpumask_t cpu_online_map = CPU_MASK_NONE; +cpumask_t cpu_possible_map = CPU_MASK_NONE; static struct task_struct *current_set[NR_CPUS]; -EXPORT_SYMBOL(cpu_online_map); - /* * Reboot, halt and power_off routines for SMP. */ @@ -490,10 +485,10 @@ void smp_ctl_clear_bit(int cr, int bit) { * Lets check how many CPUs we have. */ -void -__init smp_check_cpus(unsigned int max_cpus) +static unsigned int +__init smp_count_cpus(void) { - int cpu, num_cpus; + unsigned int cpu, num_cpus; __u16 boot_cpu_addr; /* @@ -503,19 +498,20 @@ __init smp_check_cpus(unsigned int max_cpus) boot_cpu_addr = S390_lowcore.cpu_data.cpu_addr; current_thread_info()->cpu = 0; num_cpus = 1; - for (cpu = 0; cpu <= 65535 && num_cpus < max_cpus; cpu++) { + for (cpu = 0; cpu <= 65535; cpu++) { if ((__u16) cpu == boot_cpu_addr) continue; - __cpu_logical_map[num_cpus] = (__u16) cpu; - if (signal_processor(num_cpus, sigp_sense) == + __cpu_logical_map[1] = (__u16) cpu; + if (signal_processor(1, sigp_sense) == sigp_not_operational) continue; - cpu_set(num_cpus, cpu_present_map); num_cpus++; } printk("Detected %d CPU's\n",(int) num_cpus); printk("Boot cpu address %2X\n", boot_cpu_addr); + + return num_cpus; } /* @@ -676,6 +672,44 @@ __cpu_up(unsigned int cpu) return 0; } +static unsigned int __initdata additional_cpus; +static unsigned int __initdata possible_cpus; + +void __init smp_setup_cpu_possible_map(void) +{ + unsigned int phy_cpus, pos_cpus, cpu; + + phy_cpus = smp_count_cpus(); + pos_cpus = min(phy_cpus + additional_cpus, (unsigned int) NR_CPUS); + + if (possible_cpus) + pos_cpus = min(possible_cpus, (unsigned int) NR_CPUS); + + for (cpu = 0; cpu < pos_cpus; cpu++) + cpu_set(cpu, cpu_possible_map); + + phy_cpus = min(phy_cpus, pos_cpus); + + for (cpu = 0; cpu < phy_cpus; cpu++) + cpu_set(cpu, cpu_present_map); +} + +#ifdef CONFIG_HOTPLUG_CPU + +static int __init setup_additional_cpus(char *s) +{ + additional_cpus = simple_strtoul(s, NULL, 0); + return 0; +} +early_param("additional_cpus", setup_additional_cpus); + +static int __init setup_possible_cpus(char *s) +{ + possible_cpus = simple_strtoul(s, NULL, 0); + return 0; +} +early_param("possible_cpus", setup_possible_cpus); + int __cpu_disable(void) { @@ -744,6 +778,8 @@ cpu_die(void) for(;;); } +#endif /* CONFIG_HOTPLUG_CPU */ + /* * Cycle through the processors and setup structures. */ @@ -757,7 +793,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* request the 0x1201 emergency signal external interrupt */ if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0) panic("Couldn't request external interrupt 0x1201"); - smp_check_cpus(max_cpus); memset(lowcore_ptr,0,sizeof(lowcore_ptr)); /* * Initialize prefix pages and stacks for all possible cpus @@ -806,7 +841,6 @@ void __devinit smp_prepare_boot_cpu(void) BUG_ON(smp_processor_id() != 0); cpu_set(0, cpu_online_map); - cpu_set(0, cpu_present_map); S390_lowcore.percpu_offset = __per_cpu_offset[0]; current_set[0] = current; } @@ -845,6 +879,7 @@ static int __init topology_init(void) subsys_initcall(topology_init); +EXPORT_SYMBOL(cpu_online_map); EXPORT_SYMBOL(cpu_possible_map); EXPORT_SYMBOL(lowcore_ptr); EXPORT_SYMBOL(smp_ctl_set_bit); diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 84921fe..7c88d85 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -301,7 +301,7 @@ SYSCALL(sys_mkdirat,sys_mkdirat,sys_mkdirat_wrapper) SYSCALL(sys_mknodat,sys_mknodat,sys_mknodat_wrapper) /* 290 */ SYSCALL(sys_fchownat,sys_fchownat,sys_fchownat_wrapper) SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat_wrapper) -SYSCALL(sys_fstatat64,sys_newfstatat,sys32_fstatat_wrapper) +SYSCALL(sys_fstatat64,sys_newfstatat,sys32_fstatat64_wrapper) SYSCALL(sys_unlinkat,sys_unlinkat,sys_unlinkat_wrapper) SYSCALL(sys_renameat,sys_renameat,sys_renameat_wrapper) /* 295 */ SYSCALL(sys_linkat,sys_linkat,sys_linkat_wrapper) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 504d56f..e73621d 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -446,7 +446,7 @@ endmenu config ISA_DMA_API bool - depends on MPC1211 + depends on SH_MPC1211 default y menu "Kernel features" diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index dc49bfb..9013a90 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -288,9 +288,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) memcpy(str,mpc->mpc_productid,12); str[12]=0; - printk(KERN_INFO "Product ID: %s ",str); + printk("Product ID: %s ",str); - printk(KERN_INFO "APIC at: 0x%X\n",mpc->mpc_lapic); + printk("APIC at: 0x%X\n",mpc->mpc_lapic); /* save the local APIC address, it might be non-default */ if (!acpi_lapic) diff --git a/drivers/acpi/resources/rscalc.c b/drivers/acpi/resources/rscalc.c index 7d6481d..4038dbf 100644 --- a/drivers/acpi/resources/rscalc.c +++ b/drivers/acpi/resources/rscalc.c @@ -391,8 +391,7 @@ acpi_rs_get_list_length(u8 * aml_buffer, * Ensure a 32-bit boundary for the structure */ extra_struct_bytes = - ACPI_ROUND_UP_to_32_bITS(resource_length) - - resource_length; + ACPI_ROUND_UP_to_32_bITS(resource_length); break; case ACPI_RESOURCE_NAME_END_TAG: @@ -408,8 +407,7 @@ acpi_rs_get_list_length(u8 * aml_buffer, * Add vendor data and ensure a 32-bit boundary for the structure */ extra_struct_bytes = - ACPI_ROUND_UP_to_32_bITS(resource_length) - - resource_length; + ACPI_ROUND_UP_to_32_bITS(resource_length); break; case ACPI_RESOURCE_NAME_ADDRESS32: diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c index ec75909..24095f6 100644 --- a/drivers/char/tpm/tpm_infineon.c +++ b/drivers/char/tpm/tpm_infineon.c @@ -33,6 +33,7 @@ static int TPM_INF_DATA; static int TPM_INF_ADDR; static int TPM_INF_BASE; +static int TPM_INF_ADDR_LEN; static int TPM_INF_PORT_LEN; /* TPM header definitions */ @@ -195,6 +196,7 @@ static int tpm_inf_recv(struct tpm_chip *chip, u8 * buf, size_t count) int i; int ret; u32 size = 0; + number_of_wtx = 0; recv_begin: /* start receiving header */ @@ -378,24 +380,35 @@ static int __devinit tpm_inf_pnp_probe(struct pnp_dev *dev, if (pnp_port_valid(dev, 0) && pnp_port_valid(dev, 1) && !(pnp_port_flags(dev, 0) & IORESOURCE_DISABLED)) { TPM_INF_ADDR = pnp_port_start(dev, 0); + TPM_INF_ADDR_LEN = pnp_port_len(dev, 0); TPM_INF_DATA = (TPM_INF_ADDR + 1); TPM_INF_BASE = pnp_port_start(dev, 1); TPM_INF_PORT_LEN = pnp_port_len(dev, 1); - if (!TPM_INF_PORT_LEN) - return -EINVAL; + if ((TPM_INF_PORT_LEN < 4) || (TPM_INF_ADDR_LEN < 2)) { + rc = -EINVAL; + goto err_last; + } dev_info(&dev->dev, "Found %s with ID %s\n", dev->name, dev_id->id); - if (!((TPM_INF_BASE >> 8) & 0xff)) - return -EINVAL; + if (!((TPM_INF_BASE >> 8) & 0xff)) { + rc = -EINVAL; + goto err_last; + } /* publish my base address and request region */ tpm_inf.base = TPM_INF_BASE; if (request_region (tpm_inf.base, TPM_INF_PORT_LEN, "tpm_infineon0") == NULL) { - release_region(tpm_inf.base, TPM_INF_PORT_LEN); - return -EINVAL; + rc = -EINVAL; + goto err_last; + } + if (request_region(TPM_INF_ADDR, TPM_INF_ADDR_LEN, + "tpm_infineon0") == NULL) { + rc = -EINVAL; + goto err_last; } } else { - return -EINVAL; + rc = -EINVAL; + goto err_last; } /* query chip for its vendor, its version number a.s.o. */ @@ -443,8 +456,8 @@ static int __devinit tpm_inf_pnp_probe(struct pnp_dev *dev, dev_err(&dev->dev, "Could not set IO-ports to 0x%lx\n", tpm_inf.base); - release_region(tpm_inf.base, TPM_INF_PORT_LEN); - return -EIO; + rc = -EIO; + goto err_release_region; } /* activate register */ @@ -471,14 +484,21 @@ static int __devinit tpm_inf_pnp_probe(struct pnp_dev *dev, rc = tpm_register_hardware(&dev->dev, &tpm_inf); if (rc < 0) { - release_region(tpm_inf.base, TPM_INF_PORT_LEN); - return -ENODEV; + rc = -ENODEV; + goto err_release_region; } return 0; } else { - dev_info(&dev->dev, "No Infineon TPM found!\n"); - return -ENODEV; + rc = -ENODEV; + goto err_release_region; } + +err_release_region: + release_region(tpm_inf.base, TPM_INF_PORT_LEN); + release_region(TPM_INF_ADDR, TPM_INF_ADDR_LEN); + +err_last: + return rc; } static __devexit void tpm_inf_pnp_remove(struct pnp_dev *dev) @@ -518,5 +538,5 @@ module_exit(cleanup_inf); MODULE_AUTHOR("Marcel Selhorst <selhorst@crypto.rub.de>"); MODULE_DESCRIPTION("Driver for Infineon TPM SLD 9630 TT 1.1 / SLB 9635 TT 1.2"); -MODULE_VERSION("1.6"); +MODULE_VERSION("1.7"); MODULE_LICENSE("GPL"); diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c index 3a32c59..24e51d5 100644 --- a/drivers/macintosh/windfarm_smu_sat.c +++ b/drivers/macintosh/windfarm_smu_sat.c @@ -151,6 +151,7 @@ struct smu_sdbp_header *smu_sat_get_sdb_partition(unsigned int sat_id, int id, kfree(buf); return NULL; } +EXPORT_SYMBOL_GPL(smu_sat_get_sdb_partition); /* refresh the cache */ static int wf_sat_read_cache(struct wf_sat *sat) diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 3682ec6..e7fc28b 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -102,6 +102,9 @@ * 0.47: 26 Oct 2005: Add phyaddr 0 in phy scan. * 0.48: 24 Dec 2005: Disable TSO, bugfix for pci_map_single * 0.49: 10 Dec 2005: Fix tso for large buffers. + * 0.50: 20 Jan 2006: Add 8021pq tagging support. + * 0.51: 20 Jan 2006: Add 64bit consistent memory allocation for rings. + * 0.52: 20 Jan 2006: Add MSI/MSIX support. * * Known bugs: * We suspect that on some hardware no TX done interrupts are generated. @@ -113,7 +116,7 @@ * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few * superfluous timer interrupts from the nic. */ -#define FORCEDETH_VERSION "0.49" +#define FORCEDETH_VERSION "0.52" #define DRV_NAME "forcedeth" #include <linux/module.h> @@ -153,6 +156,9 @@ #define DEV_HAS_LARGEDESC 0x0004 /* device supports jumbo frames and needs packet format 2 */ #define DEV_HAS_HIGH_DMA 0x0008 /* device supports 64bit dma */ #define DEV_HAS_CHECKSUM 0x0010 /* device supports tx and rx checksum offloads */ +#define DEV_HAS_VLAN 0x0020 /* device supports vlan tagging and striping */ +#define DEV_HAS_MSI 0x0040 /* device supports MSI */ +#define DEV_HAS_MSI_X 0x0080 /* device supports MSI-X */ enum { NvRegIrqStatus = 0x000, @@ -166,14 +172,17 @@ enum { #define NVREG_IRQ_TX_OK 0x0010 #define NVREG_IRQ_TIMER 0x0020 #define NVREG_IRQ_LINK 0x0040 -#define NVREG_IRQ_TX_ERROR 0x0080 -#define NVREG_IRQ_TX1 0x0100 +#define NVREG_IRQ_RX_FORCED 0x0080 +#define NVREG_IRQ_TX_FORCED 0x0100 #define NVREG_IRQMASK_THROUGHPUT 0x00df #define NVREG_IRQMASK_CPU 0x0040 +#define NVREG_IRQ_TX_ALL (NVREG_IRQ_TX_ERR|NVREG_IRQ_TX_OK|NVREG_IRQ_TX_FORCED) +#define NVREG_IRQ_RX_ALL (NVREG_IRQ_RX_ERROR|NVREG_IRQ_RX|NVREG_IRQ_RX_NOBUF|NVREG_IRQ_RX_FORCED) +#define NVREG_IRQ_OTHER (NVREG_IRQ_TIMER|NVREG_IRQ_LINK) #define NVREG_IRQ_UNKNOWN (~(NVREG_IRQ_RX_ERROR|NVREG_IRQ_RX|NVREG_IRQ_RX_NOBUF|NVREG_IRQ_TX_ERR| \ - NVREG_IRQ_TX_OK|NVREG_IRQ_TIMER|NVREG_IRQ_LINK|NVREG_IRQ_TX_ERROR| \ - NVREG_IRQ_TX1)) + NVREG_IRQ_TX_OK|NVREG_IRQ_TIMER|NVREG_IRQ_LINK|NVREG_IRQ_RX_FORCED| \ + NVREG_IRQ_TX_FORCED)) NvRegUnknownSetupReg6 = 0x008, #define NVREG_UNKSETUP6_VAL 3 @@ -185,6 +194,10 @@ enum { NvRegPollingInterval = 0x00c, #define NVREG_POLL_DEFAULT_THROUGHPUT 970 #define NVREG_POLL_DEFAULT_CPU 13 + NvRegMSIMap0 = 0x020, + NvRegMSIMap1 = 0x024, + NvRegMSIIrqMask = 0x030, +#define NVREG_MSI_VECTOR_0_ENABLED 0x01 NvRegMisc1 = 0x080, #define NVREG_MISC1_HD 0x02 #define NVREG_MISC1_FORCE 0x3b0f3c @@ -254,6 +267,10 @@ enum { #define NVREG_TXRXCTL_DESC_1 0 #define NVREG_TXRXCTL_DESC_2 0x02100 #define NVREG_TXRXCTL_DESC_3 0x02200 +#define NVREG_TXRXCTL_VLANSTRIP 0x00040 +#define NVREG_TXRXCTL_VLANINS 0x00080 + NvRegTxRingPhysAddrHigh = 0x148, + NvRegRxRingPhysAddrHigh = 0x14C, NvRegMIIStatus = 0x180, #define NVREG_MIISTAT_ERROR 0x0001 #define NVREG_MIISTAT_LINKCHANGE 0x0008 @@ -303,6 +320,11 @@ enum { #define NVREG_POWERSTATE_D1 0x0001 #define NVREG_POWERSTATE_D2 0x0002 #define NVREG_POWERSTATE_D3 0x0003 + NvRegVlanControl = 0x300, +#define NVREG_VLANCONTROL_ENABLE 0x2000 + NvRegMSIXMap0 = 0x3e0, + NvRegMSIXMap1 = 0x3e4, + NvRegMSIXIrqStatus = 0x3f0, }; /* Big endian: should work, but is untested */ @@ -314,7 +336,7 @@ struct ring_desc { struct ring_desc_ex { u32 PacketBufferHigh; u32 PacketBufferLow; - u32 Reserved; + u32 TxVlan; u32 FlagLen; }; @@ -355,6 +377,8 @@ typedef union _ring_type { #define NV_TX2_CHECKSUM_L3 (1<<27) #define NV_TX2_CHECKSUM_L4 (1<<26) +#define NV_TX3_VLAN_TAG_PRESENT (1<<18) + #define NV_RX_DESCRIPTORVALID (1<<16) #define NV_RX_MISSEDFRAME (1<<17) #define NV_RX_SUBSTRACT1 (1<<18) @@ -385,6 +409,9 @@ typedef union _ring_type { #define NV_RX2_ERROR (1<<30) #define NV_RX2_AVAIL (1<<31) +#define NV_RX3_VLAN_TAG_PRESENT (1<<16) +#define NV_RX3_VLAN_TAG_MASK (0x0000FFFF) + /* Miscelaneous hardware related defines: */ #define NV_PCI_REGSZ 0x270 @@ -475,6 +502,18 @@ typedef union _ring_type { #define LPA_1000FULL 0x0800 #define LPA_1000HALF 0x0400 +/* MSI/MSI-X defines */ +#define NV_MSI_X_MAX_VECTORS 8 +#define NV_MSI_X_VECTORS_MASK 0x000f +#define NV_MSI_CAPABLE 0x0010 +#define NV_MSI_X_CAPABLE 0x0020 +#define NV_MSI_ENABLED 0x0040 +#define NV_MSI_X_ENABLED 0x0080 + +#define NV_MSI_X_VECTOR_ALL 0x0 +#define NV_MSI_X_VECTOR_RX 0x0 +#define NV_MSI_X_VECTOR_TX 0x1 +#define NV_MSI_X_VECTOR_OTHER 0x2 /* * SMP locking: @@ -511,6 +550,7 @@ struct fe_priv { u32 irqmask; u32 desc_ver; u32 txrxctl_bits; + u32 vlanctl_bits; void __iomem *base; @@ -525,6 +565,7 @@ struct fe_priv { unsigned int pkt_limit; struct timer_list oom_kick; struct timer_list nic_poll; + u32 nic_poll_irq; /* media detection workaround. * Locking: Within irq hander or disable_irq+spin_lock(&np->lock); @@ -540,6 +581,13 @@ struct fe_priv { dma_addr_t tx_dma[TX_RING]; unsigned int tx_dma_len[TX_RING]; u32 tx_flags; + + /* vlan fields */ + struct vlan_group *vlangrp; + + /* msi/msi-x fields */ + u32 msi_flags; + struct msix_entry msi_x_entry[NV_MSI_X_MAX_VECTORS]; }; /* @@ -567,6 +615,16 @@ static int optimization_mode = NV_OPTIMIZATION_MODE_THROUGHPUT; */ static int poll_interval = -1; +/* + * Disable MSI interrupts + */ +static int disable_msi = 0; + +/* + * Disable MSIX interrupts + */ +static int disable_msix = 0; + static inline struct fe_priv *get_nvpriv(struct net_device *dev) { return netdev_priv(dev); @@ -612,6 +670,33 @@ static int reg_delay(struct net_device *dev, int offset, u32 mask, u32 target, return 0; } +#define NV_SETUP_RX_RING 0x01 +#define NV_SETUP_TX_RING 0x02 + +static void setup_hw_rings(struct net_device *dev, int rxtx_flags) +{ + struct fe_priv *np = get_nvpriv(dev); + u8 __iomem *base = get_hwbase(dev); + + if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { + if (rxtx_flags & NV_SETUP_RX_RING) { + writel((u32) cpu_to_le64(np->ring_addr), base + NvRegRxRingPhysAddr); + } + if (rxtx_flags & NV_SETUP_TX_RING) { + writel((u32) cpu_to_le64(np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr); + } + } else { + if (rxtx_flags & NV_SETUP_RX_RING) { + writel((u32) cpu_to_le64(np->ring_addr), base + NvRegRxRingPhysAddr); + writel((u32) (cpu_to_le64(np->ring_addr) >> 32), base + NvRegRxRingPhysAddrHigh); + } + if (rxtx_flags & NV_SETUP_TX_RING) { + writel((u32) cpu_to_le64(np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr); + writel((u32) (cpu_to_le64(np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)) >> 32), base + NvRegTxRingPhysAddrHigh); + } + } +} + #define MII_READ (-1) /* mii_rw: read/write a register on the PHY. * @@ -903,14 +988,27 @@ static void nv_do_rx_refill(unsigned long data) struct net_device *dev = (struct net_device *) data; struct fe_priv *np = netdev_priv(dev); - disable_irq(dev->irq); + + if (!(np->msi_flags & NV_MSI_X_ENABLED) || + ((np->msi_flags & NV_MSI_X_ENABLED) && + ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) { + disable_irq(dev->irq); + } else { + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); + } if (nv_alloc_rx(dev)) { spin_lock(&np->lock); if (!np->in_shutdown) mod_timer(&np->oom_kick, jiffies + OOM_REFILL); spin_unlock(&np->lock); } - enable_irq(dev->irq); + if (!(np->msi_flags & NV_MSI_X_ENABLED) || + ((np->msi_flags & NV_MSI_X_ENABLED) && + ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) { + enable_irq(dev->irq); + } else { + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); + } } static void nv_init_rx(struct net_device *dev) @@ -965,7 +1063,7 @@ static int nv_release_txskb(struct net_device *dev, unsigned int skbnr) } if (np->tx_skbuff[skbnr]) { - dev_kfree_skb_irq(np->tx_skbuff[skbnr]); + dev_kfree_skb_any(np->tx_skbuff[skbnr]); np->tx_skbuff[skbnr] = NULL; return 1; } else { @@ -1031,6 +1129,7 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) u32 bcnt; u32 size = skb->len-skb->data_len; u32 entries = (size >> NV_TX2_TSO_MAX_SHIFT) + ((size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0); + u32 tx_flags_vlan = 0; /* add fragments to entries count */ for (i = 0; i < fragments; i++) { @@ -1111,10 +1210,16 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) #endif tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0); + /* vlan tag */ + if (np->vlangrp && vlan_tx_tag_present(skb)) { + tx_flags_vlan = NV_TX3_VLAN_TAG_PRESENT | vlan_tx_tag_get(skb); + } + /* set tx flags */ if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { np->tx_ring.orig[start_nr].FlagLen |= cpu_to_le32(tx_flags | tx_flags_extra); } else { + np->tx_ring.ex[start_nr].TxVlan = cpu_to_le32(tx_flags_vlan); np->tx_ring.ex[start_nr].FlagLen |= cpu_to_le32(tx_flags | tx_flags_extra); } @@ -1209,9 +1314,14 @@ static void nv_tx_timeout(struct net_device *dev) { struct fe_priv *np = netdev_priv(dev); u8 __iomem *base = get_hwbase(dev); + u32 status; + + if (np->msi_flags & NV_MSI_X_ENABLED) + status = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQSTAT_MASK; + else + status = readl(base + NvRegIrqStatus) & NVREG_IRQSTAT_MASK; - printk(KERN_INFO "%s: Got tx_timeout. irq: %08x\n", dev->name, - readl(base + NvRegIrqStatus) & NVREG_IRQSTAT_MASK); + printk(KERN_INFO "%s: Got tx_timeout. irq: %08x\n", dev->name, status); { int i; @@ -1273,10 +1383,7 @@ static void nv_tx_timeout(struct net_device *dev) printk(KERN_DEBUG "%s: tx_timeout: dead entries!\n", dev->name); nv_drain_tx(dev); np->next_tx = np->nic_tx = 0; - if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) - writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr); - else - writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr); + setup_hw_rings(dev, NV_SETUP_TX_RING); netif_wake_queue(dev); } @@ -1342,6 +1449,8 @@ static void nv_rx_process(struct net_device *dev) { struct fe_priv *np = netdev_priv(dev); u32 Flags; + u32 vlanflags = 0; + for (;;) { struct sk_buff *skb; @@ -1357,6 +1466,7 @@ static void nv_rx_process(struct net_device *dev) } else { Flags = le32_to_cpu(np->rx_ring.ex[i].FlagLen); len = nv_descr_getlength_ex(&np->rx_ring.ex[i], np->desc_ver); + vlanflags = le32_to_cpu(np->rx_ring.ex[i].PacketBufferLow); } dprintk(KERN_DEBUG "%s: nv_rx_process: looking at packet %d, Flags 0x%x.\n", @@ -1474,7 +1584,11 @@ static void nv_rx_process(struct net_device *dev) skb->protocol = eth_type_trans(skb, dev); dprintk(KERN_DEBUG "%s: nv_rx_process: packet %d with %d bytes, proto %d accepted.\n", dev->name, np->cur_rx, len, skb->protocol); - netif_rx(skb); + if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT)) { + vlan_hwaccel_rx(skb, np->vlangrp, vlanflags & NV_RX3_VLAN_TAG_MASK); + } else { + netif_rx(skb); + } dev->last_rx = jiffies; np->stats.rx_packets++; np->stats.rx_bytes += len; @@ -1523,7 +1637,15 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu) * guessed, there is probably a simpler approach. * Changing the MTU is a rare event, it shouldn't matter. */ - disable_irq(dev->irq); + if (!(np->msi_flags & NV_MSI_X_ENABLED) || + ((np->msi_flags & NV_MSI_X_ENABLED) && + ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) { + disable_irq(dev->irq); + } else { + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector); + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector); + } spin_lock_bh(&dev->xmit_lock); spin_lock(&np->lock); /* stop engines */ @@ -1544,11 +1666,7 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu) } /* reinit nic view of the rx queue */ writel(np->rx_buf_sz, base + NvRegOffloadConfig); - writel((u32) np->ring_addr, base + NvRegRxRingPhysAddr); - if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) - writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr); - else - writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr); + setup_hw_rings(dev, NV_SETUP_RX_RING | NV_SETUP_TX_RING); writel( ((RX_RING-1) << NVREG_RINGSZ_RXSHIFT) + ((TX_RING-1) << NVREG_RINGSZ_TXSHIFT), base + NvRegRingSizes); pci_push(base); @@ -1560,7 +1678,15 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu) nv_start_tx(dev); spin_unlock(&np->lock); spin_unlock_bh(&dev->xmit_lock); - enable_irq(dev->irq); + if (!(np->msi_flags & NV_MSI_X_ENABLED) || + ((np->msi_flags & NV_MSI_X_ENABLED) && + ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) { + enable_irq(dev->irq); + } else { + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector); + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector); + } } return 0; } @@ -1866,8 +1992,13 @@ static irqreturn_t nv_nic_irq(int foo, void *data, struct pt_regs *regs) dprintk(KERN_DEBUG "%s: nv_nic_irq\n", dev->name); for (i=0; ; i++) { - events = readl(base + NvRegIrqStatus) & NVREG_IRQSTAT_MASK; - writel(NVREG_IRQSTAT_MASK, base + NvRegIrqStatus); + if (!(np->msi_flags & NV_MSI_X_ENABLED)) { + events = readl(base + NvRegIrqStatus) & NVREG_IRQSTAT_MASK; + writel(NVREG_IRQSTAT_MASK, base + NvRegIrqStatus); + } else { + events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQSTAT_MASK; + writel(NVREG_IRQSTAT_MASK, base + NvRegMSIXIrqStatus); + } pci_push(base); dprintk(KERN_DEBUG "%s: irq: %08x\n", dev->name, events); if (!(events & np->irqmask)) @@ -1907,11 +2038,16 @@ static irqreturn_t nv_nic_irq(int foo, void *data, struct pt_regs *regs) if (i > max_interrupt_work) { spin_lock(&np->lock); /* disable interrupts on the nic */ - writel(0, base + NvRegIrqMask); + if (!(np->msi_flags & NV_MSI_X_ENABLED)) + writel(0, base + NvRegIrqMask); + else + writel(np->irqmask, base + NvRegIrqMask); pci_push(base); - if (!np->in_shutdown) + if (!np->in_shutdown) { + np->nic_poll_irq = np->irqmask; mod_timer(&np->nic_poll, jiffies + POLL_WAIT); + } printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq.\n", dev->name, i); spin_unlock(&np->lock); break; @@ -1923,22 +2059,212 @@ static irqreturn_t nv_nic_irq(int foo, void *data, struct pt_regs *regs) return IRQ_RETVAL(i); } +static irqreturn_t nv_nic_irq_tx(int foo, void *data, struct pt_regs *regs) +{ + struct net_device *dev = (struct net_device *) data; + struct fe_priv *np = netdev_priv(dev); + u8 __iomem *base = get_hwbase(dev); + u32 events; + int i; + + dprintk(KERN_DEBUG "%s: nv_nic_irq_tx\n", dev->name); + + for (i=0; ; i++) { + events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_TX_ALL; + writel(NVREG_IRQ_TX_ALL, base + NvRegMSIXIrqStatus); + pci_push(base); + dprintk(KERN_DEBUG "%s: tx irq: %08x\n", dev->name, events); + if (!(events & np->irqmask)) + break; + + spin_lock(&np->lock); + nv_tx_done(dev); + spin_unlock(&np->lock); + + if (events & (NVREG_IRQ_TX_ERR)) { + dprintk(KERN_DEBUG "%s: received irq with events 0x%x. Probably TX fail.\n", + dev->name, events); + } + if (i > max_interrupt_work) { + spin_lock(&np->lock); + /* disable interrupts on the nic */ + writel(NVREG_IRQ_TX_ALL, base + NvRegIrqMask); + pci_push(base); + + if (!np->in_shutdown) { + np->nic_poll_irq |= NVREG_IRQ_TX_ALL; + mod_timer(&np->nic_poll, jiffies + POLL_WAIT); + } + printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_tx.\n", dev->name, i); + spin_unlock(&np->lock); + break; + } + + } + dprintk(KERN_DEBUG "%s: nv_nic_irq_tx completed\n", dev->name); + + return IRQ_RETVAL(i); +} + +static irqreturn_t nv_nic_irq_rx(int foo, void *data, struct pt_regs *regs) +{ + struct net_device *dev = (struct net_device *) data; + struct fe_priv *np = netdev_priv(dev); + u8 __iomem *base = get_hwbase(dev); + u32 events; + int i; + + dprintk(KERN_DEBUG "%s: nv_nic_irq_rx\n", dev->name); + + for (i=0; ; i++) { + events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_RX_ALL; + writel(NVREG_IRQ_RX_ALL, base + NvRegMSIXIrqStatus); + pci_push(base); + dprintk(KERN_DEBUG "%s: rx irq: %08x\n", dev->name, events); + if (!(events & np->irqmask)) + break; + + nv_rx_process(dev); + if (nv_alloc_rx(dev)) { + spin_lock(&np->lock); + if (!np->in_shutdown) + mod_timer(&np->oom_kick, jiffies + OOM_REFILL); + spin_unlock(&np->lock); + } + + if (i > max_interrupt_work) { + spin_lock(&np->lock); + /* disable interrupts on the nic */ + writel(NVREG_IRQ_RX_ALL, base + NvRegIrqMask); + pci_push(base); + + if (!np->in_shutdown) { + np->nic_poll_irq |= NVREG_IRQ_RX_ALL; + mod_timer(&np->nic_poll, jiffies + POLL_WAIT); + } + printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_rx.\n", dev->name, i); + spin_unlock(&np->lock); + break; + } + + } + dprintk(KERN_DEBUG "%s: nv_nic_irq_rx completed\n", dev->name); + + return IRQ_RETVAL(i); +} + +static irqreturn_t nv_nic_irq_other(int foo, void *data, struct pt_regs *regs) +{ + struct net_device *dev = (struct net_device *) data; + struct fe_priv *np = netdev_priv(dev); + u8 __iomem *base = get_hwbase(dev); + u32 events; + int i; + + dprintk(KERN_DEBUG "%s: nv_nic_irq_other\n", dev->name); + + for (i=0; ; i++) { + events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_OTHER; + writel(NVREG_IRQ_OTHER, base + NvRegMSIXIrqStatus); + pci_push(base); + dprintk(KERN_DEBUG "%s: irq: %08x\n", dev->name, events); + if (!(events & np->irqmask)) + break; + + if (events & NVREG_IRQ_LINK) { + spin_lock(&np->lock); + nv_link_irq(dev); + spin_unlock(&np->lock); + } + if (np->need_linktimer && time_after(jiffies, np->link_timeout)) { + spin_lock(&np->lock); + nv_linkchange(dev); + spin_unlock(&np->lock); + np->link_timeout = jiffies + LINK_TIMEOUT; + } + if (events & (NVREG_IRQ_UNKNOWN)) { + printk(KERN_DEBUG "%s: received irq with unknown events 0x%x. Please report\n", + dev->name, events); + } + if (i > max_interrupt_work) { + spin_lock(&np->lock); + /* disable interrupts on the nic */ + writel(NVREG_IRQ_OTHER, base + NvRegIrqMask); + pci_push(base); + + if (!np->in_shutdown) { + np->nic_poll_irq |= NVREG_IRQ_OTHER; + mod_timer(&np->nic_poll, jiffies + POLL_WAIT); + } + printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_other.\n", dev->name, i); + spin_unlock(&np->lock); + break; + } + + } + dprintk(KERN_DEBUG "%s: nv_nic_irq_other completed\n", dev->name); + + return IRQ_RETVAL(i); +} + static void nv_do_nic_poll(unsigned long data) { struct net_device *dev = (struct net_device *) data; struct fe_priv *np = netdev_priv(dev); u8 __iomem *base = get_hwbase(dev); + u32 mask = 0; - disable_irq(dev->irq); - /* FIXME: Do we need synchronize_irq(dev->irq) here? */ /* + * First disable irq(s) and then * reenable interrupts on the nic, we have to do this before calling * nv_nic_irq because that may decide to do otherwise */ - writel(np->irqmask, base + NvRegIrqMask); + + if (!(np->msi_flags & NV_MSI_X_ENABLED) || + ((np->msi_flags & NV_MSI_X_ENABLED) && + ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) { + disable_irq(dev->irq); + mask = np->irqmask; + } else { + if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) { + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); + mask |= NVREG_IRQ_RX_ALL; + } + if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) { + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector); + mask |= NVREG_IRQ_TX_ALL; + } + if (np->nic_poll_irq & NVREG_IRQ_OTHER) { + disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector); + mask |= NVREG_IRQ_OTHER; + } + } + np->nic_poll_irq = 0; + + /* FIXME: Do we need synchronize_irq(dev->irq) here? */ + + writel(mask, base + NvRegIrqMask); pci_push(base); - nv_nic_irq((int) 0, (void *) data, (struct pt_regs *) NULL); - enable_irq(dev->irq); + + if (!(np->msi_flags & NV_MSI_X_ENABLED) || + ((np->msi_flags & NV_MSI_X_ENABLED) && + ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) { + nv_nic_irq((int) 0, (void *) data, (struct pt_regs *) NULL); + enable_irq(dev->irq); + } else { + if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) { + nv_nic_irq_rx((int) 0, (void *) data, (struct pt_regs *) NULL); + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector); + } + if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) { + nv_nic_irq_tx((int) 0, (void *) data, (struct pt_regs *) NULL); + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector); + } + if (np->nic_poll_irq & NVREG_IRQ_OTHER) { + nv_nic_irq_other((int) 0, (void *) data, (struct pt_regs *) NULL); + enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector); + } + } } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -2217,11 +2543,66 @@ static struct ethtool_ops ops = { .get_perm_addr = ethtool_op_get_perm_addr, }; +static void nv_vlan_rx_register(struct net_device *dev, struct vlan_group *grp) +{ + struct fe_priv *np = get_nvpriv(dev); + + spin_lock_irq(&np->lock); + + /* save vlan group */ + np->vlangrp = grp; + + if (grp) { + /* enable vlan on MAC */ + np->txrxctl_bits |= NVREG_TXRXCTL_VLANSTRIP | NVREG_TXRXCTL_VLANINS; + } else { + /* disable vlan on MAC */ + np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANSTRIP; + np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANINS; + } + + writel(np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl); + + spin_unlock_irq(&np->lock); +}; + +static void nv_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +{ + /* nothing to do */ +}; + +static void set_msix_vector_map(struct net_device *dev, u32 vector, u32 irqmask) +{ + u8 __iomem *base = get_hwbase(dev); + int i; + u32 msixmap = 0; + + /* Each interrupt bit can be mapped to a MSIX vector (4 bits). + * MSIXMap0 represents the first 8 interrupts and MSIXMap1 represents + * the remaining 8 interrupts. + */ + for (i = 0; i < 8; i++) { + if ((irqmask >> i) & 0x1) { + msixmap |= vector << (i << 2); + } + } + writel(readl(base + NvRegMSIXMap0) | msixmap, base + NvRegMSIXMap0); + + msixmap = 0; + for (i = 0; i < 8; i++) { + if ((irqmask >> (i + 8)) & 0x1) { + msixmap |= vector << (i << 2); + } + } + writel(readl(base + NvRegMSIXMap1) | msixmap, base + NvRegMSIXMap1); +} + static int nv_open(struct net_device *dev) { struct fe_priv *np = netdev_priv(dev); u8 __iomem *base = get_hwbase(dev); - int ret, oom, i; + int ret = 1; + int oom, i; dprintk(KERN_DEBUG "nv_open: begin\n"); @@ -2253,11 +2634,7 @@ static int nv_open(struct net_device *dev) nv_copy_mac_to_hw(dev); /* 4) give hw rings */ - writel((u32) np->ring_addr, base + NvRegRxRingPhysAddr); - if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) - writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr); - else - writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr); + setup_hw_rings(dev, NV_SETUP_RX_RING | NV_SETUP_TX_RING); writel( ((RX_RING-1) << NVREG_RINGSZ_RXSHIFT) + ((TX_RING-1) << NVREG_RINGSZ_TXSHIFT), base + NvRegRingSizes); @@ -2265,6 +2642,7 @@ static int nv_open(struct net_device *dev) writel(np->linkspeed, base + NvRegLinkSpeed); writel(NVREG_UNKSETUP3_VAL1, base + NvRegUnknownSetupReg3); writel(np->txrxctl_bits, base + NvRegTxRxControl); + writel(np->vlanctl_bits, base + NvRegVlanControl); pci_push(base); writel(NVREG_TXRXCTL_BIT1|np->txrxctl_bits, base + NvRegTxRxControl); reg_delay(dev, NvRegUnknownSetupReg5, NVREG_UNKSETUP5_BIT31, NVREG_UNKSETUP5_BIT31, @@ -2315,9 +2693,77 @@ static int nv_open(struct net_device *dev) writel(NVREG_IRQSTAT_MASK, base + NvRegIrqStatus); pci_push(base); - ret = request_irq(dev->irq, &nv_nic_irq, SA_SHIRQ, dev->name, dev); - if (ret) - goto out_drain; + if (np->msi_flags & NV_MSI_X_CAPABLE) { + for (i = 0; i < (np->msi_flags & NV_MSI_X_VECTORS_MASK); i++) { + np->msi_x_entry[i].entry = i; + } + if ((ret = pci_enable_msix(np->pci_dev, np->msi_x_entry, (np->msi_flags & NV_MSI_X_VECTORS_MASK))) == 0) { + np->msi_flags |= NV_MSI_X_ENABLED; + if (optimization_mode == NV_OPTIMIZATION_MODE_THROUGHPUT) { + /* Request irq for rx handling */ + if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector, &nv_nic_irq_rx, SA_SHIRQ, dev->name, dev) != 0) { + printk(KERN_INFO "forcedeth: request_irq failed for rx %d\n", ret); + pci_disable_msix(np->pci_dev); + np->msi_flags &= ~NV_MSI_X_ENABLED; + goto out_drain; + } + /* Request irq for tx handling */ + if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector, &nv_nic_irq_tx, SA_SHIRQ, dev->name, dev) != 0) { + printk(KERN_INFO "forcedeth: request_irq failed for tx %d\n", ret); + pci_disable_msix(np->pci_dev); + np->msi_flags &= ~NV_MSI_X_ENABLED; + goto out_drain; + } + /* Request irq for link and timer handling */ + if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector, &nv_nic_irq_other, SA_SHIRQ, dev->name, dev) != 0) { + printk(KERN_INFO "forcedeth: request_irq failed for link %d\n", ret); + pci_disable_msix(np->pci_dev); + np->msi_flags &= ~NV_MSI_X_ENABLED; + goto out_drain; + } + + /* map interrupts to their respective vector */ + writel(0, base + NvRegMSIXMap0); + writel(0, base + NvRegMSIXMap1); + set_msix_vector_map(dev, NV_MSI_X_VECTOR_RX, NVREG_IRQ_RX_ALL); + set_msix_vector_map(dev, NV_MSI_X_VECTOR_TX, NVREG_IRQ_TX_ALL); + set_msix_vector_map(dev, NV_MSI_X_VECTOR_OTHER, NVREG_IRQ_OTHER); + } else { + /* Request irq for all interrupts */ + if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector, &nv_nic_irq, SA_SHIRQ, dev->name, dev) != 0) { + printk(KERN_INFO "forcedeth: request_irq failed %d\n", ret); + pci_disable_msix(np->pci_dev); + np->msi_flags &= ~NV_MSI_X_ENABLED; + goto out_drain; + } + + /* map interrupts to vector 0 */ + writel(0, base + NvRegMSIXMap0); + writel(0, base + NvRegMSIXMap1); + } + } + } + if (ret != 0 && np->msi_flags & NV_MSI_CAPABLE) { + if ((ret = pci_enable_msi(np->pci_dev)) == 0) { + np->msi_flags |= NV_MSI_ENABLED; + if (request_irq(np->pci_dev->irq, &nv_nic_irq, SA_SHIRQ, dev->name, dev) != 0) { + printk(KERN_INFO "forcedeth: request_irq failed %d\n", ret); + pci_disable_msi(np->pci_dev); + np->msi_flags &= ~NV_MSI_ENABLED; + goto out_drain; + } + + /* map interrupts to vector 0 */ + writel(0, base + NvRegMSIMap0); + writel(0, base + NvRegMSIMap1); + /* enable msi vector 0 */ + writel(NVREG_MSI_VECTOR_0_ENABLED, base + NvRegMSIIrqMask); + } + } + if (ret != 0) { + if (request_irq(np->pci_dev->irq, &nv_nic_irq, SA_SHIRQ, dev->name, dev) != 0) + goto out_drain; + } /* ask for interrupts */ writel(np->irqmask, base + NvRegIrqMask); @@ -2364,6 +2810,7 @@ static int nv_close(struct net_device *dev) { struct fe_priv *np = netdev_priv(dev); u8 __iomem *base; + int i; spin_lock_irq(&np->lock); np->in_shutdown = 1; @@ -2381,13 +2828,31 @@ static int nv_close(struct net_device *dev) /* disable interrupts on the nic or we will lock up */ base = get_hwbase(dev); - writel(0, base + NvRegIrqMask); + if (np->msi_flags & NV_MSI_X_ENABLED) { + writel(np->irqmask, base + NvRegIrqMask); + } else { + if (np->msi_flags & NV_MSI_ENABLED) + writel(0, base + NvRegMSIIrqMask); + writel(0, base + NvRegIrqMask); + } pci_push(base); dprintk(KERN_INFO "%s: Irqmask is zero again\n", dev->name); spin_unlock_irq(&np->lock); - free_irq(dev->irq, dev); + if (np->msi_flags & NV_MSI_X_ENABLED) { + for (i = 0; i < (np->msi_flags & NV_MSI_X_VECTORS_MASK); i++) { + free_irq(np->msi_x_entry[i].vector, dev); + } + pci_disable_msix(np->pci_dev); + np->msi_flags &= ~NV_MSI_X_ENABLED; + } else { + free_irq(np->pci_dev->irq, dev); + if (np->msi_flags & NV_MSI_ENABLED) { + pci_disable_msi(np->pci_dev); + np->msi_flags &= ~NV_MSI_ENABLED; + } + } drain_ring(dev); @@ -2471,7 +2936,14 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i printk(KERN_INFO "forcedeth: 64-bit DMA failed, using 32-bit addressing for device %s.\n", pci_name(pci_dev)); } else { - dev->features |= NETIF_F_HIGHDMA; + if (pci_set_consistent_dma_mask(pci_dev, 0x0000007fffffffffULL)) { + printk(KERN_INFO "forcedeth: 64-bit DMA (consistent) failed for device %s.\n", + pci_name(pci_dev)); + goto out_relreg; + } else { + dev->features |= NETIF_F_HIGHDMA; + printk(KERN_INFO "forcedeth: using HIGHDMA\n"); + } } np->txrxctl_bits = NVREG_TXRXCTL_DESC_3; } else if (id->driver_data & DEV_HAS_LARGEDESC) { @@ -2496,6 +2968,22 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i #endif } + np->vlanctl_bits = 0; + if (id->driver_data & DEV_HAS_VLAN) { + np->vlanctl_bits = NVREG_VLANCONTROL_ENABLE; + dev->features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX; + dev->vlan_rx_register = nv_vlan_rx_register; + dev->vlan_rx_kill_vid = nv_vlan_rx_kill_vid; + } + + np->msi_flags = 0; + if ((id->driver_data & DEV_HAS_MSI) && !disable_msi) { + np->msi_flags |= NV_MSI_CAPABLE; + } + if ((id->driver_data & DEV_HAS_MSI_X) && !disable_msix) { + np->msi_flags |= NV_MSI_X_CAPABLE; + } + err = -ENOMEM; np->base = ioremap(addr, NV_PCI_REGSZ); if (!np->base) @@ -2578,10 +3066,15 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i } else { np->tx_flags = NV_TX2_VALID; } - if (optimization_mode == NV_OPTIMIZATION_MODE_THROUGHPUT) + if (optimization_mode == NV_OPTIMIZATION_MODE_THROUGHPUT) { np->irqmask = NVREG_IRQMASK_THROUGHPUT; - else + if (np->msi_flags & NV_MSI_X_CAPABLE) /* set number of vectors */ + np->msi_flags |= 0x0003; + } else { np->irqmask = NVREG_IRQMASK_CPU; + if (np->msi_flags & NV_MSI_X_CAPABLE) /* set number of vectors */ + np->msi_flags |= 0x0001; + } if (id->driver_data & DEV_NEED_TIMERIRQ) np->irqmask |= NVREG_IRQ_TIMER; @@ -2737,11 +3230,11 @@ static struct pci_device_id pci_tbl[] = { }, { /* MCP55 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_14), - .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_VLAN|DEV_HAS_MSI|DEV_HAS_MSI_X, }, { /* MCP55 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_15), - .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_VLAN|DEV_HAS_MSI|DEV_HAS_MSI_X, }, {0,}, }; @@ -2771,6 +3264,10 @@ module_param(optimization_mode, int, 0); MODULE_PARM_DESC(optimization_mode, "In throughput mode (0), every tx & rx packet will generate an interrupt. In CPU mode (1), interrupts are controlled by a timer."); module_param(poll_interval, int, 0); MODULE_PARM_DESC(poll_interval, "Interval determines how frequent timer interrupt is generated by [(time_in_micro_secs * 100) / (2^10)]. Min is 0 and Max is 65535."); +module_param(disable_msi, int, 0); +MODULE_PARM_DESC(disable_msi, "Disable MSI interrupts by setting to 1."); +module_param(disable_msix, int, 0); +MODULE_PARM_DESC(disable_msix, "Disable MSIX interrupts by setting to 1."); MODULE_AUTHOR("Manfred Spraul <manfred@colorfullife.com>"); MODULE_DESCRIPTION("Reverse Engineered nForce ethernet driver"); diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 062fb10..afc4e88 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -359,7 +359,7 @@ ccw_device_set_online(struct ccw_device *cdev) else pr_debug("ccw_device_offline returned %d, device %s\n", ret, cdev->dev.bus_id); - return (ret = 0) ? -ENODEV : ret; + return (ret == 0) ? -ENODEV : ret; } static ssize_t diff --git a/drivers/s390/cio/device_pgid.c b/drivers/s390/cio/device_pgid.c index d2a5b04..85b1020 100644 --- a/drivers/s390/cio/device_pgid.c +++ b/drivers/s390/cio/device_pgid.c @@ -405,7 +405,7 @@ __ccw_device_disband_start(struct ccw_device *cdev) cdev->private->iretry = 5; cdev->private->imask >>= 1; } - ccw_device_verify_done(cdev, (sch->lpm != 0) ? 0 : -ENODEV); + ccw_device_disband_done(cdev, (sch->lpm != 0) ? 0 : -ENODEV); } /* diff --git a/drivers/s390/cio/device_status.c b/drivers/s390/cio/device_status.c index dad4dd9..6c762b4 100644 --- a/drivers/s390/cio/device_status.c +++ b/drivers/s390/cio/device_status.c @@ -317,7 +317,6 @@ ccw_device_do_sense(struct ccw_device *cdev, struct irb *irb) /* * We have ending status but no sense information. Do a basic sense. */ - sch = to_subchannel(cdev->dev.parent); sch->sense_ccw.cmd_code = CCW_CMD_BASIC_SENSE; sch->sense_ccw.cda = (__u32) __pa(cdev->private->irb.ecw); sch->sense_ccw.count = SENSE_MAX_COUNT; diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 46c4cdb..7ddd5a6 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -614,7 +614,7 @@ int ata_rwcmd_protocol(struct ata_queued_cmd *qc) } else if (lba48 && (qc->ap->flags & ATA_FLAG_PIO_LBA48)) { /* Unable to use DMA due to host limitation */ tf->protocol = ATA_PROT_PIO; - index = dev->multi_count ? 0 : 4; + index = dev->multi_count ? 0 : 8; } else { tf->protocol = ATA_PROT_DMA; index = 16; @@ -3357,11 +3357,12 @@ static void ata_pio_error(struct ata_port *ap) { struct ata_queued_cmd *qc; - printk(KERN_WARNING "ata%u: PIO error\n", ap->id); - qc = ata_qc_from_tag(ap, ap->active_tag); assert(qc != NULL); + if (qc->tf.command != ATA_CMD_PACKET) + printk(KERN_WARNING "ata%u: PIO error\n", ap->id); + /* make sure qc->err_mask is available to * know what's wrong and recover */ diff --git a/drivers/scsi/sata_mv.c b/drivers/scsi/sata_mv.c index 6fddf17..2770005 100644 --- a/drivers/scsi/sata_mv.c +++ b/drivers/scsi/sata_mv.c @@ -997,6 +997,7 @@ static void mv_qc_prep(struct ata_queued_cmd *qc) case ATA_CMD_READ_EXT: case ATA_CMD_WRITE: case ATA_CMD_WRITE_EXT: + case ATA_CMD_WRITE_FUA_EXT: mv_crqb_pack_cmd(cw++, tf->hob_nsect, ATA_REG_NSECT, 0); break; #ifdef LIBATA_NCQ /* FIXME: remove this line when NCQ added */ diff --git a/drivers/scsi/sata_vsc.c b/drivers/scsi/sata_vsc.c index 2e2c3b7..e484e8d 100644 --- a/drivers/scsi/sata_vsc.c +++ b/drivers/scsi/sata_vsc.c @@ -81,6 +81,19 @@ /* Port stride */ #define VSC_SATA_PORT_OFFSET 0x200 +/* Error interrupt status bit offsets */ +#define VSC_SATA_INT_ERROR_E_OFFSET 2 +#define VSC_SATA_INT_ERROR_P_OFFSET 4 +#define VSC_SATA_INT_ERROR_T_OFFSET 5 +#define VSC_SATA_INT_ERROR_M_OFFSET 1 +#define is_vsc_sata_int_err(port_idx, int_status) \ + (int_status & ((1 << (VSC_SATA_INT_ERROR_E_OFFSET + (8 * port_idx))) | \ + (1 << (VSC_SATA_INT_ERROR_P_OFFSET + (8 * port_idx))) | \ + (1 << (VSC_SATA_INT_ERROR_T_OFFSET + (8 * port_idx))) | \ + (1 << (VSC_SATA_INT_ERROR_M_OFFSET + (8 * port_idx))) \ + )\ + ) + static u32 vsc_sata_scr_read (struct ata_port *ap, unsigned int sc_reg) { @@ -201,13 +214,28 @@ static irqreturn_t vsc_sata_interrupt (int irq, void *dev_instance, struct ata_port *ap; ap = host_set->ports[i]; + + if (is_vsc_sata_int_err(i, int_status)) { + u32 err_status; + printk(KERN_DEBUG "%s: ignoring interrupt(s)\n", __FUNCTION__); + err_status = ap ? vsc_sata_scr_read(ap, SCR_ERROR) : 0; + vsc_sata_scr_write(ap, SCR_ERROR, err_status); + handled++; + } + if (ap && !(ap->flags & (ATA_FLAG_PORT_DISABLED|ATA_FLAG_NOINTR))) { struct ata_queued_cmd *qc; qc = ata_qc_from_tag(ap, ap->active_tag); - if (qc && (!(qc->tf.ctl & ATA_NIEN))) + if (qc && (!(qc->tf.ctl & ATA_NIEN))) { handled += ata_host_intr(ap, qc); + } else { + printk(KERN_DEBUG "%s: ignoring interrupt(s)\n", __FUNCTION__); + ata_chk_status(ap); + handled++; + } + } } } diff --git a/fs/compat.c b/fs/compat.c index a2ba78b..5333c7d 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1757,7 +1757,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, goto sticky; rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); rtv.tv_sec = timeout; - if (compat_timeval_compare(&rtv, &tv) < 0) + if (compat_timeval_compare(&rtv, &tv) >= 0) rtv = tv; if (copy_to_user(tvp, &rtv, sizeof(rtv))) { sticky: @@ -1834,7 +1834,7 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, rts.tv_sec++; rts.tv_nsec -= NSEC_PER_SEC; } - if (compat_timespec_compare(&rts, &ts) < 0) + if (compat_timespec_compare(&rts, &ts) >= 0) rts = ts; copy_to_user(tsp, &rts, sizeof(rts)); } @@ -1934,7 +1934,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; rts.tv_sec = timeout; - if (compat_timespec_compare(&rts, &ts) < 0) + if (compat_timespec_compare(&rts, &ts) >= 0) rts = ts; if (copy_to_user(tsp, &rts, sizeof(rts))) { sticky: diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index a2ca310..86ae8e9 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -792,18 +792,20 @@ ext2_xattr_delete_inode(struct inode *inode) ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); get_bh(bh); bforget(bh); + unlock_buffer(bh); } else { HDR(bh)->h_refcount = cpu_to_le32( le32_to_cpu(HDR(bh)->h_refcount) - 1); if (ce) mb_cache_entry_release(ce); + ea_bdebug(bh, "refcount now=%d", + le32_to_cpu(HDR(bh)->h_refcount)); + unlock_buffer(bh); mark_buffer_dirty(bh); if (IS_SYNC(inode)) sync_dirty_buffer(bh); DQUOT_FREE_BLOCK(inode, 1); } - ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); - unlock_buffer(bh); EXT2_I(inode)->i_file_acl = 0; cleanup: diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f556a0d..0c9a2ee 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -66,6 +66,12 @@ static void restore_sigs(sigset_t *oldset) sigprocmask(SIG_SETMASK, oldset, NULL); } +/* + * Reset request, so that it can be reused + * + * The caller must be _very_ careful to make sure, that it is holding + * the only reference to req + */ void fuse_reset_request(struct fuse_req *req) { int preallocated = req->preallocated; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2963516..6f05379 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -116,9 +116,14 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir) /* Special case for failed iget in CREATE */ static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) { - u64 nodeid = req->in.h.nodeid; - fuse_reset_request(req); - fuse_send_forget(fc, req, nodeid, 1); + /* If called from end_io_requests(), req has more than one + reference and fuse_reset_request() cannot work */ + if (fc->connected) { + u64 nodeid = req->in.h.nodeid; + fuse_reset_request(req); + fuse_send_forget(fc, req, nodeid, 1); + } else + fuse_put_request(fc, req); } void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff, diff --git a/fs/select.c b/fs/select.c index 6ce68a9..1815a57 100644 --- a/fs/select.c +++ b/fs/select.c @@ -404,7 +404,7 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, goto sticky; rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); rtv.tv_sec = timeout; - if (timeval_compare(&rtv, &tv) < 0) + if (timeval_compare(&rtv, &tv) >= 0) rtv = tv; if (copy_to_user(tvp, &rtv, sizeof(rtv))) { sticky: @@ -471,7 +471,7 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; rts.tv_sec = timeout; - if (timespec_compare(&rts, &ts) < 0) + if (timespec_compare(&rts, &ts) >= 0) rts = ts; if (copy_to_user(tsp, &rts, sizeof(rts))) { sticky: @@ -775,7 +775,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; rts.tv_sec = timeout; - if (timespec_compare(&rts, &ts) < 0) + if (timespec_compare(&rts, &ts) >= 0) rts = ts; if (copy_to_user(tsp, &rts, sizeof(rts))) { sticky: diff --git a/include/asm-powerpc/pgalloc.h b/include/asm-powerpc/pgalloc.h index 9f5b052..a00ee00 100644 --- a/include/asm-powerpc/pgalloc.h +++ b/include/asm-powerpc/pgalloc.h @@ -146,7 +146,7 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \ PMD_CACHE_NUM, PMD_TABLE_SIZE-1)) #ifndef CONFIG_PPC_64K_PAGES -#define __pud_free_tlb(tlb, pmd) \ +#define __pud_free_tlb(tlb, pud) \ pgtable_free_tlb(tlb, pgtable_free_cache(pud, \ PUD_CACHE_NUM, PUD_TABLE_SIZE-1)) #endif /* CONFIG_PPC_64K_PAGES */ diff --git a/include/asm-s390/smp.h b/include/asm-s390/smp.h index 9c6e9c3..444dae5 100644 --- a/include/asm-s390/smp.h +++ b/include/asm-s390/smp.h @@ -31,6 +31,7 @@ typedef struct __u16 cpu; } sigp_info; +extern void smp_setup_cpu_possible_map(void); extern int smp_call_function_on(void (*func) (void *info), void *info, int nonatomic, int wait, int cpu); #define NO_PROC_ID 0xFF /* No processor magic marker */ @@ -104,6 +105,7 @@ smp_call_function_on(void (*func) (void *info), void *info, #define smp_cpu_not_running(cpu) 1 #define smp_get_cpu(cpu) ({ 0; }) #define smp_put_cpu(cpu) ({ 0; }) +#define smp_setup_cpu_possible_map() #endif #endif diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 4e90905..2d9d08f 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -153,13 +153,11 @@ static int swsusp_swap_check(void) /* This is called before saving image */ { int i; - if (!swsusp_resume_device) - return -ENODEV; spin_lock(&swap_lock); for (i = 0; i < MAX_SWAPFILES; i++) { if (!(swap_info[i].flags & SWP_WRITEOK)) continue; - if (is_resume_device(swap_info + i)) { + if (!swsusp_resume_device || is_resume_device(swap_info + i)) { spin_unlock(&swap_lock); root_swap = i; return 0; diff --git a/kernel/sched.c b/kernel/sched.c index 66d9572..12d291b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5058,7 +5058,18 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span, #define MAX_DOMAIN_DISTANCE 32 static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] = - { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = -1LL }; + { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = +/* + * Architectures may override the migration cost and thus avoid + * boot-time calibration. Unit is nanoseconds. Mostly useful for + * virtualized hardware: + */ +#ifdef CONFIG_DEFAULT_MIGRATION_COST + CONFIG_DEFAULT_MIGRATION_COST +#else + -1LL +#endif +}; /* * Allow override of migration cost - in units of microseconds. diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 323fdcf..bedfa4f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -808,6 +808,8 @@ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask, nodes_clear(*nodes); if (maxnode == 0 || !nmask) return 0; + if (maxnode > PAGE_SIZE) + return -EINVAL; nlongs = BITS_TO_LONGS(maxnode); if ((maxnode % BITS_PER_LONG) == 0) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 62c1225..208812b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1541,29 +1541,29 @@ static int __initdata node_load[MAX_NUMNODES]; */ static int __init find_next_best_node(int node, nodemask_t *used_node_mask) { - int i, n, val; + int n, val; int min_val = INT_MAX; int best_node = -1; - for_each_online_node(i) { - cpumask_t tmp; + /* Use the local node if we haven't already */ + if (!node_isset(node, *used_node_mask)) { + node_set(node, *used_node_mask); + return node; + } - /* Start from local node */ - n = (node+i) % num_online_nodes(); + for_each_online_node(n) { + cpumask_t tmp; /* Don't want a node to appear more than once */ if (node_isset(n, *used_node_mask)) continue; - /* Use the local node if we haven't already */ - if (!node_isset(node, *used_node_mask)) { - best_node = node; - break; - } - /* Use the distance array to find the distance */ val = node_distance(node, n); + /* Penalize nodes under us ("prefer the next node") */ + val += (n < node); + /* Give preference to headless and unused nodes */ tmp = node_to_cpumask(n); if (!cpus_empty(tmp)) |