diff options
Diffstat (limited to 'arch')
105 files changed, 3185 insertions, 2219 deletions
diff --git a/arch/frv/defconfig b/arch/frv/defconfig new file mode 100644 index 0000000..b6e4ca5 --- /dev/null +++ b/arch/frv/defconfig @@ -0,0 +1,627 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.11.8 +# Fri May 13 17:16:03 2005 +# +CONFIG_FRV=y +CONFIG_UID16=y +CONFIG_RWSEM_GENERIC_SPINLOCK=y +CONFIG_GENERIC_FIND_NEXT_BIT=y +# CONFIG_GENERIC_CALIBRATE_DELAY is not set +# CONFIG_GENERIC_HARDIRQS is not set + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_CLEAN_COMPILE=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 + +# +# General setup +# +CONFIG_LOCALVERSION="" +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +# CONFIG_AUDIT is not set +# CONFIG_HOTPLUG is not set +# CONFIG_KOBJECT_UEVENT is not set +# CONFIG_IKCONFIG is not set +CONFIG_EMBEDDED=y +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SHMEM=y +CONFIG_CC_ALIGN_FUNCTIONS=0 +CONFIG_CC_ALIGN_LABELS=0 +CONFIG_CC_ALIGN_LOOPS=0 +CONFIG_CC_ALIGN_JUMPS=0 +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 + +# +# Loadable module support +# +# CONFIG_MODULES is not set + +# +# Fujitsu FR-V system setup +# +CONFIG_MMU=y +CONFIG_FRV_OUTOFLINE_ATOMIC_OPS=y +CONFIG_HIGHMEM=y +CONFIG_HIGHPTE=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_FRV_DEFL_CACHE_WBACK is not set +# CONFIG_FRV_DEFL_CACHE_WBEHIND is not set +CONFIG_FRV_DEFL_CACHE_WTHRU=y +# CONFIG_FRV_DEFL_CACHE_DISABLED is not set + +# +# CPU core support +# +CONFIG_CPU_FR451=y +CONFIG_CPU_FR451_COMPILE=y +CONFIG_FRV_L1_CACHE_SHIFT=5 +CONFIG_MB93091_VDK=y +# CONFIG_MB93093_PDK is not set +CONFIG_MB93090_MB00=y +# CONFIG_MB93091_NO_MB is not set +# CONFIG_GPREL_DATA_8 is not set +CONFIG_GPREL_DATA_4=y +# CONFIG_GPREL_DATA_NONE is not set +CONFIG_PCI=y +# CONFIG_PCI_LEGACY_PROC is not set +# CONFIG_PCI_NAMES is not set +# CONFIG_PCI_DEBUG is not set +# CONFIG_PCMCIA is not set + +# +# Power management options +# +# CONFIG_PM is not set + +# +# Executable formats +# +# CONFIG_BINFMT_ELF is not set +CONFIG_BINFMT_ELF_FDPIC=y +# CONFIG_BINFMT_MISC is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set + +# +# Connector - unified userspace <-> kernelspace linker +# +# CONFIG_CONNECTOR is not set +# CONFIG_FORK_CONNECTOR is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play support +# + +# +# Block devices +# +# CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +# CONFIG_BLK_DEV_LOOP is not set +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_RAM is not set +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_INITRAMFS_SOURCE="" +# CONFIG_CDROM_PKTCDVD is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +# CONFIG_ATA_OVER_ETH is not set + +# +# ATA/ATAPI/MFM/RLL support +# +# CONFIG_IDE is not set + +# +# SCSI device support +# +# CONFIG_SCSI is not set + +# +# Multi-device support (RAID and LVM) +# +# CONFIG_MD is not set + +# +# Fusion MPT device support +# +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_IEEE1394 is not set + +# +# I2O device support +# +# CONFIG_I2O is not set + +# +# Networking support +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_UNIX=y +# CONFIG_NET_KEY is not set +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_PNP=y +# CONFIG_IP_PNP_DHCP is not set +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_TUNNEL is not set +# CONFIG_IP_TCPDIAG is not set +# CONFIG_IP_TCPDIAG_IPV6 is not set +# CONFIG_IPV6 is not set +# CONFIG_NETFILTER is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_SCTP is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set +# CONFIG_NET_CLS_ROUTE is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_KGDBOE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NETPOLL_RX is not set +# CONFIG_NETPOLL_TRAP is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_IEEE80211 is not set +CONFIG_NETDEVICES=y +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_NET_VENDOR_3COM is not set + +# +# Tulip family network device support +# +# CONFIG_NET_TULIP is not set +# CONFIG_HP100 is not set +CONFIG_NET_PCI=y +# CONFIG_PCNET32 is not set +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set +# CONFIG_DGRS is not set +# CONFIG_EEPRO100 is not set +# CONFIG_E100 is not set +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +CONFIG_NE2K_PCI=y +# CONFIG_8139CP is not set +# CONFIG_8139TOO is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_TLAN is not set +# CONFIG_VIA_RHINE is not set + +# +# Ethernet (1000 Mbit) +# +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_E1000 is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SKGE is not set +# CONFIG_SK98LIN is not set +# CONFIG_VIA_VELOCITY is not set +# CONFIG_TIGON3 is not set + +# +# Ethernet (10000 Mbit) +# +# CONFIG_CHELSIO_T1 is not set +# CONFIG_IXGB is not set +# CONFIG_S2IO is not set + +# +# Token Ring devices +# +# CONFIG_TR is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_SHAPER is not set +# CONFIG_NETCONSOLE is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# Input device support +# +# CONFIG_INPUT is not set + +# +# Hardware I/O ports +# +# CONFIG_SERIO is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +# CONFIG_VT is not set +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=1 +CONFIG_SERIAL_8250_EXTENDED=y +# CONFIG_SERIAL_8250_MANY_PORTS is not set +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_SERIAL_8250_DETECT_IRQ is not set +# CONFIG_SERIAL_8250_MULTIPORT is not set +# CONFIG_SERIAL_8250_RSA is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +CONFIG_UNIX98_PTYS=y +# CONFIG_LEGACY_PTYS is not set + +# +# IPMI +# +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_RTC is not set +# CONFIG_GEN_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_DRM is not set +# CONFIG_RAW_DRIVER is not set + +# +# TPM devices +# +# CONFIG_TCG_TPM is not set + +# +# I2C support +# +# CONFIG_I2C is not set + +# +# Dallas's 1-wire bus +# +# CONFIG_W1 is not set + +# +# Misc devices +# + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# Digital Video Broadcasting Devices +# +# CONFIG_DVB is not set + +# +# Graphics support +# +# CONFIG_FB is not set + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +# CONFIG_USB is not set + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# MMC/SD Card support +# +# CONFIG_MMC is not set + +# +# InfiniBand support +# +# CONFIG_INFINIBAND is not set + +# +# File systems +# +# CONFIG_EXT2_FS is not set +# CONFIG_EXT3_FS is not set +# CONFIG_JBD is not set +# CONFIG_REISER4_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set + +# +# XFS support +# +# CONFIG_XFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +CONFIG_INOTIFY=y +# CONFIG_QUOTA is not set +CONFIG_DNOTIFY=y +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set + +# +# Caches +# +# CONFIG_FSCACHE is not set +# CONFIG_FUSE_FS is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +# CONFIG_PROC_KCORE is not set +CONFIG_SYSFS=y +# CONFIG_DEVFS_FS is not set +# CONFIG_DEVPTS_FS_XATTR is not set +CONFIG_TMPFS=y +# CONFIG_TMPFS_XATTR is not set +# CONFIG_HUGETLB_PAGE is not set +CONFIG_RAMFS=y +# CONFIG_RELAYFS_FS is not set + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +# CONFIG_NFS_V3 is not set +# CONFIG_NFS_V4 is not set +# CONFIG_NFS_DIRECTIO is not set +# CONFIG_NFSD is not set +CONFIG_ROOT_NFS=y +CONFIG_LOCKD=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +# CONFIG_RPCSEC_GSS_KRB5 is not set +# CONFIG_RPCSEC_GSS_SPKM3 is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y + +# +# Native Language Support +# +# CONFIG_NLS is not set + +# +# Kernel hacking +# +# CONFIG_PRINTK_TIME is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_MAGIC_SYSRQ is not set +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_HIGHMEM is not set +# CONFIG_DEBUG_BUGVERBOSE is not set +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_FRAME_POINTER is not set +# CONFIG_EARLY_PRINTK is not set +CONFIG_DEBUG_STACKOVERFLOW=y +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_GDBSTUB is not set + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set + +# +# Cryptographic options +# +# CONFIG_CRYPTO is not set + +# +# Hardware crypto devices +# + +# +# Library routines +# +# CONFIG_CRC_CCITT is not set +CONFIG_CRC32=y +# CONFIG_LIBCRC32C is not set diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 2203a9d..4553ffd 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -435,6 +435,11 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) if (c == &boot_cpu_data) sysenter_setup(); enable_sep_cpu(); + + if (c == &boot_cpu_data) + mtrr_bp_init(); + else + mtrr_ap_init(); } #ifdef CONFIG_X86_HT diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 96a75d0..a2c33c1 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -25,7 +25,7 @@ extern int trap_init_f00f_bug(void); /* * Alignment at which movsl is preferred for bulk memory copies. */ -struct movsl_mask movsl_mask; +struct movsl_mask movsl_mask __read_mostly; #endif void __devinit early_intel_workaround(struct cpuinfo_x86 *c) diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c index 64d91f7..169ac8e 100644 --- a/arch/i386/kernel/cpu/mtrr/generic.c +++ b/arch/i386/kernel/cpu/mtrr/generic.c @@ -67,13 +67,6 @@ void __init get_mtrr_state(void) mtrr_state.enabled = (lo & 0xc00) >> 10; } -/* Free resources associated with a struct mtrr_state */ -void __init finalize_mtrr_state(void) -{ - kfree(mtrr_state.var_ranges); - mtrr_state.var_ranges = NULL; -} - /* Some BIOS's are fucked and don't set all MTRRs the same! */ void __init mtrr_state_warn(void) { @@ -334,6 +327,9 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, */ { unsigned long flags; + struct mtrr_var_range *vr; + + vr = &mtrr_state.var_ranges[reg]; local_irq_save(flags); prepare_set(); @@ -342,11 +338,15 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, /* The invalid bit is kept in the mask, so we simply clear the relevant mask register to disable a range. */ mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); + memset(vr, 0, sizeof(struct mtrr_var_range)); } else { - mtrr_wrmsr(MTRRphysBase_MSR(reg), base << PAGE_SHIFT | type, - (base & size_and_mask) >> (32 - PAGE_SHIFT)); - mtrr_wrmsr(MTRRphysMask_MSR(reg), -size << PAGE_SHIFT | 0x800, - (-size & size_and_mask) >> (32 - PAGE_SHIFT)); + vr->base_lo = base << PAGE_SHIFT | type; + vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT); + vr->mask_lo = -size << PAGE_SHIFT | 0x800; + vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT); + + mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi); + mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi); } post_set(); diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index d66b09e..764cac6 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -332,6 +332,8 @@ int mtrr_add_page(unsigned long base, unsigned long size, error = -EINVAL; + /* No CPU hotplug when we change MTRR entries */ + lock_cpu_hotplug(); /* Search for existing MTRR */ down(&main_lock); for (i = 0; i < num_var_ranges; ++i) { @@ -372,6 +374,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, error = i; out: up(&main_lock); + unlock_cpu_hotplug(); return error; } @@ -461,6 +464,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) return -ENXIO; max = num_var_ranges; + /* No CPU hotplug when we change MTRR entries */ + lock_cpu_hotplug(); down(&main_lock); if (reg < 0) { /* Search for existing MTRR */ @@ -501,6 +506,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) error = reg; out: up(&main_lock); + unlock_cpu_hotplug(); return error; } /** @@ -544,21 +550,9 @@ static void __init init_ifs(void) centaur_init_mtrr(); } -static void __init init_other_cpus(void) -{ - if (use_intel()) - get_mtrr_state(); - - /* bring up the other processors */ - set_mtrr(~0U,0,0,0); - - if (use_intel()) { - finalize_mtrr_state(); - mtrr_state_warn(); - } -} - - +/* The suspend/resume methods are only for CPU without MTRR. CPU using generic + * MTRR driver doesn't require this + */ struct mtrr_value { mtrr_type ltype; unsigned long lbase; @@ -611,13 +605,13 @@ static struct sysdev_driver mtrr_sysdev_driver = { /** - * mtrr_init - initialize mtrrs on the boot CPU + * mtrr_bp_init - initialize mtrrs on the boot CPU * * This needs to be called early; before any of the other CPUs are * initialized (i.e. before smp_init()). * */ -static int __init mtrr_init(void) +void __init mtrr_bp_init(void) { init_ifs(); @@ -674,12 +668,48 @@ static int __init mtrr_init(void) if (mtrr_if) { set_num_var_ranges(); init_table(); - init_other_cpus(); - - return sysdev_driver_register(&cpu_sysdev_class, - &mtrr_sysdev_driver); + if (use_intel()) + get_mtrr_state(); } - return -ENXIO; } -subsys_initcall(mtrr_init); +void mtrr_ap_init(void) +{ + unsigned long flags; + + if (!mtrr_if || !use_intel()) + return; + /* + * Ideally we should hold main_lock here to avoid mtrr entries changed, + * but this routine will be called in cpu boot time, holding the lock + * breaks it. This routine is called in two cases: 1.very earily time + * of software resume, when there absolutely isn't mtrr entry changes; + * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to + * prevent mtrr entry changes + */ + local_irq_save(flags); + + mtrr_if->set_all(); + + local_irq_restore(flags); +} + +static int __init mtrr_init_finialize(void) +{ + if (!mtrr_if) + return 0; + if (use_intel()) + mtrr_state_warn(); + else { + /* The CPUs haven't MTRR and seemes not support SMP. They have + * specific drivers, we use a tricky method to support + * suspend/resume for them. + * TBD: is there any system with such CPU which supports + * suspend/resume? if no, we should remove the code. + */ + sysdev_driver_register(&cpu_sysdev_class, + &mtrr_sysdev_driver); + } + return 0; +} +subsys_initcall(mtrr_init_finialize); diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h index de135124..99c9f26 100644 --- a/arch/i386/kernel/cpu/mtrr/mtrr.h +++ b/arch/i386/kernel/cpu/mtrr/mtrr.h @@ -91,7 +91,6 @@ extern struct mtrr_ops * mtrr_if; extern unsigned int num_var_ranges; -void finalize_mtrr_state(void); void mtrr_state_warn(void); char *mtrr_attrib_to_str(int x); void mtrr_wrmsr(unsigned, unsigned, unsigned); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index d66bf48..8ac8e9f 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -68,21 +68,21 @@ EXPORT_SYMBOL(smp_num_siblings); #endif /* Package ID of each logical CPU */ -int phys_proc_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID}; +int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; EXPORT_SYMBOL(phys_proc_id); /* Core ID of each logical CPU */ -int cpu_core_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID}; +int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; EXPORT_SYMBOL(cpu_core_id); -cpumask_t cpu_sibling_map[NR_CPUS]; +cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); -cpumask_t cpu_core_map[NR_CPUS]; +cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); /* bitmap of online cpus */ -cpumask_t cpu_online_map; +cpumask_t cpu_online_map __read_mostly; EXPORT_SYMBOL(cpu_online_map); cpumask_t cpu_callin_map; @@ -100,7 +100,7 @@ static int __devinitdata tsc_sync_disabled; struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_data); -u8 x86_cpu_to_apicid[NR_CPUS] = +u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0xff }; EXPORT_SYMBOL(x86_cpu_to_apicid); @@ -550,10 +550,10 @@ extern struct { #ifdef CONFIG_NUMA /* which logical CPUs are on which nodes */ -cpumask_t node_2_cpu_mask[MAX_NUMNODES] = +cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly = { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; /* which node each logical CPU is on */ -int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 }; +int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; EXPORT_SYMBOL(cpu_2_node); /* set up a mapping between cpu and node. */ @@ -581,7 +581,7 @@ static inline void unmap_cpu_to_node(int cpu) #endif /* CONFIG_NUMA */ -u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; static void map_cpu_to_logical_apicid(void) { diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 2854c35..0ee9dee 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -91,7 +91,7 @@ EXPORT_SYMBOL(rtc_lock); DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); -struct timer_opts *cur_timer = &timer_none; +struct timer_opts *cur_timer __read_mostly = &timer_none; /* * This is a special lock that is owned by the CPU and holds the index diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index d766e09..ef8dac5 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -18,7 +18,7 @@ #include "mach_timer.h" #include <asm/hpet.h> -static unsigned long hpet_usec_quotient; /* convert hpet clks to usec */ +static unsigned long __read_mostly hpet_usec_quotient; /* convert hpet clks to usec */ static unsigned long tsc_hpet_quotient; /* convert tsc to hpet clks */ static unsigned long hpet_last; /* hpet counter value at last tick*/ static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ @@ -180,7 +180,7 @@ static int __init init_hpet(char* override) /************************************************************/ /* tsc timer_opts struct */ -static struct timer_opts timer_hpet = { +static struct timer_opts timer_hpet __read_mostly = { .name = "hpet", .mark_offset = mark_offset_hpet, .get_offset = get_offset_hpet, diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 7e01a52..761972f 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -57,6 +57,9 @@ SECTIONS *(.data.cacheline_aligned) } + /* rarely changed data like cpu maps */ + . = ALIGN(32); + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } _edata = .; /* End of data section */ . = ALIGN(THREAD_SIZE); /* init_task */ diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c index 6b25afc..f379b8d 100644 --- a/arch/i386/mm/ioremap.c +++ b/arch/i386/mm/ioremap.c @@ -228,7 +228,8 @@ EXPORT_SYMBOL(ioremap_nocache); void iounmap(volatile void __iomem *addr) { struct vm_struct *p; - if ((void __force *) addr <= high_memory) + + if ((void __force *)addr <= high_memory) return; /* @@ -241,9 +242,10 @@ void iounmap(volatile void __iomem *addr) return; write_lock(&vmlist_lock); - p = __remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr)); + p = __remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr)); if (!p) { printk(KERN_WARNING "iounmap: bad address %p\n", addr); + dump_stack(); goto out_unlock; } diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c index 0e6b45b..c547c1a 100644 --- a/arch/i386/power/cpu.c +++ b/arch/i386/power/cpu.c @@ -137,6 +137,7 @@ void __restore_processor_state(struct saved_context *ctxt) fix_processor_context(); do_fpu_end(); + mtrr_ap_init(); } void restore_processor_state(void) diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index b2e2f65..e1fb68d 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_IA64_PALINFO) += palinfo.o obj-$(CONFIG_IOSAPIC) += iosapic.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o +obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o obj-$(CONFIG_IA64_CYCLONE) += cyclone.o obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index cda06f8..542256e 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -640,9 +640,11 @@ acpi_boot_init (void) if (smp_boot_data.cpu_phys_id[cpu] != hard_smp_processor_id()) node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu]; } - build_cpu_to_node_map(); # endif #endif +#ifdef CONFIG_ACPI_NUMA + build_cpu_to_node_map(); +#endif /* Make boot-up look pretty */ printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus); return 0; diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c new file mode 100644 index 0000000..a68ce66 --- /dev/null +++ b/arch/ia64/kernel/numa.c @@ -0,0 +1,57 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ia64 kernel NUMA specific stuff + * + * Copyright (C) 2002 Erich Focht <efocht@ess.nec.de> + * Copyright (C) 2004 Silicon Graphics, Inc. + * Jesse Barnes <jbarnes@sgi.com> + */ +#include <linux/config.h> +#include <linux/topology.h> +#include <linux/module.h> +#include <asm/processor.h> +#include <asm/smp.h> + +u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_to_node_map); + +cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; + +/** + * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays + * + * Build cpu to node mapping and initialize the per node cpu masks using + * info from the node_cpuid array handed to us by ACPI. + */ +void __init build_cpu_to_node_map(void) +{ + int cpu, i, node; + + for(node=0; node < MAX_NUMNODES; node++) + cpus_clear(node_to_cpu_mask[node]); + + for(cpu = 0; cpu < NR_CPUS; ++cpu) { + node = -1; + for (i = 0; i < NR_CPUS; ++i) + if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { + node = node_cpuid[i].nid; + break; + } + cpu_to_node_map[cpu] = (node >= 0) ? node : 0; + if (node >= 0) + cpu_set(cpu, node_to_cpu_mask[node]); + } +} diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index edd9f07..b8a0a7d 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -143,6 +143,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr) __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16); psr->mfh = 0; /* drop signal handler's fph contents... */ + preempt_disable(); if (psr->dfh) ia64_drop_fpu(current); else { @@ -150,6 +151,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr) __ia64_load_fpu(current->thread.fph); ia64_set_local_fpu_owner(current); } + preempt_enable(); } return err; } diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 623b0a5..7d72c0d 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -525,47 +525,6 @@ smp_build_cpu_map (void) } } -#ifdef CONFIG_NUMA - -/* on which node is each logical CPU (one cacheline even for 64 CPUs) */ -u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_to_node_map); -/* which logical CPUs are on which nodes */ -cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; - -/* - * Build cpu to node mapping and initialize the per node cpu masks. - */ -void __init -build_cpu_to_node_map (void) -{ - int cpu, i, node; - - for(node=0; node<MAX_NUMNODES; node++) - cpus_clear(node_to_cpu_mask[node]); - for(cpu = 0; cpu < NR_CPUS; ++cpu) { - /* - * All Itanium NUMA platforms I know use ACPI, so maybe we - * can drop this ifdef completely. [EF] - */ -#ifdef CONFIG_ACPI_NUMA - node = -1; - for (i = 0; i < NR_CPUS; ++i) - if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { - node = node_cpuid[i].nid; - break; - } -#else -# error Fixme: Dunno how to build CPU-to-node map. -#endif - cpu_to_node_map[cpu] = (node >= 0) ? node : 0; - if (node >= 0) - cpu_set(cpu, node_to_cpu_mask[node]); - } -} - -#endif /* CONFIG_NUMA */ - /* * Cycle through the APs sending Wakeup IPIs to boot each. */ diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index e7e520d..4440c83 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -90,14 +90,16 @@ die (const char *str, struct pt_regs *regs, long err) .lock_owner_depth = 0 }; static int die_counter; + int cpu = get_cpu(); - if (die.lock_owner != smp_processor_id()) { + if (die.lock_owner != cpu) { console_verbose(); spin_lock_irq(&die.lock); - die.lock_owner = smp_processor_id(); + die.lock_owner = cpu; die.lock_owner_depth = 0; bust_spinlocks(1); } + put_cpu(); if (++die.lock_owner_depth < 3) { printk("%s[%d]: %s %ld [%d]\n", diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index f3fd528..b5c90e5 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -44,150 +44,7 @@ struct early_node_data { }; static struct early_node_data mem_data[MAX_NUMNODES] __initdata; - -/** - * reassign_cpu_only_nodes - called from find_memory to move CPU-only nodes to a memory node - * - * This function will move nodes with only CPUs (no memory) - * to a node with memory which is at the minimum numa_slit distance. - * Any reassigments will result in the compression of the nodes - * and renumbering the nid values where appropriate. - * The static declarations below are to avoid large stack size which - * makes the code not re-entrant. - */ -static void __init reassign_cpu_only_nodes(void) -{ - struct node_memblk_s *p; - int i, j, k, nnode, nid, cpu, cpunid, pxm; - u8 cslit, slit; - static DECLARE_BITMAP(nodes_with_mem, MAX_NUMNODES) __initdata; - static u8 numa_slit_fix[MAX_NUMNODES * MAX_NUMNODES] __initdata; - static int node_flip[MAX_NUMNODES] __initdata; - static int old_nid_map[NR_CPUS] __initdata; - - for (nnode = 0, p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++) - if (!test_bit(p->nid, (void *) nodes_with_mem)) { - set_bit(p->nid, (void *) nodes_with_mem); - nnode++; - } - - /* - * All nids with memory. - */ - if (nnode == num_online_nodes()) - return; - - /* - * Change nids and attempt to migrate CPU-only nodes - * to the best numa_slit (closest neighbor) possible. - * For reassigned CPU nodes a nid can't be arrived at - * until after this loop because the target nid's new - * identity might not have been established yet. So - * new nid values are fabricated above num_online_nodes() and - * mapped back later to their true value. - */ - /* MCD - This code is a bit complicated, but may be unnecessary now. - * We can now handle much more interesting node-numbering. - * The old requirement that 0 <= nid <= numnodes <= MAX_NUMNODES - * and that there be no holes in the numbering 0..numnodes - * has become simply 0 <= nid <= MAX_NUMNODES. - */ - nid = 0; - for_each_online_node(i) { - if (test_bit(i, (void *) nodes_with_mem)) { - /* - * Save original nid value for numa_slit - * fixup and node_cpuid reassignments. - */ - node_flip[nid] = i; - - if (i == nid) { - nid++; - continue; - } - - for (p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++) - if (p->nid == i) - p->nid = nid; - - cpunid = nid; - nid++; - } else - cpunid = MAX_NUMNODES; - - for (cpu = 0; cpu < NR_CPUS; cpu++) - if (node_cpuid[cpu].nid == i) { - /* - * For nodes not being reassigned just - * fix the cpu's nid and reverse pxm map - */ - if (cpunid < MAX_NUMNODES) { - pxm = nid_to_pxm_map[i]; - pxm_to_nid_map[pxm] = - node_cpuid[cpu].nid = cpunid; - continue; - } - - /* - * For nodes being reassigned, find best node by - * numa_slit information and then make a temporary - * nid value based on current nid and num_online_nodes(). - */ - slit = 0xff; - k = 2*num_online_nodes(); - for_each_online_node(j) { - if (i == j) - continue; - else if (test_bit(j, (void *) nodes_with_mem)) { - cslit = numa_slit[i * num_online_nodes() + j]; - if (cslit < slit) { - k = num_online_nodes() + j; - slit = cslit; - } - } - } - - /* save old nid map so we can update the pxm */ - old_nid_map[cpu] = node_cpuid[cpu].nid; - node_cpuid[cpu].nid = k; - } - } - - /* - * Fixup temporary nid values for CPU-only nodes. - */ - for (cpu = 0; cpu < NR_CPUS; cpu++) - if (node_cpuid[cpu].nid == (2*num_online_nodes())) { - pxm = nid_to_pxm_map[old_nid_map[cpu]]; - pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = nnode - 1; - } else { - for (i = 0; i < nnode; i++) { - if (node_flip[i] != (node_cpuid[cpu].nid - num_online_nodes())) - continue; - - pxm = nid_to_pxm_map[old_nid_map[cpu]]; - pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = i; - break; - } - } - - /* - * Fix numa_slit by compressing from larger - * nid array to reduced nid array. - */ - for (i = 0; i < nnode; i++) - for (j = 0; j < nnode; j++) - numa_slit_fix[i * nnode + j] = - numa_slit[node_flip[i] * num_online_nodes() + node_flip[j]]; - - memcpy(numa_slit, numa_slit_fix, sizeof (numa_slit)); - - nodes_clear(node_online_map); - for (i = 0; i < nnode; i++) - node_set_online(i); - - return; -} +static nodemask_t memory_less_mask __initdata; /* * To prevent cache aliasing effects, align per-node structures so that they @@ -233,44 +90,101 @@ static int __init build_node_maps(unsigned long start, unsigned long len, } /** - * early_nr_phys_cpus_node - return number of physical cpus on a given node + * early_nr_cpus_node - return number of cpus on a given node * @node: node to check * - * Count the number of physical cpus on @node. These are cpus that actually - * exist. We can't use nr_cpus_node() yet because + * Count the number of cpus on @node. We can't use nr_cpus_node() yet because * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been - * called yet. + * called yet. Note that node 0 will also count all non-existent cpus. */ -static int early_nr_phys_cpus_node(int node) +static int __init early_nr_cpus_node(int node) { int cpu, n = 0; for (cpu = 0; cpu < NR_CPUS; cpu++) if (node == node_cpuid[cpu].nid) - if ((cpu == 0) || node_cpuid[cpu].phys_id) - n++; + n++; return n; } +/** + * compute_pernodesize - compute size of pernode data + * @node: the node id. + */ +static unsigned long __init compute_pernodesize(int node) +{ + unsigned long pernodesize = 0, cpus; + + cpus = early_nr_cpus_node(node); + pernodesize += PERCPU_PAGE_SIZE * cpus; + pernodesize += node * L1_CACHE_BYTES; + pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); + pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); + pernodesize = PAGE_ALIGN(pernodesize); + return pernodesize; +} /** - * early_nr_cpus_node - return number of cpus on a given node - * @node: node to check + * per_cpu_node_setup - setup per-cpu areas on each node + * @cpu_data: per-cpu area on this node + * @node: node to setup * - * Count the number of cpus on @node. We can't use nr_cpus_node() yet because - * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been - * called yet. Note that node 0 will also count all non-existent cpus. + * Copy the static per-cpu data into the region we just set aside and then + * setup __per_cpu_offset for each CPU on this node. Return a pointer to + * the end of the area. */ -static int early_nr_cpus_node(int node) +static void *per_cpu_node_setup(void *cpu_data, int node) { - int cpu, n = 0; +#ifdef CONFIG_SMP + int cpu; - for (cpu = 0; cpu < NR_CPUS; cpu++) - if (node == node_cpuid[cpu].nid) - n++; + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (node == node_cpuid[cpu].nid) { + memcpy(__va(cpu_data), __phys_per_cpu_start, + __per_cpu_end - __per_cpu_start); + __per_cpu_offset[cpu] = (char*)__va(cpu_data) - + __per_cpu_start; + cpu_data += PERCPU_PAGE_SIZE; + } + } +#endif + return cpu_data; +} - return n; +/** + * fill_pernode - initialize pernode data. + * @node: the node id. + * @pernode: physical address of pernode data + * @pernodesize: size of the pernode data + */ +static void __init fill_pernode(int node, unsigned long pernode, + unsigned long pernodesize) +{ + void *cpu_data; + int cpus = early_nr_cpus_node(node); + struct bootmem_data *bdp = &mem_data[node].bootmem_data; + + mem_data[node].pernode_addr = pernode; + mem_data[node].pernode_size = pernodesize; + memset(__va(pernode), 0, pernodesize); + + cpu_data = (void *)pernode; + pernode += PERCPU_PAGE_SIZE * cpus; + pernode += node * L1_CACHE_BYTES; + + mem_data[node].pgdat = __va(pernode); + pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); + + mem_data[node].node_data = __va(pernode); + pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); + + mem_data[node].pgdat->bdata = bdp; + pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); + + cpu_data = per_cpu_node_setup(cpu_data, node); + + return; } /** @@ -304,9 +218,8 @@ static int early_nr_cpus_node(int node) static int __init find_pernode_space(unsigned long start, unsigned long len, int node) { - unsigned long epfn, cpu, cpus, phys_cpus; + unsigned long epfn; unsigned long pernodesize = 0, pernode, pages, mapsize; - void *cpu_data; struct bootmem_data *bdp = &mem_data[node].bootmem_data; epfn = (start + len) >> PAGE_SHIFT; @@ -329,49 +242,12 @@ static int __init find_pernode_space(unsigned long start, unsigned long len, * Calculate total size needed, incl. what's necessary * for good alignment and alias prevention. */ - cpus = early_nr_cpus_node(node); - phys_cpus = early_nr_phys_cpus_node(node); - pernodesize += PERCPU_PAGE_SIZE * cpus; - pernodesize += node * L1_CACHE_BYTES; - pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); - pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); - pernodesize = PAGE_ALIGN(pernodesize); + pernodesize = compute_pernodesize(node); pernode = NODEDATA_ALIGN(start, node); /* Is this range big enough for what we want to store here? */ - if (start + len > (pernode + pernodesize + mapsize)) { - mem_data[node].pernode_addr = pernode; - mem_data[node].pernode_size = pernodesize; - memset(__va(pernode), 0, pernodesize); - - cpu_data = (void *)pernode; - pernode += PERCPU_PAGE_SIZE * cpus; - pernode += node * L1_CACHE_BYTES; - - mem_data[node].pgdat = __va(pernode); - pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); - - mem_data[node].node_data = __va(pernode); - pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); - - mem_data[node].pgdat->bdata = bdp; - pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); - - /* - * Copy the static per-cpu data into the region we - * just set aside and then setup __per_cpu_offset - * for each CPU on this node. - */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { - if (node == node_cpuid[cpu].nid) { - memcpy(__va(cpu_data), __phys_per_cpu_start, - __per_cpu_end - __per_cpu_start); - __per_cpu_offset[cpu] = (char*)__va(cpu_data) - - __per_cpu_start; - cpu_data += PERCPU_PAGE_SIZE; - } - } - } + if (start + len > (pernode + pernodesize + mapsize)) + fill_pernode(node, pernode, pernodesize); return 0; } @@ -411,6 +287,9 @@ static void __init reserve_pernode_space(void) for_each_online_node(node) { pg_data_t *pdp = mem_data[node].pgdat; + if (node_isset(node, memory_less_mask)) + continue; + bdp = pdp->bdata; /* First the bootmem_map itself */ @@ -436,8 +315,8 @@ static void __init reserve_pernode_space(void) */ static void __init initialize_pernode_data(void) { - int cpu, node; pg_data_t *pgdat_list[MAX_NUMNODES]; + int cpu, node; for_each_online_node(node) pgdat_list[node] = mem_data[node].pgdat; @@ -447,12 +326,99 @@ static void __init initialize_pernode_data(void) memcpy(mem_data[node].node_data->pg_data_ptrs, pgdat_list, sizeof(pgdat_list)); } - +#ifdef CONFIG_SMP /* Set the node_data pointer for each per-cpu struct */ for (cpu = 0; cpu < NR_CPUS; cpu++) { node = node_cpuid[cpu].nid; per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data; } +#else + { + struct cpuinfo_ia64 *cpu0_cpu_info; + cpu = 0; + node = node_cpuid[cpu].nid; + cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start + + ((char *)&per_cpu__cpu_info - __per_cpu_start)); + cpu0_cpu_info->node_data = mem_data[node].node_data; + } +#endif /* CONFIG_SMP */ +} + +/** + * memory_less_node_alloc - * attempt to allocate memory on the best NUMA slit + * node but fall back to any other node when __alloc_bootmem_node fails + * for best. + * @nid: node id + * @pernodesize: size of this node's pernode data + * @align: alignment to use for this node's pernode data + */ +static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize, + unsigned long align) +{ + void *ptr = NULL; + u8 best = 0xff; + int bestnode = -1, node; + + for_each_online_node(node) { + if (node_isset(node, memory_less_mask)) + continue; + else if (node_distance(nid, node) < best) { + best = node_distance(nid, node); + bestnode = node; + } + } + + ptr = __alloc_bootmem_node(mem_data[bestnode].pgdat, + pernodesize, align, __pa(MAX_DMA_ADDRESS)); + + if (!ptr) + panic("NO memory for memory less node\n"); + return ptr; +} + +/** + * pgdat_insert - insert the pgdat into global pgdat_list + * @pgdat: the pgdat for a node. + */ +static void __init pgdat_insert(pg_data_t *pgdat) +{ + pg_data_t *prev = NULL, *next; + + for_each_pgdat(next) + if (pgdat->node_id < next->node_id) + break; + else + prev = next; + + if (prev) { + prev->pgdat_next = pgdat; + pgdat->pgdat_next = next; + } else { + pgdat->pgdat_next = pgdat_list; + pgdat_list = pgdat; + } + + return; +} + +/** + * memory_less_nodes - allocate and initialize CPU only nodes pernode + * information. + */ +static void __init memory_less_nodes(void) +{ + unsigned long pernodesize; + void *pernode; + int node; + + for_each_node_mask(node, memory_less_mask) { + pernodesize = compute_pernodesize(node); + pernode = memory_less_node_alloc(node, pernodesize, + (node) ? (node * PERCPU_PAGE_SIZE) : (1024*1024)); + fill_pernode(node, __pa(pernode), pernodesize); + } + + return; } /** @@ -472,16 +438,19 @@ void __init find_memory(void) node_set_online(0); } + nodes_or(memory_less_mask, memory_less_mask, node_online_map); min_low_pfn = -1; max_low_pfn = 0; - if (num_online_nodes() > 1) - reassign_cpu_only_nodes(); - /* These actually end up getting called by call_pernode_memory() */ efi_memmap_walk(filter_rsvd_memory, build_node_maps); efi_memmap_walk(filter_rsvd_memory, find_pernode_space); + for_each_online_node(node) + if (mem_data[node].bootmem_data.node_low_pfn) { + node_clear(node, memory_less_mask); + mem_data[node].min_pfn = ~0UL; + } /* * Initialize the boot memory maps in reverse order since that's * what the bootmem allocator expects @@ -492,17 +461,14 @@ void __init find_memory(void) if (!node_online(node)) continue; + else if (node_isset(node, memory_less_mask)) + continue; bdp = &mem_data[node].bootmem_data; pernode = mem_data[node].pernode_addr; pernodesize = mem_data[node].pernode_size; map = pernode + pernodesize; - /* Sanity check... */ - if (!pernode) - panic("pernode space for node %d " - "could not be allocated!", node); - init_bootmem_node(mem_data[node].pgdat, map>>PAGE_SHIFT, bdp->node_boot_start>>PAGE_SHIFT, @@ -512,6 +478,7 @@ void __init find_memory(void) efi_memmap_walk(filter_rsvd_memory, free_node_bootmem); reserve_pernode_space(); + memory_less_nodes(); initialize_pernode_data(); max_pfn = max_low_pfn; @@ -519,6 +486,7 @@ void __init find_memory(void) find_initrd(); } +#ifdef CONFIG_SMP /** * per_cpu_init - setup per-cpu variables * @@ -529,15 +497,15 @@ void *per_cpu_init(void) { int cpu; - if (smp_processor_id() == 0) { - for (cpu = 0; cpu < NR_CPUS; cpu++) { - per_cpu(local_per_cpu_offset, cpu) = - __per_cpu_offset[cpu]; - } - } + if (smp_processor_id() != 0) + return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; + + for (cpu = 0; cpu < NR_CPUS; cpu++) + per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; } +#endif /* CONFIG_SMP */ /** * show_mem - give short summary of memory stats @@ -680,12 +648,13 @@ void __init paging_init(void) max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; - /* so min() will work in count_node_pages */ - for_each_online_node(node) - mem_data[node].min_pfn = ~0UL; - efi_memmap_walk(filter_rsvd_memory, count_node_pages); + vmalloc_end -= PAGE_ALIGN(max_low_pfn * sizeof(struct page)); + vmem_map = (struct page *) vmalloc_end; + efi_memmap_walk(create_mem_map_page_table, NULL); + printk("Virtual mem_map starts at 0x%p\n", vmem_map); + for_each_online_node(node) { memset(zones_size, 0, sizeof(zones_size)); memset(zholes_size, 0, sizeof(zholes_size)); @@ -719,15 +688,6 @@ void __init paging_init(void) mem_data[node].num_dma_physpages); } - if (node == 0) { - vmalloc_end -= - PAGE_ALIGN(max_low_pfn * sizeof(struct page)); - vmem_map = (struct page *) vmalloc_end; - - efi_memmap_walk(create_mem_map_page_table, NULL); - printk("Virtual mem_map starts at 0x%p\n", vmem_map); - } - pfn_offset = mem_data[node].min_pfn; NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset; @@ -735,5 +695,11 @@ void __init paging_init(void) pfn_offset, zholes_size); } + /* + * Make memory less nodes become a member of the known nodes. + */ + for_each_node_mask(node, memory_less_mask) + pgdat_insert(mem_data[node].pgdat); + zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 4eb2f52..65f9958 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -597,7 +597,8 @@ mem_init (void) kclist_add(&kcore_kernel, _stext, _end - _stext); for_each_pgdat(pgdat) - totalram_pages += free_all_bootmem_node(pgdat); + if (pgdat->bdata->node_bootmem_map) + totalram_pages += free_all_bootmem_node(pgdat); reserved_pages = 0; efi_memmap_walk(count_reserved_pages, &reserved_pages); diff --git a/arch/ia64/sn/include/pci/pcibr_provider.h b/arch/ia64/sn/include/pci/pcibr_provider.h deleted file mode 100644 index 1cd291d..0000000 --- a/arch/ia64/sn/include/pci/pcibr_provider.h +++ /dev/null @@ -1,151 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1992-1997,2000-2004 Silicon Graphics, Inc. All rights reserved. - */ -#ifndef _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H -#define _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H - -/* Workarounds */ -#define PV907516 (1 << 1) /* TIOCP: Don't write the write buffer flush reg */ - -#define BUSTYPE_MASK 0x1 - -/* Macros given a pcibus structure */ -#define IS_PCIX(ps) ((ps)->pbi_bridge_mode & BUSTYPE_MASK) -#define IS_PCI_BRIDGE_ASIC(asic) (asic == PCIIO_ASIC_TYPE_PIC || \ - asic == PCIIO_ASIC_TYPE_TIOCP) -#define IS_PIC_SOFT(ps) (ps->pbi_bridge_type == PCIBR_BRIDGETYPE_PIC) - - -/* - * The different PCI Bridge types supported on the SGI Altix platforms - */ -#define PCIBR_BRIDGETYPE_UNKNOWN -1 -#define PCIBR_BRIDGETYPE_PIC 2 -#define PCIBR_BRIDGETYPE_TIOCP 3 - -/* - * Bridge 64bit Direct Map Attributes - */ -#define PCI64_ATTR_PREF (1ull << 59) -#define PCI64_ATTR_PREC (1ull << 58) -#define PCI64_ATTR_VIRTUAL (1ull << 57) -#define PCI64_ATTR_BAR (1ull << 56) -#define PCI64_ATTR_SWAP (1ull << 55) -#define PCI64_ATTR_VIRTUAL1 (1ull << 54) - -#define PCI32_LOCAL_BASE 0 -#define PCI32_MAPPED_BASE 0x40000000 -#define PCI32_DIRECT_BASE 0x80000000 - -#define IS_PCI32_MAPPED(x) ((uint64_t)(x) < PCI32_DIRECT_BASE && \ - (uint64_t)(x) >= PCI32_MAPPED_BASE) -#define IS_PCI32_DIRECT(x) ((uint64_t)(x) >= PCI32_MAPPED_BASE) - - -/* - * Bridge PMU Address Transaltion Entry Attibutes - */ -#define PCI32_ATE_V (0x1 << 0) -#define PCI32_ATE_CO (0x1 << 1) -#define PCI32_ATE_PREC (0x1 << 2) -#define PCI32_ATE_PREF (0x1 << 3) -#define PCI32_ATE_BAR (0x1 << 4) -#define PCI32_ATE_ADDR_SHFT 12 - -#define MINIMAL_ATES_REQUIRED(addr, size) \ - (IOPG(IOPGOFF(addr) + (size) - 1) == IOPG((size) - 1)) - -#define MINIMAL_ATE_FLAG(addr, size) \ - (MINIMAL_ATES_REQUIRED((uint64_t)addr, size) ? 1 : 0) - -/* bit 29 of the pci address is the SWAP bit */ -#define ATE_SWAPSHIFT 29 -#define ATE_SWAP_ON(x) ((x) |= (1 << ATE_SWAPSHIFT)) -#define ATE_SWAP_OFF(x) ((x) &= ~(1 << ATE_SWAPSHIFT)) - -/* - * I/O page size - */ -#if PAGE_SIZE < 16384 -#define IOPFNSHIFT 12 /* 4K per mapped page */ -#else -#define IOPFNSHIFT 14 /* 16K per mapped page */ -#endif - -#define IOPGSIZE (1 << IOPFNSHIFT) -#define IOPG(x) ((x) >> IOPFNSHIFT) -#define IOPGOFF(x) ((x) & (IOPGSIZE-1)) - -#define PCIBR_DEV_SWAP_DIR (1ull << 19) -#define PCIBR_CTRL_PAGE_SIZE (0x1 << 21) - -/* - * PMU resources. - */ -struct ate_resource{ - uint64_t *ate; - uint64_t num_ate; - uint64_t lowest_free_index; -}; - -struct pcibus_info { - struct pcibus_bussoft pbi_buscommon; /* common header */ - uint32_t pbi_moduleid; - short pbi_bridge_type; - short pbi_bridge_mode; - - struct ate_resource pbi_int_ate_resource; - uint64_t pbi_int_ate_size; - - uint64_t pbi_dir_xbase; - char pbi_hub_xid; - - uint64_t pbi_devreg[8]; - spinlock_t pbi_lock; - - uint32_t pbi_valid_devices; - uint32_t pbi_enabled_devices; -}; - -/* - * pcibus_info structure locking macros - */ -inline static unsigned long -pcibr_lock(struct pcibus_info *pcibus_info) -{ - unsigned long flag; - spin_lock_irqsave(&pcibus_info->pbi_lock, flag); - return(flag); -} -#define pcibr_unlock(pcibus_info, flag) spin_unlock_irqrestore(&pcibus_info->pbi_lock, flag) - -extern int pcibr_init_provider(void); -extern void *pcibr_bus_fixup(struct pcibus_bussoft *); -extern dma_addr_t pcibr_dma_map(struct pci_dev *, unsigned long, size_t); -extern dma_addr_t pcibr_dma_map_consistent(struct pci_dev *, unsigned long, size_t); -extern void pcibr_dma_unmap(struct pci_dev *, dma_addr_t, int); - -/* - * prototypes for the bridge asic register access routines in pcibr_reg.c - */ -extern void pcireg_control_bit_clr(struct pcibus_info *, uint64_t); -extern void pcireg_control_bit_set(struct pcibus_info *, uint64_t); -extern uint64_t pcireg_tflush_get(struct pcibus_info *); -extern uint64_t pcireg_intr_status_get(struct pcibus_info *); -extern void pcireg_intr_enable_bit_clr(struct pcibus_info *, uint64_t); -extern void pcireg_intr_enable_bit_set(struct pcibus_info *, uint64_t); -extern void pcireg_intr_addr_addr_set(struct pcibus_info *, int, uint64_t); -extern void pcireg_force_intr_set(struct pcibus_info *, int); -extern uint64_t pcireg_wrb_flush_get(struct pcibus_info *, int); -extern void pcireg_int_ate_set(struct pcibus_info *, int, uint64_t); -extern uint64_t * pcireg_int_ate_addr(struct pcibus_info *, int); -extern void pcibr_force_interrupt(struct sn_irq_info *sn_irq_info); -extern void pcibr_change_devices_irq(struct sn_irq_info *sn_irq_info); -extern int pcibr_ate_alloc(struct pcibus_info *, int); -extern void pcibr_ate_free(struct pcibus_info *, int); -extern void ate_write(struct pcibus_info *, int, int, uint64_t); -#endif diff --git a/arch/ia64/sn/include/pci/pic.h b/arch/ia64/sn/include/pci/pic.h deleted file mode 100644 index fd18ace..0000000 --- a/arch/ia64/sn/include/pci/pic.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1992 - 1997, 2000-2003 Silicon Graphics, Inc. All rights reserved. - */ -#ifndef _ASM_IA64_SN_PCI_PIC_H -#define _ASM_IA64_SN_PCI_PIC_H - -/* - * PIC AS DEVICE ZERO - * ------------------ - * - * PIC handles PCI/X busses. PCI/X requires that the 'bridge' (i.e. PIC) - * be designated as 'device 0'. That is a departure from earlier SGI - * PCI bridges. Because of that we use config space 1 to access the - * config space of the first actual PCI device on the bus. - * Here's what the PIC manual says: - * - * The current PCI-X bus specification now defines that the parent - * hosts bus bridge (PIC for example) must be device 0 on bus 0. PIC - * reduced the total number of devices from 8 to 4 and removed the - * device registers and windows, now only supporting devices 0,1,2, and - * 3. PIC did leave all 8 configuration space windows. The reason was - * there was nothing to gain by removing them. Here in lies the problem. - * The device numbering we do using 0 through 3 is unrelated to the device - * numbering which PCI-X requires in configuration space. In the past we - * correlated Configs pace and our device space 0 <-> 0, 1 <-> 1, etc. - * PCI-X requires we start a 1, not 0 and currently the PX brick - * does associate our: - * - * device 0 with configuration space window 1, - * device 1 with configuration space window 2, - * device 2 with configuration space window 3, - * device 3 with configuration space window 4. - * - * The net effect is that all config space access are off-by-one with - * relation to other per-slot accesses on the PIC. - * Here is a table that shows some of that: - * - * Internal Slot# - * | - * | 0 1 2 3 - * ----------|--------------------------------------- - * config | 0x21000 0x22000 0x23000 0x24000 - * | - * even rrb | 0[0] n/a 1[0] n/a [] == implied even/odd - * | - * odd rrb | n/a 0[1] n/a 1[1] - * | - * int dev | 00 01 10 11 - * | - * ext slot# | 1 2 3 4 - * ----------|--------------------------------------- - */ - -#define PIC_ATE_TARGETID_SHFT 8 -#define PIC_HOST_INTR_ADDR 0x0000FFFFFFFFFFFFUL -#define PIC_PCI64_ATTR_TARG_SHFT 60 - - -/***************************************************************************** - *********************** PIC MMR structure mapping *************************** - *****************************************************************************/ - -/* NOTE: PIC WAR. PV#854697. PIC does not allow writes just to [31:0] - * of a 64-bit register. When writing PIC registers, always write the - * entire 64 bits. - */ - -struct pic { - - /* 0x000000-0x00FFFF -- Local Registers */ - - /* 0x000000-0x000057 -- Standard Widget Configuration */ - uint64_t p_wid_id; /* 0x000000 */ - uint64_t p_wid_stat; /* 0x000008 */ - uint64_t p_wid_err_upper; /* 0x000010 */ - uint64_t p_wid_err_lower; /* 0x000018 */ - #define p_wid_err p_wid_err_lower - uint64_t p_wid_control; /* 0x000020 */ - uint64_t p_wid_req_timeout; /* 0x000028 */ - uint64_t p_wid_int_upper; /* 0x000030 */ - uint64_t p_wid_int_lower; /* 0x000038 */ - #define p_wid_int p_wid_int_lower - uint64_t p_wid_err_cmdword; /* 0x000040 */ - uint64_t p_wid_llp; /* 0x000048 */ - uint64_t p_wid_tflush; /* 0x000050 */ - - /* 0x000058-0x00007F -- Bridge-specific Widget Configuration */ - uint64_t p_wid_aux_err; /* 0x000058 */ - uint64_t p_wid_resp_upper; /* 0x000060 */ - uint64_t p_wid_resp_lower; /* 0x000068 */ - #define p_wid_resp p_wid_resp_lower - uint64_t p_wid_tst_pin_ctrl; /* 0x000070 */ - uint64_t p_wid_addr_lkerr; /* 0x000078 */ - - /* 0x000080-0x00008F -- PMU & MAP */ - uint64_t p_dir_map; /* 0x000080 */ - uint64_t _pad_000088; /* 0x000088 */ - - /* 0x000090-0x00009F -- SSRAM */ - uint64_t p_map_fault; /* 0x000090 */ - uint64_t _pad_000098; /* 0x000098 */ - - /* 0x0000A0-0x0000AF -- Arbitration */ - uint64_t p_arb; /* 0x0000A0 */ - uint64_t _pad_0000A8; /* 0x0000A8 */ - - /* 0x0000B0-0x0000BF -- Number In A Can or ATE Parity Error */ - uint64_t p_ate_parity_err; /* 0x0000B0 */ - uint64_t _pad_0000B8; /* 0x0000B8 */ - - /* 0x0000C0-0x0000FF -- PCI/GIO */ - uint64_t p_bus_timeout; /* 0x0000C0 */ - uint64_t p_pci_cfg; /* 0x0000C8 */ - uint64_t p_pci_err_upper; /* 0x0000D0 */ - uint64_t p_pci_err_lower; /* 0x0000D8 */ - #define p_pci_err p_pci_err_lower - uint64_t _pad_0000E0[4]; /* 0x0000{E0..F8} */ - - /* 0x000100-0x0001FF -- Interrupt */ - uint64_t p_int_status; /* 0x000100 */ - uint64_t p_int_enable; /* 0x000108 */ - uint64_t p_int_rst_stat; /* 0x000110 */ - uint64_t p_int_mode; /* 0x000118 */ - uint64_t p_int_device; /* 0x000120 */ - uint64_t p_int_host_err; /* 0x000128 */ - uint64_t p_int_addr[8]; /* 0x0001{30,,,68} */ - uint64_t p_err_int_view; /* 0x000170 */ - uint64_t p_mult_int; /* 0x000178 */ - uint64_t p_force_always[8]; /* 0x0001{80,,,B8} */ - uint64_t p_force_pin[8]; /* 0x0001{C0,,,F8} */ - - /* 0x000200-0x000298 -- Device */ - uint64_t p_device[4]; /* 0x0002{00,,,18} */ - uint64_t _pad_000220[4]; /* 0x0002{20,,,38} */ - uint64_t p_wr_req_buf[4]; /* 0x0002{40,,,58} */ - uint64_t _pad_000260[4]; /* 0x0002{60,,,78} */ - uint64_t p_rrb_map[2]; /* 0x0002{80,,,88} */ - #define p_even_resp p_rrb_map[0] /* 0x000280 */ - #define p_odd_resp p_rrb_map[1] /* 0x000288 */ - uint64_t p_resp_status; /* 0x000290 */ - uint64_t p_resp_clear; /* 0x000298 */ - - uint64_t _pad_0002A0[12]; /* 0x0002{A0..F8} */ - - /* 0x000300-0x0003F8 -- Buffer Address Match Registers */ - struct { - uint64_t upper; /* 0x0003{00,,,F0} */ - uint64_t lower; /* 0x0003{08,,,F8} */ - } p_buf_addr_match[16]; - - /* 0x000400-0x0005FF -- Performance Monitor Registers (even only) */ - struct { - uint64_t flush_w_touch; /* 0x000{400,,,5C0} */ - uint64_t flush_wo_touch; /* 0x000{408,,,5C8} */ - uint64_t inflight; /* 0x000{410,,,5D0} */ - uint64_t prefetch; /* 0x000{418,,,5D8} */ - uint64_t total_pci_retry; /* 0x000{420,,,5E0} */ - uint64_t max_pci_retry; /* 0x000{428,,,5E8} */ - uint64_t max_latency; /* 0x000{430,,,5F0} */ - uint64_t clear_all; /* 0x000{438,,,5F8} */ - } p_buf_count[8]; - - - /* 0x000600-0x0009FF -- PCI/X registers */ - uint64_t p_pcix_bus_err_addr; /* 0x000600 */ - uint64_t p_pcix_bus_err_attr; /* 0x000608 */ - uint64_t p_pcix_bus_err_data; /* 0x000610 */ - uint64_t p_pcix_pio_split_addr; /* 0x000618 */ - uint64_t p_pcix_pio_split_attr; /* 0x000620 */ - uint64_t p_pcix_dma_req_err_attr; /* 0x000628 */ - uint64_t p_pcix_dma_req_err_addr; /* 0x000630 */ - uint64_t p_pcix_timeout; /* 0x000638 */ - - uint64_t _pad_000640[120]; /* 0x000{640,,,9F8} */ - - /* 0x000A00-0x000BFF -- PCI/X Read&Write Buffer */ - struct { - uint64_t p_buf_addr; /* 0x000{A00,,,AF0} */ - uint64_t p_buf_attr; /* 0X000{A08,,,AF8} */ - } p_pcix_read_buf_64[16]; - - struct { - uint64_t p_buf_addr; /* 0x000{B00,,,BE0} */ - uint64_t p_buf_attr; /* 0x000{B08,,,BE8} */ - uint64_t p_buf_valid; /* 0x000{B10,,,BF0} */ - uint64_t __pad1; /* 0x000{B18,,,BF8} */ - } p_pcix_write_buf_64[8]; - - /* End of Local Registers -- Start of Address Map space */ - - char _pad_000c00[0x010000 - 0x000c00]; - - /* 0x010000-0x011fff -- Internal ATE RAM (Auto Parity Generation) */ - uint64_t p_int_ate_ram[1024]; /* 0x010000-0x011fff */ - - /* 0x012000-0x013fff -- Internal ATE RAM (Manual Parity Generation) */ - uint64_t p_int_ate_ram_mp[1024]; /* 0x012000-0x013fff */ - - char _pad_014000[0x18000 - 0x014000]; - - /* 0x18000-0x197F8 -- PIC Write Request Ram */ - uint64_t p_wr_req_lower[256]; /* 0x18000 - 0x187F8 */ - uint64_t p_wr_req_upper[256]; /* 0x18800 - 0x18FF8 */ - uint64_t p_wr_req_parity[256]; /* 0x19000 - 0x197F8 */ - - char _pad_019800[0x20000 - 0x019800]; - - /* 0x020000-0x027FFF -- PCI Device Configuration Spaces */ - union { - uint8_t c[0x1000 / 1]; /* 0x02{0000,,,7FFF} */ - uint16_t s[0x1000 / 2]; /* 0x02{0000,,,7FFF} */ - uint32_t l[0x1000 / 4]; /* 0x02{0000,,,7FFF} */ - uint64_t d[0x1000 / 8]; /* 0x02{0000,,,7FFF} */ - union { - uint8_t c[0x100 / 1]; - uint16_t s[0x100 / 2]; - uint32_t l[0x100 / 4]; - uint64_t d[0x100 / 8]; - } f[8]; - } p_type0_cfg_dev[8]; /* 0x02{0000,,,7FFF} */ - - /* 0x028000-0x028FFF -- PCI Type 1 Configuration Space */ - union { - uint8_t c[0x1000 / 1]; /* 0x028000-0x029000 */ - uint16_t s[0x1000 / 2]; /* 0x028000-0x029000 */ - uint32_t l[0x1000 / 4]; /* 0x028000-0x029000 */ - uint64_t d[0x1000 / 8]; /* 0x028000-0x029000 */ - union { - uint8_t c[0x100 / 1]; - uint16_t s[0x100 / 2]; - uint32_t l[0x100 / 4]; - uint64_t d[0x100 / 8]; - } f[8]; - } p_type1_cfg; /* 0x028000-0x029000 */ - - char _pad_029000[0x030000-0x029000]; - - /* 0x030000-0x030007 -- PCI Interrupt Acknowledge Cycle */ - union { - uint8_t c[8 / 1]; - uint16_t s[8 / 2]; - uint32_t l[8 / 4]; - uint64_t d[8 / 8]; - } p_pci_iack; /* 0x030000-0x030007 */ - - char _pad_030007[0x040000-0x030008]; - - /* 0x040000-0x030007 -- PCIX Special Cycle */ - union { - uint8_t c[8 / 1]; - uint16_t s[8 / 2]; - uint32_t l[8 / 4]; - uint64_t d[8 / 8]; - } p_pcix_cycle; /* 0x040000-0x040007 */ -}; - -#endif /* _ASM_IA64_SN_PCI_PIC_H */ diff --git a/arch/ia64/sn/include/pci/tiocp.h b/arch/ia64/sn/include/pci/tiocp.h deleted file mode 100644 index f07c83b..0000000 --- a/arch/ia64/sn/include/pci/tiocp.h +++ /dev/null @@ -1,256 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2003-2004 Silicon Graphics, Inc. All rights reserved. - */ -#ifndef _ASM_IA64_SN_PCI_TIOCP_H -#define _ASM_IA64_SN_PCI_TIOCP_H - -#define TIOCP_HOST_INTR_ADDR 0x003FFFFFFFFFFFFFUL -#define TIOCP_PCI64_CMDTYPE_MEM (0x1ull << 60) - - -/***************************************************************************** - *********************** TIOCP MMR structure mapping *************************** - *****************************************************************************/ - -struct tiocp{ - - /* 0x000000-0x00FFFF -- Local Registers */ - - /* 0x000000-0x000057 -- (Legacy Widget Space) Configuration */ - uint64_t cp_id; /* 0x000000 */ - uint64_t cp_stat; /* 0x000008 */ - uint64_t cp_err_upper; /* 0x000010 */ - uint64_t cp_err_lower; /* 0x000018 */ - #define cp_err cp_err_lower - uint64_t cp_control; /* 0x000020 */ - uint64_t cp_req_timeout; /* 0x000028 */ - uint64_t cp_intr_upper; /* 0x000030 */ - uint64_t cp_intr_lower; /* 0x000038 */ - #define cp_intr cp_intr_lower - uint64_t cp_err_cmdword; /* 0x000040 */ - uint64_t _pad_000048; /* 0x000048 */ - uint64_t cp_tflush; /* 0x000050 */ - - /* 0x000058-0x00007F -- Bridge-specific Configuration */ - uint64_t cp_aux_err; /* 0x000058 */ - uint64_t cp_resp_upper; /* 0x000060 */ - uint64_t cp_resp_lower; /* 0x000068 */ - #define cp_resp cp_resp_lower - uint64_t cp_tst_pin_ctrl; /* 0x000070 */ - uint64_t cp_addr_lkerr; /* 0x000078 */ - - /* 0x000080-0x00008F -- PMU & MAP */ - uint64_t cp_dir_map; /* 0x000080 */ - uint64_t _pad_000088; /* 0x000088 */ - - /* 0x000090-0x00009F -- SSRAM */ - uint64_t cp_map_fault; /* 0x000090 */ - uint64_t _pad_000098; /* 0x000098 */ - - /* 0x0000A0-0x0000AF -- Arbitration */ - uint64_t cp_arb; /* 0x0000A0 */ - uint64_t _pad_0000A8; /* 0x0000A8 */ - - /* 0x0000B0-0x0000BF -- Number In A Can or ATE Parity Error */ - uint64_t cp_ate_parity_err; /* 0x0000B0 */ - uint64_t _pad_0000B8; /* 0x0000B8 */ - - /* 0x0000C0-0x0000FF -- PCI/GIO */ - uint64_t cp_bus_timeout; /* 0x0000C0 */ - uint64_t cp_pci_cfg; /* 0x0000C8 */ - uint64_t cp_pci_err_upper; /* 0x0000D0 */ - uint64_t cp_pci_err_lower; /* 0x0000D8 */ - #define cp_pci_err cp_pci_err_lower - uint64_t _pad_0000E0[4]; /* 0x0000{E0..F8} */ - - /* 0x000100-0x0001FF -- Interrupt */ - uint64_t cp_int_status; /* 0x000100 */ - uint64_t cp_int_enable; /* 0x000108 */ - uint64_t cp_int_rst_stat; /* 0x000110 */ - uint64_t cp_int_mode; /* 0x000118 */ - uint64_t cp_int_device; /* 0x000120 */ - uint64_t cp_int_host_err; /* 0x000128 */ - uint64_t cp_int_addr[8]; /* 0x0001{30,,,68} */ - uint64_t cp_err_int_view; /* 0x000170 */ - uint64_t cp_mult_int; /* 0x000178 */ - uint64_t cp_force_always[8]; /* 0x0001{80,,,B8} */ - uint64_t cp_force_pin[8]; /* 0x0001{C0,,,F8} */ - - /* 0x000200-0x000298 -- Device */ - uint64_t cp_device[4]; /* 0x0002{00,,,18} */ - uint64_t _pad_000220[4]; /* 0x0002{20,,,38} */ - uint64_t cp_wr_req_buf[4]; /* 0x0002{40,,,58} */ - uint64_t _pad_000260[4]; /* 0x0002{60,,,78} */ - uint64_t cp_rrb_map[2]; /* 0x0002{80,,,88} */ - #define cp_even_resp cp_rrb_map[0] /* 0x000280 */ - #define cp_odd_resp cp_rrb_map[1] /* 0x000288 */ - uint64_t cp_resp_status; /* 0x000290 */ - uint64_t cp_resp_clear; /* 0x000298 */ - - uint64_t _pad_0002A0[12]; /* 0x0002{A0..F8} */ - - /* 0x000300-0x0003F8 -- Buffer Address Match Registers */ - struct { - uint64_t upper; /* 0x0003{00,,,F0} */ - uint64_t lower; /* 0x0003{08,,,F8} */ - } cp_buf_addr_match[16]; - - /* 0x000400-0x0005FF -- Performance Monitor Registers (even only) */ - struct { - uint64_t flush_w_touch; /* 0x000{400,,,5C0} */ - uint64_t flush_wo_touch; /* 0x000{408,,,5C8} */ - uint64_t inflight; /* 0x000{410,,,5D0} */ - uint64_t prefetch; /* 0x000{418,,,5D8} */ - uint64_t total_pci_retry; /* 0x000{420,,,5E0} */ - uint64_t max_pci_retry; /* 0x000{428,,,5E8} */ - uint64_t max_latency; /* 0x000{430,,,5F0} */ - uint64_t clear_all; /* 0x000{438,,,5F8} */ - } cp_buf_count[8]; - - - /* 0x000600-0x0009FF -- PCI/X registers */ - uint64_t cp_pcix_bus_err_addr; /* 0x000600 */ - uint64_t cp_pcix_bus_err_attr; /* 0x000608 */ - uint64_t cp_pcix_bus_err_data; /* 0x000610 */ - uint64_t cp_pcix_pio_split_addr; /* 0x000618 */ - uint64_t cp_pcix_pio_split_attr; /* 0x000620 */ - uint64_t cp_pcix_dma_req_err_attr; /* 0x000628 */ - uint64_t cp_pcix_dma_req_err_addr; /* 0x000630 */ - uint64_t cp_pcix_timeout; /* 0x000638 */ - - uint64_t _pad_000640[24]; /* 0x000{640,,,6F8} */ - - /* 0x000700-0x000737 -- Debug Registers */ - uint64_t cp_ct_debug_ctl; /* 0x000700 */ - uint64_t cp_br_debug_ctl; /* 0x000708 */ - uint64_t cp_mux3_debug_ctl; /* 0x000710 */ - uint64_t cp_mux4_debug_ctl; /* 0x000718 */ - uint64_t cp_mux5_debug_ctl; /* 0x000720 */ - uint64_t cp_mux6_debug_ctl; /* 0x000728 */ - uint64_t cp_mux7_debug_ctl; /* 0x000730 */ - - uint64_t _pad_000738[89]; /* 0x000{738,,,9F8} */ - - /* 0x000A00-0x000BFF -- PCI/X Read&Write Buffer */ - struct { - uint64_t cp_buf_addr; /* 0x000{A00,,,AF0} */ - uint64_t cp_buf_attr; /* 0X000{A08,,,AF8} */ - } cp_pcix_read_buf_64[16]; - - struct { - uint64_t cp_buf_addr; /* 0x000{B00,,,BE0} */ - uint64_t cp_buf_attr; /* 0x000{B08,,,BE8} */ - uint64_t cp_buf_valid; /* 0x000{B10,,,BF0} */ - uint64_t __pad1; /* 0x000{B18,,,BF8} */ - } cp_pcix_write_buf_64[8]; - - /* End of Local Registers -- Start of Address Map space */ - - char _pad_000c00[0x010000 - 0x000c00]; - - /* 0x010000-0x011FF8 -- Internal ATE RAM (Auto Parity Generation) */ - uint64_t cp_int_ate_ram[1024]; /* 0x010000-0x011FF8 */ - - char _pad_012000[0x14000 - 0x012000]; - - /* 0x014000-0x015FF8 -- Internal ATE RAM (Manual Parity Generation) */ - uint64_t cp_int_ate_ram_mp[1024]; /* 0x014000-0x015FF8 */ - - char _pad_016000[0x18000 - 0x016000]; - - /* 0x18000-0x197F8 -- TIOCP Write Request Ram */ - uint64_t cp_wr_req_lower[256]; /* 0x18000 - 0x187F8 */ - uint64_t cp_wr_req_upper[256]; /* 0x18800 - 0x18FF8 */ - uint64_t cp_wr_req_parity[256]; /* 0x19000 - 0x197F8 */ - - char _pad_019800[0x1C000 - 0x019800]; - - /* 0x1C000-0x1EFF8 -- TIOCP Read Response Ram */ - uint64_t cp_rd_resp_lower[512]; /* 0x1C000 - 0x1CFF8 */ - uint64_t cp_rd_resp_upper[512]; /* 0x1D000 - 0x1DFF8 */ - uint64_t cp_rd_resp_parity[512]; /* 0x1E000 - 0x1EFF8 */ - - char _pad_01F000[0x20000 - 0x01F000]; - - /* 0x020000-0x021FFF -- Host Device (CP) Configuration Space (not used) */ - char _pad_020000[0x021000 - 0x20000]; - - /* 0x021000-0x027FFF -- PCI Device Configuration Spaces */ - union { - uint8_t c[0x1000 / 1]; /* 0x02{0000,,,7FFF} */ - uint16_t s[0x1000 / 2]; /* 0x02{0000,,,7FFF} */ - uint32_t l[0x1000 / 4]; /* 0x02{0000,,,7FFF} */ - uint64_t d[0x1000 / 8]; /* 0x02{0000,,,7FFF} */ - union { - uint8_t c[0x100 / 1]; - uint16_t s[0x100 / 2]; - uint32_t l[0x100 / 4]; - uint64_t d[0x100 / 8]; - } f[8]; - } cp_type0_cfg_dev[7]; /* 0x02{1000,,,7FFF} */ - - /* 0x028000-0x028FFF -- PCI Type 1 Configuration Space */ - union { - uint8_t c[0x1000 / 1]; /* 0x028000-0x029000 */ - uint16_t s[0x1000 / 2]; /* 0x028000-0x029000 */ - uint32_t l[0x1000 / 4]; /* 0x028000-0x029000 */ - uint64_t d[0x1000 / 8]; /* 0x028000-0x029000 */ - union { - uint8_t c[0x100 / 1]; - uint16_t s[0x100 / 2]; - uint32_t l[0x100 / 4]; - uint64_t d[0x100 / 8]; - } f[8]; - } cp_type1_cfg; /* 0x028000-0x029000 */ - - char _pad_029000[0x030000-0x029000]; - - /* 0x030000-0x030007 -- PCI Interrupt Acknowledge Cycle */ - union { - uint8_t c[8 / 1]; - uint16_t s[8 / 2]; - uint32_t l[8 / 4]; - uint64_t d[8 / 8]; - } cp_pci_iack; /* 0x030000-0x030007 */ - - char _pad_030007[0x040000-0x030008]; - - /* 0x040000-0x040007 -- PCIX Special Cycle */ - union { - uint8_t c[8 / 1]; - uint16_t s[8 / 2]; - uint32_t l[8 / 4]; - uint64_t d[8 / 8]; - } cp_pcix_cycle; /* 0x040000-0x040007 */ - - char _pad_040007[0x200000-0x040008]; - - /* 0x200000-0x7FFFFF -- PCI/GIO Device Spaces */ - union { - uint8_t c[0x100000 / 1]; - uint16_t s[0x100000 / 2]; - uint32_t l[0x100000 / 4]; - uint64_t d[0x100000 / 8]; - } cp_devio_raw[6]; /* 0x200000-0x7FFFFF */ - - #define cp_devio(n) cp_devio_raw[((n)<2)?(n*2):(n+2)] - - char _pad_800000[0xA00000-0x800000]; - - /* 0xA00000-0xBFFFFF -- PCI/GIO Device Spaces w/flush */ - union { - uint8_t c[0x100000 / 1]; - uint16_t s[0x100000 / 2]; - uint32_t l[0x100000 / 4]; - uint64_t d[0x100000 / 8]; - } cp_devio_raw_flush[6]; /* 0xA00000-0xBFFFFF */ - - #define cp_devio_flush(n) cp_devio_raw_flush[((n)<2)?(n*2):(n+2)] - -}; - -#endif /* _ASM_IA64_SN_PCI_TIOCP_H */ diff --git a/arch/ia64/sn/include/xtalk/hubdev.h b/arch/ia64/sn/include/xtalk/hubdev.h index 868e7ec..580a1c0 100644 --- a/arch/ia64/sn/include/xtalk/hubdev.h +++ b/arch/ia64/sn/include/xtalk/hubdev.h @@ -8,6 +8,8 @@ #ifndef _ASM_IA64_SN_XTALK_HUBDEV_H #define _ASM_IA64_SN_XTALK_HUBDEV_H +#include "xtalk/xwidgetdev.h" + #define HUB_WIDGET_ID_MAX 0xf #define DEV_PER_WIDGET (2*2*8) #define IIO_ITTE_WIDGET_BITS 4 /* size of widget field */ diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 783eb43..a67f39e 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -9,21 +9,28 @@ #include <linux/bootmem.h> #include <linux/nodemask.h> #include <asm/sn/types.h> -#include <asm/sn/sn_sal.h> #include <asm/sn/addrs.h> -#include <asm/sn/pcibus_provider_defs.h> -#include <asm/sn/pcidev.h> -#include "pci/pcibr_provider.h" -#include "xtalk/xwidgetdev.h" #include <asm/sn/geo.h> -#include "xtalk/hubdev.h" #include <asm/sn/io.h> +#include <asm/sn/pcibr_provider.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> #include <asm/sn/simulator.h> +#include <asm/sn/sn_sal.h> #include <asm/sn/tioca_provider.h> +#include "xtalk/hubdev.h" +#include "xtalk/xwidgetdev.h" -char master_baseio_wid; nasid_t master_nasid = INVALID_NASID; /* Partition Master */ +static struct list_head sn_sysdata_list; + +/* sysdata list struct */ +struct sysdata_el { + struct list_head entry; + void *sysdata; +}; + struct slab_info { struct hubdev_info hubdev; }; @@ -138,23 +145,6 @@ sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev, } /* - * sn_alloc_pci_sysdata() - This routine allocates a pci controller - * which is expected as the pci_dev and pci_bus sysdata by the Linux - * PCI infrastructure. - */ -static inline struct pci_controller *sn_alloc_pci_sysdata(void) -{ - struct pci_controller *pci_sysdata; - - pci_sysdata = kmalloc(sizeof(*pci_sysdata), GFP_KERNEL); - if (!pci_sysdata) - BUG(); - - memset(pci_sysdata, 0, sizeof(*pci_sysdata)); - return pci_sysdata; -} - -/* * sn_fixup_ionodes() - This routine initializes the HUB data strcuture for * each node in the system. */ @@ -221,22 +211,34 @@ static void sn_fixup_ionodes(void) } +void sn_pci_unfixup_slot(struct pci_dev *dev) +{ + struct pci_dev *host_pci_dev = SN_PCIDEV_INFO(dev)->host_pci_dev; + + sn_irq_unfixup(dev); + pci_dev_put(host_pci_dev); + pci_dev_put(dev); +} + /* * sn_pci_fixup_slot() - This routine sets up a slot's resources * consistent with the Linux PCI abstraction layer. Resources acquired * from our PCI provider include PIO maps to BAR space and interrupt * objects. */ -static void sn_pci_fixup_slot(struct pci_dev *dev) +void sn_pci_fixup_slot(struct pci_dev *dev) { int idx; int segment = 0; - uint64_t size; - struct sn_irq_info *sn_irq_info; - struct pci_dev *host_pci_dev; int status = 0; struct pcibus_bussoft *bs; + struct pci_bus *host_pci_bus; + struct pci_dev *host_pci_dev; + struct sn_irq_info *sn_irq_info; + unsigned long size; + unsigned int bus_no, devfn; + pci_dev_get(dev); /* for the sysdata pointer */ dev->sysdata = kmalloc(sizeof(struct pcidev_info), GFP_KERNEL); if (SN_PCIDEV_INFO(dev) <= 0) BUG(); /* Cannot afford to run out of memory */ @@ -253,7 +255,7 @@ static void sn_pci_fixup_slot(struct pci_dev *dev) (u64) __pa(SN_PCIDEV_INFO(dev)), (u64) __pa(sn_irq_info)); if (status) - BUG(); /* Cannot get platform pci device information information */ + BUG(); /* Cannot get platform pci device information */ /* Copy over PIO Mapped Addresses */ for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) { @@ -275,15 +277,21 @@ static void sn_pci_fixup_slot(struct pci_dev *dev) dev->resource[idx].parent = &iomem_resource; } - /* set up host bus linkages */ - bs = SN_PCIBUS_BUSSOFT(dev->bus); - host_pci_dev = - pci_find_slot(SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32, - SN_PCIDEV_INFO(dev)-> - pdi_slot_host_handle & 0xffffffff); + /* + * Using the PROMs values for the PCI host bus, get the Linux + * PCI host_pci_dev struct and set up host bus linkages + */ + + bus_no = SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32; + devfn = SN_PCIDEV_INFO(dev)->pdi_slot_host_handle & 0xffffffff; + host_pci_bus = pci_find_bus(pci_domain_nr(dev->bus), bus_no); + host_pci_dev = pci_get_slot(host_pci_bus, devfn); + + SN_PCIDEV_INFO(dev)->host_pci_dev = host_pci_dev; SN_PCIDEV_INFO(dev)->pdi_host_pcidev_info = - SN_PCIDEV_INFO(host_pci_dev); + SN_PCIDEV_INFO(host_pci_dev); SN_PCIDEV_INFO(dev)->pdi_linux_pcidev = dev; + bs = SN_PCIBUS_BUSSOFT(dev->bus); SN_PCIDEV_INFO(dev)->pdi_pcibus_info = bs; if (bs && bs->bs_asic_type < PCIIO_ASIC_MAX_TYPES) { @@ -297,6 +305,9 @@ static void sn_pci_fixup_slot(struct pci_dev *dev) SN_PCIDEV_INFO(dev)->pdi_sn_irq_info = sn_irq_info; dev->irq = SN_PCIDEV_INFO(dev)->pdi_sn_irq_info->irq_irq; sn_irq_fixup(dev, sn_irq_info); + } else { + SN_PCIDEV_INFO(dev)->pdi_sn_irq_info = NULL; + kfree(sn_irq_info); } } @@ -304,55 +315,57 @@ static void sn_pci_fixup_slot(struct pci_dev *dev) * sn_pci_controller_fixup() - This routine sets up a bus's resources * consistent with the Linux PCI abstraction layer. */ -static void sn_pci_controller_fixup(int segment, int busnum) +void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) { int status = 0; int nasid, cnode; - struct pci_bus *bus; struct pci_controller *controller; struct pcibus_bussoft *prom_bussoft_ptr; struct hubdev_info *hubdev_info; void *provider_soft; struct sn_pcibus_provider *provider; - status = - sal_get_pcibus_info((u64) segment, (u64) busnum, - (u64) ia64_tpa(&prom_bussoft_ptr)); - if (status > 0) { - return; /* bus # does not exist */ - } - + status = sal_get_pcibus_info((u64) segment, (u64) busnum, + (u64) ia64_tpa(&prom_bussoft_ptr)); + if (status > 0) + return; /*bus # does not exist */ prom_bussoft_ptr = __va(prom_bussoft_ptr); - controller = sn_alloc_pci_sysdata(); - /* controller non-zero is BUG'd in sn_alloc_pci_sysdata */ - bus = pci_scan_bus(busnum, &pci_root_ops, controller); + controller = kcalloc(1,sizeof(struct pci_controller), GFP_KERNEL); + if (!controller) + BUG(); + if (bus == NULL) { - return; /* error, or bus already scanned */ + bus = pci_scan_bus(busnum, &pci_root_ops, controller); + if (bus == NULL) + return; /* error, or bus already scanned */ + bus->sysdata = NULL; } + if (bus->sysdata) + goto error_return; /* sysdata already alloc'd */ + /* * Per-provider fixup. Copies the contents from prom to local * area and links SN_PCIBUS_BUSSOFT(). */ - if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES) { + if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES) return; /* unsupported asic type */ - } + + if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB) + goto error_return; /* no further fixup necessary */ provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type]; - if (provider == NULL) { + if (provider == NULL) return; /* no provider registerd for this asic */ - } provider_soft = NULL; - if (provider->bus_fixup) { + if (provider->bus_fixup) provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr); - } - if (provider_soft == NULL) { + if (provider_soft == NULL) return; /* fixup failed or not applicable */ - } /* * Generic bus fixup goes here. Don't reference prom_bussoft_ptr @@ -361,12 +374,47 @@ static void sn_pci_controller_fixup(int segment, int busnum) bus->sysdata = controller; PCI_CONTROLLER(bus)->platform_data = provider_soft; - nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base); cnode = nasid_to_cnodeid(nasid); hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info = &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]); + + return; + +error_return: + + kfree(controller); + return; +} + +void sn_bus_store_sysdata(struct pci_dev *dev) +{ + struct sysdata_el *element; + + element = kcalloc(1, sizeof(struct sysdata_el), GFP_KERNEL); + if (!element) { + dev_dbg(dev, "%s: out of memory!\n", __FUNCTION__); + return; + } + element->sysdata = dev->sysdata; + list_add(&element->entry, &sn_sysdata_list); +} + +void sn_bus_free_sysdata(void) +{ + struct sysdata_el *element; + struct list_head *list; + +sn_sysdata_free_start: + list_for_each(list, &sn_sysdata_list) { + element = list_entry(list, struct sysdata_el, entry); + list_del(&element->entry); + kfree(element->sysdata); + kfree(element); + goto sn_sysdata_free_start; + } + return; } /* @@ -403,20 +451,17 @@ static int __init sn_pci_init(void) */ ia64_max_iommu_merge_mask = ~PAGE_MASK; sn_fixup_ionodes(); - sn_irq = kmalloc(sizeof(struct sn_irq_info *) * NR_IRQS, GFP_KERNEL); - if (sn_irq <= 0) - BUG(); /* Canno afford to run out of memory. */ - memset(sn_irq, 0, sizeof(struct sn_irq_info *) * NR_IRQS); - + sn_irq_lh_init(); + INIT_LIST_HEAD(&sn_sysdata_list); sn_init_cpei_timer(); #ifdef CONFIG_PROC_FS register_sn_procfs(); #endif - for (i = 0; i < PCI_BUSES_TO_SCAN; i++) { - sn_pci_controller_fixup(0, i); - } + /* busses are not known yet ... */ + for (i = 0; i < PCI_BUSES_TO_SCAN; i++) + sn_pci_controller_fixup(0, i, NULL); /* * Generic Linux PCI Layer has created the pci_bus and pci_dev @@ -425,9 +470,8 @@ static int __init sn_pci_init(void) */ while ((pci_dev = - pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pci_dev)) != NULL) { + pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pci_dev)) != NULL) sn_pci_fixup_slot(pci_dev); - } sn_ioif_inited = 1; /* sn I/O infrastructure now initialized */ @@ -469,3 +513,8 @@ cnodeid_get_geoid(cnodeid_t cnode) } subsys_initcall(sn_pci_init); +EXPORT_SYMBOL(sn_pci_fixup_slot); +EXPORT_SYMBOL(sn_pci_unfixup_slot); +EXPORT_SYMBOL(sn_pci_controller_fixup); +EXPORT_SYMBOL(sn_bus_store_sysdata); +EXPORT_SYMBOL(sn_bus_free_sysdata); diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 0f4e8138..84d276a 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -9,13 +9,13 @@ */ #include <linux/irq.h> -#include <asm/sn/intr.h> +#include <linux/spinlock.h> #include <asm/sn/addrs.h> #include <asm/sn/arch.h> -#include "xtalk/xwidgetdev.h" +#include <asm/sn/intr.h> +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> -#include "pci/pcibr_provider.h" #include <asm/sn/shub_mmr.h> #include <asm/sn/sn_sal.h> @@ -25,7 +25,8 @@ static void unregister_intr_pda(struct sn_irq_info *sn_irq_info); extern int sn_force_interrupt_flag; extern int sn_ioif_inited; -struct sn_irq_info **sn_irq; +static struct list_head **sn_irq_lh; +static spinlock_t sn_irq_info_lock = SPIN_LOCK_UNLOCKED; /* non-IRQ lock */ static inline uint64_t sn_intr_alloc(nasid_t local_nasid, int local_widget, u64 sn_irq_info, @@ -101,7 +102,7 @@ static void sn_end_irq(unsigned int irq) nasid = get_nasid(); event_occurred = HUB_L((uint64_t *) GLOBAL_MMR_ADDR (nasid, SH_EVENT_OCCURRED)); - /* If the UART bit is set here, we may have received an + /* If the UART bit is set here, we may have received an * interrupt from the UART that the driver missed. To * make sure, we IPI ourselves to force us to look again. */ @@ -115,82 +116,84 @@ static void sn_end_irq(unsigned int irq) force_interrupt(irq); } +static void sn_irq_info_free(struct rcu_head *head); + static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) { - struct sn_irq_info *sn_irq_info = sn_irq[irq]; - struct sn_irq_info *tmp_sn_irq_info; + struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; int cpuid, cpuphys; - nasid_t t_nasid; /* nasid to target */ - int t_slice; /* slice to target */ - - /* allocate a temp sn_irq_info struct to get new target info */ - tmp_sn_irq_info = kmalloc(sizeof(*tmp_sn_irq_info), GFP_KERNEL); - if (!tmp_sn_irq_info) - return; cpuid = first_cpu(mask); cpuphys = cpu_physical_id(cpuid); - t_nasid = cpuid_to_nasid(cpuid); - t_slice = cpuid_to_slice(cpuid); - while (sn_irq_info) { - int status; - int local_widget; - uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge; - nasid_t local_nasid = NASID_GET(bridge); + list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, + sn_irq_lh[irq], list) { + uint64_t bridge; + int local_widget, status; + nasid_t local_nasid; + struct sn_irq_info *new_irq_info; + + new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC); + if (new_irq_info == NULL) + break; + memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info)); + + bridge = (uint64_t) new_irq_info->irq_bridge; + if (!bridge) { + kfree(new_irq_info); + break; /* irq is not a device interrupt */ + } - if (!bridge) - break; /* irq is not a device interrupt */ + local_nasid = NASID_GET(bridge); if (local_nasid & 1) local_widget = TIO_SWIN_WIDGETNUM(bridge); else local_widget = SWIN_WIDGETNUM(bridge); - /* Free the old PROM sn_irq_info structure */ - sn_intr_free(local_nasid, local_widget, sn_irq_info); + /* Free the old PROM new_irq_info structure */ + sn_intr_free(local_nasid, local_widget, new_irq_info); + /* Update kernels new_irq_info with new target info */ + unregister_intr_pda(new_irq_info); - /* allocate a new PROM sn_irq_info struct */ + /* allocate a new PROM new_irq_info struct */ status = sn_intr_alloc(local_nasid, local_widget, - __pa(tmp_sn_irq_info), irq, t_nasid, - t_slice); - - if (status == 0) { - /* Update kernels sn_irq_info with new target info */ - unregister_intr_pda(sn_irq_info); - sn_irq_info->irq_cpuid = cpuid; - sn_irq_info->irq_nasid = t_nasid; - sn_irq_info->irq_slice = t_slice; - sn_irq_info->irq_xtalkaddr = - tmp_sn_irq_info->irq_xtalkaddr; - sn_irq_info->irq_cookie = tmp_sn_irq_info->irq_cookie; - register_intr_pda(sn_irq_info); - - if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type)) { - pcibr_change_devices_irq(sn_irq_info); - } + __pa(new_irq_info), irq, + cpuid_to_nasid(cpuid), + cpuid_to_slice(cpuid)); + + /* SAL call failed */ + if (status) { + kfree(new_irq_info); + break; + } + + new_irq_info->irq_cpuid = cpuid; + register_intr_pda(new_irq_info); + + if (IS_PCI_BRIDGE_ASIC(new_irq_info->irq_bridge_type)) + pcibr_change_devices_irq(new_irq_info); - sn_irq_info = sn_irq_info->irq_next; + spin_lock(&sn_irq_info_lock); + list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); + spin_unlock(&sn_irq_info_lock); + call_rcu(&sn_irq_info->rcu, sn_irq_info_free); #ifdef CONFIG_SMP - set_irq_affinity_info((irq & 0xff), cpuphys, 0); + set_irq_affinity_info((irq & 0xff), cpuphys, 0); #endif - } else { - break; /* snp_affinity failed the intr_alloc */ - } } - kfree(tmp_sn_irq_info); } struct hw_interrupt_type irq_type_sn = { - "SN hub", - sn_startup_irq, - sn_shutdown_irq, - sn_enable_irq, - sn_disable_irq, - sn_ack_irq, - sn_end_irq, - sn_set_affinity_irq + .typename = "SN hub", + .startup = sn_startup_irq, + .shutdown = sn_shutdown_irq, + .enable = sn_enable_irq, + .disable = sn_disable_irq, + .ack = sn_ack_irq, + .end = sn_end_irq, + .set_affinity = sn_set_affinity_irq }; unsigned int sn_local_vector_to_irq(u8 vector) @@ -231,19 +234,18 @@ static void unregister_intr_pda(struct sn_irq_info *sn_irq_info) struct sn_irq_info *tmp_irq_info; int i, foundmatch; + rcu_read_lock(); if (pdacpu(cpu)->sn_last_irq == irq) { foundmatch = 0; - for (i = pdacpu(cpu)->sn_last_irq - 1; i; i--) { - tmp_irq_info = sn_irq[i]; - while (tmp_irq_info) { + for (i = pdacpu(cpu)->sn_last_irq - 1; + i && !foundmatch; i--) { + list_for_each_entry_rcu(tmp_irq_info, + sn_irq_lh[i], + list) { if (tmp_irq_info->irq_cpuid == cpu) { - foundmatch++; + foundmatch = 1; break; } - tmp_irq_info = tmp_irq_info->irq_next; - } - if (foundmatch) { - break; } } pdacpu(cpu)->sn_last_irq = i; @@ -251,60 +253,27 @@ static void unregister_intr_pda(struct sn_irq_info *sn_irq_info) if (pdacpu(cpu)->sn_first_irq == irq) { foundmatch = 0; - for (i = pdacpu(cpu)->sn_first_irq + 1; i < NR_IRQS; i++) { - tmp_irq_info = sn_irq[i]; - while (tmp_irq_info) { + for (i = pdacpu(cpu)->sn_first_irq + 1; + i < NR_IRQS && !foundmatch; i++) { + list_for_each_entry_rcu(tmp_irq_info, + sn_irq_lh[i], + list) { if (tmp_irq_info->irq_cpuid == cpu) { - foundmatch++; + foundmatch = 1; break; } - tmp_irq_info = tmp_irq_info->irq_next; - } - if (foundmatch) { - break; } } pdacpu(cpu)->sn_first_irq = ((i == NR_IRQS) ? 0 : i); } + rcu_read_unlock(); } -struct sn_irq_info *sn_irq_alloc(nasid_t local_nasid, int local_widget, int irq, - nasid_t nasid, int slice) +static void sn_irq_info_free(struct rcu_head *head) { struct sn_irq_info *sn_irq_info; - int status; - - sn_irq_info = kmalloc(sizeof(*sn_irq_info), GFP_KERNEL); - if (sn_irq_info == NULL) - return NULL; - - memset(sn_irq_info, 0x0, sizeof(*sn_irq_info)); - - status = - sn_intr_alloc(local_nasid, local_widget, __pa(sn_irq_info), irq, - nasid, slice); - - if (status) { - kfree(sn_irq_info); - return NULL; - } else { - return sn_irq_info; - } -} - -void sn_irq_free(struct sn_irq_info *sn_irq_info) -{ - uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge; - nasid_t local_nasid = NASID_GET(bridge); - int local_widget; - - if (local_nasid & 1) /* tio check */ - local_widget = TIO_SWIN_WIDGETNUM(bridge); - else - local_widget = SWIN_WIDGETNUM(bridge); - - sn_intr_free(local_nasid, local_widget, sn_irq_info); + sn_irq_info = container_of(head, struct sn_irq_info, rcu); kfree(sn_irq_info); } @@ -314,30 +283,54 @@ void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info) int slice = sn_irq_info->irq_slice; int cpu = nasid_slice_to_cpuid(nasid, slice); + pci_dev_get(pci_dev); sn_irq_info->irq_cpuid = cpu; sn_irq_info->irq_pciioinfo = SN_PCIDEV_INFO(pci_dev); /* link it into the sn_irq[irq] list */ - sn_irq_info->irq_next = sn_irq[sn_irq_info->irq_irq]; - sn_irq[sn_irq_info->irq_irq] = sn_irq_info; + spin_lock(&sn_irq_info_lock); + list_add_rcu(&sn_irq_info->list, sn_irq_lh[sn_irq_info->irq_irq]); + spin_unlock(&sn_irq_info_lock); (void)register_intr_pda(sn_irq_info); } +void sn_irq_unfixup(struct pci_dev *pci_dev) +{ + struct sn_irq_info *sn_irq_info; + + /* Only cleanup IRQ stuff if this device has a host bus context */ + if (!SN_PCIDEV_BUSSOFT(pci_dev)) + return; + + sn_irq_info = SN_PCIDEV_INFO(pci_dev)->pdi_sn_irq_info; + if (!sn_irq_info || !sn_irq_info->irq_irq) { + kfree(sn_irq_info); + return; + } + + unregister_intr_pda(sn_irq_info); + spin_lock(&sn_irq_info_lock); + list_del_rcu(&sn_irq_info->list); + spin_unlock(&sn_irq_info_lock); + call_rcu(&sn_irq_info->rcu, sn_irq_info_free); + pci_dev_put(pci_dev); +} + static void force_interrupt(int irq) { struct sn_irq_info *sn_irq_info; if (!sn_ioif_inited) return; - sn_irq_info = sn_irq[irq]; - while (sn_irq_info) { + + rcu_read_lock(); + list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[irq], list) { if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) && - (sn_irq_info->irq_bridge != NULL)) { + (sn_irq_info->irq_bridge != NULL)) pcibr_force_interrupt(sn_irq_info); - } - sn_irq_info = sn_irq_info->irq_next; } + rcu_read_unlock(); } /* @@ -402,19 +395,41 @@ static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info) void sn_lb_int_war_check(void) { + struct sn_irq_info *sn_irq_info; int i; if (!sn_ioif_inited || pda->sn_first_irq == 0) return; + + rcu_read_lock(); for (i = pda->sn_first_irq; i <= pda->sn_last_irq; i++) { - struct sn_irq_info *sn_irq_info = sn_irq[i]; - while (sn_irq_info) { - /* Only call for PCI bridges that are fully initialized. */ + list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[i], list) { + /* + * Only call for PCI bridges that are fully + * initialized. + */ if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) && - (sn_irq_info->irq_bridge != NULL)) { + (sn_irq_info->irq_bridge != NULL)) sn_check_intr(i, sn_irq_info); - } - sn_irq_info = sn_irq_info->irq_next; } } + rcu_read_unlock(); +} + +void sn_irq_lh_init(void) +{ + int i; + + sn_irq_lh = kmalloc(sizeof(struct list_head *) * NR_IRQS, GFP_KERNEL); + if (!sn_irq_lh) + panic("SN PCI INIT: Failed to allocate memory for PCI init\n"); + + for (i = 0; i < NR_IRQS; i++) { + sn_irq_lh[i] = kmalloc(sizeof(struct list_head), GFP_KERNEL); + if (!sn_irq_lh[i]) + panic("SN PCI INIT: Failed IRQ memory allocation\n"); + + INIT_LIST_HEAD(sn_irq_lh[i]); + } + } diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 22e10d2..7c7fe44 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -270,7 +270,7 @@ void __init sn_setup(char **cmdline_p) { long status, ticks_per_sec, drift; int pxm; - int major = sn_sal_rev_major(), minor = sn_sal_rev_minor(); + u32 version = sn_sal_rev(); extern void sn_cpu_init(void); ia64_sn_plat_set_error_handling_features(); @@ -308,22 +308,21 @@ void __init sn_setup(char **cmdline_p) * support here so we don't have to listen to failed keyboard probe * messages. */ - if ((major < 2 || (major == 2 && minor <= 9)) && - acpi_kbd_controller_present) { + if (version <= 0x0209 && acpi_kbd_controller_present) { printk(KERN_INFO "Disabling legacy keyboard support as prom " "is too old and doesn't provide FADT\n"); acpi_kbd_controller_present = 0; } - printk("SGI SAL version %x.%02x\n", major, minor); + printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF); /* * Confirm the SAL we're running on is recent enough... */ - if ((major < SN_SAL_MIN_MAJOR) || (major == SN_SAL_MIN_MAJOR && - minor < SN_SAL_MIN_MINOR)) { + if (version < SN_SAL_MIN_VERSION) { printk(KERN_ERR "This kernel needs SGI SAL version >= " - "%x.%02x\n", SN_SAL_MIN_MAJOR, SN_SAL_MIN_MINOR); + "%x.%02x\n", SN_SAL_MIN_VERSION >> 8, + SN_SAL_MIN_VERSION & 0x00FF); panic("PROM version too old\n"); } diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c index 8716f4d..c1cbcd1 100644 --- a/arch/ia64/sn/kernel/tiocx.c +++ b/arch/ia64/sn/kernel/tiocx.c @@ -14,6 +14,7 @@ #include <linux/proc_fs.h> #include <linux/device.h> #include <linux/delay.h> +#include <asm/system.h> #include <asm/uaccess.h> #include <asm/sn/sn_sal.h> #include <asm/sn/addrs.h> @@ -481,6 +482,9 @@ static int __init tiocx_init(void) cnodeid_t cnodeid; int found_tiocx_device = 0; + if (!ia64_platform_is("sn2")) + return -ENODEV; + bus_register(&tiocx_bus_type); for (cnodeid = 0; cnodeid < MAX_COMPACT_NODES; cnodeid++) { diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index 5da9bdb..a2f7a88 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -11,9 +11,10 @@ #include <linux/module.h> #include <asm/dma.h> -#include <asm/sn/sn_sal.h> +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> +#include <asm/sn/sn_sal.h> #define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset) #define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG)) diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c index 0e47bce8..d1647b8 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c @@ -8,9 +8,9 @@ #include <linux/types.h> #include <asm/sn/sn_sal.h> +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> -#include "pci/pcibr_provider.h" int pcibr_invalidate_ate = 0; /* by default don't invalidate ATE on free */ diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index 64af2b2..b058dc2 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -8,18 +8,17 @@ #include <linux/types.h> #include <linux/pci.h> -#include <asm/sn/sn_sal.h> +#include <asm/sn/addrs.h> #include <asm/sn/geo.h> -#include "xtalk/xwidgetdev.h" -#include "xtalk/hubdev.h" +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> -#include "pci/tiocp.h" -#include "pci/pic.h" -#include "pci/pcibr_provider.h" -#include "pci/tiocp.h" +#include <asm/sn/pic.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/tiocp.h> #include "tio.h" -#include <asm/sn/addrs.h> +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" extern int sn_ioif_inited; diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index 3893999..9813da5 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -6,18 +6,51 @@ * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved. */ -#include <linux/types.h> #include <linux/interrupt.h> +#include <linux/types.h> #include <linux/pci.h> -#include <asm/sn/sn_sal.h> -#include "xtalk/xwidgetdev.h" +#include <asm/sn/addrs.h> #include <asm/sn/geo.h> -#include "xtalk/hubdev.h" +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> -#include "pci/pcibr_provider.h" -#include <asm/sn/addrs.h> +#include <asm/sn/sn_sal.h> +#include "xtalk/xwidgetdev.h" +#include "xtalk/hubdev.h" + +int +sal_pcibr_slot_enable(struct pcibus_info *soft, int device, void *resp) +{ + struct ia64_sal_retval ret_stuff; + uint64_t busnum; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + busnum = soft->pbi_buscommon.bs_persist_busnum; + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_SLOT_ENABLE, (u64) busnum, + (u64) device, (u64) resp, 0, 0, 0, 0); + + return (int)ret_stuff.v0; +} + +int +sal_pcibr_slot_disable(struct pcibus_info *soft, int device, int action, + void *resp) +{ + struct ia64_sal_retval ret_stuff; + uint64_t busnum; + + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + busnum = soft->pbi_buscommon.bs_persist_busnum; + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_SLOT_DISABLE, + (u64) busnum, (u64) device, (u64) action, + (u64) resp, 0, 0, 0); + + return (int)ret_stuff.v0; +} static int sal_pcibr_error_interrupt(struct pcibus_info *soft) { @@ -188,3 +221,6 @@ pcibr_init_provider(void) return 0; } + +EXPORT_SYMBOL_GPL(sal_pcibr_slot_enable); +EXPORT_SYMBOL_GPL(sal_pcibr_slot_disable); diff --git a/arch/ia64/sn/pci/pcibr/pcibr_reg.c b/arch/ia64/sn/pci/pcibr/pcibr_reg.c index 865c11c..21426d0 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_reg.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_reg.c @@ -6,13 +6,13 @@ * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved. */ -#include <linux/types.h> #include <linux/interrupt.h> +#include <linux/types.h> +#include <asm/sn/pcibr_provider.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> -#include "pci/tiocp.h" -#include "pci/pic.h" -#include "pci/pcibr_provider.h" +#include <asm/sn/pic.h> +#include <asm/sn/tiocp.h> union br_ptr { struct tiocp tio; diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c index 05aa8c2f..51cc4e6 100644 --- a/arch/ia64/sn/pci/tioca_provider.c +++ b/arch/ia64/sn/pci/tioca_provider.c @@ -589,8 +589,7 @@ tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft) /* sanity check prom rev */ - if (sn_sal_rev_major() < 4 || - (sn_sal_rev_major() == 4 && sn_sal_rev_minor() < 6)) { + if (sn_sal_rev() < 0x0406) { printk (KERN_ERR "%s: SGI prom rev 4.06 or greater required " "for tioca support\n", __FUNCTION__); diff --git a/arch/m32r/kernel/setup_m32700ut.c b/arch/m32r/kernel/setup_m32700ut.c index b014e2c..a146b24 100644 --- a/arch/m32r/kernel/setup_m32700ut.c +++ b/arch/m32r/kernel/setup_m32700ut.c @@ -3,8 +3,8 @@ * * Setup routines for Renesas M32700UT Board * - * Copyright (c) 2002 Hiroyuki Kondo, Hirokazu Takata, - * Hitoshi Yamamoto, Takeo Takahashi + * Copyright (c) 2002-2005 Hiroyuki Kondo, Hirokazu Takata, + * Hitoshi Yamamoto, Takeo Takahashi * * This file is subject to the terms and conditions of the GNU General * Public License. See the file "COPYING" in the main directory of this @@ -435,7 +435,7 @@ void __init init_IRQ(void) icu_data[M32R_IRQ_INT2].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD01; enable_m32700ut_irq(M32R_IRQ_INT2); -//#if defined(CONFIG_VIDEO_M32R_AR) +#if defined(CONFIG_VIDEO_M32R_AR) /* * INT3# is used for AR */ @@ -445,9 +445,11 @@ void __init init_IRQ(void) irq_desc[M32R_IRQ_INT3].depth = 1; icu_data[M32R_IRQ_INT3].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD10; disable_m32700ut_irq(M32R_IRQ_INT3); -//#endif /* CONFIG_VIDEO_M32R_AR */ +#endif /* CONFIG_VIDEO_M32R_AR */ } +#if defined(CONFIG_SMC91X) + #define LAN_IOSTART 0x300 #define LAN_IOEND 0x320 static struct resource smc91x_resources[] = { @@ -469,10 +471,55 @@ static struct platform_device smc91x_device = { .num_resources = ARRAY_SIZE(smc91x_resources), .resource = smc91x_resources, }; +#endif + +#if defined(CONFIG_FB_S1D13XXX) + +#include <video/s1d13xxxfb.h> +#include <asm/s1d13806.h> + +static struct s1d13xxxfb_pdata s1d13xxxfb_data = { + .initregs = s1d13xxxfb_initregs, + .initregssize = ARRAY_SIZE(s1d13xxxfb_initregs), + .platform_init_video = NULL, +#ifdef CONFIG_PM + .platform_suspend_video = NULL, + .platform_resume_video = NULL, +#endif +}; + +static struct resource s1d13xxxfb_resources[] = { + [0] = { + .start = 0x10600000UL, + .end = 0x1073FFFFUL, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 0x10400000UL, + .end = 0x104001FFUL, + .flags = IORESOURCE_MEM, + } +}; + +static struct platform_device s1d13xxxfb_device = { + .name = S1D_DEVICENAME, + .id = 0, + .dev = { + .platform_data = &s1d13xxxfb_data, + }, + .num_resources = ARRAY_SIZE(s1d13xxxfb_resources), + .resource = s1d13xxxfb_resources, +}; +#endif static int __init platform_init(void) { +#if defined(CONFIG_SMC91X) platform_device_register(&smc91x_device); +#endif +#if defined(CONFIG_FB_S1D13XXX) + platform_device_register(&s1d13xxxfb_device); +#endif return 0; } arch_initcall(platform_init); diff --git a/arch/m32r/kernel/setup_mappi.c b/arch/m32r/kernel/setup_mappi.c index aaf8e56..4e70980 100644 --- a/arch/m32r/kernel/setup_mappi.c +++ b/arch/m32r/kernel/setup_mappi.c @@ -3,14 +3,15 @@ * * Setup routines for Renesas MAPPI Board * - * Copyright (c) 2001, 2002 Hiroyuki Kondo, Hirokazu Takata, - * Hitoshi Yamamoto + * Copyright (c) 2001-2005 Hiroyuki Kondo, Hirokazu Takata, + * Hitoshi Yamamoto */ #include <linux/config.h> #include <linux/irq.h> #include <linux/kernel.h> #include <linux/init.h> +#include <linux/device.h> #include <asm/system.h> #include <asm/m32r.h> @@ -158,3 +159,49 @@ void __init init_IRQ(void) disable_mappi_irq(M32R_IRQ_INT2); #endif /* CONFIG_M32RPCC */ } + +#if defined(CONFIG_FB_S1D13XXX) + +#include <video/s1d13xxxfb.h> +#include <asm/s1d13806.h> + +static struct s1d13xxxfb_pdata s1d13xxxfb_data = { + .initregs = s1d13xxxfb_initregs, + .initregssize = ARRAY_SIZE(s1d13xxxfb_initregs), + .platform_init_video = NULL, +#ifdef CONFIG_PM + .platform_suspend_video = NULL, + .platform_resume_video = NULL, +#endif +}; + +static struct resource s1d13xxxfb_resources[] = { + [0] = { + .start = 0x10200000UL, + .end = 0x1033FFFFUL, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 0x10000000UL, + .end = 0x100001FFUL, + .flags = IORESOURCE_MEM, + } +}; + +static struct platform_device s1d13xxxfb_device = { + .name = S1D_DEVICENAME, + .id = 0, + .dev = { + .platform_data = &s1d13xxxfb_data, + }, + .num_resources = ARRAY_SIZE(s1d13xxxfb_resources), + .resource = s1d13xxxfb_resources, +}; + +static int __init platform_init(void) +{ + platform_device_register(&s1d13xxxfb_device); + return 0; +} +arch_initcall(platform_init); +#endif diff --git a/arch/m32r/kernel/setup_mappi2.c b/arch/m32r/kernel/setup_mappi2.c index 38d5e9a..a1d8015 100644 --- a/arch/m32r/kernel/setup_mappi2.c +++ b/arch/m32r/kernel/setup_mappi2.c @@ -3,8 +3,8 @@ * * Setup routines for Renesas MAPPI-II(M3A-ZA36) Board * - * Copyright (c) 2001, 2002 Hiroyuki Kondo, Hirokazu Takata, - * Hitoshi Yamamoto, Mamoru Sakugawa + * Copyright (c) 2001-2005 Hiroyuki Kondo, Hirokazu Takata, + * Hitoshi Yamamoto, Mamoru Sakugawa */ #include <linux/config.h> diff --git a/arch/m32r/kernel/setup_mappi3.c b/arch/m32r/kernel/setup_mappi3.c index 3d60a85..a76412e 100644 --- a/arch/m32r/kernel/setup_mappi3.c +++ b/arch/m32r/kernel/setup_mappi3.c @@ -3,8 +3,8 @@ * * Setup routines for Renesas MAPPI-III(M3A-2170) Board * - * Copyright (c) 2001-2005 Hiroyuki Kondo, Hirokazu Takata, - * Hitoshi Yamamoto, Mamoru Sakugawa + * Copyright (c) 2001-2005 Hiroyuki Kondo, Hirokazu Takata, + * Hitoshi Yamamoto, Mamoru Sakugawa */ #include <linux/config.h> @@ -178,6 +178,8 @@ void __init init_IRQ(void) #endif /* CONFIG_M32R_CFC */ } +#if defined(CONFIG_SMC91X) + #define LAN_IOSTART 0x300 #define LAN_IOEND 0x320 static struct resource smc91x_resources[] = { @@ -200,9 +202,55 @@ static struct platform_device smc91x_device = { .resource = smc91x_resources, }; +#endif + +#if defined(CONFIG_FB_S1D13XXX) + +#include <video/s1d13xxxfb.h> +#include <asm/s1d13806.h> + +static struct s1d13xxxfb_pdata s1d13xxxfb_data = { + .initregs = s1d13xxxfb_initregs, + .initregssize = ARRAY_SIZE(s1d13xxxfb_initregs), + .platform_init_video = NULL, +#ifdef CONFIG_PM + .platform_suspend_video = NULL, + .platform_resume_video = NULL, +#endif +}; + +static struct resource s1d13xxxfb_resources[] = { + [0] = { + .start = 0x1d600000UL, + .end = 0x1d73FFFFUL, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 0x1d400000UL, + .end = 0x1d4001FFUL, + .flags = IORESOURCE_MEM, + } +}; + +static struct platform_device s1d13xxxfb_device = { + .name = S1D_DEVICENAME, + .id = 0, + .dev = { + .platform_data = &s1d13xxxfb_data, + }, + .num_resources = ARRAY_SIZE(s1d13xxxfb_resources), + .resource = s1d13xxxfb_resources, +}; +#endif + static int __init platform_init(void) { +#if defined(CONFIG_SMC91X) platform_device_register(&smc91x_device); +#endif +#if defined(CONFIG_FB_S1D13XXX) + platform_device_register(&s1d13xxxfb_device); +#endif return 0; } arch_initcall(platform_init); diff --git a/arch/m32r/kernel/setup_oaks32r.c b/arch/m32r/kernel/setup_oaks32r.c index d656640..45add5b 100644 --- a/arch/m32r/kernel/setup_oaks32r.c +++ b/arch/m32r/kernel/setup_oaks32r.c @@ -3,8 +3,8 @@ * * Setup routines for OAKS32R Board * - * Copyright (c) 2002-2004 Hiroyuki Kondo, Hirokazu Takata, - * Hitoshi Yamamoto, Mamoru Sakugawa + * Copyright (c) 2002-2005 Hiroyuki Kondo, Hirokazu Takata, + * Hitoshi Yamamoto, Mamoru Sakugawa */ #include <linux/config.h> @@ -139,5 +139,4 @@ void __init init_IRQ(void) icu_data[M32R_IRQ_SIO1_S].icucr = 0; disable_oaks32r_irq(M32R_IRQ_SIO1_S); #endif /* CONFIG_SERIAL_M32R_SIO */ - } diff --git a/arch/m32r/kernel/setup_opsput.c b/arch/m32r/kernel/setup_opsput.c index 86f4cf2..f0301f5 100644 --- a/arch/m32r/kernel/setup_opsput.c +++ b/arch/m32r/kernel/setup_opsput.c @@ -3,7 +3,7 @@ * * Setup routines for Renesas OPSPUT Board * - * Copyright (c) 2002-2004 + * Copyright (c) 2002-2005 * Hiroyuki Kondo, Hirokazu Takata, * Hitoshi Yamamoto, Takeo Takahashi, Mamoru Sakugawa * @@ -439,7 +439,7 @@ void __init init_IRQ(void) icu_data[M32R_IRQ_INT2].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD01; enable_opsput_irq(M32R_IRQ_INT2); -//#if defined(CONFIG_VIDEO_M32R_AR) +#if defined(CONFIG_VIDEO_M32R_AR) /* * INT3# is used for AR */ @@ -449,9 +449,11 @@ void __init init_IRQ(void) irq_desc[M32R_IRQ_INT3].depth = 1; icu_data[M32R_IRQ_INT3].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD10; disable_opsput_irq(M32R_IRQ_INT3); -//#endif /* CONFIG_VIDEO_M32R_AR */ +#endif /* CONFIG_VIDEO_M32R_AR */ } +#if defined(CONFIG_SMC91X) + #define LAN_IOSTART 0x300 #define LAN_IOEND 0x320 static struct resource smc91x_resources[] = { @@ -473,10 +475,55 @@ static struct platform_device smc91x_device = { .num_resources = ARRAY_SIZE(smc91x_resources), .resource = smc91x_resources, }; +#endif + +#if defined(CONFIG_FB_S1D13XXX) + +#include <video/s1d13xxxfb.h> +#include <asm/s1d13806.h> + +static struct s1d13xxxfb_pdata s1d13xxxfb_data = { + .initregs = s1d13xxxfb_initregs, + .initregssize = ARRAY_SIZE(s1d13xxxfb_initregs), + .platform_init_video = NULL, +#ifdef CONFIG_PM + .platform_suspend_video = NULL, + .platform_resume_video = NULL, +#endif +}; + +static struct resource s1d13xxxfb_resources[] = { + [0] = { + .start = 0x10600000UL, + .end = 0x1073FFFFUL, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 0x10400000UL, + .end = 0x104001FFUL, + .flags = IORESOURCE_MEM, + } +}; + +static struct platform_device s1d13xxxfb_device = { + .name = S1D_DEVICENAME, + .id = 0, + .dev = { + .platform_data = &s1d13xxxfb_data, + }, + .num_resources = ARRAY_SIZE(s1d13xxxfb_resources), + .resource = s1d13xxxfb_resources, +}; +#endif static int __init platform_init(void) { +#if defined(CONFIG_SMC91X) platform_device_register(&smc91x_device); +#endif +#if defined(CONFIG_FB_S1D13XXX) + platform_device_register(&s1d13xxxfb_device); +#endif return 0; } arch_initcall(platform_init); diff --git a/arch/ppc/platforms/pmac_cpufreq.c b/arch/ppc/platforms/pmac_cpufreq.c index 5fdd4f6..c060524 100644 --- a/arch/ppc/platforms/pmac_cpufreq.c +++ b/arch/ppc/platforms/pmac_cpufreq.c @@ -452,7 +452,7 @@ static u32 __pmac read_gpio(struct device_node *np) return offset; } -static int __pmac pmac_cpufreq_suspend(struct cpufreq_policy *policy, u32 state) +static int __pmac pmac_cpufreq_suspend(struct cpufreq_policy *policy, pm_message_t pmsg) { /* Ok, this could be made a bit smarter, but let's be robust for now. We * always force a speed change to high speed before sleep, to make sure diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c index 1d162c7..8d4c46f 100644 --- a/arch/ppc64/kernel/cputable.c +++ b/arch/ppc64/kernel/cputable.c @@ -49,160 +49,219 @@ extern void __setup_cpu_be(unsigned long offset, struct cpu_spec* spec); #endif struct cpu_spec cpu_specs[] = { - { /* Power3 */ - 0xffff0000, 0x00400000, "POWER3 (630)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* Power3+ */ - 0xffff0000, 0x00410000, "POWER3 (630+)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* Northstar */ - 0xffff0000, 0x00330000, "RS64-II (northstar)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* Pulsar */ - 0xffff0000, 0x00340000, "RS64-III (pulsar)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* I-star */ - 0xffff0000, 0x00360000, "RS64-III (icestar)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* S-star */ - 0xffff0000, 0x00370000, "RS64-IV (sstar)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* Power4 */ - 0xffff0000, 0x00350000, "POWER4 (gp)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - }, - { /* Power4+ */ - 0xffff0000, 0x00380000, "POWER4+ (gq)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - }, - { /* PPC970 */ - 0xffff0000, 0x00390000, "PPC970", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | - CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP, - 128, 128, - __setup_cpu_ppc970, - COMMON_PPC64_FW - }, - { /* PPC970FX */ - 0xffff0000, 0x003c0000, "PPC970FX", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | - CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP, - 128, 128, - __setup_cpu_ppc970, - COMMON_PPC64_FW - }, - { /* Power5 */ - 0xffff0000, 0x003a0000, "POWER5 (gr)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | - CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | - CPU_FTR_MMCRA_SIHV, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - }, - { /* Power5 */ - 0xffff0000, 0x003b0000, "POWER5 (gs)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | - CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | - CPU_FTR_MMCRA_SIHV, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - }, - { /* BE DD1.x */ - 0xffff0000, 0x00700000, "Broadband Engine", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | - CPU_FTR_SMT, - COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP, - 128, 128, - __setup_cpu_be, - COMMON_PPC64_FW - }, - { /* default match */ - 0x00000000, 0x00000000, "POWER4 (compatible)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - } + { /* Power3 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00400000, + .cpu_name = "POWER3 (630)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power3+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00410000, + .cpu_name = "POWER3 (630+)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Northstar */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00330000, + .cpu_name = "RS64-II (northstar)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Pulsar */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00340000, + .cpu_name = "RS64-III (pulsar)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* I-star */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00360000, + .cpu_name = "RS64-III (icestar)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* S-star */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00370000, + .cpu_name = "RS64-IV (sstar)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power4 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00350000, + .cpu_name = "POWER4 (gp)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power4+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00380000, + .cpu_name = "POWER4+ (gq)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + }, + { /* PPC970 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00390000, + .cpu_name = "PPC970", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | + CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64 | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_ppc970, + .firmware_features = COMMON_PPC64_FW, + }, + { /* PPC970FX */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003c0000, + .cpu_name = "PPC970FX", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | + CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64 | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_ppc970, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power5 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003a0000, + .cpu_name = "POWER5 (gr)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | + CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | + CPU_FTR_MMCRA_SIHV, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power5 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003b0000, + .cpu_name = "POWER5 (gs)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | + CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | + CPU_FTR_MMCRA_SIHV, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + }, + { /* BE DD1.x */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00700000, + .cpu_name = "Broadband Engine", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | + CPU_FTR_SMT, + .cpu_user_features = COMMON_USER_PPC64 | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_be, + .firmware_features = COMMON_PPC64_FW, + }, + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "POWER4 (compatible)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + } }; firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = { - {FW_FEATURE_PFT, "hcall-pft"}, - {FW_FEATURE_TCE, "hcall-tce"}, - {FW_FEATURE_SPRG0, "hcall-sprg0"}, - {FW_FEATURE_DABR, "hcall-dabr"}, - {FW_FEATURE_COPY, "hcall-copy"}, - {FW_FEATURE_ASR, "hcall-asr"}, - {FW_FEATURE_DEBUG, "hcall-debug"}, - {FW_FEATURE_PERF, "hcall-perf"}, - {FW_FEATURE_DUMP, "hcall-dump"}, - {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, - {FW_FEATURE_MIGRATE, "hcall-migrate"}, - {FW_FEATURE_PERFMON, "hcall-perfmon"}, - {FW_FEATURE_CRQ, "hcall-crq"}, - {FW_FEATURE_VIO, "hcall-vio"}, - {FW_FEATURE_RDMA, "hcall-rdma"}, - {FW_FEATURE_LLAN, "hcall-lLAN"}, - {FW_FEATURE_BULK, "hcall-bulk"}, - {FW_FEATURE_XDABR, "hcall-xdabr"}, - {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, - {FW_FEATURE_SPLPAR, "hcall-splpar"}, + {FW_FEATURE_PFT, "hcall-pft"}, + {FW_FEATURE_TCE, "hcall-tce"}, + {FW_FEATURE_SPRG0, "hcall-sprg0"}, + {FW_FEATURE_DABR, "hcall-dabr"}, + {FW_FEATURE_COPY, "hcall-copy"}, + {FW_FEATURE_ASR, "hcall-asr"}, + {FW_FEATURE_DEBUG, "hcall-debug"}, + {FW_FEATURE_PERF, "hcall-perf"}, + {FW_FEATURE_DUMP, "hcall-dump"}, + {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, + {FW_FEATURE_MIGRATE, "hcall-migrate"}, + {FW_FEATURE_PERFMON, "hcall-perfmon"}, + {FW_FEATURE_CRQ, "hcall-crq"}, + {FW_FEATURE_VIO, "hcall-vio"}, + {FW_FEATURE_RDMA, "hcall-rdma"}, + {FW_FEATURE_LLAN, "hcall-lLAN"}, + {FW_FEATURE_BULK, "hcall-bulk"}, + {FW_FEATURE_XDABR, "hcall-xdabr"}, + {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, + {FW_FEATURE_SPLPAR, "hcall-splpar"}, }; diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index 675c270..93ebcac0 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -308,6 +308,7 @@ exception_marker: label##_pSeries: \ HMT_MEDIUM; \ mtspr SPRG1,r13; /* save r13 */ \ + RUNLATCH_ON(r13); \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common) #define STD_EXCEPTION_ISERIES(n, label, area) \ @@ -315,6 +316,7 @@ label##_pSeries: \ label##_iSeries: \ HMT_MEDIUM; \ mtspr SPRG1,r13; /* save r13 */ \ + RUNLATCH_ON(r13); \ EXCEPTION_PROLOG_ISERIES_1(area); \ EXCEPTION_PROLOG_ISERIES_2; \ b label##_common @@ -324,6 +326,7 @@ label##_iSeries: \ label##_iSeries: \ HMT_MEDIUM; \ mtspr SPRG1,r13; /* save r13 */ \ + RUNLATCH_ON(r13); \ EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN); \ lbz r10,PACAPROCENABLED(r13); \ cmpwi 0,r10,0; \ @@ -393,6 +396,7 @@ __start_interrupts: _machine_check_pSeries: HMT_MEDIUM mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) . = 0x300 @@ -419,6 +423,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) data_access_slb_pSeries: HMT_MEDIUM mtspr SPRG1,r13 + RUNLATCH_ON(r13) mfspr r13,SPRG3 /* get paca address into r13 */ std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ std r10,PACA_EXSLB+EX_R10(r13) @@ -439,6 +444,7 @@ data_access_slb_pSeries: instruction_access_slb_pSeries: HMT_MEDIUM mtspr SPRG1,r13 + RUNLATCH_ON(r13) mfspr r13,SPRG3 /* get paca address into r13 */ std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ std r10,PACA_EXSLB+EX_R10(r13) @@ -464,6 +470,7 @@ instruction_access_slb_pSeries: .globl system_call_pSeries system_call_pSeries: HMT_MEDIUM + RUNLATCH_ON(r9) mr r9,r13 mfmsr r10 mfspr r13,SPRG3 @@ -707,11 +714,13 @@ fwnmi_data_area: system_reset_fwnmi: HMT_MEDIUM mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) .globl machine_check_fwnmi machine_check_fwnmi: HMT_MEDIUM mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) /* @@ -848,6 +857,7 @@ unrecov_fer: .align 7 .globl data_access_common data_access_common: + RUNLATCH_ON(r10) /* It wont fit in the 0x300 handler */ mfspr r10,DAR std r10,PACA_EXGEN+EX_DAR(r13) mfspr r10,DSISR diff --git a/arch/ppc64/kernel/hvconsole.c b/arch/ppc64/kernel/hvconsole.c index c72fb8f..138e128 100644 --- a/arch/ppc64/kernel/hvconsole.c +++ b/arch/ppc64/kernel/hvconsole.c @@ -27,7 +27,6 @@ #include <linux/module.h> #include <asm/hvcall.h> #include <asm/hvconsole.h> -#include <asm/prom.h> /** * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper @@ -42,29 +41,14 @@ int hvc_get_chars(uint32_t vtermno, char *buf, int count) unsigned long got; if (plpar_hcall(H_GET_TERM_CHAR, vtermno, 0, 0, 0, &got, - (unsigned long *)buf, (unsigned long *)buf+1) == H_Success) { - /* - * Work around a HV bug where it gives us a null - * after every \r. -- paulus - */ - if (got > 0) { - int i; - for (i = 1; i < got; ++i) { - if (buf[i] == 0 && buf[i-1] == '\r') { - --got; - if (i < got) - memmove(&buf[i], &buf[i+1], - got - i); - } - } - } + (unsigned long *)buf, (unsigned long *)buf+1) == H_Success) return got; - } return 0; } EXPORT_SYMBOL(hvc_get_chars); + /** * hvc_put_chars: send characters to firmware for denoted vterm adapter * @vtermno: The vtermno or unit_address of the adapter from which the data @@ -88,34 +72,3 @@ int hvc_put_chars(uint32_t vtermno, const char *buf, int count) } EXPORT_SYMBOL(hvc_put_chars); - -/* - * We hope/assume that the first vty found corresponds to the first console - * device. - */ -int hvc_find_vtys(void) -{ - struct device_node *vty; - int num_found = 0; - - for (vty = of_find_node_by_name(NULL, "vty"); vty != NULL; - vty = of_find_node_by_name(vty, "vty")) { - uint32_t *vtermno; - - /* We have statically defined space for only a certain number of - * console adapters. */ - if (num_found >= MAX_NR_HVC_CONSOLES) - break; - - vtermno = (uint32_t *)get_property(vty, "reg", NULL); - if (!vtermno) - continue; - - if (device_is_compatible(vty, "hvterm1")) { - hvc_instantiate(*vtermno, num_found); - ++num_found; - } - } - - return num_found; -} diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index b3f770f..077c82f 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -834,6 +834,92 @@ static int __init iSeries_src_init(void) late_initcall(iSeries_src_init); +static inline void process_iSeries_events(void) +{ + asm volatile ("li 0,0x5555; sc" : : : "r0", "r3"); +} + +static void yield_shared_processor(void) +{ + unsigned long tb; + + HvCall_setEnabledInterrupts(HvCall_MaskIPI | + HvCall_MaskLpEvent | + HvCall_MaskLpProd | + HvCall_MaskTimeout); + + tb = get_tb(); + /* Compute future tb value when yield should expire */ + HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy); + + /* + * The decrementer stops during the yield. Force a fake decrementer + * here and let the timer_interrupt code sort out the actual time. + */ + get_paca()->lppaca.int_dword.fields.decr_int = 1; + process_iSeries_events(); +} + +static int iseries_shared_idle(void) +{ + while (1) { + while (!need_resched() && !hvlpevent_is_pending()) { + local_irq_disable(); + ppc64_runlatch_off(); + + /* Recheck with irqs off */ + if (!need_resched() && !hvlpevent_is_pending()) + yield_shared_processor(); + + HMT_medium(); + local_irq_enable(); + } + + ppc64_runlatch_on(); + + if (hvlpevent_is_pending()) + process_iSeries_events(); + + schedule(); + } + + return 0; +} + +static int iseries_dedicated_idle(void) +{ + long oldval; + + while (1) { + oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); + + if (!oldval) { + set_thread_flag(TIF_POLLING_NRFLAG); + + while (!need_resched()) { + ppc64_runlatch_off(); + HMT_low(); + + if (hvlpevent_is_pending()) { + HMT_medium(); + ppc64_runlatch_on(); + process_iSeries_events(); + } + } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + } else { + set_need_resched(); + } + + ppc64_runlatch_on(); + schedule(); + } + + return 0; +} + #ifndef CONFIG_PCI void __init iSeries_init_IRQ(void) { } #endif @@ -859,5 +945,13 @@ void __init iSeries_early_setup(void) ppc_md.get_rtc_time = iSeries_get_rtc_time; ppc_md.calibrate_decr = iSeries_calibrate_decr; ppc_md.progress = iSeries_progress; + + if (get_paca()->lppaca.shared_proc) { + ppc_md.idle_loop = iseries_shared_idle; + printk(KERN_INFO "Using shared processor idle loop\n"); + } else { + ppc_md.idle_loop = iseries_dedicated_idle; + printk(KERN_INFO "Using dedicated idle loop\n"); + } } diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c index 08952c7..954395d 100644 --- a/arch/ppc64/kernel/idle.c +++ b/arch/ppc64/kernel/idle.c @@ -20,109 +20,18 @@ #include <linux/kernel.h> #include <linux/smp.h> #include <linux/cpu.h> -#include <linux/module.h> #include <linux/sysctl.h> -#include <linux/smp.h> #include <asm/system.h> #include <asm/processor.h> -#include <asm/mmu.h> #include <asm/cputable.h> #include <asm/time.h> -#include <asm/iSeries/HvCall.h> -#include <asm/iSeries/ItLpQueue.h> -#include <asm/plpar_wrappers.h> #include <asm/systemcfg.h> +#include <asm/machdep.h> extern void power4_idle(void); -static int (*idle_loop)(void); - -#ifdef CONFIG_PPC_ISERIES -static unsigned long maxYieldTime = 0; -static unsigned long minYieldTime = 0xffffffffffffffffUL; - -static inline void process_iSeries_events(void) -{ - asm volatile ("li 0,0x5555; sc" : : : "r0", "r3"); -} - -static void yield_shared_processor(void) -{ - unsigned long tb; - unsigned long yieldTime; - - HvCall_setEnabledInterrupts(HvCall_MaskIPI | - HvCall_MaskLpEvent | - HvCall_MaskLpProd | - HvCall_MaskTimeout); - - tb = get_tb(); - /* Compute future tb value when yield should expire */ - HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy); - - yieldTime = get_tb() - tb; - if (yieldTime > maxYieldTime) - maxYieldTime = yieldTime; - - if (yieldTime < minYieldTime) - minYieldTime = yieldTime; - - /* - * The decrementer stops during the yield. Force a fake decrementer - * here and let the timer_interrupt code sort out the actual time. - */ - get_paca()->lppaca.int_dword.fields.decr_int = 1; - process_iSeries_events(); -} - -static int iSeries_idle(void) -{ - struct paca_struct *lpaca; - long oldval; - - /* ensure iSeries run light will be out when idle */ - ppc64_runlatch_off(); - - lpaca = get_paca(); - - while (1) { - if (lpaca->lppaca.shared_proc) { - if (hvlpevent_is_pending()) - process_iSeries_events(); - if (!need_resched()) - yield_shared_processor(); - } else { - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); - - while (!need_resched()) { - HMT_medium(); - if (hvlpevent_is_pending()) - process_iSeries_events(); - HMT_low(); - } - - HMT_medium(); - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); - } - } - - ppc64_runlatch_on(); - schedule(); - ppc64_runlatch_off(); - } - - return 0; -} - -#else - -static int default_idle(void) +int default_idle(void) { long oldval; unsigned int cpu = smp_processor_id(); @@ -134,7 +43,8 @@ static int default_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (!need_resched() && !cpu_is_offline(cpu)) { - barrier(); + ppc64_runlatch_off(); + /* * Go into low thread priority and possibly * low power mode. @@ -149,6 +59,7 @@ static int default_idle(void) set_need_resched(); } + ppc64_runlatch_on(); schedule(); if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); @@ -157,127 +68,19 @@ static int default_idle(void) return 0; } -#ifdef CONFIG_PPC_PSERIES - -DECLARE_PER_CPU(unsigned long, smt_snooze_delay); - -int dedicated_idle(void) +int native_idle(void) { - long oldval; - struct paca_struct *lpaca = get_paca(), *ppaca; - unsigned long start_snooze; - unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); - unsigned int cpu = smp_processor_id(); - - ppaca = &paca[cpu ^ 1]; - while (1) { - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - lpaca->lppaca.idle = 1; - - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); - start_snooze = __get_tb() + - *smt_snooze_delay * tb_ticks_per_usec; - while (!need_resched() && !cpu_is_offline(cpu)) { - /* - * Go into low thread priority and possibly - * low power mode. - */ - HMT_low(); - HMT_very_low(); - - if (*smt_snooze_delay == 0 || - __get_tb() < start_snooze) - continue; - - HMT_medium(); - - if (!(ppaca->lppaca.idle)) { - local_irq_disable(); - - /* - * We are about to sleep the thread - * and so wont be polling any - * more. - */ - clear_thread_flag(TIF_POLLING_NRFLAG); - - /* - * SMT dynamic mode. Cede will result - * in this thread going dormant, if the - * partner thread is still doing work. - * Thread wakes up if partner goes idle, - * an interrupt is presented, or a prod - * occurs. Returning from the cede - * enables external interrupts. - */ - if (!need_resched()) - cede_processor(); - else - local_irq_enable(); - } else { - /* - * Give the HV an opportunity at the - * processor, since we are not doing - * any work. - */ - poll_pending(); - } - } - - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); - } - - HMT_medium(); - lpaca->lppaca.idle = 0; - schedule(); - if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) - cpu_die(); - } - return 0; -} - -static int shared_idle(void) -{ - struct paca_struct *lpaca = get_paca(); - unsigned int cpu = smp_processor_id(); - - while (1) { - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - lpaca->lppaca.idle = 1; + ppc64_runlatch_off(); - while (!need_resched() && !cpu_is_offline(cpu)) { - local_irq_disable(); + if (!need_resched()) + power4_idle(); - /* - * Yield the processor to the hypervisor. We return if - * an external interrupt occurs (which are driven prior - * to returning here) or if a prod occurs from another - * processor. When returning here, external interrupts - * are enabled. - * - * Check need_resched() again with interrupts disabled - * to avoid a race. - */ - if (!need_resched()) - cede_processor(); - else - local_irq_enable(); + if (need_resched()) { + ppc64_runlatch_on(); + schedule(); } - HMT_medium(); - lpaca->lppaca.idle = 0; - schedule(); if (cpu_is_offline(smp_processor_id()) && system_state == SYSTEM_RUNNING) cpu_die(); @@ -286,29 +89,10 @@ static int shared_idle(void) return 0; } -#endif /* CONFIG_PPC_PSERIES */ - -static int native_idle(void) -{ - while(1) { - /* check CPU type here */ - if (!need_resched()) - power4_idle(); - if (need_resched()) - schedule(); - - if (cpu_is_offline(raw_smp_processor_id()) && - system_state == SYSTEM_RUNNING) - cpu_die(); - } - return 0; -} - -#endif /* CONFIG_PPC_ISERIES */ - void cpu_idle(void) { - idle_loop(); + BUG_ON(NULL == ppc_md.idle_loop); + ppc_md.idle_loop(); } int powersave_nap; @@ -342,42 +126,3 @@ register_powersave_nap_sysctl(void) } __initcall(register_powersave_nap_sysctl); #endif - -int idle_setup(void) -{ - /* - * Move that junk to each platform specific file, eventually define - * a pSeries_idle for shared processor stuff - */ -#ifdef CONFIG_PPC_ISERIES - idle_loop = iSeries_idle; - return 1; -#else - idle_loop = default_idle; -#endif -#ifdef CONFIG_PPC_PSERIES - if (systemcfg->platform & PLATFORM_PSERIES) { - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { - if (get_paca()->lppaca.shared_proc) { - printk(KERN_INFO "Using shared processor idle loop\n"); - idle_loop = shared_idle; - } else { - printk(KERN_INFO "Using dedicated idle loop\n"); - idle_loop = dedicated_idle; - } - } else { - printk(KERN_INFO "Using default idle loop\n"); - idle_loop = default_idle; - } - } -#endif /* CONFIG_PPC_PSERIES */ -#ifndef CONFIG_PPC_ISERIES - if (systemcfg->platform == PLATFORM_POWERMAC || - systemcfg->platform == PLATFORM_MAPLE) { - printk(KERN_INFO "Using native/NAP idle loop\n"); - idle_loop = native_idle; - } -#endif /* CONFIG_PPC_ISERIES */ - - return 1; -} diff --git a/arch/ppc64/kernel/maple_setup.c b/arch/ppc64/kernel/maple_setup.c index da8900b..bb55b5a 100644 --- a/arch/ppc64/kernel/maple_setup.c +++ b/arch/ppc64/kernel/maple_setup.c @@ -177,6 +177,8 @@ void __init maple_setup_arch(void) #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif + + printk(KERN_INFO "Using native/NAP idle loop\n"); } /* @@ -297,4 +299,5 @@ struct machdep_calls __initdata maple_md = { .get_rtc_time = maple_get_rtc_time, .calibrate_decr = generic_calibrate_decr, .progress = maple_progress, + .idle_loop = native_idle, }; diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S index f3dea0c..59f4f99 100644 --- a/arch/ppc64/kernel/misc.S +++ b/arch/ppc64/kernel/misc.S @@ -1124,9 +1124,11 @@ _GLOBAL(sys_call_table32) .llong .compat_sys_mq_getsetattr .llong .compat_sys_kexec_load .llong .sys32_add_key - .llong .sys32_request_key + .llong .sys32_request_key /* 270 */ .llong .compat_sys_keyctl .llong .compat_sys_waitid + .llong .sys32_ioprio_set + .llong .sys32_ioprio_get .balign 8 _GLOBAL(sys_call_table) @@ -1403,3 +1405,5 @@ _GLOBAL(sys_call_table) .llong .sys_request_key /* 270 */ .llong .sys_keyctl .llong .sys_waitid + .llong .sys_ioprio_set + .llong .sys_ioprio_get diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index 44d9af7..5bec956 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -19,6 +19,7 @@ #undef DEBUG #include <linux/config.h> +#include <linux/cpu.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -82,6 +83,9 @@ int fwnmi_active; /* TRUE if an FWNMI handler is present */ extern void pSeries_system_reset_exception(struct pt_regs *regs); extern int pSeries_machine_check_exception(struct pt_regs *regs); +static int pseries_shared_idle(void); +static int pseries_dedicated_idle(void); + static volatile void __iomem * chrp_int_ack_special; struct mpic *pSeries_mpic; @@ -229,6 +233,20 @@ static void __init pSeries_setup_arch(void) if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) vpa_init(boot_cpuid); + + /* Choose an idle loop */ + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + if (get_paca()->lppaca.shared_proc) { + printk(KERN_INFO "Using shared processor idle loop\n"); + ppc_md.idle_loop = pseries_shared_idle; + } else { + printk(KERN_INFO "Using dedicated idle loop\n"); + ppc_md.idle_loop = pseries_dedicated_idle; + } + } else { + printk(KERN_INFO "Using default idle loop\n"); + ppc_md.idle_loop = default_idle; + } } static int __init pSeries_init_panel(void) @@ -418,6 +436,144 @@ static int __init pSeries_probe(int platform) return 1; } +DECLARE_PER_CPU(unsigned long, smt_snooze_delay); + +static inline void dedicated_idle_sleep(unsigned int cpu) +{ + struct paca_struct *ppaca = &paca[cpu ^ 1]; + + /* Only sleep if the other thread is not idle */ + if (!(ppaca->lppaca.idle)) { + local_irq_disable(); + + /* + * We are about to sleep the thread and so wont be polling any + * more. + */ + clear_thread_flag(TIF_POLLING_NRFLAG); + + /* + * SMT dynamic mode. Cede will result in this thread going + * dormant, if the partner thread is still doing work. Thread + * wakes up if partner goes idle, an interrupt is presented, or + * a prod occurs. Returning from the cede enables external + * interrupts. + */ + if (!need_resched()) + cede_processor(); + else + local_irq_enable(); + } else { + /* + * Give the HV an opportunity at the processor, since we are + * not doing any work. + */ + poll_pending(); + } +} + +static int pseries_dedicated_idle(void) +{ + long oldval; + struct paca_struct *lpaca = get_paca(); + unsigned int cpu = smp_processor_id(); + unsigned long start_snooze; + unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); + + while (1) { + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + lpaca->lppaca.idle = 1; + + oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); + if (!oldval) { + set_thread_flag(TIF_POLLING_NRFLAG); + + start_snooze = __get_tb() + + *smt_snooze_delay * tb_ticks_per_usec; + + while (!need_resched() && !cpu_is_offline(cpu)) { + ppc64_runlatch_off(); + + /* + * Go into low thread priority and possibly + * low power mode. + */ + HMT_low(); + HMT_very_low(); + + if (*smt_snooze_delay != 0 && + __get_tb() > start_snooze) { + HMT_medium(); + dedicated_idle_sleep(cpu); + } + + } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + } else { + set_need_resched(); + } + + lpaca->lppaca.idle = 0; + ppc64_runlatch_on(); + + schedule(); + + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) + cpu_die(); + } +} + +static int pseries_shared_idle(void) +{ + struct paca_struct *lpaca = get_paca(); + unsigned int cpu = smp_processor_id(); + + while (1) { + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + lpaca->lppaca.idle = 1; + + while (!need_resched() && !cpu_is_offline(cpu)) { + local_irq_disable(); + ppc64_runlatch_off(); + + /* + * Yield the processor to the hypervisor. We return if + * an external interrupt occurs (which are driven prior + * to returning here) or if a prod occurs from another + * processor. When returning here, external interrupts + * are enabled. + * + * Check need_resched() again with interrupts disabled + * to avoid a race. + */ + if (!need_resched()) + cede_processor(); + else + local_irq_enable(); + + HMT_medium(); + } + + lpaca->lppaca.idle = 0; + ppc64_runlatch_on(); + + schedule(); + + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) + cpu_die(); + } + + return 0; +} + struct machdep_calls __initdata pSeries_md = { .probe = pSeries_probe, .setup_arch = pSeries_setup_arch, diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c index 6cf03d3..3013cdb 100644 --- a/arch/ppc64/kernel/pmac_setup.c +++ b/arch/ppc64/kernel/pmac_setup.c @@ -186,6 +186,8 @@ void __init pmac_setup_arch(void) #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif + + printk(KERN_INFO "Using native/NAP idle loop\n"); } #ifdef CONFIG_SCSI @@ -507,5 +509,6 @@ struct machdep_calls __initdata pmac_md = { .calibrate_decr = pmac_calibrate_decr, .feature_call = pmac_do_feature_call, .progress = pmac_progress, - .check_legacy_ioport = pmac_check_legacy_ioport + .check_legacy_ioport = pmac_check_legacy_ioport, + .idle_loop = native_idle, }; diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index d5e4866..d1b33f0 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c @@ -96,7 +96,6 @@ extern void udbg_init_maple_realmode(void); extern unsigned long klimit; extern void mm_init_ppc64(void); -extern int idle_setup(void); extern void stab_initialize(unsigned long stab); extern void htab_initialize(void); extern void early_init_devtree(void *flat_dt); @@ -1081,8 +1080,11 @@ void __init setup_arch(char **cmdline_p) ppc_md.setup_arch(); - /* Select the correct idle loop for the platform. */ - idle_setup(); + /* Use the default idle loop if the platform hasn't provided one. */ + if (NULL == ppc_md.idle_loop) { + ppc_md.idle_loop = default_idle; + printk(KERN_INFO "Using default idle loop\n"); + } paging_init(); ppc64_boot_msg(0x15, "Setup Done"); diff --git a/arch/ppc64/kernel/sys_ppc32.c b/arch/ppc64/kernel/sys_ppc32.c index 118436e..2066190 100644 --- a/arch/ppc64/kernel/sys_ppc32.c +++ b/arch/ppc64/kernel/sys_ppc32.c @@ -30,47 +30,26 @@ #include <linux/sem.h> #include <linux/msg.h> #include <linux/shm.h> -#include <linux/slab.h> -#include <linux/uio.h> -#include <linux/aio.h> -#include <linux/nfs_fs.h> -#include <linux/module.h> -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/cache.h> -#include <linux/nfsd/xdr.h> -#include <linux/nfsd/syscall.h> #include <linux/poll.h> #include <linux/personality.h> #include <linux/stat.h> -#include <linux/filter.h> -#include <linux/highmem.h> -#include <linux/highuid.h> #include <linux/mman.h> -#include <linux/ipv6.h> #include <linux/in.h> -#include <linux/icmpv6.h> #include <linux/syscalls.h> #include <linux/unistd.h> #include <linux/sysctl.h> #include <linux/binfmts.h> -#include <linux/dnotify.h> #include <linux/security.h> #include <linux/compat.h> #include <linux/ptrace.h> -#include <linux/aio_abi.h> #include <linux/elf.h> -#include <net/scm.h> -#include <net/sock.h> - #include <asm/ptrace.h> #include <asm/types.h> #include <asm/ipc.h> #include <asm/uaccess.h> #include <asm/unistd.h> #include <asm/semaphore.h> -#include <asm/ppcdebug.h> #include <asm/time.h> #include <asm/mmu_context.h> #include <asm/systemcfg.h> @@ -350,8 +329,6 @@ asmlinkage long sys32_adjtimex(struct timex32 __user *utp) return ret; } - -/* These are here just in case some old sparc32 binary calls it. */ asmlinkage long sys32_pause(void) { current->state = TASK_INTERRUPTIBLE; @@ -360,8 +337,6 @@ asmlinkage long sys32_pause(void) return -ERESTARTNOHAND; } - - static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i) { long usec; @@ -847,16 +822,6 @@ asmlinkage long sys32_getpgid(u32 pid) } -/* Note: it is necessary to treat which and who as unsigned ints, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long sys32_getpriority(u32 which, u32 who) -{ - return sys_getpriority((int)which, (int)who); -} - /* Note: it is necessary to treat pid as an unsigned int, * with the corresponding cast to a signed int to insure that the @@ -1048,6 +1013,11 @@ asmlinkage long sys32_setpgid(u32 pid, u32 pgid) return sys_setpgid((int)pid, (int)pgid); } +long sys32_getpriority(u32 which, u32 who) +{ + /* sign extend which and who */ + return sys_getpriority((int)which, (int)who); +} long sys32_setpriority(u32 which, u32 who, u32 niceval) { @@ -1055,6 +1025,18 @@ long sys32_setpriority(u32 which, u32 who, u32 niceval) return sys_setpriority((int)which, (int)who, (int)niceval); } +long sys32_ioprio_get(u32 which, u32 who) +{ + /* sign extend which and who */ + return sys_ioprio_get((int)which, (int)who); +} + +long sys32_ioprio_set(u32 which, u32 who, u32 ioprio) +{ + /* sign extend which, who and ioprio */ + return sys_ioprio_set((int)which, (int)who, (int)ioprio); +} + /* Note: it is necessary to treat newmask as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -1273,8 +1255,6 @@ long ppc32_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, (u64)len_high << 32 | len_low, advice); } -extern asmlinkage long sys_timer_create(clockid_t, sigevent_t __user *, timer_t __user *); - long ppc32_timer_create(clockid_t clock, struct compat_sigevent __user *ev32, timer_t __user *timer_id) diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c index 2f704a2..02b8ac4 100644 --- a/arch/ppc64/kernel/sysfs.c +++ b/arch/ppc64/kernel/sysfs.c @@ -112,7 +112,6 @@ void ppc64_enable_pmcs(void) unsigned long hid0; #ifdef CONFIG_PPC_PSERIES unsigned long set, reset; - int ret; #endif /* CONFIG_PPC_PSERIES */ /* Only need to enable them once */ @@ -145,11 +144,7 @@ void ppc64_enable_pmcs(void) case PLATFORM_PSERIES_LPAR: set = 1UL << 63; reset = 0; - ret = plpar_hcall_norets(H_PERFMON, set, reset); - if (ret) - printk(KERN_ERR "H_PERFMON call on cpu %u " - "returned %d\n", - smp_processor_id(), ret); + plpar_hcall_norets(H_PERFMON, set, reset); break; #endif /* CONFIG_PPC_PSERIES */ @@ -161,13 +156,6 @@ void ppc64_enable_pmcs(void) /* instruct hypervisor to maintain PMCs */ if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) get_paca()->lppaca.pmcregs_in_use = 1; - - /* - * On SMT machines we have to set the run latch in the ctrl register - * in order to make PMC6 spin. - */ - if (cpu_has_feature(CPU_FTR_SMT)) - ppc64_runlatch_on(); #endif /* CONFIG_PPC_PSERIES */ } diff --git a/arch/ppc64/kernel/vdso32/vdso32.lds.S b/arch/ppc64/kernel/vdso32/vdso32.lds.S index 11290c9..6f87a91 100644 --- a/arch/ppc64/kernel/vdso32/vdso32.lds.S +++ b/arch/ppc64/kernel/vdso32/vdso32.lds.S @@ -40,9 +40,9 @@ SECTIONS .gcc_except_table : { *(.gcc_except_table) } .fixup : { *(.fixup) } - .got ALIGN(4) : { *(.got.plt) *(.got) } - .dynamic : { *(.dynamic) } :text :dynamic + .got : { *(.got) } + .plt : { *(.plt) } _end = .; __end = .; diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index d78bc13..4b13292 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -43,6 +43,8 @@ config SPARC64_PAGE_SIZE_4MB endchoice +source kernel/Kconfig.hz + source "init/Kconfig" config SYSVIPC_COMPAT diff --git a/arch/sparc64/kernel/dtlb_backend.S b/arch/sparc64/kernel/dtlb_backend.S index b73a3c8..5385228 100644 --- a/arch/sparc64/kernel/dtlb_backend.S +++ b/arch/sparc64/kernel/dtlb_backend.S @@ -16,7 +16,7 @@ #elif PAGE_SHIFT == 19 #define SZ_BITS _PAGE_SZ512K #elif PAGE_SHIFT == 22 -#define SZ_BITS _PAGE_SZ4M +#define SZ_BITS _PAGE_SZ4MB #endif #define VALID_SZ_BITS (_PAGE_VALID | SZ_BITS) diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 9469e77..6682c78 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -128,7 +128,6 @@ config HOSTFS config HPPFS tristate "HoneyPot ProcFS (EXPERIMENTAL)" - depends on BROKEN help hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc entries to be overridden, removed, or fabricated from the host. @@ -141,8 +140,9 @@ config HPPFS You only need this if you are setting up a UML honeypot. Otherwise, it is safe to say 'N' here. - If you are actively using it, please ask for it to be fixed. In this - moment, it does not work on 2.6 (it works somehow on 2.4). + If you are actively using it, please report any problems, since it's + getting fixed. In this moment, it is experimental on 2.6 (it works on + 2.4). config MCONSOLE bool "Management console" diff --git a/arch/um/Kconfig_i386 b/arch/um/Kconfig_i386 index e41f374..27c18a8 100644 --- a/arch/um/Kconfig_i386 +++ b/arch/um/Kconfig_i386 @@ -19,6 +19,18 @@ config 3_LEVEL_PGTABLES memory. All the memory that can't be mapped directly will be treated as high memory. +config STUB_CODE + hex + default 0xbfffe000 + +config STUB_DATA + hex + default 0xbffff000 + +config STUB_START + hex + default STUB_CODE + config ARCH_HAS_SC_SIGNALS bool default y diff --git a/arch/um/Kconfig_x86_64 b/arch/um/Kconfig_x86_64 index f162f50..735a047 100644 --- a/arch/um/Kconfig_x86_64 +++ b/arch/um/Kconfig_x86_64 @@ -14,6 +14,18 @@ config 3_LEVEL_PGTABLES bool default y +config STUB_CODE + hex + default 0x7fbfffe000 + +config STUB_DATA + hex + default 0x7fbffff000 + +config STUB_START + hex + default STUB_CODE + config ARCH_HAS_SC_SIGNALS bool default n diff --git a/arch/um/Makefile-i386 b/arch/um/Makefile-i386 index 29e182d..3010590 100644 --- a/arch/um/Makefile-i386 +++ b/arch/um/Makefile-i386 @@ -8,7 +8,7 @@ ifeq ($(CONFIG_MODE_SKAS),y) endif endif -CFLAGS += -U__$(SUBARCH)__ -U$(SUBARCH) +CFLAGS += -U__$(SUBARCH)__ -U$(SUBARCH) $(STUB_CFLAGS) ARCH_USER_CFLAGS := ifneq ($(CONFIG_GPROF),y) diff --git a/arch/um/Makefile-x86_64 b/arch/um/Makefile-x86_64 index 3214456..d80bd00 100644 --- a/arch/um/Makefile-x86_64 +++ b/arch/um/Makefile-x86_64 @@ -4,7 +4,7 @@ SUBARCH_LIBS := arch/um/sys-x86_64/ START := 0x60000000 -CFLAGS += -U__$(SUBARCH)__ -fno-builtin +CFLAGS += -U__$(SUBARCH)__ -fno-builtin $(STUB_CFLAGS) ARCH_USER_CFLAGS := -D__x86_64__ ELF_ARCH := i386:x86-64 diff --git a/arch/um/defconfig b/arch/um/defconfig index 4067c3a..80d30d1 100644 --- a/arch/um/defconfig +++ b/arch/um/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc3-skas3-v9-pre2 -# Sun Apr 24 19:46:10 2005 +# Linux kernel version: 2.6.12-rc6-mm1 +# Tue Jun 14 18:22:21 2005 # CONFIG_GENERIC_HARDIRQS=y CONFIG_UML=y @@ -13,23 +13,32 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y # # UML-specific options # -CONFIG_MODE_TT=y +# CONFIG_MODE_TT is not set +# CONFIG_STATIC_LINK is not set CONFIG_MODE_SKAS=y CONFIG_UML_X86=y # CONFIG_64BIT is not set CONFIG_TOP_ADDR=0xc0000000 # CONFIG_3_LEVEL_PGTABLES is not set +CONFIG_STUB_CODE=0xbfffe000 +CONFIG_STUB_DATA=0xbffff000 +CONFIG_STUB_START=0xbfffe000 CONFIG_ARCH_HAS_SC_SIGNALS=y CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA=y -CONFIG_LD_SCRIPT_STATIC=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_LD_SCRIPT_DYN=y CONFIG_NET=y CONFIG_BINFMT_ELF=y CONFIG_BINFMT_MISC=m -CONFIG_HOSTFS=y +# CONFIG_HOSTFS is not set CONFIG_MCONSOLE=y # CONFIG_MAGIC_SYSRQ is not set # CONFIG_HOST_2G_2G is not set -# CONFIG_SMP is not set CONFIG_NEST_LEVEL=0 CONFIG_KERNEL_HALF_GIGS=1 # CONFIG_HIGHMEM is not set @@ -63,6 +72,8 @@ CONFIG_IKCONFIG_PROC=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set CONFIG_KALLSYMS_EXTRA_PASS=y +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -81,6 +92,7 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_FORCE_UNLOAD is not set CONFIG_OBSOLETE_MODPARM=y +# CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_KMOD=y @@ -115,6 +127,7 @@ CONFIG_UML_SOUND=m CONFIG_SOUND=m CONFIG_HOSTAUDIO=m CONFIG_UML_RANDOM=y +# CONFIG_MMAPPER is not set # # Block devices @@ -176,6 +189,17 @@ CONFIG_INET=y # CONFIG_INET_TUNNEL is not set CONFIG_IP_TCPDIAG=y # CONFIG_IP_TCPDIAG_IPV6 is not set + +# +# TCP congestion control +# +CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_WESTWOOD=y +CONFIG_TCP_CONG_HTCP=y +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set # CONFIG_IPV6 is not set # CONFIG_NETFILTER is not set @@ -206,11 +230,15 @@ CONFIG_IP_TCPDIAG=y # Network testing # # CONFIG_NET_PKTGEN is not set +# CONFIG_KGDBOE is not set # CONFIG_NETPOLL is not set +# CONFIG_NETPOLL_RX is not set +# CONFIG_NETPOLL_TRAP is not set # CONFIG_NET_POLL_CONTROLLER is not set # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_IEEE80211 is not set CONFIG_DUMMY=m # CONFIG_BONDING is not set # CONFIG_EQUALIZER is not set @@ -227,6 +255,7 @@ CONFIG_PPP=m # CONFIG_PPP_SYNC_TTY is not set # CONFIG_PPP_DEFLATE is not set # CONFIG_PPP_BSDCOMP is not set +# CONFIG_PPP_MPPE is not set # CONFIG_PPPOE is not set CONFIG_SLIP=m # CONFIG_SLIP_COMPRESSED is not set @@ -240,10 +269,12 @@ CONFIG_SLIP=m # CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y # CONFIG_EXT3_FS_XATTR is not set CONFIG_JBD=y # CONFIG_JBD_DEBUG is not set +# CONFIG_REISER4_FS is not set CONFIG_REISERFS_FS=y # CONFIG_REISERFS_CHECK is not set # CONFIG_REISERFS_PROC_INFO is not set @@ -256,6 +287,7 @@ CONFIG_REISERFS_FS=y # CONFIG_XFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set +CONFIG_INOTIFY=y CONFIG_QUOTA=y # CONFIG_QFMT_V1 is not set # CONFIG_QFMT_V2 is not set @@ -265,6 +297,12 @@ CONFIG_AUTOFS_FS=m CONFIG_AUTOFS4_FS=m # +# Caches +# +# CONFIG_FSCACHE is not set +# CONFIG_FUSE_FS is not set + +# # CD-ROM/DVD Filesystems # CONFIG_ISO9660_FS=m @@ -291,6 +329,8 @@ CONFIG_TMPFS=y # CONFIG_TMPFS_XATTR is not set # CONFIG_HUGETLB_PAGE is not set CONFIG_RAMFS=y +# CONFIG_CONFIGFS_FS is not set +# CONFIG_RELAYFS_FS is not set # # Miscellaneous filesystems @@ -319,6 +359,7 @@ CONFIG_RAMFS=y # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set # # Partition Types @@ -404,14 +445,15 @@ CONFIG_CRC32=m # CONFIG_PRINTK_TIME is not set CONFIG_DEBUG_KERNEL=y CONFIG_LOG_BUF_SHIFT=14 +CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set -# CONFIG_DEBUG_SLAB is not set +CONFIG_DEBUG_SLAB=y # CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_KOBJECT is not set CONFIG_DEBUG_INFO=y # CONFIG_DEBUG_FS is not set CONFIG_FRAME_POINTER=y -CONFIG_PT_PROXY=y +# CONFIG_GPROF is not set # CONFIG_GCOV is not set # CONFIG_SYSCALL_DEBUG is not set diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 2bb4c4f5..e0fdffa 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -663,11 +663,15 @@ struct tty_driver *line_register_devfs(struct lines *set, return driver; } +static spinlock_t winch_handler_lock; +LIST_HEAD(winch_handlers); + void lines_init(struct line *lines, int nlines) { struct line *line; int i; + spin_lock_init(&winch_handler_lock); for(i = 0; i < nlines; i++){ line = &lines[i]; INIT_LIST_HEAD(&line->chan_list); @@ -724,31 +728,30 @@ irqreturn_t winch_interrupt(int irq, void *data, struct pt_regs *unused) return IRQ_HANDLED; } -DECLARE_MUTEX(winch_handler_sem); -LIST_HEAD(winch_handlers); - void register_winch_irq(int fd, int tty_fd, int pid, struct tty_struct *tty) { struct winch *winch; - down(&winch_handler_sem); winch = kmalloc(sizeof(*winch), GFP_KERNEL); if (winch == NULL) { printk("register_winch_irq - kmalloc failed\n"); - goto out; + return; } + *winch = ((struct winch) { .list = LIST_HEAD_INIT(winch->list), .fd = fd, .tty_fd = tty_fd, .pid = pid, .tty = tty }); + + spin_lock(&winch_handler_lock); list_add(&winch->list, &winch_handlers); + spin_unlock(&winch_handler_lock); + if(um_request_irq(WINCH_IRQ, fd, IRQ_READ, winch_interrupt, SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, "winch", winch) < 0) printk("register_winch_irq - failed to register IRQ\n"); - out: - up(&winch_handler_sem); } static void unregister_winch(struct tty_struct *tty) @@ -756,7 +759,7 @@ static void unregister_winch(struct tty_struct *tty) struct list_head *ele; struct winch *winch, *found = NULL; - down(&winch_handler_sem); + spin_lock(&winch_handler_lock); list_for_each(ele, &winch_handlers){ winch = list_entry(ele, struct winch, list); if(winch->tty == tty){ @@ -764,20 +767,25 @@ static void unregister_winch(struct tty_struct *tty) break; } } - if(found == NULL) - goto out; + goto err; + + list_del(&winch->list); + spin_unlock(&winch_handler_lock); if(winch->pid != -1) os_kill_process(winch->pid, 1); free_irq(WINCH_IRQ, winch); - list_del(&winch->list); kfree(winch); - out: - up(&winch_handler_sem); + + return; +err: + spin_unlock(&winch_handler_lock); } +/* XXX: No lock as it's an exitcall... is this valid? Depending on cleanup + * order... are we sure that nothing else is done on the list? */ static void winch_cleanup(void) { struct list_head *ele; @@ -786,6 +794,9 @@ static void winch_cleanup(void) list_for_each(ele, &winch_handlers){ winch = list_entry(ele, struct winch, list); if(winch->fd != -1){ + /* Why is this different from the above free_irq(), + * which deactivates SIGIO? This searches the FD + * somewhere else and removes it from the list... */ deactivate_fd(winch->fd, WINCH_IRQ); os_close_file(winch->fd); } diff --git a/arch/um/include/mem.h b/arch/um/include/mem.h index 10c46c3..99d3ad4 100644 --- a/arch/um/include/mem.h +++ b/arch/um/include/mem.h @@ -13,6 +13,7 @@ extern int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w); extern int is_remapped(void *virt); extern int physmem_remove_mapping(void *virt); extern void physmem_forget_descriptor(int fd); +extern unsigned long to_phys(void *virt); #endif diff --git a/arch/um/include/registers.h b/arch/um/include/registers.h index 8744abb..0a35e6d 100644 --- a/arch/um/include/registers.h +++ b/arch/um/include/registers.h @@ -14,6 +14,7 @@ extern int restore_fp_registers(int pid, unsigned long *fp_regs); extern void save_registers(int pid, union uml_pt_regs *regs); extern void restore_registers(int pid, union uml_pt_regs *regs); extern void init_registers(int pid); +extern void get_safe_registers(unsigned long * regs); #endif diff --git a/arch/um/include/sysdep-i386/ptrace_user.h b/arch/um/include/sysdep-i386/ptrace_user.h index eca8066..899aa4b 100644 --- a/arch/um/include/sysdep-i386/ptrace_user.h +++ b/arch/um/include/sysdep-i386/ptrace_user.h @@ -20,11 +20,24 @@ #define PT_SYSCALL_ARG3_OFFSET PT_OFFSET(EDX) #define PT_SYSCALL_ARG4_OFFSET PT_OFFSET(ESI) #define PT_SYSCALL_ARG5_OFFSET PT_OFFSET(EDI) +#define PT_SYSCALL_ARG6_OFFSET PT_OFFSET(EBP) #define PT_SYSCALL_RET_OFFSET PT_OFFSET(EAX) +#define REGS_SYSCALL_NR EAX /* This is used before a system call */ +#define REGS_SYSCALL_ARG1 EBX +#define REGS_SYSCALL_ARG2 ECX +#define REGS_SYSCALL_ARG3 EDX +#define REGS_SYSCALL_ARG4 ESI +#define REGS_SYSCALL_ARG5 EDI +#define REGS_SYSCALL_ARG6 EBP + +#define REGS_IP_INDEX EIP +#define REGS_SP_INDEX UESP + #define PT_IP_OFFSET PT_OFFSET(EIP) #define PT_IP(regs) ((regs)[EIP]) +#define PT_SP_OFFSET PT_OFFSET(UESP) #define PT_SP(regs) ((regs)[UESP]) #ifndef FRAME_SIZE diff --git a/arch/um/include/sysdep-i386/stub.h b/arch/um/include/sysdep-i386/stub.h new file mode 100644 index 0000000..d3699fe --- /dev/null +++ b/arch/um/include/sysdep-i386/stub.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_STUB_H +#define __SYSDEP_STUB_H + +#include <asm/ptrace.h> +#include <asm/unistd.h> + +extern void stub_segv_handler(int sig); +extern void stub_clone_handler(void); + +#define STUB_SYSCALL_RET EAX +#define STUB_MMAP_NR __NR_mmap2 +#define MMAP_OFFSET(o) ((o) >> PAGE_SHIFT) + +static inline long stub_syscall2(long syscall, long arg1, long arg2) +{ + long ret; + + __asm__("movl %0, %%ecx; " : : "g" (arg2) : "%ecx"); + __asm__("movl %0, %%ebx; " : : "g" (arg1) : "%ebx"); + __asm__("movl %0, %%eax; " : : "g" (syscall) : "%eax"); + __asm__("int $0x80;" : : : "%eax"); + __asm__ __volatile__("movl %%eax, %0; " : "=g" (ret) :); + return(ret); +} + +static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3) +{ + __asm__("movl %0, %%edx; " : : "g" (arg3) : "%edx"); + return(stub_syscall2(syscall, arg1, arg2)); +} + +static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3, + long arg4) +{ + __asm__("movl %0, %%esi; " : : "g" (arg4) : "%esi"); + return(stub_syscall3(syscall, arg1, arg2, arg3)); +} + +static inline long stub_syscall6(long syscall, long arg1, long arg2, long arg3, + long arg4, long arg5, long arg6) +{ + long ret; + __asm__("movl %0, %%eax; " : : "g" (syscall) : "%eax"); + __asm__("movl %0, %%ebx; " : : "g" (arg1) : "%ebx"); + __asm__("movl %0, %%ecx; " : : "g" (arg2) : "%ecx"); + __asm__("movl %0, %%edx; " : : "g" (arg3) : "%edx"); + __asm__("movl %0, %%esi; " : : "g" (arg4) : "%esi"); + __asm__("movl %0, %%edi; " : : "g" (arg5) : "%edi"); + __asm__ __volatile__("pushl %%ebp ; movl %1, %%ebp; " + "int $0x80; popl %%ebp ; " + "movl %%eax, %0; " : "=g" (ret) : "g" (arg6) : "%eax"); + return(ret); +} + +static inline void trap_myself(void) +{ + __asm("int3"); +} + +#endif diff --git a/arch/um/include/sysdep-x86_64/ptrace_user.h b/arch/um/include/sysdep-x86_64/ptrace_user.h index 3172997..128faf0 100644 --- a/arch/um/include/sysdep-x86_64/ptrace_user.h +++ b/arch/um/include/sysdep-x86_64/ptrace_user.h @@ -55,6 +55,20 @@ #define PTRACE_OLDSETOPTIONS 21 #endif +/* These are before the system call, so the the system call number is RAX + * rather than ORIG_RAX, and arg4 is R10 rather than RCX + */ +#define REGS_SYSCALL_NR PT_INDEX(RAX) +#define REGS_SYSCALL_ARG1 PT_INDEX(RDI) +#define REGS_SYSCALL_ARG2 PT_INDEX(RSI) +#define REGS_SYSCALL_ARG3 PT_INDEX(RDX) +#define REGS_SYSCALL_ARG4 PT_INDEX(R10) +#define REGS_SYSCALL_ARG5 PT_INDEX(R8) +#define REGS_SYSCALL_ARG6 PT_INDEX(R9) + +#define REGS_IP_INDEX PT_INDEX(RIP) +#define REGS_SP_INDEX PT_INDEX(RSP) + #endif /* diff --git a/arch/um/include/sysdep-x86_64/stub.h b/arch/um/include/sysdep-x86_64/stub.h new file mode 100644 index 0000000..f599058 --- /dev/null +++ b/arch/um/include/sysdep-x86_64/stub.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_STUB_H +#define __SYSDEP_STUB_H + +#include <asm/ptrace.h> +#include <asm/unistd.h> +#include <sysdep/ptrace_user.h> + +extern void stub_segv_handler(int sig); +extern void stub_clone_handler(void); + +#define STUB_SYSCALL_RET PT_INDEX(RAX) +#define STUB_MMAP_NR __NR_mmap +#define MMAP_OFFSET(o) (o) + +static inline long stub_syscall2(long syscall, long arg1, long arg2) +{ + long ret; + + __asm__("movq %0, %%rsi; " : : "g" (arg2) : "%rsi"); + __asm__("movq %0, %%rdi; " : : "g" (arg1) : "%rdi"); + __asm__("movq %0, %%rax; " : : "g" (syscall) : "%rax"); + __asm__("syscall;" : : : "%rax", "%r11", "%rcx"); + __asm__ __volatile__("movq %%rax, %0; " : "=g" (ret) :); + return(ret); +} + +static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3) +{ + __asm__("movq %0, %%rdx; " : : "g" (arg3) : "%rdx"); + return(stub_syscall2(syscall, arg1, arg2)); +} + +static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3, + long arg4) +{ + __asm__("movq %0, %%r10; " : : "g" (arg4) : "%r10"); + return(stub_syscall3(syscall, arg1, arg2, arg3)); +} + +static inline long stub_syscall6(long syscall, long arg1, long arg2, long arg3, + long arg4, long arg5, long arg6) +{ + __asm__("movq %0, %%r9; " : : "g" (arg6) : "%r9"); + __asm__("movq %0, %%r8; " : : "g" (arg5) : "%r8"); + return(stub_syscall4(syscall, arg1, arg2, arg3, arg4)); +} + +static inline void trap_myself(void) +{ + __asm("int3"); +} + +#endif diff --git a/arch/um/include/time_user.h b/arch/um/include/time_user.h index f64ef77..17d7ef2 100644 --- a/arch/um/include/time_user.h +++ b/arch/um/include/time_user.h @@ -10,6 +10,7 @@ extern void timer(void); extern void switch_timers(int to_real); extern void idle_sleep(int secs); extern void enable_timer(void); +extern void prepare_timer(void * ptr); extern void disable_timer(void); extern unsigned long time_lock(void); extern void time_unlock(unsigned long); diff --git a/arch/um/include/tlb.h b/arch/um/include/tlb.h index da10972..c6f9628 100644 --- a/arch/um/include/tlb.h +++ b/arch/um/include/tlb.h @@ -37,31 +37,25 @@ struct host_vm_op { extern void mprotect_kernel_vm(int w); extern void force_flush_all(void); extern void fix_range_common(struct mm_struct *mm, unsigned long start_addr, - unsigned long end_addr, int force, int data, - void (*do_ops)(int, struct host_vm_op *, int)); + unsigned long end_addr, int force, + void (*do_ops)(union mm_context *, + struct host_vm_op *, int)); extern int flush_tlb_kernel_range_common(unsigned long start, unsigned long end); extern int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, int r, int w, int x, struct host_vm_op *ops, int index, - int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)); + int last_filled, union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, + int)); extern int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, int index, int last_filled, - int data, void (*do_ops)(int, struct host_vm_op *, int)); + union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, + int)); extern int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, struct host_vm_op *ops, int index, - int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)); + int last_filled, union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, + int)); #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index 715b083..3942a5f 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -67,6 +67,12 @@ SECTIONS *(.stub .text.* .gnu.linkonce.t.*) /* .gnu.warning sections are handled specially by elf32.em. */ *(.gnu.warning) + + . = ALIGN(4096); + __syscall_stub_start = .; + *(.__syscall_stub*) + __syscall_stub_end = .; + . = ALIGN(4096); } =0x90909090 .fini : { KEEP (*(.fini)) diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index 420e6d5..a24e3b7 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -353,6 +353,8 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len, #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) +extern int __syscall_stub_start, __binary_start; + void setup_physmem(unsigned long start, unsigned long reserve_end, unsigned long len, unsigned long highmem) { @@ -371,6 +373,12 @@ void setup_physmem(unsigned long start, unsigned long reserve_end, exit(1); } + /* Special kludge - This page will be mapped in to userspace processes + * from physmem_fd, so it needs to be written out there. + */ + os_seek_file(physmem_fd, __pa(&__syscall_stub_start)); + os_write_file(physmem_fd, &__syscall_stub_start, PAGE_SIZE); + bootmap_size = init_bootmem(pfn, pfn + delta); free_bootmem(__pa(reserve_end) + bootmap_size, len - bootmap_size - reserve); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 1b5ef3e..c45a60e 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -32,6 +32,7 @@ #include "uml-config.h" #include "choose-mode.h" #include "mode.h" +#include "tempfile.h" #ifdef UML_CONFIG_MODE_SKAS #include "skas.h" #include "skas_ptrace.h" @@ -358,11 +359,16 @@ void forward_pending_sigio(int target) kill(target, SIGIO); } +int ptrace_faultinfo = 0; +int proc_mm = 1; + +extern void *__syscall_stub_start, __syscall_stub_end; + #ifdef UML_CONFIG_MODE_SKAS -static inline int check_skas3_ptrace_support(void) +static inline void check_skas3_ptrace_support(void) { struct ptrace_faultinfo fi; - int pid, n, ret = 1; + int pid, n; printf("Checking for the skas3 patch in the host..."); pid = start_ptraced_child(); @@ -374,33 +380,31 @@ static inline int check_skas3_ptrace_support(void) else { perror("not found"); } - ret = 0; - } else { + } + else { + ptrace_faultinfo = 1; printf("found\n"); } init_registers(pid); stop_ptraced_child(pid, 1, 1); - - return(ret); } int can_do_skas(void) { - int ret = 1; - printf("Checking for /proc/mm..."); if (os_access("/proc/mm", OS_ACC_W_OK) < 0) { + proc_mm = 0; printf("not found\n"); - ret = 0; goto out; - } else { + } + else { printf("found\n"); } - ret = check_skas3_ptrace_support(); out: - return ret; + check_skas3_ptrace_support(); + return 1; } #else int can_do_skas(void) diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile index ff69c4b..d296d55 100644 --- a/arch/um/kernel/skas/Makefile +++ b/arch/um/kernel/skas/Makefile @@ -3,11 +3,14 @@ # Licensed under the GPL # -obj-y := exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \ +obj-y := clone.o exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \ syscall_kern.o syscall_user.o tlb.o trap_user.o uaccess.o \ subdir- := util -USER_OBJS := process.o +USER_OBJS := process.o clone.o include arch/um/scripts/Makefile.rules + +# clone.o is in the stub, so it can't be built with profiling +$(obj)/clone.o : c_flags = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c new file mode 100644 index 0000000..4dc55f1 --- /dev/null +++ b/arch/um/kernel/skas/clone.c @@ -0,0 +1,44 @@ +#include <sched.h> +#include <signal.h> +#include <sys/mman.h> +#include <sys/time.h> +#include <asm/unistd.h> +#include <asm/page.h> +#include "ptrace_user.h" +#include "skas.h" +#include "stub-data.h" +#include "uml-config.h" +#include "sysdep/stub.h" + +/* This is in a separate file because it needs to be compiled with any + * extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled + */ +void __attribute__ ((__section__ (".__syscall_stub"))) +stub_clone_handler(void) +{ + long err; + struct stub_data *from = (struct stub_data *) UML_CONFIG_STUB_DATA; + + err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD, + UML_CONFIG_STUB_DATA + PAGE_SIZE / 2 - + sizeof(void *)); + if(err != 0) + goto out; + + err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); + if(err) + goto out; + + err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL, + (long) &from->timer, 0); + if(err) + goto out; + + err = stub_syscall6(STUB_MMAP_NR, UML_CONFIG_STUB_DATA, PAGE_SIZE, + PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, + from->fd, from->offset); + out: + /* save current result. Parent: pid; child: retcode of mmap */ + from->err = err; + trap_myself(); +} diff --git a/arch/um/kernel/skas/exec_kern.c b/arch/um/kernel/skas/exec_kern.c index c6b4d5d..77ed7bb 100644 --- a/arch/um/kernel/skas/exec_kern.c +++ b/arch/um/kernel/skas/exec_kern.c @@ -18,7 +18,7 @@ void flush_thread_skas(void) { force_flush_all(); - switch_mm_skas(current->mm->context.skas.mm_fd); + switch_mm_skas(¤t->mm->context.skas.id); } void start_thread_skas(struct pt_regs *regs, unsigned long eip, diff --git a/arch/um/kernel/skas/include/mm_id.h b/arch/um/kernel/skas/include/mm_id.h new file mode 100644 index 0000000..48dd098 --- /dev/null +++ b/arch/um/kernel/skas/include/mm_id.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2005 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __MM_ID_H +#define __MM_ID_H + +struct mm_id { + union { + int mm_fd; + int pid; + } u; + unsigned long stack; +}; + +#endif diff --git a/arch/um/kernel/skas/include/mmu-skas.h b/arch/um/kernel/skas/include/mmu-skas.h index 4cd60d7..278b72f 100644 --- a/arch/um/kernel/skas/include/mmu-skas.h +++ b/arch/um/kernel/skas/include/mmu-skas.h @@ -6,10 +6,15 @@ #ifndef __SKAS_MMU_H #define __SKAS_MMU_H +#include "mm_id.h" + struct mmu_context_skas { - int mm_fd; + struct mm_id id; + unsigned long last_page_table; }; +extern void switch_mm_skas(struct mm_id * mm_idp); + #endif /* diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h index 96b51db..d983ea8 100644 --- a/arch/um/kernel/skas/include/skas.h +++ b/arch/um/kernel/skas/include/skas.h @@ -6,9 +6,11 @@ #ifndef __SKAS_H #define __SKAS_H +#include "mm_id.h" #include "sysdep/ptrace.h" extern int userspace_pid[]; +extern int proc_mm, ptrace_faultinfo; extern void switch_threads(void *me, void *next); extern void thread_wait(void *sw, void *fb); @@ -22,16 +24,18 @@ extern void new_thread_proc(void *stack, void (*handler)(int sig)); extern void remove_sigstack(void); extern void new_thread_handler(int sig); extern void handle_syscall(union uml_pt_regs *regs); -extern void map(int fd, unsigned long virt, unsigned long len, int r, int w, - int x, int phys_fd, unsigned long long offset); -extern int unmap(int fd, void *addr, unsigned long len); -extern int protect(int fd, unsigned long addr, unsigned long len, - int r, int w, int x); +extern int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, + int r, int w, int x, int phys_fd, unsigned long long offset); +extern int unmap(struct mm_id * mm_idp, void *addr, unsigned long len); +extern int protect(struct mm_id * mm_idp, unsigned long addr, + unsigned long len, int r, int w, int x); extern void user_signal(int sig, union uml_pt_regs *regs, int pid); extern int new_mm(int from); -extern void start_userspace(int cpu); +extern int start_userspace(unsigned long stub_stack); +extern int copy_context_skas0(unsigned long stack, int pid); extern void get_skas_faultinfo(int pid, struct faultinfo * fi); extern long execute_syscall_skas(void *r); +extern unsigned long current_stub_stack(void); #endif diff --git a/arch/um/kernel/skas/include/stub-data.h b/arch/um/kernel/skas/include/stub-data.h new file mode 100644 index 0000000..f6ed92c --- /dev/null +++ b/arch/um/kernel/skas/include/stub-data.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2005 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __STUB_DATA_H +#define __STUB_DATA_H + +#include <sys/time.h> + +struct stub_data { + long offset; + int fd; + struct itimerval timer; + long err; +}; + +#endif diff --git a/arch/um/kernel/skas/mem.c b/arch/um/kernel/skas/mem.c index 438db2f..147466d 100644 --- a/arch/um/kernel/skas/mem.c +++ b/arch/um/kernel/skas/mem.c @@ -5,7 +5,9 @@ #include "linux/config.h" #include "linux/mm.h" +#include "asm/pgtable.h" #include "mem_user.h" +#include "skas.h" unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out, unsigned long *task_size_out) @@ -18,7 +20,9 @@ unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out, *task_size_out = CONFIG_HOST_TASK_SIZE; #else *host_size_out = top; - *task_size_out = top; + if (proc_mm && ptrace_faultinfo) + *task_size_out = top; + else *task_size_out = CONFIG_STUB_START & PGDIR_MASK; #endif return(((unsigned long) set_task_sizes_skas) & ~0xffffff); } diff --git a/arch/um/kernel/skas/mem_user.c b/arch/um/kernel/skas/mem_user.c index 1310bf1..b0980ff 100644 --- a/arch/um/kernel/skas/mem_user.c +++ b/arch/um/kernel/skas/mem_user.c @@ -3,100 +3,171 @@ * Licensed under the GPL */ +#include <signal.h> #include <errno.h> #include <sys/mman.h> +#include <sys/wait.h> +#include <asm/page.h> +#include <asm/unistd.h> #include "mem_user.h" #include "mem.h" +#include "mm_id.h" #include "user.h" #include "os.h" #include "proc_mm.h" - -void map(int fd, unsigned long virt, unsigned long len, int r, int w, - int x, int phys_fd, unsigned long long offset) +#include "ptrace_user.h" +#include "user_util.h" +#include "kern_util.h" +#include "task.h" +#include "registers.h" +#include "uml-config.h" +#include "sysdep/ptrace.h" +#include "sysdep/stub.h" +#include "skas.h" + +extern unsigned long syscall_stub, __syscall_stub_start; + +extern void wait_stub_done(int pid, int sig, char * fname); + +static long run_syscall_stub(struct mm_id * mm_idp, int syscall, + unsigned long *args) { - struct proc_mm_op map; - int prot, n; - - prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | - (x ? PROT_EXEC : 0); - - map = ((struct proc_mm_op) { .op = MM_MMAP, - .u = - { .mmap = - { .addr = virt, - .len = len, - .prot = prot, - .flags = MAP_SHARED | - MAP_FIXED, - .fd = phys_fd, - .offset = offset - } } } ); - n = os_write_file(fd, &map, sizeof(map)); - if(n != sizeof(map)) - printk("map : /proc/mm map failed, err = %d\n", -n); + int n, pid = mm_idp->u.pid; + unsigned long regs[MAX_REG_NR]; + + get_safe_registers(regs); + regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE + + ((unsigned long) &syscall_stub - + (unsigned long) &__syscall_stub_start); + /* XXX Don't have a define for starting a syscall */ + regs[REGS_SYSCALL_NR] = syscall; + regs[REGS_SYSCALL_ARG1] = args[0]; + regs[REGS_SYSCALL_ARG2] = args[1]; + regs[REGS_SYSCALL_ARG3] = args[2]; + regs[REGS_SYSCALL_ARG4] = args[3]; + regs[REGS_SYSCALL_ARG5] = args[4]; + regs[REGS_SYSCALL_ARG6] = args[5]; + n = ptrace_setregs(pid, regs); + if(n < 0){ + printk("run_syscall_stub : PTRACE_SETREGS failed, " + "errno = %d\n", n); + return(n); + } + + wait_stub_done(pid, 0, "run_syscall_stub"); + + return(*((unsigned long *) mm_idp->stack)); } -int unmap(int fd, void *addr, unsigned long len) +int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, + int r, int w, int x, int phys_fd, unsigned long long offset) { - struct proc_mm_op unmap; - int n; - - unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, - .u = - { .munmap = - { .addr = (unsigned long) addr, - .len = len } } } ); - n = os_write_file(fd, &unmap, sizeof(unmap)); - if(n != sizeof(unmap)) { - if(n < 0) - return(n); - else if(n > 0) - return(-EIO); - } - - return(0); + int prot, n; + + prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0); + + if(proc_mm){ + struct proc_mm_op map; + int fd = mm_idp->u.mm_fd; + map = ((struct proc_mm_op) { .op = MM_MMAP, + .u = + { .mmap = + { .addr = virt, + .len = len, + .prot = prot, + .flags = MAP_SHARED | + MAP_FIXED, + .fd = phys_fd, + .offset= offset + } } } ); + n = os_write_file(fd, &map, sizeof(map)); + if(n != sizeof(map)) + printk("map : /proc/mm map failed, err = %d\n", -n); + } + else { + long res; + unsigned long args[] = { virt, len, prot, + MAP_SHARED | MAP_FIXED, phys_fd, + MMAP_OFFSET(offset) }; + + res = run_syscall_stub(mm_idp, STUB_MMAP_NR, args); + if((void *) res == MAP_FAILED) + printk("mmap stub failed, errno = %d\n", res); + } + + return 0; } -int protect(int fd, unsigned long addr, unsigned long len, int r, int w, - int x, int must_succeed) +int unmap(struct mm_id *mm_idp, void *addr, unsigned long len) { - struct proc_mm_op protect; - int prot, n; - - prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | - (x ? PROT_EXEC : 0); - - protect = ((struct proc_mm_op) { .op = MM_MPROTECT, - .u = - { .mprotect = - { .addr = (unsigned long) addr, - .len = len, - .prot = prot } } } ); - - n = os_write_file(fd, &protect, sizeof(protect)); - if(n != sizeof(protect)) { - if(n == 0) return(0); - - if(must_succeed) - panic("protect failed, err = %d", -n); - - return(-EIO); - } + int n; + + if(proc_mm){ + struct proc_mm_op unmap; + int fd = mm_idp->u.mm_fd; + unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, + .u = + { .munmap = + { .addr = + (unsigned long) addr, + .len = len } } } ); + n = os_write_file(fd, &unmap, sizeof(unmap)); + if(n != sizeof(unmap)) { + if(n < 0) + return(n); + else if(n > 0) + return(-EIO); + } + } + else { + int res; + unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0, + 0 }; + + res = run_syscall_stub(mm_idp, __NR_munmap, args); + if(res < 0) + printk("munmap stub failed, errno = %d\n", res); + } + + return(0); +} - return(0); +int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len, + int r, int w, int x) +{ + struct proc_mm_op protect; + int prot, n; + + prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0); + + if(proc_mm){ + int fd = mm_idp->u.mm_fd; + protect = ((struct proc_mm_op) { .op = MM_MPROTECT, + .u = + { .mprotect = + { .addr = + (unsigned long) addr, + .len = len, + .prot = prot } } } ); + + n = os_write_file(fd, &protect, sizeof(protect)); + if(n != sizeof(protect)) + panic("protect failed, err = %d", -n); + } + else { + int res; + unsigned long args[] = { addr, len, prot, 0, 0, 0 }; + + res = run_syscall_stub(mm_idp, __NR_mprotect, args); + if(res < 0) + panic("mprotect stub failed, errno = %d\n", res); + } + + return(0); } void before_mem_skas(unsigned long unused) { } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 6cb9a6d..d232daa 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -3,46 +3,143 @@ * Licensed under the GPL */ +#include "linux/config.h" #include "linux/sched.h" #include "linux/list.h" #include "linux/spinlock.h" #include "linux/slab.h" +#include "linux/errno.h" +#include "linux/mm.h" #include "asm/current.h" #include "asm/segment.h" #include "asm/mmu.h" +#include "asm/pgalloc.h" +#include "asm/pgtable.h" #include "os.h" #include "skas.h" +extern int __syscall_stub_start; + +static int init_stub_pte(struct mm_struct *mm, unsigned long proc, + unsigned long kernel) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + spin_lock(&mm->page_table_lock); + pgd = pgd_offset(mm, proc); + pud = pud_alloc(mm, pgd, proc); + if (!pud) + goto out; + + pmd = pmd_alloc(mm, pud, proc); + if (!pmd) + goto out_pmd; + + pte = pte_alloc_map(mm, pmd, proc); + if (!pte) + goto out_pte; + + /* There's an interaction between the skas0 stub pages, stack + * randomization, and the BUG at the end of exit_mmap. exit_mmap + * checks that the number of page tables freed is the same as had + * been allocated. If the stack is on the last page table page, + * then the stack pte page will be freed, and if not, it won't. To + * avoid having to know where the stack is, or if the process mapped + * something at the top of its address space for some other reason, + * we set TASK_SIZE to end at the start of the last page table. + * This keeps exit_mmap off the last page, but introduces a leak + * of that page. So, we hang onto it here and free it in + * destroy_context_skas. + */ + + mm->context.skas.last_page_table = pmd_page_kernel(*pmd); + + *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); + *pte = pte_mkexec(*pte); + *pte = pte_wrprotect(*pte); + spin_unlock(&mm->page_table_lock); + return(0); + + out_pmd: + pud_free(pud); + out_pte: + pmd_free(pmd); + out: + spin_unlock(&mm->page_table_lock); + return(-ENOMEM); +} + int init_new_context_skas(struct task_struct *task, struct mm_struct *mm) { - int from; + struct mm_struct *cur_mm = current->mm; + struct mm_id *cur_mm_id = &cur_mm->context.skas.id; + struct mm_id *mm_id = &mm->context.skas.id; + unsigned long stack; + int from, ret; - if((current->mm != NULL) && (current->mm != &init_mm)) - from = current->mm->context.skas.mm_fd; - else from = -1; + if(proc_mm){ + if((cur_mm != NULL) && (cur_mm != &init_mm)) + from = cur_mm->context.skas.id.u.mm_fd; + else from = -1; - mm->context.skas.mm_fd = new_mm(from); - if(mm->context.skas.mm_fd < 0){ - printk("init_new_context_skas - new_mm failed, errno = %d\n", - mm->context.skas.mm_fd); - return(mm->context.skas.mm_fd); + ret = new_mm(from); + if(ret < 0){ + printk("init_new_context_skas - new_mm failed, " + "errno = %d\n", ret); + return ret; + } + mm_id->u.mm_fd = ret; } + else { + /* This zeros the entry that pgd_alloc didn't, needed since + * we are about to reinitialize it, and want mm.nr_ptes to + * be accurate. + */ + mm->pgd[USER_PTRS_PER_PGD] = __pgd(0); - return(0); + ret = init_stub_pte(mm, CONFIG_STUB_CODE, + (unsigned long) &__syscall_stub_start); + if(ret) + goto out; + + ret = -ENOMEM; + stack = get_zeroed_page(GFP_KERNEL); + if(stack == 0) + goto out; + mm_id->stack = stack; + + ret = init_stub_pte(mm, CONFIG_STUB_DATA, stack); + if(ret) + goto out_free; + + mm->nr_ptes--; + + if((cur_mm != NULL) && (cur_mm != &init_mm)) + mm_id->u.pid = copy_context_skas0(stack, + cur_mm_id->u.pid); + else mm_id->u.pid = start_userspace(stack); + } + + return 0; + + out_free: + free_page(mm_id->stack); + out: + return ret; } void destroy_context_skas(struct mm_struct *mm) { - os_close_file(mm->context.skas.mm_fd); -} + struct mmu_context_skas *mmu = &mm->context.skas; -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ + if(proc_mm) + os_close_file(mmu->id.u.mm_fd); + else { + os_kill_ptraced_process(mmu->id.u.pid, 1); + free_page(mmu->id.stack); + free_page(mmu->last_page_table); + } +} diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 773cd2b..ba671da 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002- 2004 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -13,7 +13,9 @@ #include <sys/wait.h> #include <sys/mman.h> #include <sys/user.h> +#include <sys/time.h> #include <asm/unistd.h> +#include <asm/types.h> #include "user.h" #include "ptrace_user.h" #include "time_user.h" @@ -21,13 +23,18 @@ #include "user_util.h" #include "kern_util.h" #include "skas.h" +#include "stub-data.h" +#include "mm_id.h" #include "sysdep/sigcontext.h" +#include "sysdep/stub.h" #include "os.h" #include "proc_mm.h" #include "skas_ptrace.h" #include "chan_user.h" #include "signal_user.h" #include "registers.h" +#include "mem.h" +#include "uml-config.h" #include "process.h" int is_skas_winch(int pid, int fd, void *data) @@ -39,20 +46,55 @@ int is_skas_winch(int pid, int fd, void *data) return(1); } -void get_skas_faultinfo(int pid, struct faultinfo * fi) +void wait_stub_done(int pid, int sig, char * fname) { - int err; - - err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); - if(err) - panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, " - "errno = %d\n", errno); + int n, status, err; + + do { + if ( sig != -1 ) { + err = ptrace(PTRACE_CONT, pid, 0, sig); + if(err) + panic("%s : continue failed, errno = %d\n", + fname, errno); + } + sig = 0; + + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + } while((n >= 0) && WIFSTOPPED(status) && + (WSTOPSIG(status) == SIGVTALRM)); + + if((n < 0) || !WIFSTOPPED(status) || + (WSTOPSIG(status) != SIGUSR1 && WSTOPSIG(status != SIGTRAP))){ + panic("%s : failed to wait for SIGUSR1/SIGTRAP, " + "pid = %d, n = %d, errno = %d, status = 0x%x\n", + fname, pid, n, errno, status); + } +} - /* Special handling for i386, which has different structs */ - if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo)) - memset((char *)fi + sizeof(struct ptrace_faultinfo), 0, - sizeof(struct faultinfo) - - sizeof(struct ptrace_faultinfo)); +void get_skas_faultinfo(int pid, struct faultinfo * fi) +{ + int err; + + if(ptrace_faultinfo){ + err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); + if(err) + panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, " + "errno = %d\n", errno); + + /* Special handling for i386, which has different structs */ + if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo)) + memset((char *)fi + sizeof(struct ptrace_faultinfo), 0, + sizeof(struct faultinfo) - + sizeof(struct ptrace_faultinfo)); + } + else { + wait_stub_done(pid, SIGSEGV, "get_skas_faultinfo"); + + /* faultinfo is prepared by the stub-segv-handler at start of + * the stub stack page. We just have to copy it. + */ + memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); + } } static void handle_segv(int pid, union uml_pt_regs * regs) @@ -91,11 +133,56 @@ static void handle_trap(int pid, union uml_pt_regs *regs, int local_using_sysemu handle_syscall(regs); } -static int userspace_tramp(void *arg) +extern int __syscall_stub_start; + +static int userspace_tramp(void *stack) { - init_new_thread_signals(0); - enable_timer(); + void *addr; + ptrace(PTRACE_TRACEME, 0, 0, 0); + + init_new_thread_signals(1); + enable_timer(); + + if(!proc_mm){ + /* This has a pte, but it can't be mapped in with the usual + * tlb_flush mechanism because this is part of that mechanism + */ + int fd; + __u64 offset; + + fd = phys_mapping(to_phys(&__syscall_stub_start), &offset); + addr = mmap64((void *) UML_CONFIG_STUB_CODE, page_size(), + PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); + if(addr == MAP_FAILED){ + printk("mapping mmap stub failed, errno = %d\n", + errno); + exit(1); + } + + if(stack != NULL){ + fd = phys_mapping(to_phys(stack), &offset); + addr = mmap((void *) UML_CONFIG_STUB_DATA, page_size(), + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, fd, offset); + if(addr == MAP_FAILED){ + printk("mapping segfault stack failed, " + "errno = %d\n", errno); + exit(1); + } + } + } + if(!ptrace_faultinfo && (stack != NULL)){ + unsigned long v = UML_CONFIG_STUB_CODE + + (unsigned long) stub_segv_handler - + (unsigned long) &__syscall_stub_start; + + set_sigstack((void *) UML_CONFIG_STUB_DATA, page_size()); + set_handler(SIGSEGV, (void *) v, SA_ONSTACK, + SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, + SIGUSR1, -1); + } + os_stop_process(os_getpid()); return(0); } @@ -105,11 +192,11 @@ static int userspace_tramp(void *arg) #define NR_CPUS 1 int userspace_pid[NR_CPUS]; -void start_userspace(int cpu) +int start_userspace(unsigned long stub_stack) { void *stack; unsigned long sp; - int pid, status, n; + int pid, status, n, flags; stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); @@ -117,8 +204,9 @@ void start_userspace(int cpu) panic("start_userspace : mmap failed, errno = %d", errno); sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); - pid = clone(userspace_tramp, (void *) sp, - CLONE_FILES | CLONE_VM | SIGCHLD, NULL); + flags = CLONE_FILES | SIGCHLD; + if(proc_mm) flags |= CLONE_VM; + pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); if(pid < 0) panic("start_userspace : clone failed, errno = %d", errno); @@ -140,7 +228,7 @@ void start_userspace(int cpu) if(munmap(stack, PAGE_SIZE) < 0) panic("start_userspace : munmap failed, errno = %d\n", errno); - userspace_pid[cpu] = pid; + return(pid); } void userspace(union uml_pt_regs *regs) @@ -174,7 +262,9 @@ void userspace(union uml_pt_regs *regs) if(WIFSTOPPED(status)){ switch(WSTOPSIG(status)){ case SIGSEGV: - handle_segv(pid, regs); + if(PTRACE_FULL_FAULTINFO || !ptrace_faultinfo) + user_signal(SIGSEGV, regs, pid); + else handle_segv(pid, regs); break; case SIGTRAP + 0x80: handle_trap(pid, regs, local_using_sysemu); @@ -194,6 +284,7 @@ void userspace(union uml_pt_regs *regs) printk("userspace - child stopped with signal " "%d\n", WSTOPSIG(status)); } + pid = userspace_pid[0]; interrupt_end(); /* Avoid -ERESTARTSYS handling in host */ @@ -207,6 +298,67 @@ void userspace(union uml_pt_regs *regs) #define INIT_JMP_HALT 3 #define INIT_JMP_REBOOT 4 + +int copy_context_skas0(unsigned long new_stack, int pid) +{ + int err; + unsigned long regs[MAX_REG_NR]; + unsigned long current_stack = current_stub_stack(); + struct stub_data *data = (struct stub_data *) current_stack; + struct stub_data *child_data = (struct stub_data *) new_stack; + __u64 new_offset; + int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset); + + /* prepare offset and fd of child's stack as argument for parent's + * and child's mmap2 calls + */ + *data = ((struct stub_data) { .offset = MMAP_OFFSET(new_offset), + .fd = new_fd, + .timer = ((struct itimerval) + { { 0, 1000000 / hz() }, + { 0, 1000000 / hz() }})}); + get_safe_registers(regs); + + /* Set parent's instruction pointer to start of clone-stub */ + regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE + + (unsigned long) stub_clone_handler - + (unsigned long) &__syscall_stub_start; + regs[REGS_SP_INDEX] = UML_CONFIG_STUB_DATA + PAGE_SIZE - + sizeof(void *); + err = ptrace_setregs(pid, regs); + if(err < 0) + panic("copy_context_skas0 : PTRACE_SETREGS failed, " + "pid = %d, errno = %d\n", pid, errno); + + /* set a well known return code for detection of child write failure */ + child_data->err = 12345678; + + /* Wait, until parent has finished its work: read child's pid from + * parent's stack, and check, if bad result. + */ + wait_stub_done(pid, 0, "copy_context_skas0"); + + pid = data->err; + if(pid < 0) + panic("copy_context_skas0 - stub-parent reports error %d\n", + pid); + + /* Wait, until child has finished too: read child's result from + * child's stack and check it. + */ + wait_stub_done(pid, -1, "copy_context_skas0"); + if (child_data->err != UML_CONFIG_STUB_DATA) + panic("copy_context_skas0 - stub-child reports error %d\n", + child_data->err); + + if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, + (void *)PTRACE_O_TRACESYSGOOD) < 0) + panic("copy_context_skas0 : PTRACE_SETOPTIONS failed, " + "errno = %d\n", errno); + + return pid; +} + void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr, void (*handler)(int)) { @@ -334,21 +486,19 @@ void reboot_skas(void) siglongjmp(initial_jmpbuf, INIT_JMP_REBOOT); } -void switch_mm_skas(int mm_fd) +void switch_mm_skas(struct mm_id *mm_idp) { int err; #warning need cpu pid in switch_mm_skas - err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, mm_fd); - if(err) - panic("switch_mm_skas - PTRACE_SWITCH_MM failed, errno = %d\n", - errno); -} - -void kill_off_processes_skas(void) -{ -#warning need to loop over userspace_pids in kill_off_processes_skas - os_kill_ptraced_process(userspace_pid[0], 1); + if(proc_mm){ + err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, + mm_idp->u.mm_fd); + if(err) + panic("switch_mm_skas - PTRACE_SWITCH_MM failed, " + "errno = %d\n", errno); + } + else userspace_pid[0] = mm_idp->u.pid; } /* diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c index 0a7b8aa..cbabab1 100644 --- a/arch/um/kernel/skas/process_kern.c +++ b/arch/um/kernel/skas/process_kern.c @@ -175,9 +175,12 @@ static int start_kernel_proc(void *unused) return(0); } +extern int userspace_pid[]; + int start_uml_skas(void) { - start_userspace(0); + if(proc_mm) + userspace_pid[0] = start_userspace(0); init_new_thread_signals(1); @@ -199,3 +202,31 @@ int thread_pid_skas(struct task_struct *task) #warning Need to look up userspace_pid by cpu return(userspace_pid[0]); } + +void kill_off_processes_skas(void) +{ + if(proc_mm) +#warning need to loop over userspace_pids in kill_off_processes_skas + os_kill_ptraced_process(userspace_pid[0], 1); + else { + struct task_struct *p; + int pid, me; + + me = os_getpid(); + for_each_process(p){ + if(p->mm == NULL) + continue; + + pid = p->mm->context.skas.id.u.pid; + os_kill_ptraced_process(pid, 1); + } + } +} + +unsigned long current_stub_stack(void) +{ + if(current->mm == NULL) + return(0); + + return(current->mm->context.skas.id.stack); +} diff --git a/arch/um/kernel/skas/tlb.c b/arch/um/kernel/skas/tlb.c index b8c5e717..6230999 100644 --- a/arch/um/kernel/skas/tlb.c +++ b/arch/um/kernel/skas/tlb.c @@ -6,6 +6,7 @@ #include "linux/stddef.h" #include "linux/sched.h" +#include "linux/config.h" #include "linux/mm.h" #include "asm/page.h" #include "asm/pgtable.h" @@ -17,7 +18,7 @@ #include "os.h" #include "tlb.h" -static void do_ops(int fd, struct host_vm_op *ops, int last) +static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) { struct host_vm_op *op; int i; @@ -26,18 +27,18 @@ static void do_ops(int fd, struct host_vm_op *ops, int last) op = &ops[i]; switch(op->type){ case MMAP: - map(fd, op->u.mmap.addr, op->u.mmap.len, + map(&mmu->skas.id, op->u.mmap.addr, op->u.mmap.len, op->u.mmap.r, op->u.mmap.w, op->u.mmap.x, op->u.mmap.fd, op->u.mmap.offset); break; case MUNMAP: - unmap(fd, (void *) op->u.munmap.addr, + unmap(&mmu->skas.id, (void *) op->u.munmap.addr, op->u.munmap.len); break; case MPROTECT: - protect(fd, op->u.mprotect.addr, op->u.mprotect.len, - op->u.mprotect.r, op->u.mprotect.w, - op->u.mprotect.x); + protect(&mmu->skas.id, op->u.mprotect.addr, + op->u.mprotect.len, op->u.mprotect.r, + op->u.mprotect.w, op->u.mprotect.x); break; default: printk("Unknown op type %d in do_ops\n", op->type); @@ -46,12 +47,15 @@ static void do_ops(int fd, struct host_vm_op *ops, int last) } } +extern int proc_mm; + static void fix_range(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force) { - int fd = mm->context.skas.mm_fd; + if(!proc_mm && (end_addr > CONFIG_STUB_START)) + end_addr = CONFIG_STUB_START; - fix_range_common(mm, start_addr, end_addr, force, fd, do_ops); + fix_range_common(mm, start_addr, end_addr, force, do_ops); } void __flush_tlb_one_skas(unsigned long addr) @@ -69,17 +73,20 @@ void flush_tlb_range_skas(struct vm_area_struct *vma, unsigned long start, void flush_tlb_mm_skas(struct mm_struct *mm) { + unsigned long end; + /* Don't bother flushing if this address space is about to be * destroyed. */ if(atomic_read(&mm->mm_users) == 0) return; - fix_range(mm, 0, host_task_size, 0); - flush_tlb_kernel_range_common(start_vm, end_vm); + end = proc_mm ? task_size : CONFIG_STUB_START; + fix_range(mm, 0, end, 0); } void force_flush_all_skas(void) { - fix_range(current->mm, 0, host_task_size, 1); + unsigned long end = proc_mm ? task_size : CONFIG_STUB_START; + fix_range(current->mm, 0, end, 1); } diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index f829b30..c40b611 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -48,6 +48,13 @@ void enable_timer(void) set_interval(ITIMER_VIRTUAL); } +void prepare_timer(void * ptr) +{ + int usec = 1000000/hz(); + *(struct itimerval *)ptr = ((struct itimerval) { { 0, usec }, + { 0, usec }}); +} + void disable_timer(void) { struct itimerval disable = ((struct itimerval) { { 0, 0 }, { 0, 0 }}); diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index eda477e..83ec8d47 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -18,13 +18,15 @@ #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) void fix_range_common(struct mm_struct *mm, unsigned long start_addr, - unsigned long end_addr, int force, int data, - void (*do_ops)(int, struct host_vm_op *, int)) + unsigned long end_addr, int force, + void (*do_ops)(union mm_context *, struct host_vm_op *, + int)) { pgd_t *npgd; pud_t *npud; pmd_t *npmd; pte_t *npte; + union mm_context *mmu = &mm->context; unsigned long addr, end; int r, w, x; struct host_vm_op ops[16]; @@ -40,7 +42,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, end = end_addr; if(force || pgd_newpage(*npgd)){ op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); pgd_mkuptodate(*npgd); } @@ -55,7 +57,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, end = end_addr; if(force || pud_newpage(*npud)){ op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); pud_mkuptodate(*npud); } @@ -70,7 +72,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, end = end_addr; if(force || pmd_newpage(*npmd)){ op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); pmd_mkuptodate(*npmd); } @@ -93,21 +95,21 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, op_index = add_mmap(addr, pte_val(*npte) & PAGE_MASK, PAGE_SIZE, r, w, x, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); else op_index = add_munmap(addr, PAGE_SIZE, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); } else if(pte_newprot(*npte)) op_index = add_mprotect(addr, PAGE_SIZE, r, w, x, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); *npte = pte_mkuptodate(*npte); addr += PAGE_SIZE; } - (*do_ops)(data, ops, op_index); + (*do_ops)(mmu, ops, op_index); } int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) @@ -195,51 +197,6 @@ int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) return(updated); } -void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) -{ - address &= PAGE_MASK; - flush_tlb_range(vma, address, address + PAGE_SIZE); -} - -void flush_tlb_all(void) -{ - flush_tlb_mm(current->mm); -} - -void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt, - flush_tlb_kernel_range_common, start, end); -} - -void flush_tlb_kernel_vm(void) -{ - CHOOSE_MODE(flush_tlb_kernel_vm_tt(), - flush_tlb_kernel_range_common(start_vm, end_vm)); -} - -void __flush_tlb_one(unsigned long addr) -{ - CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr); -} - -void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start, - end); -} - -void flush_tlb_mm(struct mm_struct *mm) -{ - CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm); -} - -void force_flush_all(void) -{ - CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas()); -} - pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address) { return(pgd_offset(mm, address)); @@ -270,9 +227,9 @@ pte_t *addr_pte(struct task_struct *task, unsigned long addr) } int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, - int r, int w, int x, struct host_vm_op *ops, int index, - int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)) + int r, int w, int x, struct host_vm_op *ops, int index, + int last_filled, union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, int)) { __u64 offset; struct host_vm_op *last; @@ -292,7 +249,7 @@ int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, } if(index == last_filled){ - (*do_ops)(data, ops, last_filled); + (*do_ops)(mmu, ops, last_filled); index = -1; } @@ -310,8 +267,8 @@ int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, } int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, - int index, int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)) + int index, int last_filled, union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, int)) { struct host_vm_op *last; @@ -325,7 +282,7 @@ int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, } if(index == last_filled){ - (*do_ops)(data, ops, last_filled); + (*do_ops)(mmu, ops, last_filled); index = -1; } @@ -337,8 +294,9 @@ int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, } int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, - struct host_vm_op *ops, int index, int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)) + struct host_vm_op *ops, int index, int last_filled, + union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, int)) { struct host_vm_op *last; @@ -354,7 +312,7 @@ int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, } if(index == last_filled){ - (*do_ops)(data, ops, last_filled); + (*do_ops)(mmu, ops, last_filled); index = -1; } @@ -367,3 +325,49 @@ int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, .x = x } } }); return(index); } + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) +{ + address &= PAGE_MASK; + flush_tlb_range(vma, address, address + PAGE_SIZE); +} + +void flush_tlb_all(void) +{ + flush_tlb_mm(current->mm); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt, + flush_tlb_kernel_range_common, start, end); +} + +void flush_tlb_kernel_vm(void) +{ + CHOOSE_MODE(flush_tlb_kernel_vm_tt(), + flush_tlb_kernel_range_common(start_vm, end_vm)); +} + +void __flush_tlb_one(unsigned long addr) +{ + CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr); +} + +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start, + end); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm); +} + +void force_flush_all(void) +{ + CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas()); +} + diff --git a/arch/um/kernel/tt/tlb.c b/arch/um/kernel/tt/tlb.c index 203216a..2eefb43 100644 --- a/arch/um/kernel/tt/tlb.c +++ b/arch/um/kernel/tt/tlb.c @@ -17,7 +17,7 @@ #include "os.h" #include "tlb.h" -static void do_ops(int unused, struct host_vm_op *ops, int last) +static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) { struct host_vm_op *op; int i; @@ -55,7 +55,7 @@ static void fix_range(struct mm_struct *mm, unsigned long start_addr, panic("fix_range fixing wrong address space, current = 0x%p", current); - fix_range_common(mm, start_addr, end_addr, force, 0, do_ops); + fix_range_common(mm, start_addr, end_addr, force, do_ops); } atomic_t vmchange_seq = ATOMIC_INIT(1); diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 61dfd4f..163476a 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -30,6 +30,7 @@ SECTIONS _einittext = .; } . = ALIGN(4096); + .text : { *(.text) @@ -39,6 +40,12 @@ SECTIONS /* .gnu.warning sections are handled specially by elf32.em. */ *(.gnu.warning) *(.gnu.linkonce.t*) + + . = ALIGN(4096); + __syscall_stub_start = .; + *(.__syscall_stub*) + __syscall_stub_end = .; + . = ALIGN(4096); } #include "asm/common.lds.S" diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/um/os-Linux/sys-i386/registers.c index 9a0ad09..3125d32 100644 --- a/arch/um/os-Linux/sys-i386/registers.c +++ b/arch/um/os-Linux/sys-i386/registers.c @@ -121,6 +121,11 @@ void init_registers(int pid) err); } +void get_safe_registers(unsigned long *regs) +{ + memcpy(regs, exec_regs, HOST_FRAME_SIZE * sizeof(unsigned long)); +} + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/arch/um/os-Linux/sys-x86_64/registers.c b/arch/um/os-Linux/sys-x86_64/registers.c index 6286c97..44438d1 100644 --- a/arch/um/os-Linux/sys-x86_64/registers.c +++ b/arch/um/os-Linux/sys-x86_64/registers.c @@ -69,6 +69,11 @@ void init_registers(int pid) err); } +void get_safe_registers(unsigned long *regs) +{ + memcpy(regs, exec_regs, HOST_FRAME_SIZE * sizeof(unsigned long)); +} + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules index 7459d09..17f305b 100644 --- a/arch/um/scripts/Makefile.rules +++ b/arch/um/scripts/Makefile.rules @@ -16,6 +16,11 @@ define unprofile endef +# The stubs and unmap.o can't try to call mcount or update basic block data +define unprofile + $(patsubst -pg,,$(patsubst -fprofile-arcs -ftest-coverage,,$(1))) +endef + quiet_cmd_make_link = SYMLINK $@ cmd_make_link = ln -sf $(srctree)/arch/$(SUBARCH)/$($(notdir $@)-dir)/$(notdir $@) $@ diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile index 095bcdb..77c3c4d 100644 --- a/arch/um/sys-i386/Makefile +++ b/arch/um/sys-i386/Makefile @@ -1,6 +1,6 @@ obj-y = bitops.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \ - ptrace_user.o semaphore.o signal.o sigcontext.o syscalls.o sysrq.o \ - sys_call_table.o + ptrace_user.o semaphore.o signal.o sigcontext.o stub.o stub_segv.o \ + syscalls.o sysrq.o sys_call_table.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_MODULES) += module.o @@ -16,6 +16,14 @@ semaphore.c-dir = kernel highmem.c-dir = mm module.c-dir = kernel +STUB_CFLAGS = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) + +# _cflags works with kernel files, not with userspace ones, but c_flags does, +# why ask why? +$(obj)/stub_segv.o : c_flags = $(STUB_CFLAGS) + +$(obj)/stub.o : a_flags = $(STUB_CFLAGS) + subdir- := util include arch/um/scripts/Makefile.unmap diff --git a/arch/um/sys-i386/stub.S b/arch/um/sys-i386/stub.S new file mode 100644 index 0000000..2f2c70a --- /dev/null +++ b/arch/um/sys-i386/stub.S @@ -0,0 +1,8 @@ +#include "uml-config.h" + + .globl syscall_stub +.section .__syscall_stub, "x" +syscall_stub: + int $0x80 + mov %eax, UML_CONFIG_STUB_DATA + int3 diff --git a/arch/um/sys-i386/stub_segv.c b/arch/um/sys-i386/stub_segv.c new file mode 100644 index 0000000..b251442 --- /dev/null +++ b/arch/um/sys-i386/stub_segv.c @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include <signal.h> +#include <asm/sigcontext.h> +#include <asm/unistd.h> +#include "uml-config.h" +#include "sysdep/sigcontext.h" +#include "sysdep/faultinfo.h" + +void __attribute__ ((__section__ (".__syscall_stub"))) +stub_segv_handler(int sig) +{ + struct sigcontext *sc = (struct sigcontext *) (&sig + 1); + + GET_FAULTINFO_FROM_SC(*((struct faultinfo *) UML_CONFIG_STUB_DATA), + sc); + + __asm__("movl %0, %%eax ; int $0x80": : "g" (__NR_getpid)); + __asm__("movl %%eax, %%ebx ; movl %0, %%eax ; movl %1, %%ecx ;" + "int $0x80": : "g" (__NR_kill), "g" (SIGUSR1)); + /* Pop the frame pointer and return address since we need to leave + * the stack in its original form when we do the sigreturn here, by + * hand. + */ + __asm__("popl %%eax ; popl %%eax ; popl %%eax ; movl %0, %%eax ; " + "int $0x80" : : "g" (__NR_sigreturn)); +} diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile index 2bc6f68..7488206 100644 --- a/arch/um/sys-x86_64/Makefile +++ b/arch/um/sys-x86_64/Makefile @@ -6,8 +6,8 @@ #XXX: why into lib-y? lib-y = bitops.o bugs.o csum-partial.o delay.o fault.o mem.o memcpy.o \ - ptrace.o ptrace_user.o semaphore.o sigcontext.o signal.o \ - syscalls.o sysrq.o thunk.o syscall_table.o + ptrace.o ptrace_user.o semaphore.o sigcontext.o signal.o stub.o \ + stub_segv.o syscalls.o syscall_table.o sysrq.o thunk.o obj-y := ksyms.o obj-$(CONFIG_MODULES) += module.o um_module.o @@ -28,6 +28,14 @@ semaphore.c-dir = kernel thunk.S-dir = lib module.c-dir = kernel +STUB_CFLAGS = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) + +# _cflags works with kernel files, not with userspace ones, but c_flags does, +# why ask why? +$(obj)/stub_segv.o : c_flags = $(STUB_CFLAGS) + +$(obj)/stub.o : a_flags = $(STUB_CFLAGS) + subdir- := util include arch/um/scripts/Makefile.unmap diff --git a/arch/um/sys-x86_64/stub.S b/arch/um/sys-x86_64/stub.S new file mode 100644 index 0000000..31c1492 --- /dev/null +++ b/arch/um/sys-x86_64/stub.S @@ -0,0 +1,15 @@ +#include "uml-config.h" + + .globl syscall_stub +.section .__syscall_stub, "x" +syscall_stub: + syscall + /* We don't have 64-bit constants, so this constructs the address + * we need. + */ + movq $(UML_CONFIG_STUB_DATA >> 32), %rbx + salq $32, %rbx + movq $(UML_CONFIG_STUB_DATA & 0xffffffff), %rcx + or %rcx, %rbx + movq %rax, (%rbx) + int3 diff --git a/arch/um/sys-x86_64/stub_segv.c b/arch/um/sys-x86_64/stub_segv.c new file mode 100644 index 0000000..161d1fe --- /dev/null +++ b/arch/um/sys-x86_64/stub_segv.c @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include <signal.h> +#include <linux/compiler.h> +#include <asm/unistd.h> +#include "uml-config.h" +#include "sysdep/sigcontext.h" +#include "sysdep/faultinfo.h" + +void __attribute__ ((__section__ (".__syscall_stub"))) +stub_segv_handler(int sig) +{ + struct ucontext *uc; + + __asm__("movq %%rdx, %0" : "=g" (uc) :); + GET_FAULTINFO_FROM_SC(*((struct faultinfo *) UML_CONFIG_STUB_DATA), + &uc->uc_mcontext); + + __asm__("movq %0, %%rax ; syscall": : "g" (__NR_getpid)); + __asm__("movq %%rax, %%rdi ; movq %0, %%rax ; movq %1, %%rsi ;" + "syscall": : "g" (__NR_kill), "g" (SIGUSR1)); + /* Two popqs to restore the stack to the state just before entering + * the handler, one pops the return address, the other pops the frame + * pointer. + */ + __asm__("popq %%rax ; popq %%rax ; movq %0, %%rax ; syscall" : : "g" + (__NR_rt_sigreturn)); +} diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index b02d921..5fd0322 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -1076,6 +1076,10 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_MCE mcheck_init(c); #endif + if (c == &boot_cpu_data) + mtrr_bp_init(); + else + mtrr_ap_init(); #ifdef CONFIG_NUMA if (c != &boot_cpu_data) numa_add_cpu(c - cpu_data); diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c index 6c0f402e..0612640 100644 --- a/arch/x86_64/kernel/suspend.c +++ b/arch/x86_64/kernel/suspend.c @@ -119,6 +119,7 @@ void __restore_processor_state(struct saved_context *ctxt) fix_processor_context(); do_fpu_end(); + mtrr_ap_init(); } void restore_processor_state(void) diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 73389f5..61c1275 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -56,6 +56,10 @@ SECTIONS .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { *(.data.cacheline_aligned) } + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { + *(.data.read_mostly) + } #define VSYSCALL_ADDR (-10*1024*1024) #define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.cacheline_aligned) + SIZEOF(.data.cacheline_aligned) + 4095) & ~(4095)) diff --git a/arch/xtensa/kernel/syscalls.c b/arch/xtensa/kernel/syscalls.c index abc8ed6..3540d8b 100644 --- a/arch/xtensa/kernel/syscalls.c +++ b/arch/xtensa/kernel/syscalls.c @@ -46,8 +46,6 @@ extern void do_syscall_trace(void); typedef int (*syscall_t)(void *a0,...); -extern int (*do_syscalls)(struct pt_regs *regs, syscall_t fun, - int narg); extern syscall_t sys_call_table[]; extern unsigned char sys_narg_table[]; @@ -72,10 +70,8 @@ int sys_pipe(int __user *userfds) /* * Common code for old and new mmaps. */ - -static inline long do_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) +long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, + unsigned long flags, unsigned long fd, unsigned long pgoff) { int error = -EBADF; struct file * file = NULL; @@ -97,29 +93,6 @@ out: return error; } -unsigned long old_mmap(unsigned long addr, size_t len, int prot, - int flags, int fd, off_t offset) -{ - return do_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); -} - -long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, - unsigned long flags, unsigned long fd, unsigned long pgoff) -{ - return do_mmap2(addr, len, prot, flags, fd, pgoff); -} - -int sys_fork(struct pt_regs *regs) -{ - return do_fork(SIGCHLD, regs->areg[1], regs, 0, NULL, NULL); -} - -int sys_vfork(struct pt_regs *regs) -{ - return do_fork(CLONE_VFORK|CLONE_VM|SIGCHLD, regs->areg[1], - regs, 0, NULL, NULL); -} - int sys_clone(struct pt_regs *regs) { unsigned long clone_flags; @@ -162,30 +135,6 @@ int sys_uname(struct old_utsname * name) return -EFAULT; } -int sys_olduname(struct oldold_utsname * name) -{ - int error; - - if (!name) - return -EFAULT; - if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) - return -EFAULT; - - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); - error -= __put_user(0,name->sysname+__OLD_UTS_LEN); - error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); - error -= __put_user(0,name->nodename+__OLD_UTS_LEN); - error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); - error -= __put_user(0,name->release+__OLD_UTS_LEN); - error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); - error -= __put_user(0,name->version+__OLD_UTS_LEN); - error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); - error -= __put_user(0,name->machine+__OLD_UTS_LEN); - - return error ? -EFAULT : 0; -} - - /* * Build the string table for the builtin "poor man's strace". */ @@ -319,100 +268,3 @@ void system_call (struct pt_regs *regs) regs->areg[2] = res; do_syscall_trace(); } - -/* - * sys_ipc() is the de-multiplexer for the SysV IPC calls.. - * - * This is really horribly ugly. - */ - -int sys_ipc (uint call, int first, int second, - int third, void __user *ptr, long fifth) -{ - int version, ret; - - version = call >> 16; /* hack for backward compatibility */ - call &= 0xffff; - ret = -ENOSYS; - - switch (call) { - case SEMOP: - ret = sys_semtimedop (first, (struct sembuf __user *)ptr, - second, NULL); - break; - - case SEMTIMEDOP: - ret = sys_semtimedop (first, (struct sembuf __user *)ptr, - second, (const struct timespec *) fifth); - break; - - case SEMGET: - ret = sys_semget (first, second, third); - break; - - case SEMCTL: { - union semun fourth; - - if (ptr && !get_user(fourth.__pad, (void *__user *) ptr)) - ret = sys_semctl (first, second, third, fourth); - break; - } - - case MSGSND: - ret = sys_msgsnd (first, (struct msgbuf __user*) ptr, - second, third); - break; - - case MSGRCV: - switch (version) { - case 0: { - struct ipc_kludge tmp; - - if (ptr && !copy_from_user(&tmp, - (struct ipc_kludge *) ptr, - sizeof (tmp))) - ret = sys_msgrcv (first, tmp.msgp, second, - tmp.msgtyp, third); - break; - } - - default: - ret = sys_msgrcv (first, (struct msgbuf __user *) ptr, - second, 0, third); - break; - } - break; - - case MSGGET: - ret = sys_msgget ((key_t) first, second); - break; - - case MSGCTL: - ret = sys_msgctl (first, second, (struct msqid_ds __user*) ptr); - break; - - case SHMAT: { - ulong raddr; - ret = do_shmat (first, (char __user *) ptr, second, &raddr); - - if (!ret) - ret = put_user (raddr, (ulong __user *) third); - - break; - } - - case SHMDT: - ret = sys_shmdt ((char __user *)ptr); - break; - - case SHMGET: - ret = sys_shmget (first, second, third); - break; - - case SHMCTL: - ret = sys_shmctl (first, second, (struct shmid_ds __user*) ptr); - break; - } - return ret; -} - diff --git a/arch/xtensa/kernel/syscalls.h b/arch/xtensa/kernel/syscalls.h index 5b3f75f..0758069 100644 --- a/arch/xtensa/kernel/syscalls.h +++ b/arch/xtensa/kernel/syscalls.h @@ -25,20 +25,19 @@ */ SYSCALL(0, 0) /* 00 */ - SYSCALL(sys_exit, 1) -SYSCALL(sys_fork, 0) +SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_read, 3) SYSCALL(sys_write, 3) SYSCALL(sys_open, 3) /* 05 */ SYSCALL(sys_close, 1) -SYSCALL(sys_waitpid, 3) +SYSCALL(sys_ni_syscall, 3) SYSCALL(sys_creat, 2) SYSCALL(sys_link, 2) SYSCALL(sys_unlink, 1) /* 10 */ SYSCALL(sys_execve, 0) SYSCALL(sys_chdir, 1) -SYSCALL(sys_time, 1) +SYSCALL(sys_ni_syscall, 1) SYSCALL(sys_mknod, 3) SYSCALL(sys_chmod, 2) /* 15 */ SYSCALL(sys_lchown, 3) @@ -47,19 +46,19 @@ SYSCALL(sys_stat, 2) SYSCALL(sys_lseek, 3) SYSCALL(sys_getpid, 0) /* 20 */ SYSCALL(sys_mount, 5) -SYSCALL(sys_oldumount, 1) +SYSCALL(sys_ni_syscall, 1) SYSCALL(sys_setuid, 1) SYSCALL(sys_getuid, 0) -SYSCALL(sys_stime, 1) /* 25 */ +SYSCALL(sys_ni_syscall, 1) /* 25 */ SYSCALL(sys_ptrace, 4) -SYSCALL(sys_alarm, 1) +SYSCALL(sys_ni_syscall, 1) SYSCALL(sys_fstat, 2) -SYSCALL(sys_pause, 0) +SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_utime, 2) /* 30 */ SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_access, 2) -SYSCALL(sys_nice, 1) +SYSCALL(sys_ni_syscall, 1) SYSCALL(sys_ni_syscall, 0) /* 35 */ SYSCALL(sys_sync, 0) SYSCALL(sys_kill, 2) @@ -73,7 +72,7 @@ SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_brk, 1) /* 45 */ SYSCALL(sys_setgid, 1) SYSCALL(sys_getgid, 0) -SYSCALL(sys_ni_syscall, 0) /* was signal(2) */ +SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_geteuid, 0) SYSCALL(sys_getegid, 0) /* 50 */ SYSCALL(sys_acct, 1) @@ -84,21 +83,21 @@ SYSCALL(sys_fcntl, 3) /* 55 */ SYSCALL(sys_ni_syscall, 2) SYSCALL(sys_setpgid, 2) SYSCALL(sys_ni_syscall, 0) -SYSCALL(sys_olduname, 1) +SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_umask, 1) /* 60 */ SYSCALL(sys_chroot, 1) SYSCALL(sys_ustat, 2) SYSCALL(sys_dup2, 2) SYSCALL(sys_getppid, 0) -SYSCALL(sys_getpgrp, 0) /* 65 */ +SYSCALL(sys_ni_syscall, 0) /* 65 */ SYSCALL(sys_setsid, 0) SYSCALL(sys_sigaction, 3) -SYSCALL(sys_sgetmask, 0) -SYSCALL(sys_ssetmask, 1) +SYSCALL(sys_ni_syscall, 0) +SYSCALL(sys_ni_syscall, 1) SYSCALL(sys_setreuid, 2) /* 70 */ SYSCALL(sys_setregid, 2) SYSCALL(sys_sigsuspend, 0) -SYSCALL(sys_sigpending, 1) +SYSCALL(sys_ni_syscall, 1) SYSCALL(sys_sethostname, 2) SYSCALL(sys_setrlimit, 2) /* 75 */ SYSCALL(sys_getrlimit, 2) @@ -107,15 +106,15 @@ SYSCALL(sys_gettimeofday, 2) SYSCALL(sys_settimeofday, 2) SYSCALL(sys_getgroups, 2) /* 80 */ SYSCALL(sys_setgroups, 2) -SYSCALL(sys_ni_syscall, 0) /* old_select */ +SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_symlink, 2) SYSCALL(sys_lstat, 2) SYSCALL(sys_readlink, 3) /* 85 */ SYSCALL(sys_uselib, 1) SYSCALL(sys_swapon, 2) SYSCALL(sys_reboot, 3) -SYSCALL(old_readdir, 3) -SYSCALL(old_mmap, 6) /* 90 */ +SYSCALL(sys_ni_syscall, 3) +SYSCALL(sys_ni_syscall, 6) /* 90 */ SYSCALL(sys_munmap, 2) SYSCALL(sys_truncate, 2) SYSCALL(sys_ftruncate, 2) @@ -127,7 +126,7 @@ SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_statfs, 2) SYSCALL(sys_fstatfs, 2) /* 100 */ SYSCALL(sys_ni_syscall, 3) -SYSCALL(sys_socketcall, 2) +SYSCALL(sys_ni_syscall, 2) SYSCALL(sys_syslog, 3) SYSCALL(sys_setitimer, 3) SYSCALL(sys_getitimer, 2) /* 105 */ @@ -137,32 +136,32 @@ SYSCALL(sys_newfstat, 2) SYSCALL(sys_uname, 1) SYSCALL(sys_ni_syscall, 0) /* 110 */ SYSCALL(sys_vhangup, 0) -SYSCALL(sys_ni_syscall, 0) /* was sys_idle() */ +SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_wait4, 4) SYSCALL(sys_swapoff, 1) /* 115 */ SYSCALL(sys_sysinfo, 1) -SYSCALL(sys_ipc, 5) /* 6 really, but glibc uses only 5) */ +SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_fsync, 1) SYSCALL(sys_sigreturn, 0) SYSCALL(sys_clone, 0) /* 120 */ SYSCALL(sys_setdomainname, 2) SYSCALL(sys_newuname, 1) -SYSCALL(sys_ni_syscall, 0) /* sys_modify_ldt */ +SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_adjtimex, 1) SYSCALL(sys_mprotect, 3) /* 125 */ -SYSCALL(sys_sigprocmask, 3) -SYSCALL(sys_ni_syscall, 2) /* old sys_create_module */ +SYSCALL(sys_ni_syscall, 3) +SYSCALL(sys_ni_syscall, 2) SYSCALL(sys_init_module, 2) SYSCALL(sys_delete_module, 1) -SYSCALL(sys_ni_syscall, 1) /* old sys_get_kernel_sysm */ /* 130 */ +SYSCALL(sys_ni_syscall, 1) /* 130 */ SYSCALL(sys_quotactl, 0) SYSCALL(sys_getpgid, 1) SYSCALL(sys_fchdir, 1) SYSCALL(sys_bdflush, 2) SYSCALL(sys_sysfs, 3) /* 135 */ SYSCALL(sys_personality, 1) -SYSCALL(sys_ni_syscall, 0) /* for afs_syscall */ +SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_setfsuid, 1) SYSCALL(sys_setfsgid, 1) SYSCALL(sys_llseek, 5) /* 140 */ @@ -212,7 +211,7 @@ SYSCALL(sys_socket, 3) SYSCALL(sys_socketpair, 4) SYSCALL(sys_setresuid, 3) /* 185 */ SYSCALL(sys_getresuid, 3) -SYSCALL(sys_ni_syscall, 5) /* old sys_query_module */ +SYSCALL(sys_ni_syscall, 5) SYSCALL(sys_poll, 3) SYSCALL(sys_nfsservctl, 3) SYSCALL(sys_setresgid, 3) /* 190 */ @@ -235,7 +234,7 @@ SYSCALL(sys_sigaltstack, 0) SYSCALL(sys_sendfile, 4) SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_ni_syscall, 0) -SYSCALL(sys_mmap2, 6) /* 210 */ +SYSCALL(sys_mmap, 6) /* 210 */ SYSCALL(sys_truncate64, 2) SYSCALL(sys_ftruncate64, 2) SYSCALL(sys_stat64, 2) @@ -245,4 +244,4 @@ SYSCALL(sys_pivot_root, 2) SYSCALL(sys_mincore, 3) SYSCALL(sys_madvise, 3) SYSCALL(sys_getdents64, 3) -SYSCALL(sys_vfork, 0) /* 220 */ +SYSCALL(sys_ni_syscall, 0) /* 220 */ |