diff options
92 files changed, 3160 insertions, 15867 deletions
diff --git a/sys/amd64/acpica/OsdEnvironment.c b/sys/amd64/acpica/OsdEnvironment.c index a3d3661..cd7fd64 100644 --- a/sys/amd64/acpica/OsdEnvironment.c +++ b/sys/amd64/acpica/OsdEnvironment.c @@ -36,8 +36,6 @@ #include "acpi.h" -#include <machine/pc/bios.h> - u_long i386_acpi_root; SYSCTL_ULONG(_machdep, OID_AUTO, acpi_root, CTLFLAG_RD, &i386_acpi_root, 0, diff --git a/sys/amd64/acpica/acpi_machdep.c b/sys/amd64/acpica/acpi_machdep.c index 6d48588..66c71f4 100644 --- a/sys/amd64/acpica/acpi_machdep.c +++ b/sys/amd64/acpica/acpi_machdep.c @@ -28,315 +28,20 @@ #include <sys/param.h> #include <sys/bus.h> -#include <sys/conf.h> -#include <sys/fcntl.h> -#include <sys/uio.h> #include "acpi.h" - #include <dev/acpica/acpivar.h> -#include <dev/acpica/acpiio.h> - -static device_t acpi_dev; - -/* - * APM driver emulation - */ - -#if __FreeBSD_version < 500000 -#include <sys/select.h> -#else -#include <sys/selinfo.h> -#endif - -#include <machine/apm_bios.h> -#include <machine/pc/bios.h> - -#include <i386/bios/apm.h> - -static struct apm_softc apm_softc; - -static d_open_t apmopen; -static d_close_t apmclose; -static d_write_t apmwrite; -static d_ioctl_t apmioctl; -static d_poll_t apmpoll; - -#define CDEV_MAJOR 39 -static struct cdevsw apm_cdevsw = { - .d_open = apmopen, - .d_close = apmclose, - .d_write = apmwrite, - .d_ioctl = apmioctl, - .d_poll = apmpoll, - .d_name = "apm", - .d_maj = CDEV_MAJOR, -}; - -static int -acpi_capm_convert_battstate(struct acpi_battinfo *battp) -{ - int state; - - state = 0xff; /* XXX unknown */ - - if (battp->state & ACPI_BATT_STAT_DISCHARG) { - if (battp->cap >= 50) { - state = 0; /* high */ - } else { - state = 1; /* low */ - } - } - if (battp->state & ACPI_BATT_STAT_CRITICAL) { - state = 2; /* critical */ - } - if (battp->state & ACPI_BATT_STAT_CHARGING) { - state = 3; /* charging */ - } - return (state); -} - -static int -acpi_capm_convert_battflags(struct acpi_battinfo *battp) -{ - int flags; - - flags = 0; - - if (battp->cap >= 50) { - flags |= APM_BATT_HIGH; - } else { - if (battp->state & ACPI_BATT_STAT_CRITICAL) { - flags |= APM_BATT_CRITICAL; - } else { - flags |= APM_BATT_LOW; - } - } - if (battp->state & ACPI_BATT_STAT_CHARGING) { - flags |= APM_BATT_CHARGING; - } - if (battp->state == ACPI_BATT_STAT_NOT_PRESENT) { - flags = APM_BATT_NOT_PRESENT; - } - - return (flags); -} - -static int -acpi_capm_get_info(apm_info_t aip) -{ - int acline; - struct acpi_battinfo batt; - - aip->ai_infoversion = 1; - aip->ai_major = 1; - aip->ai_minor = 2; - aip->ai_status = apm_softc.active; - aip->ai_capabilities= 0xff00; /* XXX unknown */ - - if (acpi_acad_get_acline(&acline)) { - aip->ai_acline = 0xff; /* unknown */ - } else { - aip->ai_acline = acline; /* on/off */ - } - - if (acpi_battery_get_battinfo(-1, &batt)) { - aip->ai_batt_stat = 0xff; /* unknown */ - aip->ai_batt_life = 0xff; /* unknown */ - aip->ai_batt_time = -1; /* unknown */ - aip->ai_batteries = 0; - } else { - aip->ai_batt_stat = acpi_capm_convert_battstate(&batt); - aip->ai_batt_life = batt.cap; - aip->ai_batt_time = (batt.min == -1) ? -1 : batt.min * 60; - aip->ai_batteries = acpi_battery_get_units(); - } - - return (0); -} - -static int -acpi_capm_get_pwstatus(apm_pwstatus_t app) -{ - int batt_unit; - int acline; - struct acpi_battinfo batt; - - if (app->ap_device != PMDV_ALLDEV && - (app->ap_device < PMDV_BATT0 || app->ap_device > PMDV_BATT_ALL)) { - return (1); - } - - if (app->ap_device == PMDV_ALLDEV) { - batt_unit = -1; /* all units */ - } else { - batt_unit = app->ap_device - PMDV_BATT0; - } - - if (acpi_battery_get_battinfo(batt_unit, &batt)) { - return (1); - } - - app->ap_batt_stat = acpi_capm_convert_battstate(&batt); - app->ap_batt_flag = acpi_capm_convert_battflags(&batt); - app->ap_batt_life = batt.cap; - app->ap_batt_time = (batt.min == -1) ? -1 : batt.min * 60; - - if (acpi_acad_get_acline(&acline)) { - app->ap_acline = 0xff; /* unknown */ - } else { - app->ap_acline = acline; /* on/off */ - } - - return (0); -} - -static int -apmopen(dev_t dev, int flag, int fmt, d_thread_t *td) -{ - return (0); -} - -static int -apmclose(dev_t dev, int flag, int fmt, d_thread_t *td) -{ - return (0); -} - -static int -apmioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, d_thread_t *td) -{ - int error = 0; - struct acpi_softc *acpi_sc; - struct apm_info info; - apm_info_old_t aiop; - - if ((acpi_sc = device_get_softc(acpi_dev)) == NULL) { - return (ENXIO); - } - - switch (cmd) { - case APMIO_SUSPEND: - if (!(flag & FWRITE)) - return (EPERM); - if (apm_softc.active) - acpi_SetSleepState(acpi_sc, acpi_sc->acpi_suspend_sx); - else - error = EINVAL; - break; - - case APMIO_STANDBY: - if (!(flag & FWRITE)) - return (EPERM); - if (apm_softc.active) - acpi_SetSleepState(acpi_sc, acpi_sc->acpi_standby_sx); - else - error = EINVAL; - break; - - case APMIO_GETINFO_OLD: - if (acpi_capm_get_info(&info)) - error = ENXIO; - aiop = (apm_info_old_t)addr; - aiop->ai_major = info.ai_major; - aiop->ai_minor = info.ai_minor; - aiop->ai_acline = info.ai_acline; - aiop->ai_batt_stat = info.ai_batt_stat; - aiop->ai_batt_life = info.ai_batt_life; - aiop->ai_status = info.ai_status; - break; - - case APMIO_GETINFO: - if (acpi_capm_get_info((apm_info_t)addr)) - error = ENXIO; - - break; - - case APMIO_GETPWSTATUS: - if (acpi_capm_get_pwstatus((apm_pwstatus_t)addr)) - error = ENXIO; - break; - - case APMIO_ENABLE: - if (!(flag & FWRITE)) - return (EPERM); - apm_softc.active = 1; - break; - - case APMIO_DISABLE: - if (!(flag & FWRITE)) - return (EPERM); - apm_softc.active = 0; - break; - - case APMIO_HALTCPU: - break; - - case APMIO_NOTHALTCPU: - break; - - case APMIO_DISPLAY: - if (!(flag & FWRITE)) - return (EPERM); - break; - - case APMIO_BIOS: - if (!(flag & FWRITE)) - return (EPERM); - bzero(addr, sizeof(struct apm_bios_arg)); - break; - - default: - error = EINVAL; - break; - } - - return (error); -} - -static int -apmwrite(dev_t dev, struct uio *uio, int ioflag) -{ - - return (uio->uio_resid); -} - -static int -apmpoll(dev_t dev, int events, d_thread_t *td) -{ - return (0); -} - -static void -acpi_capm_init(struct acpi_softc *sc) -{ - - make_dev(&apm_cdevsw, 0, 0, 5, 0664, "apm"); -} int acpi_machdep_init(device_t dev) { struct acpi_softc *sc; - acpi_dev = dev; - if ((sc = device_get_softc(acpi_dev)) == NULL) { + if ((sc = device_get_softc(dev)) == NULL) { return (ENXIO); } - /* - * XXX: Prevent the PnP BIOS code from interfering with - * our own scan of ISA devices. - */ - PnPBIOStable = NULL; - - acpi_capm_init(sc); - acpi_install_wakeup_handler(sc); -#ifdef SMP - acpi_SetIntrModel(ACPI_INTR_APIC); -#endif return (0); } - diff --git a/sys/amd64/acpica/acpi_wakeup.c b/sys/amd64/acpica/acpi_wakeup.c index 9f02e93..3a17e6c 100644 --- a/sys/amd64/acpica/acpi_wakeup.c +++ b/sys/amd64/acpica/acpi_wakeup.c @@ -26,329 +26,20 @@ * * $FreeBSD$ */ - #include <sys/param.h> -#include <sys/systm.h> -#include <sys/kernel.h> #include <sys/bus.h> -#include <sys/lock.h> -#include <sys/proc.h> -#include <sys/sysctl.h> - -#include <vm/vm.h> -#include <vm/vm_param.h> -#include <vm/pmap.h> -#include <vm/vm_object.h> -#include <vm/vm_page.h> -#include <vm/vm_map.h> -#include <vm/vm_kern.h> -#include <vm/vm_extern.h> - -#include <machine/bus.h> -#include <machine/cpufunc.h> -#include <machine/segments.h> - -#include <i386/isa/intr_machdep.h> #include "acpi.h" - -#include <dev/acpica/acpica_support.h> - #include <dev/acpica/acpivar.h> -#include "acpi_wakecode.h" - -extern void initializecpu(void); - -static struct region_descriptor r_idt, r_gdt, *p_gdt; -static u_int16_t r_ldt; - -static u_int32_t r_eax, r_ebx, r_ecx, r_edx, r_ebp, r_esi, r_edi, - r_efl, r_cr0, r_cr2, r_cr3, r_cr4, ret_addr; - -static u_int16_t r_cs, r_ds, r_es, r_fs, r_gs, r_ss, r_tr; -static u_int32_t r_esp = 0; - -static void acpi_printcpu(void); -static void acpi_realmodeinst(void *arg, bus_dma_segment_t *segs, - int nsegs, int error); -static void acpi_alloc_wakeup_handler(void); - -/* XXX shut gcc up */ -extern int acpi_savecpu(void); -extern int acpi_restorecpu(void); - -#ifdef __GNUC__ -__asm__(" \n\ - .text \n\ - .p2align 2, 0x90 \n\ - .type acpi_restorecpu, @function\n\ -acpi_restorecpu: \n\ - .align 4 \n\ - movl r_eax,%eax \n\ - movl r_ebx,%ebx \n\ - movl r_ecx,%ecx \n\ - movl r_edx,%edx \n\ - movl r_ebp,%ebp \n\ - movl r_esi,%esi \n\ - movl r_edi,%edi \n\ - movl r_esp,%esp \n\ - \n\ - pushl r_efl \n\ - popfl \n\ - \n\ - movl ret_addr,%eax \n\ - movl %eax,(%esp) \n\ - xorl %eax,%eax \n\ - ret \n\ - \n\ - .text \n\ - .p2align 2, 0x90 \n\ - .type acpi_savecpu, @function \n\ -acpi_savecpu: \n\ - movw %cs,r_cs \n\ - movw %ds,r_ds \n\ - movw %es,r_es \n\ - movw %fs,r_fs \n\ - movw %gs,r_gs \n\ - movw %ss,r_ss \n\ - \n\ - movl %eax,r_eax \n\ - movl %ebx,r_ebx \n\ - movl %ecx,r_ecx \n\ - movl %edx,r_edx \n\ - movl %ebp,r_ebp \n\ - movl %esi,r_esi \n\ - movl %edi,r_edi \n\ - \n\ - movl %cr0,%eax \n\ - movl %eax,r_cr0 \n\ - movl %cr2,%eax \n\ - movl %eax,r_cr2 \n\ - movl %cr3,%eax \n\ - movl %eax,r_cr3 \n\ - movl %cr4,%eax \n\ - movl %eax,r_cr4 \n\ - \n\ - pushfl \n\ - popl r_efl \n\ - \n\ - movl %esp,r_esp \n\ - \n\ - sgdt r_gdt \n\ - sidt r_idt \n\ - sldt r_ldt \n\ - str r_tr \n\ - \n\ - movl (%esp),%eax \n\ - movl %eax,ret_addr \n\ - movl $1,%eax \n\ - ret \n\ -"); -#endif /* __GNUC__ */ - -static void -acpi_printcpu(void) -{ - - printf("======== acpi_printcpu() debug dump ========\n"); - printf("gdt[%04x:%08x] idt[%04x:%08x] ldt[%04x] tr[%04x] efl[%08x]\n", - r_gdt.rd_limit, r_gdt.rd_base, r_idt.rd_limit, r_idt.rd_base, - r_ldt, r_tr, r_efl); - printf("eax[%08x] ebx[%08x] ecx[%08x] edx[%08x]\n", - r_eax, r_ebx, r_ecx, r_edx); - printf("esi[%08x] edi[%08x] ebp[%08x] esp[%08x]\n", - r_esi, r_edi, r_ebp, r_esp); - printf("cr0[%08x] cr2[%08x] cr3[%08x] cr4[%08x]\n", - r_cr0, r_cr2, r_cr3, r_cr4); - printf("cs[%04x] ds[%04x] es[%04x] fs[%04x] gs[%04x] ss[%04x]\n", - r_cs, r_ds, r_es, r_fs, r_gs, r_ss); -} - -#define WAKECODE_FIXUP(offset, type, val) do { \ - void **addr; \ - addr = (void **)(sc->acpi_wakeaddr + offset); \ - (type *)*addr = val; \ -} while (0) - -#define WAKECODE_BCOPY(offset, type, val) do { \ - void **addr; \ - addr = (void **)(sc->acpi_wakeaddr + offset); \ - bcopy(&(val), addr, sizeof(type)); \ -} while (0) - int acpi_sleep_machdep(struct acpi_softc *sc, int state) { - ACPI_STATUS status; - vm_paddr_t oldphys; - struct pmap *pm; - vm_page_t page; - static vm_page_t opage = NULL; - int ret = 0; - int pteobj_allocated = 0; - u_long ef; - struct proc *p; - - if (sc->acpi_wakeaddr == 0) { - return (0); - } - - AcpiSetFirmwareWakingVector(sc->acpi_wakephys); - - ef = read_eflags(); - disable_intr(); - - /* Create Identity Mapping */ - if ((p = curproc) == NULL) - p = &proc0; - pm = vmspace_pmap(p->p_vmspace); - if (pm->pm_pteobj == NULL) { - pm->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI + 1); - pteobj_allocated = 1; - } - - oldphys = pmap_extract(pm, sc->acpi_wakephys); - if (oldphys) { - opage = PHYS_TO_VM_PAGE(oldphys); - } - page = PHYS_TO_VM_PAGE(sc->acpi_wakephys); - pmap_enter(pm, sc->acpi_wakephys, page, - VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE, 1); - - ret_addr = 0; - if (acpi_savecpu()) { - /* Execute Sleep */ - p_gdt = (struct region_descriptor *)(sc->acpi_wakeaddr + physical_gdt); - p_gdt->rd_limit = r_gdt.rd_limit; - p_gdt->rd_base = vtophys(r_gdt.rd_base); - - WAKECODE_FIXUP(physical_esp, u_int32_t, vtophys(r_esp)); - WAKECODE_FIXUP(previous_cr0, u_int32_t, r_cr0); - WAKECODE_FIXUP(previous_cr2, u_int32_t, r_cr2); - WAKECODE_FIXUP(previous_cr3, u_int32_t, r_cr3); - WAKECODE_FIXUP(previous_cr4, u_int32_t, r_cr4); - - WAKECODE_FIXUP(previous_tr, u_int16_t, r_tr); - WAKECODE_BCOPY(previous_gdt, struct region_descriptor, r_gdt); - WAKECODE_FIXUP(previous_ldt, u_int16_t, r_ldt); - WAKECODE_BCOPY(previous_idt, struct region_descriptor, r_idt); - WAKECODE_FIXUP(where_to_recover, void, acpi_restorecpu); - - WAKECODE_FIXUP(previous_ds, u_int16_t, r_ds); - WAKECODE_FIXUP(previous_es, u_int16_t, r_es); - WAKECODE_FIXUP(previous_fs, u_int16_t, r_fs); - WAKECODE_FIXUP(previous_gs, u_int16_t, r_gs); - WAKECODE_FIXUP(previous_ss, u_int16_t, r_ss); - - if (acpi_get_verbose(sc)) { - acpi_printcpu(); - } - - wbinvd(); - - if (state == ACPI_STATE_S4 && sc->acpi_s4bios) { - status = AcpiEnterSleepStateS4Bios(); - } else { - status = AcpiEnterSleepState(state); - } - - if (status != AE_OK) { - device_printf(sc->acpi_dev, - "AcpiEnterSleepState failed - %s\n", - AcpiFormatException(status)); - ret = -1; - goto out; - } - - for (;;) ; - } else { - /* Execute Wakeup */ -#if 0 - initializecpu(); -#endif - icu_reinit(); - - if (acpi_get_verbose(sc)) { - acpi_savecpu(); - acpi_printcpu(); - } - } - -out: - vm_page_lock_queues(); - pmap_remove(pm, sc->acpi_wakephys, sc->acpi_wakephys + PAGE_SIZE); - vm_page_unlock_queues(); - if (opage) { - pmap_enter(pm, sc->acpi_wakephys, page, - VM_PROT_READ | VM_PROT_WRITE, 0); - } - - if (pteobj_allocated) { - vm_object_deallocate(pm->pm_pteobj); - pm->pm_pteobj = NULL; - } - - write_eflags(ef); - - return (ret); -} - -static bus_dma_tag_t acpi_waketag; -static bus_dmamap_t acpi_wakemap; -static vm_offset_t acpi_wakeaddr = 0; - -static void -acpi_alloc_wakeup_handler(void) -{ - - if (!cold) - return; - - if (bus_dma_tag_create(/* parent */ NULL, /* alignment */ 2, 0, - /* lowaddr below 1MB */ 0x9ffff, - /* highaddr */ BUS_SPACE_MAXADDR, NULL, NULL, - PAGE_SIZE, 1, PAGE_SIZE, 0, &acpi_waketag) != 0) { - printf("acpi_alloc_wakeup_handler: unable to create wake tag\n"); - return; - } - - if (bus_dmamem_alloc(acpi_waketag, (void **)&acpi_wakeaddr, - BUS_DMA_NOWAIT, &acpi_wakemap)) { - printf("acpi_alloc_wakeup_handler: unable to allocate wake memory\n"); - return; - } -} - -SYSINIT(acpiwakeup, SI_SUB_KMEM, SI_ORDER_ANY, acpi_alloc_wakeup_handler, 0) - -static void -acpi_realmodeinst(void *arg, bus_dma_segment_t *segs, int nsegs, int error) -{ - struct acpi_softc *sc = arg; - u_int32_t *addr; - - addr = (u_int32_t *)&wakecode[wakeup_sw32 + 2]; - *addr = segs[0].ds_addr + wakeup_32; - bcopy(wakecode, (void *)sc->acpi_wakeaddr, sizeof(wakecode)); - sc->acpi_wakephys = segs[0].ds_addr; + return (0); } void acpi_install_wakeup_handler(struct acpi_softc *sc) { - - if (acpi_wakeaddr == 0) { - return; - } - - sc->acpi_waketag = acpi_waketag; - sc->acpi_wakeaddr = acpi_wakeaddr; - sc->acpi_wakemap = acpi_wakemap; - - bus_dmamap_load(sc->acpi_waketag, sc->acpi_wakemap, - (void *)sc->acpi_wakeaddr, PAGE_SIZE, - acpi_realmodeinst, sc, 0); } - diff --git a/sys/amd64/amd64/autoconf.c b/sys/amd64/amd64/autoconf.c index f97fbc0..a19672a 100644 --- a/sys/amd64/amd64/autoconf.c +++ b/sys/amd64/amd64/autoconf.c @@ -73,13 +73,8 @@ #include <nfsclient/nfs.h> #include <nfsclient/nfsdiskless.h> -#include <machine/bootinfo.h> #include <machine/md_var.h> -#ifdef APIC_IO -#include <machine/smp.h> -#else -#include <i386/isa/icu.h> -#endif /* APIC_IO */ +#include <amd64/isa/icu.h> #ifdef DEV_ISA #include <isa/isavar.h> @@ -127,13 +122,8 @@ configure(dummy) * * This is all rather inconvenient. */ -#ifdef APIC_IO - bsp_apic_configure(); - enable_intr(); -#else enable_intr(); INTREN(IRQ_SLAVE); -#endif /* APIC_IO */ /* nexus0 is the top of the i386 device tree */ device_add_child(root_bus, "nexus", 0); @@ -163,50 +153,6 @@ configure_final(dummy) { cninit_finish(); - - if (bootverbose) { - -#ifdef APIC_IO - imen_dump(); -#endif /* APIC_IO */ - -#ifdef PC98 - { - int i; - /* - * Print out the BIOS's idea of the disk geometries. - */ - printf("BIOS Geometries:\n"); - for (i = 0; i < N_BIOS_GEOM; i++) { - unsigned long bios_geom; - int max_cylinder, max_head, max_sector; - - bios_geom = bootinfo.bi_bios_geom[i]; - - /* - * XXX the bootstrap punts a 1200K floppy geometry - * when the get-disk-geometry interrupt fails. Skip - * drives that have this geometry. - */ - if (bios_geom == 0x4f010f) - continue; - - printf(" %x:%08lx ", i, bios_geom); - max_cylinder = bios_geom >> 16; - max_head = (bios_geom >> 8) & 0xff; - max_sector = bios_geom & 0xff; - printf( - "0..%d=%d cylinders, 0..%d=%d heads, 1..%d=%d sectors\n", - max_cylinder, max_cylinder + 1, - max_head, max_head + 1, - max_sector, max_sector); - } - printf(" %d accounted for\n", bootinfo.bi_n_bios_used); - } -#endif - - printf("Device configuration finished.\n"); - } cold = 0; } diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index a19baa7..8cca838 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -36,9 +36,6 @@ * $FreeBSD$ */ -#include "opt_npx.h" -#include "opt_swtch.h" - #include <machine/asmacros.h> #include "assym.s" @@ -57,37 +54,26 @@ * about its state. This is only a slight optimization and is probably * not worth it anymore. Note that we need to clear the pm_active bits so * we do need the old proc if it still exists. - * 0(%esp) = ret - * 4(%esp) = oldtd - * 8(%esp) = newtd + * %rdi = oldtd + * %rsi = newtd */ ENTRY(cpu_throw) - movl PCPU(CPUID), %esi - movl 4(%esp),%ecx /* Old thread */ - testl %ecx,%ecx /* no thread? */ + xorq %rax, %rax + movl PCPU(CPUID), %eax + testq %rdi,%rdi /* no thread? */ jz 1f /* release bit from old pm_active */ - movl TD_PROC(%ecx), %eax /* thread->td_proc */ - movl P_VMSPACE(%eax), %ebx /* proc->p_vmspace */ -#ifdef SMP - lock -#endif - btrl %esi, VM_PMAP+PM_ACTIVE(%ebx) /* clear old */ + movq TD_PROC(%rdi), %rdx /* oldtd->td_proc */ + movq P_VMSPACE(%rdx), %rdx /* proc->p_vmspace */ + btrq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */ 1: - movl 8(%esp),%ecx /* New thread */ - movl TD_PCB(%ecx),%edx -#ifdef SWTCH_OPTIM_STATS - incl tlb_flush_count -#endif - movl PCB_CR3(%edx),%eax - movl %eax,%cr3 /* new address space */ + movq TD_PCB(%rsi),%rdx /* newtd->td_proc */ + movq PCB_CR3(%rdx),%rdx + movq %rdx,%cr3 /* new address space */ /* set bit in new pm_active */ - movl TD_PROC(%ecx),%eax - movl P_VMSPACE(%eax), %ebx -#ifdef SMP - lock -#endif - btsl %esi, VM_PMAP+PM_ACTIVE(%ebx) /* set new */ + movq TD_PROC(%rsi),%rdx + movq P_VMSPACE(%rdx), %rdx + btsq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */ jmp sw1 /* @@ -95,278 +81,184 @@ ENTRY(cpu_throw) * * Save the current thread state, then select the next thread to run * and load its state. - * 0(%esp) = ret - * 4(%esp) = oldtd - * 8(%esp) = newtd + * %rdi = oldtd + * %rsi = newtd */ ENTRY(cpu_switch) /* Switch to new thread. First, save context. */ - movl 4(%esp),%ecx - #ifdef INVARIANTS - testl %ecx,%ecx /* no thread? */ + testq %rdi,%rdi /* no thread? */ jz badsw2 /* no, panic */ #endif - movl TD_PCB(%ecx),%edx + movq TD_PCB(%rdi),%rdx - movl (%esp),%eax /* Hardware registers */ - movl %eax,PCB_EIP(%edx) - movl %ebx,PCB_EBX(%edx) - movl %esp,PCB_ESP(%edx) - movl %ebp,PCB_EBP(%edx) - movl %esi,PCB_ESI(%edx) - movl %edi,PCB_EDI(%edx) - movl %gs,PCB_GS(%edx) - pushfl /* PSL */ - popl PCB_PSL(%edx) + movq (%rsp),%rax /* Hardware registers */ + movq %rax,PCB_RIP(%rdx) + movq %rbx,PCB_RBX(%rdx) + movq %rsp,PCB_RSP(%rdx) + movq %rbp,PCB_RBP(%rdx) + movq %r12,PCB_R12(%rdx) + movq %r13,PCB_R13(%rdx) + movq %r14,PCB_R14(%rdx) + movq %r15,PCB_R15(%rdx) + pushfq /* PSL */ + popq PCB_RFLAGS(%rdx) - /* Test if debug registers should be saved. */ - testl $PCB_DBREGS,PCB_FLAGS(%edx) - jz 1f /* no, skip over */ - movl %dr7,%eax /* yes, do the save */ - movl %eax,PCB_DR7(%edx) - andl $0x0000fc00, %eax /* disable all watchpoints */ - movl %eax,%dr7 - movl %dr6,%eax - movl %eax,PCB_DR6(%edx) - movl %dr3,%eax - movl %eax,PCB_DR3(%edx) - movl %dr2,%eax - movl %eax,PCB_DR2(%edx) - movl %dr1,%eax - movl %eax,PCB_DR1(%edx) - movl %dr0,%eax - movl %eax,PCB_DR0(%edx) -1: - -#ifdef DEV_NPX /* have we used fp, and need a save? */ - cmpl %ecx,PCPU(FPCURTHREAD) + cmpq %rdi,PCPU(FPCURTHREAD) jne 1f - addl $PCB_SAVEFPU,%edx /* h/w bugs make saving complicated */ - pushl %edx + pushq %rdi + pushq %rsi + addq $PCB_SAVEFPU,%rdx /* h/w bugs make saving complicated */ + movq %rdx, %rdi call npxsave /* do it in a big C function */ - popl %eax + popq %rsi + popq %rdi 1: -#endif /* Save is done. Now fire up new thread. Leave old vmspace. */ - movl %ecx,%edi - movl 8(%esp),%ecx /* New thread */ #ifdef INVARIANTS - testl %ecx,%ecx /* no thread? */ + testq %rsi,%rsi /* no thread? */ jz badsw3 /* no, panic */ #endif - movl TD_PCB(%ecx),%edx - movl PCPU(CPUID), %esi + movq TD_PCB(%rsi),%rdx + xorq %rax, %rax + movl PCPU(CPUID), %eax /* switch address space */ - movl PCB_CR3(%edx),%eax -#ifdef LAZY_SWITCH - cmpl $0,lazy_flush_enable - je 1f - cmpl %eax,IdlePTD /* Kernel address space? */ -#ifdef SWTCH_OPTIM_STATS - je 3f -#else - je sw1 -#endif -1: -#endif - movl %cr3,%ebx /* The same address space? */ - cmpl %ebx,%eax -#ifdef SWTCH_OPTIM_STATS - je 2f /* Yes, skip all that cruft */ -#else - je sw1 -#endif -#ifdef SWTCH_OPTIM_STATS - incl tlb_flush_count -#endif - movl %eax,%cr3 /* new address space */ + movq PCB_CR3(%rdx),%rdx + movq %rdx,%cr3 /* new address space */ /* Release bit from old pmap->pm_active */ - movl TD_PROC(%edi), %eax /* oldproc */ - movl P_VMSPACE(%eax), %ebx -#ifdef SMP - lock -#endif - btrl %esi, VM_PMAP+PM_ACTIVE(%ebx) /* clear old */ + movq TD_PROC(%rdi), %rdx /* oldproc */ + movq P_VMSPACE(%rdx), %rdx + btrq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */ /* Set bit in new pmap->pm_active */ - movl TD_PROC(%ecx),%eax /* newproc */ - movl P_VMSPACE(%eax), %ebx -#ifdef SMP - lock -#endif - btsl %esi, VM_PMAP+PM_ACTIVE(%ebx) /* set new */ - -#ifdef LAZY_SWITCH -#ifdef SWTCH_OPTIM_STATS - jmp sw1 - -2: /* same address space */ - incl swtch_optim_stats - jmp sw1 - -3: /* kernel address space */ - incl lazy_flush_count -#endif -#endif + movq TD_PROC(%rsi),%rdx /* newproc */ + movq P_VMSPACE(%rdx), %rdx + btsq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */ sw1: /* * At this point, we've switched address spaces and are ready * to load up the rest of the next context. */ - cmpl $0, PCB_EXT(%edx) /* has pcb extension? */ - je 1f /* If not, use the default */ - btsl %esi, private_tss /* mark use of private tss */ - movl PCB_EXT(%edx), %edi /* new tss descriptor */ - jmp 2f /* Load it up */ + movq TD_PCB(%rsi),%rdx -1: /* - * Use the common default TSS instead of our own. - * Set our stack pointer into the TSS, it's set to just - * below the PCB. In C, common_tss.tss_esp0 = &pcb - 16; - */ - leal -16(%edx), %ebx /* leave space for vm86 */ - movl %ebx, PCPU(COMMON_TSS) + TSS_ESP0 - - /* - * Test this CPU's bit in the bitmap to see if this - * CPU was using a private TSS. - */ - btrl %esi, private_tss /* Already using the common? */ - jae 3f /* if so, skip reloading */ - PCPU_ADDR(COMMON_TSSD, %edi) -2: - /* Move correct tss descriptor into GDT slot, then reload tr. */ - movl PCPU(TSS_GDT), %ebx /* entry in GDT */ - movl 0(%edi), %eax - movl %eax, 0(%ebx) - movl 4(%edi), %eax - movl %eax, 4(%ebx) - movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */ - ltr %si -3: + /* Update the TSS_RSP0 pointer for the next interrupt */ + leaq -16(%rdx), %rbx + movq %rbx, common_tss + COMMON_TSS_RSP0 /* Restore context. */ - movl PCB_EBX(%edx),%ebx - movl PCB_ESP(%edx),%esp - movl PCB_EBP(%edx),%ebp - movl PCB_ESI(%edx),%esi - movl PCB_EDI(%edx),%edi - movl PCB_EIP(%edx),%eax - movl %eax,(%esp) - pushl PCB_PSL(%edx) - popfl - - movl %edx, PCPU(CURPCB) - movl %ecx, PCPU(CURTHREAD) /* into next thread */ + movq PCB_RBX(%rdx),%rbx + movq PCB_RSP(%rdx),%rsp + movq PCB_RBP(%rdx),%rbp + movq PCB_R12(%rdx),%r12 + movq PCB_R13(%rdx),%r13 + movq PCB_R14(%rdx),%r14 + movq PCB_R15(%rdx),%r15 + movq PCB_RIP(%rdx),%rax + movq %rax,(%rsp) + pushq PCB_RFLAGS(%rdx) + popfq + + movq %rdx, PCPU(CURPCB) + movq %rsi, PCPU(CURTHREAD) /* into next thread */ - /* - * Determine the LDT to use and load it if is the default one and - * that is not the current one. - */ - movl TD_PROC(%ecx),%eax - cmpl $0,P_MD+MD_LDT(%eax) - jnz 1f - movl _default_ldt,%eax - cmpl PCPU(CURRENTLDT),%eax - je 2f - lldt _default_ldt - movl %eax,PCPU(CURRENTLDT) - jmp 2f -1: - /* Load the LDT when it is not the default one. */ - pushl %edx /* Preserve pointer to pcb. */ - addl $P_MD,%eax /* Pointer to mdproc is arg. */ - pushl %eax - call set_user_ldt - addl $4,%esp - popl %edx -2: - - /* This must be done after loading the user LDT. */ - .globl cpu_switch_load_gs -cpu_switch_load_gs: - movl PCB_GS(%edx),%gs - - /* Test if debug registers should be restored. */ - testl $PCB_DBREGS,PCB_FLAGS(%edx) - jz 1f - - /* - * Restore debug registers. The special code for dr7 is to - * preserve the current values of its reserved bits. - */ - movl PCB_DR6(%edx),%eax - movl %eax,%dr6 - movl PCB_DR3(%edx),%eax - movl %eax,%dr3 - movl PCB_DR2(%edx),%eax - movl %eax,%dr2 - movl PCB_DR1(%edx),%eax - movl %eax,%dr1 - movl PCB_DR0(%edx),%eax - movl %eax,%dr0 - movl %dr7,%eax - andl $0x0000fc00,%eax - movl PCB_DR7(%edx),%ecx - andl $~0x0000fc00,%ecx - orl %ecx,%eax - movl %eax,%dr7 -1: ret #ifdef INVARIANTS badsw1: - pushal - pushl $sw0_1 + pushq %rax + pushq %rcx + pushq %rdx + pushq %rbx + pushq %rbp + pushq %rsi + pushq %rdi + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r11 + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq $sw0_1 call panic sw0_1: .asciz "cpu_throw: no newthread supplied" badsw2: - pushal - pushl $sw0_2 + pushq %rax + pushq %rcx + pushq %rdx + pushq %rbx + pushq %rbp + pushq %rsi + pushq %rdi + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r11 + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq $sw0_2 call panic sw0_2: .asciz "cpu_switch: no curthread supplied" badsw3: - pushal - pushl $sw0_3 + pushq %rax + pushq %rcx + pushq %rdx + pushq %rbx + pushq %rbp + pushq %rsi + pushq %rdi + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r11 + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq $sw0_3 call panic sw0_3: .asciz "cpu_switch: no newthread supplied" #endif +noswitch: .asciz "cpu_switch: called!" +nothrow: .asciz "cpu_throw: called!" /* * savectx(pcb) * Update pcb, saving current processor state. */ ENTRY(savectx) /* Fetch PCB. */ - movl 4(%esp),%ecx + movq %rdi,%rcx - /* Save caller's return address. Child won't execute this routine. */ - movl (%esp),%eax - movl %eax,PCB_EIP(%ecx) + /* Save caller's return address. */ + movq (%rsp),%rax + movq %rax,PCB_RIP(%rcx) - movl %cr3,%eax - movl %eax,PCB_CR3(%ecx) + movq %cr3,%rax + movq %rax,PCB_CR3(%rcx) - movl %ebx,PCB_EBX(%ecx) - movl %esp,PCB_ESP(%ecx) - movl %ebp,PCB_EBP(%ecx) - movl %esi,PCB_ESI(%ecx) - movl %edi,PCB_EDI(%ecx) - movl %gs,PCB_GS(%ecx) - pushfl - popl PCB_PSL(%ecx) + movq %rbx,PCB_RBX(%rcx) + movq %rsp,PCB_RSP(%rcx) + movq %rbp,PCB_RBP(%rcx) + movq %r12,PCB_R12(%rcx) + movq %r13,PCB_R13(%rcx) + movq %r14,PCB_R14(%rcx) + movq %r15,PCB_R15(%rcx) + pushfq + popq PCB_RFLAGS(%rcx) -#ifdef DEV_NPX /* * If fpcurthread == NULL, then the npx h/w state is irrelevant and the * state had better already be in the pcb. This is true for forks @@ -379,30 +271,25 @@ ENTRY(savectx) * have to handle h/w bugs for reloading. We used to lose the * parent's npx state for forks by forgetting to reload. */ - pushfl + pushfq cli - movl PCPU(FPCURTHREAD),%eax - testl %eax,%eax + movq PCPU(FPCURTHREAD),%rax + testq %rax,%rax je 1f - pushl %ecx - movl TD_PCB(%eax),%eax - leal PCB_SAVEFPU(%eax),%eax - pushl %eax - pushl %eax + pushq %rcx + pushq %rax + movq TD_PCB(%rax),%rdi + leaq PCB_SAVEFPU(%rdi),%rdi call npxsave - addl $4,%esp - popl %eax - popl %ecx + popq %rax + popq %rcx - pushl $PCB_SAVEFPU_SIZE - leal PCB_SAVEFPU(%ecx),%ecx - pushl %ecx - pushl %eax + movq $PCB_SAVEFPU_SIZE,%rdx /* arg 3 */ + leaq PCB_SAVEFPU(%rcx),%rsi /* arg 2 */ + movq %rax,%rdi /* arg 1 */ call bcopy - addl $12,%esp 1: - popfl -#endif /* DEV_NPX */ + popfq ret diff --git a/sys/amd64/amd64/critical.c b/sys/amd64/amd64/critical.c index 1a74b95..c8a1a88 100644 --- a/sys/amd64/amd64/critical.c +++ b/sys/amd64/amd64/critical.c @@ -18,145 +18,25 @@ #include <machine/clock.h> #include <machine/critical.h> -#ifdef SMP -#include <machine/privatespace.h> -#include <machine/smp.h> -#else /* - * XXX this mess to get sched_ithd() and call_fast_unpend() - */ -#include <sys/bus.h> -#include <machine/apic.h> -#include <machine/frame.h> -#include <i386/isa/icu.h> -#include <i386/isa/intr_machdep.h> -#endif - -void i386_unpend(void); /* NOTE: not static, called from assembly */ - -/* - * cpu_unpend() - called from critical_exit() inline after quick - * interrupt-pending check. + * cpu_critical_fork_exit() - cleanup after fork */ void -cpu_unpend(void) +cpu_critical_fork_exit(void) { - register_t eflags; struct thread *td; td = curthread; - eflags = intr_disable(); - if (PCPU_GET(int_pending)) { - ++td->td_intr_nesting_level; - i386_unpend(); - --td->td_intr_nesting_level; - } - intr_restore(eflags); -} - -/* - * cpu_critical_fork_exit() - cleanup after fork - * - * For i386 we do not have to do anything, td_critnest is - * handled by the fork trampoline code. - */ -void -cpu_critical_fork_exit(void) -{ + td->td_critnest = 1; + td->td_md.md_savecrit = read_rflags() | PSL_I; } /* * cpu_thread_link() - thread linkup, initialize machine-dependant fields - * - * There are currently no machine-dependant fields that require - * initialization. */ void cpu_thread_link(struct thread *td) { -} - -/* - * Called from cpu_unpend or called from the assembly vector code - * to process any interrupts which may have occured while we were in - * a critical section. - * - * - interrupts must be disabled - * - td_critnest must be 0 - * - td_intr_nesting_level must be incremented by the caller - * - * NOT STATIC (called from assembly) - */ -void -i386_unpend(void) -{ - struct clockframe frame; - - frame.cf_cs = SEL_KPL; - frame.cf_eip = (register_t)i386_unpend; - frame.cf_eflags = PSL_KERNEL; - KASSERT(curthread->td_critnest == 0, ("unpend critnest != 0")); - KASSERT((read_eflags() & PSL_I) == 0, ("unpend interrupts enabled1")); - curthread->td_critnest = 1; - for (;;) { - u_int32_t mask; - int irq; - - /* - * Fast interrupts have priority - */ - if ((mask = PCPU_GET(fpending)) != 0) { - irq = bsfl(mask); - PCPU_SET(fpending, mask & ~(1 << irq)); - call_fast_unpend(irq); - KASSERT((read_eflags() & PSL_I) == 0, - ("unpend interrupts enabled2 %d", irq)); - continue; - } - - /* - * Threaded interrupts come next - */ - if ((mask = PCPU_GET(ipending)) != 0) { - irq = bsfl(mask); - PCPU_SET(ipending, mask & ~(1 << irq)); - sched_ithd((void *)irq); - KASSERT((read_eflags() & PSL_I) == 0, - ("unpend interrupts enabled3 %d", irq)); - continue; - } - /* - * Software interrupts and delayed IPIs are last - * - * XXX give the bits #defined names. see also - * isa/xxx_vector.s - */ - if ((mask = PCPU_GET(spending)) != 0) { - irq = bsfl(mask); - PCPU_SET(spending, mask & ~(1 << irq)); - switch(irq) { - case 0: /* bit 0 - hardclock */ - hardclock_process(&frame); - break; - case 1: /* bit 1 - statclock */ - if (profprocs != 0) - profclock(&frame); - if (pscnt == psdiv) - statclock(&frame); - break; - } - KASSERT((read_eflags() & PSL_I) == 0, - ("unpend interrupts enabled4 %d", irq)); - continue; - } - break; - } - /* - * Interrupts are still disabled, we can safely clear int_pending - * and td_critnest. - */ - KASSERT((read_eflags() & PSL_I) == 0, ("unpend interrupts enabled5")); - PCPU_SET(int_pending, 0); - curthread->td_critnest = 0; + td->td_md.md_savecrit = 0; } diff --git a/sys/amd64/amd64/dump_machdep.c b/sys/amd64/amd64/dump_machdep.c index 3f3acc7..91f53ec 100644 --- a/sys/amd64/amd64/dump_machdep.c +++ b/sys/amd64/amd64/dump_machdep.c @@ -63,7 +63,7 @@ dumpsys(struct dumperinfo *di) /* Fill in the kernel dump header */ strcpy(kdh.magic, KERNELDUMPMAGIC); - strcpy(kdh.architecture, "i386"); + strcpy(kdh.architecture, "amd64"); kdh.version = htod32(KERNELDUMPVERSION); kdh.architectureversion = htod32(KERNELDUMP_I386_VERSION); kdh.dumplength = htod64(Maxmem * (off_t)PAGE_SIZE); diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c index 4cf9f21..d7a9fb9 100644 --- a/sys/amd64/amd64/elf_machdep.c +++ b/sys/amd64/amd64/elf_machdep.c @@ -44,7 +44,7 @@ #include <machine/elf.h> #include <machine/md_var.h> -struct sysentvec elf32_freebsd_sysvec = { +struct sysentvec elf64_freebsd_sysvec = { SYS_MAXSYSCALL, sysent, 0, @@ -58,7 +58,7 @@ struct sysentvec elf32_freebsd_sysvec = { sigcode, &szsigcode, NULL, - "FreeBSD ELF32", + "FreeBSD ELF64", __elfN(coredump), NULL, MINSIGSTKSZ, @@ -72,17 +72,17 @@ struct sysentvec elf32_freebsd_sysvec = { exec_setregs }; -static Elf32_Brandinfo freebsd_brand_info = { +static Elf64_Brandinfo freebsd_brand_info = { ELFOSABI_FREEBSD, - EM_386, + EM_X86_64, "FreeBSD", "", "/usr/libexec/ld-elf.so.1", - &elf32_freebsd_sysvec + &elf64_freebsd_sysvec }; -SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_ANY, - (sysinit_cfunc_t) elf32_insert_brand_entry, +SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_ANY, + (sysinit_cfunc_t) elf64_insert_brand_entry, &freebsd_brand_info); /* Process one elf relocation with addend. */ @@ -117,7 +117,7 @@ elf_reloc_internal(linker_file_t lf, const void *data, int type, int local) } if (local) { - if (rtype == R_386_RELATIVE) { /* A + B */ + if (rtype == R_X86_64_RELATIVE) { /* A + B */ addr = relocbase + addend; if (*where != addr) *where = addr; @@ -127,10 +127,10 @@ elf_reloc_internal(linker_file_t lf, const void *data, int type, int local) switch (rtype) { - case R_386_NONE: /* none */ + case R_X86_64_NONE: /* none */ break; - case R_386_32: /* S + A */ + case R_X86_64_64: /* S + A */ addr = elf_lookup(lf, symidx, 1); if (addr == 0) return -1; @@ -139,16 +139,17 @@ elf_reloc_internal(linker_file_t lf, const void *data, int type, int local) *where = addr; break; - case R_386_PC32: /* S + A - P */ + case R_X86_64_PC32: /* S + A - P */ addr = elf_lookup(lf, symidx, 1); if (addr == 0) return -1; addr += addend - (Elf_Addr)where; + /* XXX needs to be 32 bit *where, not 64 bit */ if (*where != addr) *where = addr; break; - case R_386_COPY: /* none */ + case R_X86_64_COPY: /* none */ /* * There shouldn't be copy relocations in kernel * objects. @@ -157,7 +158,7 @@ elf_reloc_internal(linker_file_t lf, const void *data, int type, int local) return -1; break; - case R_386_GLOB_DAT: /* S */ + case R_X86_64_GLOB_DAT: /* S */ addr = elf_lookup(lf, symidx, 1); if (addr == 0) return -1; @@ -165,7 +166,7 @@ elf_reloc_internal(linker_file_t lf, const void *data, int type, int local) *where = addr; break; - case R_386_RELATIVE: + case R_X86_64_RELATIVE: /* B + A */ break; default: diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index ce07659..8ff6020 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -34,15 +34,10 @@ * $FreeBSD$ */ -#include "opt_npx.h" - #include <machine/asmacros.h> #include <sys/mutex.h> #include <machine/psl.h> #include <machine/trap.h> -#ifdef SMP -#include <machine/smptests.h> /** various SMP options */ -#endif #include "assym.s" @@ -79,29 +74,29 @@ */ #define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(X,name); \ .type __CONCAT(X,name),@function; __CONCAT(X,name): -#define TRAP(a) pushl $(a) ; jmp alltraps +#define TRAP(a) pushq $(a) ; jmp alltraps MCOUNT_LABEL(user) MCOUNT_LABEL(btrap) IDTVEC(div) - pushl $0; TRAP(T_DIVIDE) + pushq $0; TRAP(T_DIVIDE) IDTVEC(dbg) - pushl $0; TRAP(T_TRCTRAP) + pushq $0; TRAP(T_TRCTRAP) IDTVEC(nmi) - pushl $0; TRAP(T_NMI) + pushq $0; TRAP(T_NMI) IDTVEC(bpt) - pushl $0; TRAP(T_BPTFLT) + pushq $0; TRAP(T_BPTFLT) IDTVEC(ofl) - pushl $0; TRAP(T_OFLOW) + pushq $0; TRAP(T_OFLOW) IDTVEC(bnd) - pushl $0; TRAP(T_BOUND) + pushq $0; TRAP(T_BOUND) IDTVEC(ill) - pushl $0; TRAP(T_PRIVINFLT) + pushq $0; TRAP(T_PRIVINFLT) IDTVEC(dna) - pushl $0; TRAP(T_DNA) + pushq $0; TRAP(T_DNA) IDTVEC(fpusegm) - pushl $0; TRAP(T_FPOPFLT) + pushq $0; TRAP(T_FPOPFLT) IDTVEC(tss) TRAP(T_TSSFLT) IDTVEC(missing) @@ -113,16 +108,15 @@ IDTVEC(prot) IDTVEC(page) TRAP(T_PAGEFLT) IDTVEC(mchk) - pushl $0; TRAP(T_MCHK) + pushq $0; TRAP(T_MCHK) IDTVEC(rsvd) - pushl $0; TRAP(T_RESERVED) + pushq $0; TRAP(T_RESERVED) IDTVEC(fpu) - pushl $0; TRAP(T_ARITHTRAP) + pushq $0; TRAP(T_ARITHTRAP) IDTVEC(align) TRAP(T_ALIGNFLT) - IDTVEC(xmm) - pushl $0; TRAP(T_XMMFLT) + pushq $0; TRAP(T_XMMFLT) /* * alltraps entry point. Interrupts are enabled if this was a trap @@ -135,31 +129,36 @@ IDTVEC(xmm) .globl alltraps .type alltraps,@function alltraps: - pushal - pushl %ds - pushl %es - pushl %fs + subq $TF_TRAPNO,%rsp /* tf_err and tf_trapno already pushed */ + movq %rdi,TF_RDI(%rsp) + movq %rsi,TF_RSI(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + movq %r8,TF_R8(%rsp) + movq %r9,TF_R9(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rbx,TF_RBX(%rsp) + movq %rbp,TF_RBP(%rsp) + movq %r10,TF_R10(%rsp) + movq %r11,TF_R11(%rsp) + movq %r12,TF_R12(%rsp) + movq %r13,TF_R13(%rsp) + movq %r14,TF_R14(%rsp) + movq %r15,TF_R15(%rsp) alltraps_with_regs_pushed: - mov $KDSEL,%ax - mov %ax,%ds - mov %ax,%es - mov $KPSEL,%ax - mov %ax,%fs - FAKE_MCOUNT(13*4(%esp)) + FAKE_MCOUNT(13*4(%rsp)) calltrap: FAKE_MCOUNT(btrap) /* init "from" btrap -> calltrap */ call trap - - /* - * Return via doreti to handle ASTs. - */ MEXITCOUNT - jmp doreti + jmp doreti /* Handle any pending ASTs */ /* - * SYSCALL CALL GATE (old entry point for a.out binaries) + * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80) * - * The intersegment call has been set up to specify one dummy parameter. + * Even though the name says 'int0x80', this is actually a TGT (trap gate) + * rather then an IGT (interrupt gate). Thus interrupts are enabled on + * entry just as they are for a normal syscall. * * This leaves a place to put eflags so that the call frame can be * converted to a trap frame. Note that the eflags is (semi-)bogusly @@ -169,79 +168,122 @@ calltrap: * and clobber the saved cs/eip. */ SUPERALIGN_TEXT -IDTVEC(lcall_syscall) - pushfl /* save eflags */ - popl 8(%esp) /* shuffle into tf_eflags */ - pushl $7 /* sizeof "lcall 7,0" */ - subl $4,%esp /* skip over tf_trapno */ - pushal - pushl %ds - pushl %es - pushl %fs - mov $KDSEL,%ax /* switch to kernel segments */ - mov %ax,%ds - mov %ax,%es - mov $KPSEL,%ax - mov %ax,%fs - FAKE_MCOUNT(13*4(%esp)) +IDTVEC(int0x80_syscall) + pushq $2 /* sizeof "int 0x80" */ + subq $TF_ERR,%rsp /* skip over tf_trapno */ + movq %rdi,TF_RDI(%rsp) + movq %rsi,TF_RSI(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + movq %r8,TF_R8(%rsp) + movq %r9,TF_R9(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rbx,TF_RBX(%rsp) + movq %rbp,TF_RBP(%rsp) + movq %r10,TF_R10(%rsp) + movq %r11,TF_R11(%rsp) + movq %r12,TF_R12(%rsp) + movq %r13,TF_R13(%rsp) + movq %r14,TF_R14(%rsp) + movq %r15,TF_R15(%rsp) + FAKE_MCOUNT(13*4(%rsp)) call syscall MEXITCOUNT jmp doreti /* - * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80) - * - * Even though the name says 'int0x80', this is actually a TGT (trap gate) - * rather then an IGT (interrupt gate). Thus interrupts are enabled on - * entry just as they are for a normal syscall. + * Fast syscall entry point. We enter here with just our new %cs/%ss set, + * and the new privilige level. We are still running on the old user stack + * pointer. We have to juggle a few things around to find our stack etc. + * swapgs gives us access to our PCPU space only. + * XXX The PCPU stuff is stubbed out right now... */ - SUPERALIGN_TEXT -IDTVEC(int0x80_syscall) - pushl $2 /* sizeof "int 0x80" */ - subl $4,%esp /* skip over tf_trapno */ - pushal - pushl %ds - pushl %es - pushl %fs - mov $KDSEL,%ax /* switch to kernel segments */ - mov %ax,%ds - mov %ax,%es - mov $KPSEL,%ax - mov %ax,%fs - FAKE_MCOUNT(13*4(%esp)) +IDTVEC(fast_syscall) + #swapgs + movq %rsp,PCPU(SCRATCH_RSP) + movq common_tss+COMMON_TSS_RSP0,%rsp + sti + /* Now emulate a trapframe. Ugh. */ + subq $TF_SIZE,%rsp + movq $KUDSEL,TF_SS(%rsp) + /* defer TF_RSP till we have a spare register */ + movq %r11,TF_RFLAGS(%rsp) + movq $KUCSEL,TF_CS(%rsp) + movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */ + movq $2,TF_ERR(%rsp) + movq %rdi,TF_RDI(%rsp) /* arg 1 */ + movq %rsi,TF_RSI(%rsp) /* arg 2 */ + movq %rdx,TF_RDX(%rsp) /* arg 3 */ + movq %r10,TF_RCX(%rsp) /* arg 4 */ + movq %r8,TF_R8(%rsp) /* arg 5 */ + movq %r9,TF_R9(%rsp) /* arg 6 */ + movq %rax,TF_RAX(%rsp) /* syscall number */ + movq %rbx,TF_RBX(%rsp) /* C preserved */ + movq %rbp,TF_RBP(%rsp) /* C preserved */ + movq %r12,TF_R12(%rsp) /* C preserved */ + movq %r13,TF_R13(%rsp) /* C preserved */ + movq %r14,TF_R14(%rsp) /* C preserved */ + movq %r15,TF_R15(%rsp) /* C preserved */ + movq PCPU(SCRATCH_RSP),%r12 /* %r12 already saved */ + movq %r12,TF_RSP(%rsp) /* user stack pointer */ call syscall - MEXITCOUNT + movq PCPU(CURPCB),%rax + testq $PCB_FULLCTX,PCB_FLAGS(%rax) + jne 3f + /* simplified from doreti */ +1: /* Check for and handle AST's on return to userland */ + cli + movq PCPU(CURTHREAD),%rax + testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax) + je 2f + sti + movq %rsp, %rdi + call ast + jmp 1b +2: /* restore preserved registers */ + movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */ + movq TF_RSI(%rsp),%rsi /* bonus: preserve arg 2 */ + movq TF_RDX(%rsp),%rdx /* return value 2 */ + movq TF_RAX(%rsp),%rax /* return value 1 */ + movq TF_RBX(%rsp),%rbx /* C preserved */ + movq TF_RBP(%rsp),%rbp /* C preserved */ + movq TF_R12(%rsp),%r12 /* C preserved */ + movq TF_R13(%rsp),%r13 /* C preserved */ + movq TF_R14(%rsp),%r14 /* C preserved */ + movq TF_R15(%rsp),%r15 /* C preserved */ + movq TF_RFLAGS(%rsp),%r11 /* original %rflags */ + movq TF_RIP(%rsp),%rcx /* original %rip */ + movq TF_RSP(%rsp),%r9 /* user stack pointer */ + movq %r9,%rsp /* original %rsp */ + #swapgs + sysretq +3: /* Requested full context restore, use doreti for that */ + andq $~PCB_FULLCTX,PCB_FLAGS(%rax) jmp doreti +/* + * Here for CYA insurance, in case a "syscall" instruction gets + * issued from 32 bit compatability mode. MSR_CSTAR has to point + * to *something* if EFER_SCE is enabled. + */ +IDTVEC(fast_syscall32) + sysret + ENTRY(fork_trampoline) - pushl %esp /* trapframe pointer */ - pushl %ebx /* arg1 */ - pushl %esi /* function */ - movl PCPU(CURTHREAD),%ebx /* setup critnest */ - movl $1,TD_CRITNEST(%ebx) - sti /* enable interrupts */ + movq %r12, %rdi /* function */ + movq %rbx, %rsi /* arg1 */ + movq %rsp, %rdx /* trapframe pointer */ call fork_exit - addl $12,%esp - /* cut from syscall */ - - /* - * Return via doreti to handle ASTs. - */ MEXITCOUNT - jmp doreti + jmp doreti /* Handle any ASTs */ /* - * Include vm86 call routines, which want to call doreti. - */ -#include "i386/i386/vm86bios.s" - -/* * Include what was once config+isa-dependent code. * XXX it should be in a stand-alone file. It's still icu-dependent and * belongs in i386/isa. */ -#include "i386/isa/vector.s" +#include "amd64/isa/vector.s" .data ALIGN_DATA @@ -256,20 +298,10 @@ ENTRY(fork_trampoline) .type doreti,@function doreti: FAKE_MCOUNT(bintr) /* init "from" bintr -> doreti */ -doreti_next: /* - * Check if ASTs can be handled now. PSL_VM must be checked first - * since segment registers only have an RPL in non-VM86 mode. + * Check if ASTs can be handled now. */ - testl $PSL_VM,TF_EFLAGS(%esp) /* are we in vm86 mode? */ - jz doreti_notvm86 - movl PCPU(CURPCB),%ecx - testl $PCB_VM86CALL,PCB_FLAGS(%ecx) /* are we in a vm86 call? */ - jz doreti_ast /* can handle ASTS now if not */ - jmp doreti_exit - -doreti_notvm86: - testb $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */ + testb $SEL_RPL_MASK,TF_CS(%rsp) /* are we returning to user mode? */ jz doreti_exit /* can't handle ASTs now if not */ doreti_ast: @@ -279,13 +311,12 @@ doreti_ast: * since we will be informed of any new ASTs by an IPI. */ cli - movl PCPU(CURTHREAD),%eax - testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax) + movq PCPU(CURTHREAD),%rax + testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax) je doreti_exit sti - pushl %esp /* pass a pointer to the trapframe */ + movq %rsp, %rdi /* pass a pointer to the trapframe */ call ast - add $4,%esp jmp doreti_ast /* @@ -298,20 +329,25 @@ doreti_ast: doreti_exit: MEXITCOUNT - .globl doreti_popl_fs -doreti_popl_fs: - popl %fs - .globl doreti_popl_es -doreti_popl_es: - popl %es - .globl doreti_popl_ds -doreti_popl_ds: - popl %ds - popal - addl $8,%esp + movq TF_RDI(%rsp),%rdi + movq TF_RSI(%rsp),%rsi + movq TF_RDX(%rsp),%rdx + movq TF_RCX(%rsp),%rcx + movq TF_R8(%rsp),%r8 + movq TF_R9(%rsp),%r9 + movq TF_RAX(%rsp),%rax + movq TF_RBX(%rsp),%rbx + movq TF_RBP(%rsp),%rbp + movq TF_R10(%rsp),%r10 + movq TF_R11(%rsp),%r11 + movq TF_R12(%rsp),%r12 + movq TF_R13(%rsp),%r13 + movq TF_R14(%rsp),%r14 + movq TF_R15(%rsp),%r15 + addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */ .globl doreti_iret doreti_iret: - iret + iretq /* * doreti_iret_fault and friends. Alternative return code for @@ -323,23 +359,24 @@ doreti_iret: ALIGN_TEXT .globl doreti_iret_fault doreti_iret_fault: - subl $8,%esp - pushal - pushl %ds - .globl doreti_popl_ds_fault -doreti_popl_ds_fault: - pushl %es - .globl doreti_popl_es_fault -doreti_popl_es_fault: - pushl %fs - .globl doreti_popl_fs_fault -doreti_popl_fs_fault: - movl $0,TF_ERR(%esp) /* XXX should be the error code */ - movl $T_PROTFLT,TF_TRAPNO(%esp) + subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */ + movq %rdi,TF_RDI(%rsp) + movq %rsi,TF_RSI(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + movq %r8,TF_R8(%rsp) + movq %r9,TF_R9(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rbx,TF_RBX(%rsp) + movq %rbp,TF_RBP(%rsp) + movq %r10,TF_R10(%rsp) + movq %r11,TF_R11(%rsp) + movq %r12,TF_R12(%rsp) + movq %r13,TF_R13(%rsp) + movq %r14,TF_R14(%rsp) + movq %r15,TF_R15(%rsp) + movq $T_PROTFLT,TF_TRAPNO(%rsp) + movq $0,TF_ERR(%rsp) /* XXX should be the error code */ jmp alltraps_with_regs_pushed -#ifdef APIC_IO -#include "i386/isa/apic_ipl.s" -#else -#include "i386/isa/icu_ipl.s" -#endif /* APIC_IO */ +#include "amd64/isa/icu_ipl.s" diff --git a/sys/amd64/amd64/exception.s b/sys/amd64/amd64/exception.s index ce07659..8ff6020 100644 --- a/sys/amd64/amd64/exception.s +++ b/sys/amd64/amd64/exception.s @@ -34,15 +34,10 @@ * $FreeBSD$ */ -#include "opt_npx.h" - #include <machine/asmacros.h> #include <sys/mutex.h> #include <machine/psl.h> #include <machine/trap.h> -#ifdef SMP -#include <machine/smptests.h> /** various SMP options */ -#endif #include "assym.s" @@ -79,29 +74,29 @@ */ #define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(X,name); \ .type __CONCAT(X,name),@function; __CONCAT(X,name): -#define TRAP(a) pushl $(a) ; jmp alltraps +#define TRAP(a) pushq $(a) ; jmp alltraps MCOUNT_LABEL(user) MCOUNT_LABEL(btrap) IDTVEC(div) - pushl $0; TRAP(T_DIVIDE) + pushq $0; TRAP(T_DIVIDE) IDTVEC(dbg) - pushl $0; TRAP(T_TRCTRAP) + pushq $0; TRAP(T_TRCTRAP) IDTVEC(nmi) - pushl $0; TRAP(T_NMI) + pushq $0; TRAP(T_NMI) IDTVEC(bpt) - pushl $0; TRAP(T_BPTFLT) + pushq $0; TRAP(T_BPTFLT) IDTVEC(ofl) - pushl $0; TRAP(T_OFLOW) + pushq $0; TRAP(T_OFLOW) IDTVEC(bnd) - pushl $0; TRAP(T_BOUND) + pushq $0; TRAP(T_BOUND) IDTVEC(ill) - pushl $0; TRAP(T_PRIVINFLT) + pushq $0; TRAP(T_PRIVINFLT) IDTVEC(dna) - pushl $0; TRAP(T_DNA) + pushq $0; TRAP(T_DNA) IDTVEC(fpusegm) - pushl $0; TRAP(T_FPOPFLT) + pushq $0; TRAP(T_FPOPFLT) IDTVEC(tss) TRAP(T_TSSFLT) IDTVEC(missing) @@ -113,16 +108,15 @@ IDTVEC(prot) IDTVEC(page) TRAP(T_PAGEFLT) IDTVEC(mchk) - pushl $0; TRAP(T_MCHK) + pushq $0; TRAP(T_MCHK) IDTVEC(rsvd) - pushl $0; TRAP(T_RESERVED) + pushq $0; TRAP(T_RESERVED) IDTVEC(fpu) - pushl $0; TRAP(T_ARITHTRAP) + pushq $0; TRAP(T_ARITHTRAP) IDTVEC(align) TRAP(T_ALIGNFLT) - IDTVEC(xmm) - pushl $0; TRAP(T_XMMFLT) + pushq $0; TRAP(T_XMMFLT) /* * alltraps entry point. Interrupts are enabled if this was a trap @@ -135,31 +129,36 @@ IDTVEC(xmm) .globl alltraps .type alltraps,@function alltraps: - pushal - pushl %ds - pushl %es - pushl %fs + subq $TF_TRAPNO,%rsp /* tf_err and tf_trapno already pushed */ + movq %rdi,TF_RDI(%rsp) + movq %rsi,TF_RSI(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + movq %r8,TF_R8(%rsp) + movq %r9,TF_R9(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rbx,TF_RBX(%rsp) + movq %rbp,TF_RBP(%rsp) + movq %r10,TF_R10(%rsp) + movq %r11,TF_R11(%rsp) + movq %r12,TF_R12(%rsp) + movq %r13,TF_R13(%rsp) + movq %r14,TF_R14(%rsp) + movq %r15,TF_R15(%rsp) alltraps_with_regs_pushed: - mov $KDSEL,%ax - mov %ax,%ds - mov %ax,%es - mov $KPSEL,%ax - mov %ax,%fs - FAKE_MCOUNT(13*4(%esp)) + FAKE_MCOUNT(13*4(%rsp)) calltrap: FAKE_MCOUNT(btrap) /* init "from" btrap -> calltrap */ call trap - - /* - * Return via doreti to handle ASTs. - */ MEXITCOUNT - jmp doreti + jmp doreti /* Handle any pending ASTs */ /* - * SYSCALL CALL GATE (old entry point for a.out binaries) + * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80) * - * The intersegment call has been set up to specify one dummy parameter. + * Even though the name says 'int0x80', this is actually a TGT (trap gate) + * rather then an IGT (interrupt gate). Thus interrupts are enabled on + * entry just as they are for a normal syscall. * * This leaves a place to put eflags so that the call frame can be * converted to a trap frame. Note that the eflags is (semi-)bogusly @@ -169,79 +168,122 @@ calltrap: * and clobber the saved cs/eip. */ SUPERALIGN_TEXT -IDTVEC(lcall_syscall) - pushfl /* save eflags */ - popl 8(%esp) /* shuffle into tf_eflags */ - pushl $7 /* sizeof "lcall 7,0" */ - subl $4,%esp /* skip over tf_trapno */ - pushal - pushl %ds - pushl %es - pushl %fs - mov $KDSEL,%ax /* switch to kernel segments */ - mov %ax,%ds - mov %ax,%es - mov $KPSEL,%ax - mov %ax,%fs - FAKE_MCOUNT(13*4(%esp)) +IDTVEC(int0x80_syscall) + pushq $2 /* sizeof "int 0x80" */ + subq $TF_ERR,%rsp /* skip over tf_trapno */ + movq %rdi,TF_RDI(%rsp) + movq %rsi,TF_RSI(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + movq %r8,TF_R8(%rsp) + movq %r9,TF_R9(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rbx,TF_RBX(%rsp) + movq %rbp,TF_RBP(%rsp) + movq %r10,TF_R10(%rsp) + movq %r11,TF_R11(%rsp) + movq %r12,TF_R12(%rsp) + movq %r13,TF_R13(%rsp) + movq %r14,TF_R14(%rsp) + movq %r15,TF_R15(%rsp) + FAKE_MCOUNT(13*4(%rsp)) call syscall MEXITCOUNT jmp doreti /* - * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80) - * - * Even though the name says 'int0x80', this is actually a TGT (trap gate) - * rather then an IGT (interrupt gate). Thus interrupts are enabled on - * entry just as they are for a normal syscall. + * Fast syscall entry point. We enter here with just our new %cs/%ss set, + * and the new privilige level. We are still running on the old user stack + * pointer. We have to juggle a few things around to find our stack etc. + * swapgs gives us access to our PCPU space only. + * XXX The PCPU stuff is stubbed out right now... */ - SUPERALIGN_TEXT -IDTVEC(int0x80_syscall) - pushl $2 /* sizeof "int 0x80" */ - subl $4,%esp /* skip over tf_trapno */ - pushal - pushl %ds - pushl %es - pushl %fs - mov $KDSEL,%ax /* switch to kernel segments */ - mov %ax,%ds - mov %ax,%es - mov $KPSEL,%ax - mov %ax,%fs - FAKE_MCOUNT(13*4(%esp)) +IDTVEC(fast_syscall) + #swapgs + movq %rsp,PCPU(SCRATCH_RSP) + movq common_tss+COMMON_TSS_RSP0,%rsp + sti + /* Now emulate a trapframe. Ugh. */ + subq $TF_SIZE,%rsp + movq $KUDSEL,TF_SS(%rsp) + /* defer TF_RSP till we have a spare register */ + movq %r11,TF_RFLAGS(%rsp) + movq $KUCSEL,TF_CS(%rsp) + movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */ + movq $2,TF_ERR(%rsp) + movq %rdi,TF_RDI(%rsp) /* arg 1 */ + movq %rsi,TF_RSI(%rsp) /* arg 2 */ + movq %rdx,TF_RDX(%rsp) /* arg 3 */ + movq %r10,TF_RCX(%rsp) /* arg 4 */ + movq %r8,TF_R8(%rsp) /* arg 5 */ + movq %r9,TF_R9(%rsp) /* arg 6 */ + movq %rax,TF_RAX(%rsp) /* syscall number */ + movq %rbx,TF_RBX(%rsp) /* C preserved */ + movq %rbp,TF_RBP(%rsp) /* C preserved */ + movq %r12,TF_R12(%rsp) /* C preserved */ + movq %r13,TF_R13(%rsp) /* C preserved */ + movq %r14,TF_R14(%rsp) /* C preserved */ + movq %r15,TF_R15(%rsp) /* C preserved */ + movq PCPU(SCRATCH_RSP),%r12 /* %r12 already saved */ + movq %r12,TF_RSP(%rsp) /* user stack pointer */ call syscall - MEXITCOUNT + movq PCPU(CURPCB),%rax + testq $PCB_FULLCTX,PCB_FLAGS(%rax) + jne 3f + /* simplified from doreti */ +1: /* Check for and handle AST's on return to userland */ + cli + movq PCPU(CURTHREAD),%rax + testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax) + je 2f + sti + movq %rsp, %rdi + call ast + jmp 1b +2: /* restore preserved registers */ + movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */ + movq TF_RSI(%rsp),%rsi /* bonus: preserve arg 2 */ + movq TF_RDX(%rsp),%rdx /* return value 2 */ + movq TF_RAX(%rsp),%rax /* return value 1 */ + movq TF_RBX(%rsp),%rbx /* C preserved */ + movq TF_RBP(%rsp),%rbp /* C preserved */ + movq TF_R12(%rsp),%r12 /* C preserved */ + movq TF_R13(%rsp),%r13 /* C preserved */ + movq TF_R14(%rsp),%r14 /* C preserved */ + movq TF_R15(%rsp),%r15 /* C preserved */ + movq TF_RFLAGS(%rsp),%r11 /* original %rflags */ + movq TF_RIP(%rsp),%rcx /* original %rip */ + movq TF_RSP(%rsp),%r9 /* user stack pointer */ + movq %r9,%rsp /* original %rsp */ + #swapgs + sysretq +3: /* Requested full context restore, use doreti for that */ + andq $~PCB_FULLCTX,PCB_FLAGS(%rax) jmp doreti +/* + * Here for CYA insurance, in case a "syscall" instruction gets + * issued from 32 bit compatability mode. MSR_CSTAR has to point + * to *something* if EFER_SCE is enabled. + */ +IDTVEC(fast_syscall32) + sysret + ENTRY(fork_trampoline) - pushl %esp /* trapframe pointer */ - pushl %ebx /* arg1 */ - pushl %esi /* function */ - movl PCPU(CURTHREAD),%ebx /* setup critnest */ - movl $1,TD_CRITNEST(%ebx) - sti /* enable interrupts */ + movq %r12, %rdi /* function */ + movq %rbx, %rsi /* arg1 */ + movq %rsp, %rdx /* trapframe pointer */ call fork_exit - addl $12,%esp - /* cut from syscall */ - - /* - * Return via doreti to handle ASTs. - */ MEXITCOUNT - jmp doreti + jmp doreti /* Handle any ASTs */ /* - * Include vm86 call routines, which want to call doreti. - */ -#include "i386/i386/vm86bios.s" - -/* * Include what was once config+isa-dependent code. * XXX it should be in a stand-alone file. It's still icu-dependent and * belongs in i386/isa. */ -#include "i386/isa/vector.s" +#include "amd64/isa/vector.s" .data ALIGN_DATA @@ -256,20 +298,10 @@ ENTRY(fork_trampoline) .type doreti,@function doreti: FAKE_MCOUNT(bintr) /* init "from" bintr -> doreti */ -doreti_next: /* - * Check if ASTs can be handled now. PSL_VM must be checked first - * since segment registers only have an RPL in non-VM86 mode. + * Check if ASTs can be handled now. */ - testl $PSL_VM,TF_EFLAGS(%esp) /* are we in vm86 mode? */ - jz doreti_notvm86 - movl PCPU(CURPCB),%ecx - testl $PCB_VM86CALL,PCB_FLAGS(%ecx) /* are we in a vm86 call? */ - jz doreti_ast /* can handle ASTS now if not */ - jmp doreti_exit - -doreti_notvm86: - testb $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */ + testb $SEL_RPL_MASK,TF_CS(%rsp) /* are we returning to user mode? */ jz doreti_exit /* can't handle ASTs now if not */ doreti_ast: @@ -279,13 +311,12 @@ doreti_ast: * since we will be informed of any new ASTs by an IPI. */ cli - movl PCPU(CURTHREAD),%eax - testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax) + movq PCPU(CURTHREAD),%rax + testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax) je doreti_exit sti - pushl %esp /* pass a pointer to the trapframe */ + movq %rsp, %rdi /* pass a pointer to the trapframe */ call ast - add $4,%esp jmp doreti_ast /* @@ -298,20 +329,25 @@ doreti_ast: doreti_exit: MEXITCOUNT - .globl doreti_popl_fs -doreti_popl_fs: - popl %fs - .globl doreti_popl_es -doreti_popl_es: - popl %es - .globl doreti_popl_ds -doreti_popl_ds: - popl %ds - popal - addl $8,%esp + movq TF_RDI(%rsp),%rdi + movq TF_RSI(%rsp),%rsi + movq TF_RDX(%rsp),%rdx + movq TF_RCX(%rsp),%rcx + movq TF_R8(%rsp),%r8 + movq TF_R9(%rsp),%r9 + movq TF_RAX(%rsp),%rax + movq TF_RBX(%rsp),%rbx + movq TF_RBP(%rsp),%rbp + movq TF_R10(%rsp),%r10 + movq TF_R11(%rsp),%r11 + movq TF_R12(%rsp),%r12 + movq TF_R13(%rsp),%r13 + movq TF_R14(%rsp),%r14 + movq TF_R15(%rsp),%r15 + addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */ .globl doreti_iret doreti_iret: - iret + iretq /* * doreti_iret_fault and friends. Alternative return code for @@ -323,23 +359,24 @@ doreti_iret: ALIGN_TEXT .globl doreti_iret_fault doreti_iret_fault: - subl $8,%esp - pushal - pushl %ds - .globl doreti_popl_ds_fault -doreti_popl_ds_fault: - pushl %es - .globl doreti_popl_es_fault -doreti_popl_es_fault: - pushl %fs - .globl doreti_popl_fs_fault -doreti_popl_fs_fault: - movl $0,TF_ERR(%esp) /* XXX should be the error code */ - movl $T_PROTFLT,TF_TRAPNO(%esp) + subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */ + movq %rdi,TF_RDI(%rsp) + movq %rsi,TF_RSI(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + movq %r8,TF_R8(%rsp) + movq %r9,TF_R9(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rbx,TF_RBX(%rsp) + movq %rbp,TF_RBP(%rsp) + movq %r10,TF_R10(%rsp) + movq %r11,TF_R11(%rsp) + movq %r12,TF_R12(%rsp) + movq %r13,TF_R13(%rsp) + movq %r14,TF_R14(%rsp) + movq %r15,TF_R15(%rsp) + movq $T_PROTFLT,TF_TRAPNO(%rsp) + movq $0,TF_ERR(%rsp) /* XXX should be the error code */ jmp alltraps_with_regs_pushed -#ifdef APIC_IO -#include "i386/isa/apic_ipl.s" -#else -#include "i386/isa/icu_ipl.s" -#endif /* APIC_IO */ +#include "amd64/isa/icu_ipl.s" diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 8dadb6b..2116701 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -35,11 +35,8 @@ * $FreeBSD$ */ -#include "opt_cpu.h" #include "opt_debug_npx.h" #include "opt_isa.h" -#include "opt_math_emulate.h" -#include "opt_npx.h" #include <sys/param.h> #include <sys/systm.h> @@ -60,66 +57,34 @@ #include <sys/signalvar.h> #include <sys/user.h> -#ifndef SMP -#include <machine/asmacros.h> -#endif #include <machine/cputypes.h> #include <machine/frame.h> #include <machine/md_var.h> #include <machine/pcb.h> #include <machine/psl.h> -#ifndef SMP -#include <machine/clock.h> -#endif #include <machine/resource.h> #include <machine/specialreg.h> #include <machine/segments.h> #include <machine/ucontext.h> -#ifndef SMP -#include <i386/isa/icu.h> -#ifdef PC98 -#include <pc98/pc98/pc98.h> -#else -#include <i386/isa/isa.h> -#endif -#endif -#include <i386/isa/intr_machdep.h> +#include <amd64/isa/intr_machdep.h> #ifdef DEV_ISA #include <isa/isavar.h> #endif -#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU) -#define CPU_ENABLE_SSE -#endif -#if defined(CPU_DISABLE_SSE) -#undef CPU_ENABLE_SSE -#endif - /* * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. */ -/* Configuration flags. */ -#define NPX_DISABLE_I586_OPTIMIZED_BCOPY (1 << 0) -#define NPX_DISABLE_I586_OPTIMIZED_BZERO (1 << 1) -#define NPX_DISABLE_I586_OPTIMIZED_COPYIO (1 << 2) -#define NPX_PREFER_EMULATOR (1 << 3) - #if defined(__GNUC__) && !defined(lint) #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) #define fnclex() __asm("fnclex") #define fninit() __asm("fninit") -#define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr))) #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) -#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop") -#define frstor(addr) __asm("frstor %0" : : "m" (*(addr))) -#ifdef CPU_ENABLE_SSE #define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) -#endif #define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \ : : "n" (CR0_TS) : "ax") #define stop_emulating() __asm("clts") @@ -129,81 +94,31 @@ void fldcw(caddr_t addr); void fnclex(void); void fninit(void); -void fnsave(caddr_t addr); void fnstcw(caddr_t addr); void fnstsw(caddr_t addr); -void fp_divide_by_0(void); -void frstor(caddr_t addr); -#ifdef CPU_ENABLE_SSE void fxsave(caddr_t addr); void fxrstor(caddr_t addr); -#endif void start_emulating(void); void stop_emulating(void); #endif /* __GNUC__ */ -#ifdef CPU_ENABLE_SSE -#define GET_FPU_CW(thread) \ - (cpu_fxsr ? \ - (thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_cw : \ - (thread)->td_pcb->pcb_save.sv_87.sv_env.en_cw) -#define GET_FPU_SW(thread) \ - (cpu_fxsr ? \ - (thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_sw : \ - (thread)->td_pcb->pcb_save.sv_87.sv_env.en_sw) -#else /* CPU_ENABLE_SSE */ -#define GET_FPU_CW(thread) \ - (thread->td_pcb->pcb_save.sv_87.sv_env.en_cw) -#define GET_FPU_SW(thread) \ - (thread->td_pcb->pcb_save.sv_87.sv_env.en_sw) -#endif /* CPU_ENABLE_SSE */ +#define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_cw) +#define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_sw) typedef u_char bool_t; -static void fpusave(union savefpu *); -static void fpurstor(union savefpu *); static int npx_attach(device_t dev); static void npx_identify(driver_t *driver, device_t parent); -#ifndef SMP -static void npx_intr(void *); -#endif static int npx_probe(device_t dev); -#ifdef I586_CPU_XXX -static long timezero(const char *funcname, - void (*func)(void *buf, size_t len)); -#endif /* I586_CPU */ - -int hw_float; /* XXX currently just alias for npx_exists */ +int hw_float = 1; SYSCTL_INT(_hw,HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, &hw_float, 0, "Floatingpoint instructions executed in hardware"); -#ifndef SMP -static volatile u_int npx_intrs_while_probing; -static volatile u_int npx_traps_while_probing; -#endif - -static union savefpu npx_cleanstate; +static struct savefpu npx_cleanstate; static bool_t npx_cleanstate_ready; -static bool_t npx_ex16; -static bool_t npx_exists; -static bool_t npx_irq13; - -#ifndef SMP -alias_for_inthand_t probetrap; -__asm(" \n\ - .text \n\ - .p2align 2,0x90 \n\ - .type " __XSTRING(CNAME(probetrap)) ",@function \n\ -" __XSTRING(CNAME(probetrap)) ": \n\ - ss \n\ - incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\ - fnclex \n\ - iret \n\ -"); -#endif /* SMP */ /* * Identify routine. Create a connection point on our parent for probing. @@ -220,104 +135,23 @@ npx_identify(driver, parent) panic("npx_identify"); } -#ifndef SMP -/* - * Do minimal handling of npx interrupts to convert them to traps. - */ -static void -npx_intr(dummy) - void *dummy; -{ - struct thread *td; - -#ifndef SMP - npx_intrs_while_probing++; -#endif - - /* - * The BUSY# latch must be cleared in all cases so that the next - * unmasked npx exception causes an interrupt. - */ -#ifdef PC98 - outb(0xf8, 0); -#else - outb(0xf0, 0); -#endif - - /* - * fpcurthread is normally non-null here. In that case, schedule an - * AST to finish the exception handling in the correct context - * (this interrupt may occur after the thread has entered the - * kernel via a syscall or an interrupt). Otherwise, the npx - * state of the thread that caused this interrupt must have been - * pushed to the thread's pcb, and clearing of the busy latch - * above has finished the (essentially null) handling of this - * interrupt. Control will eventually return to the instruction - * that caused it and it will repeat. We will eventually (usually - * soon) win the race to handle the interrupt properly. - */ - td = PCPU_GET(fpcurthread); - if (td != NULL) { - td->td_pcb->pcb_flags |= PCB_NPXTRAP; - mtx_lock_spin(&sched_lock); - td->td_flags |= TDF_ASTPENDING; - mtx_unlock_spin(&sched_lock); - } -} -#endif /* !SMP */ - /* * Probe routine. Initialize cr0 to give correct behaviour for [f]wait - * whether the device exists or not (XXX should be elsewhere). Set flags - * to tell npxattach() what to do. Modify device struct if npx doesn't - * need to use interrupts. Return 0 if device exists. + * whether the device exists or not (XXX should be elsewhere). + * Modify device struct if npx doesn't need to use interrupts. + * Return 0 if device exists. */ static int npx_probe(dev) device_t dev; { -#ifndef SMP - struct gate_descriptor save_idt_npxtrap; - struct resource *ioport_res, *irq_res; - void *irq_cookie; - int ioport_rid, irq_num, irq_rid; - u_short control; - u_short status; - - save_idt_npxtrap = idt[16]; - setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - ioport_rid = 0; - ioport_res = bus_alloc_resource(dev, SYS_RES_IOPORT, &ioport_rid, - IO_NPX, IO_NPX, IO_NPXSIZE, RF_ACTIVE); - if (ioport_res == NULL) - panic("npx: can't get ports"); -#ifdef PC98 - if (resource_int_value("npx", 0, "irq", &irq_num) != 0) - irq_num = 8; -#else - if (resource_int_value("npx", 0, "irq", &irq_num) != 0) - irq_num = 13; -#endif - irq_rid = 0; - irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &irq_rid, irq_num, - irq_num, 1, RF_ACTIVE); - if (irq_res == NULL) - panic("npx: can't get IRQ"); - if (bus_setup_intr(dev, irq_res, INTR_TYPE_MISC | INTR_FAST, npx_intr, - NULL, &irq_cookie) != 0) - panic("npx: can't create intr"); -#endif /* !SMP */ /* * Partially reset the coprocessor, if any. Some BIOS's don't reset * it after a warm boot. */ -#ifdef PC98 - outb(0xf8,0); -#else outb(0xf1, 0); /* full reset on some systems, NOP on others */ outb(0xf0, 0); /* clear BUSY# latch */ -#endif /* * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT * instructions. We must set the CR0_MP bit and use the CR0_TS @@ -338,116 +172,13 @@ npx_probe(dev) */ stop_emulating(); /* - * Finish resetting the coprocessor, if any. If there is an error - * pending, then we may get a bogus IRQ13, but npx_intr() will handle - * it OK. Bogus halts have never been observed, but we enabled - * IRQ13 and cleared the BUSY# latch early to handle them anyway. + * Finish resetting the coprocessor. */ fninit(); device_set_desc(dev, "math processor"); -#ifdef SMP - - /* - * Exception 16 MUST work for SMP. - */ - npx_ex16 = hw_float = npx_exists = 1; - return (0); - -#else /* !SMP */ - - /* - * Don't use fwait here because it might hang. - * Don't use fnop here because it usually hangs if there is no FPU. - */ - DELAY(1000); /* wait for any IRQ13 */ -#ifdef DIAGNOSTIC - if (npx_intrs_while_probing != 0) - printf("fninit caused %u bogus npx interrupt(s)\n", - npx_intrs_while_probing); - if (npx_traps_while_probing != 0) - printf("fninit caused %u bogus npx trap(s)\n", - npx_traps_while_probing); -#endif - /* - * Check for a status of mostly zero. - */ - status = 0x5a5a; - fnstsw(&status); - if ((status & 0xb8ff) == 0) { - /* - * Good, now check for a proper control word. - */ - control = 0x5a5a; - fnstcw(&control); - if ((control & 0x1f3f) == 0x033f) { - hw_float = npx_exists = 1; - /* - * We have an npx, now divide by 0 to see if exception - * 16 works. - */ - control &= ~(1 << 2); /* enable divide by 0 trap */ - fldcw(&control); -#ifdef FPU_ERROR_BROKEN - /* - * FPU error signal doesn't work on some CPU - * accelerator board. - */ - npx_ex16 = 1; - return (0); -#endif - npx_traps_while_probing = npx_intrs_while_probing = 0; - fp_divide_by_0(); - if (npx_traps_while_probing != 0) { - /* - * Good, exception 16 works. - */ - npx_ex16 = 1; - goto no_irq13; - } - if (npx_intrs_while_probing != 0) { - /* - * Bad, we are stuck with IRQ13. - */ - npx_irq13 = 1; - idt[16] = save_idt_npxtrap; - return (0); - } - /* - * Worse, even IRQ13 is broken. Use emulator. - */ - } - } - /* - * Probe failed, but we want to get to npxattach to initialize the - * emulator and say that it has been installed. XXX handle devices - * that aren't really devices better. - */ - /* FALLTHROUGH */ -no_irq13: - idt[16] = save_idt_npxtrap; - bus_teardown_intr(dev, irq_res, irq_cookie); - - /* - * XXX hack around brokenness of bus_teardown_intr(). If we left the - * irq active then we would get it instead of exception 16. - */ - { - register_t crit; - - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - INTRDIS(1 << irq_num); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); - } - - bus_release_resource(dev, SYS_RES_IRQ, irq_rid, irq_res); - bus_release_resource(dev, SYS_RES_IOPORT, ioport_rid, ioport_res); return (0); - -#endif /* SMP */ } /* @@ -457,67 +188,19 @@ static int npx_attach(dev) device_t dev; { - int flags; register_t s; - if (resource_int_value("npx", 0, "flags", &flags) != 0) - flags = 0; - - if (flags) - device_printf(dev, "flags 0x%x ", flags); - if (npx_irq13) { - device_printf(dev, "using IRQ 13 interface\n"); - } else { -#if defined(MATH_EMULATE) || defined(GPL_MATH_EMULATE) - if (npx_ex16) { - if (!(flags & NPX_PREFER_EMULATOR)) - device_printf(dev, "INT 16 interface\n"); - else { - device_printf(dev, "FPU exists, but flags request " - "emulator\n"); - hw_float = npx_exists = 0; - } - } else if (npx_exists) { - device_printf(dev, "error reporting broken; using 387 emulator\n"); - hw_float = npx_exists = 0; - } else - device_printf(dev, "387 emulator\n"); -#else - if (npx_ex16) { - device_printf(dev, "INT 16 interface\n"); - if (flags & NPX_PREFER_EMULATOR) { - device_printf(dev, "emulator requested, but none compiled " - "into kernel, using FPU\n"); - } - } else - device_printf(dev, "no 387 emulator in kernel and no FPU!\n"); -#endif - } + device_printf(dev, "INT 16 interface\n"); npxinit(__INITIAL_NPXCW__); if (npx_cleanstate_ready == 0) { s = intr_disable(); stop_emulating(); - fpusave(&npx_cleanstate); + fxsave(&npx_cleanstate); start_emulating(); npx_cleanstate_ready = 1; intr_restore(s); } -#ifdef I586_CPU_XXX - if (cpu_class == CPUCLASS_586 && npx_ex16 && npx_exists && - timezero("i586_bzero()", i586_bzero) < - timezero("bzero()", bzero) * 4 / 5) { - if (!(flags & NPX_DISABLE_I586_OPTIMIZED_BCOPY)) - bcopy_vector = i586_bcopy; - if (!(flags & NPX_DISABLE_I586_OPTIMIZED_BZERO)) - bzero_vector = i586_bzero; - if (!(flags & NPX_DISABLE_I586_OPTIMIZED_COPYIO)) { - copyin_vector = i586_copyin; - copyout_vector = i586_copyout; - } - } -#endif - return (0); /* XXX unused */ } @@ -528,11 +211,9 @@ void npxinit(control) u_short control; { - static union savefpu dummy; + static struct savefpu dummy; register_t savecrit; - if (!npx_exists) - return; /* * fninit has the same h/w bugs as fnsave. Use the detoxified * fnsave to throw away any junk in the fpu. npxsave() initializes @@ -541,11 +222,8 @@ npxinit(control) savecrit = intr_disable(); npxsave(&dummy); stop_emulating(); -#ifdef CPU_ENABLE_SSE /* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */ - if (cpu_fxsr) - fninit(); -#endif + fninit(); fldcw(&control); start_emulating(); intr_restore(savecrit); @@ -558,6 +236,9 @@ void npxexit(td) struct thread *td; { +#ifdef NPX_DEBUG + u_int masked_exceptions; +#endif register_t savecrit; savecrit = intr_disable(); @@ -565,20 +246,16 @@ npxexit(td) npxsave(&PCPU_GET(curpcb)->pcb_save); intr_restore(savecrit); #ifdef NPX_DEBUG - if (npx_exists) { - u_int masked_exceptions; - - masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f; - /* - * Log exceptions that would have trapped with the old - * control word (overflow, divide by 0, and invalid operand). - */ - if (masked_exceptions & 0x0d) - log(LOG_ERR, - "pid %d (%s) exited with masked floating point exceptions 0x%02x\n", - td->td_proc->p_pid, td->td_proc->p_comm, - masked_exceptions); - } + masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f; + /* + * Log exceptions that would have trapped with the old + * control word (overflow, divide by 0, and invalid operand). + */ + if (masked_exceptions & 0x0d) + log(LOG_ERR, +"pid %d (%s) exited with masked floating point exceptions 0x%02x\n", + td->td_proc->p_pid, td->td_proc->p_comm, + masked_exceptions); #endif } @@ -586,13 +263,7 @@ int npxformat() { - if (!npx_exists) - return (_MC_FPFMT_NODEV); -#ifdef CPU_ENABLE_SSE - if (cpu_fxsr) - return (_MC_FPFMT_XMM); -#endif - return (_MC_FPFMT_387); + return (_MC_FPFMT_XMM); } /* @@ -789,11 +460,6 @@ npxtrap() register_t savecrit; u_short control, status; - if (!npx_exists) { - printf("npxtrap: fpcurthread = %p, curthread = %p, npx_exists = %d\n", - PCPU_GET(fpcurthread), curthread, npx_exists); - panic("npxtrap from nowhere"); - } savecrit = intr_disable(); /* @@ -832,8 +498,6 @@ npxdna() register_t s; u_short control; - if (!npx_exists) - return (0); if (PCPU_GET(fpcurthread) == curthread) { printf("npxdna: fpcurthread == curthread %d times\n", ++err_count); @@ -867,20 +531,15 @@ npxdna() pcb->pcb_flags |= PCB_NPXINITDONE; } else { /* - * The following frstor may cause an IRQ13 when the state + * The following frstor may cause a trap when the state * being restored has a pending error. The error will * appear to have been triggered by the current (npx) user * instruction even when that instruction is a no-wait * instruction that should not trigger an error (e.g., - * fnclex). On at least one 486 system all of the no-wait * instructions are broken the same as frstor, so our - * treatment does not amplify the breakage. On at least - * one 386/Cyrix 387 system, fnclex works correctly while - * frstor and fnsave are broken, so our treatment breaks - * fnclex if it is the first FPU instruction after a context - * switch. + * treatment does not amplify the breakage. */ - fpurstor(&pcb->pcb_save); + fxrstor(&pcb->pcb_save); } intr_restore(s); @@ -912,11 +571,11 @@ npxdna() */ void npxsave(addr) - union savefpu *addr; + struct savefpu *addr; { stop_emulating(); - fpusave(addr); + fxsave(addr); start_emulating(); PCPU_SET(fpcurthread, NULL); @@ -944,13 +603,10 @@ npxdrop() int npxgetregs(td, addr) struct thread *td; - union savefpu *addr; + struct savefpu *addr; { register_t s; - if (!npx_exists) - return (_MC_FPOWNED_NONE); - if ((td->td_pcb->pcb_flags & PCB_NPXINITDONE) == 0) { if (npx_cleanstate_ready) bcopy(&npx_cleanstate, addr, sizeof(npx_cleanstate)); @@ -960,16 +616,7 @@ npxgetregs(td, addr) } s = intr_disable(); if (td == PCPU_GET(fpcurthread)) { - fpusave(addr); -#ifdef CPU_ENABLE_SSE - if (!cpu_fxsr) -#endif - /* - * fnsave initializes the FPU and destroys whatever - * context it contains. Make sure the FPU owner - * starts with a clean state next time. - */ - npxdrop(); + fxsave(addr); intr_restore(s); return (_MC_FPOWNED_FPU); } else { @@ -985,16 +632,13 @@ npxgetregs(td, addr) void npxsetregs(td, addr) struct thread *td; - union savefpu *addr; + struct savefpu *addr; { register_t s; - if (!npx_exists) - return; - s = intr_disable(); if (td == PCPU_GET(fpcurthread)) { - fpurstor(addr); + fxrstor(addr); intr_restore(s); } else { intr_restore(s); @@ -1003,62 +647,6 @@ npxsetregs(td, addr) curthread->td_pcb->pcb_flags |= PCB_NPXINITDONE; } -static void -fpusave(addr) - union savefpu *addr; -{ - -#ifdef CPU_ENABLE_SSE - if (cpu_fxsr) - fxsave(addr); - else -#endif - fnsave(addr); -} - -static void -fpurstor(addr) - union savefpu *addr; -{ - -#ifdef CPU_ENABLE_SSE - if (cpu_fxsr) - fxrstor(addr); - else -#endif - frstor(addr); -} - -#ifdef I586_CPU_XXX -static long -timezero(funcname, func) - const char *funcname; - void (*func)(void *buf, size_t len); - -{ - void *buf; -#define BUFSIZE 1048576 - long usec; - struct timeval finish, start; - - buf = malloc(BUFSIZE, M_TEMP, M_NOWAIT); - if (buf == NULL) - return (BUFSIZE); - microtime(&start); - (*func)(buf, BUFSIZE); - microtime(&finish); - usec = 1000000 * (finish.tv_sec - start.tv_sec) + - finish.tv_usec - start.tv_usec; - if (usec <= 0) - usec = 1; - if (bootverbose) - printf("%s bandwidth = %u kBps\n", funcname, - (u_int32_t)(((BUFSIZE >> 10) * 1000000) / usec)); - free(buf, M_TEMP); - return (usec); -} -#endif /* I586_CPU */ - static device_method_t npx_methods[] = { /* Device interface */ DEVMETHOD(device_identify, npx_identify), @@ -1080,13 +668,13 @@ static driver_t npx_driver = { static devclass_t npx_devclass; -#ifdef DEV_ISA /* * We prefer to attach to the root nexus so that the usual case (exception 16) * doesn't describe the processor as being `on isa'. */ DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0); +#ifdef DEV_ISA /* * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. */ @@ -1132,7 +720,5 @@ static driver_t npxisa_driver = { static devclass_t npxisa_devclass; DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0); -#ifndef PC98 DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0); -#endif #endif /* DEV_ISA */ diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index 7c287d6..8e25e04 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -53,7 +53,6 @@ #include <sys/resourcevar.h> #include <sys/ucontext.h> #include <sys/user.h> -#include <machine/bootinfo.h> #include <machine/tss.h> #include <sys/vmmeter.h> #include <vm/vm.h> @@ -68,12 +67,8 @@ #include <nfs/rpcv2.h> #include <nfsclient/nfs.h> #include <nfsclient/nfsdiskless.h> -#ifdef SMP -#include <machine/apic.h> -#endif #include <machine/cpu.h> #include <machine/sigframe.h> -#include <machine/vm86.h> #include <machine/proc.h> ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); @@ -91,7 +86,6 @@ ASSYM(TD_SWITCHIN, offsetof(struct thread, td_switchin)); ASSYM(TD_MD, offsetof(struct thread, td_md)); ASSYM(P_MD, offsetof(struct proc, p_md)); -ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt)); ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); @@ -99,7 +93,6 @@ ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap)); ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall)); ASSYM(V_INTR, offsetof(struct vmmeter, v_intr)); -/* ASSYM(UPAGES, UPAGES);*/ ASSYM(UAREA_PAGES, UAREA_PAGES); ASSYM(KSTACK_PAGES, KSTACK_PAGES); ASSYM(PAGE_SIZE, PAGE_SIZE); @@ -118,111 +111,72 @@ ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS); ASSYM(KERNBASE, KERNBASE); ASSYM(MCLBYTES, MCLBYTES); ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3)); -ASSYM(PCB_EDI, offsetof(struct pcb, pcb_edi)); -ASSYM(PCB_ESI, offsetof(struct pcb, pcb_esi)); -ASSYM(PCB_EBP, offsetof(struct pcb, pcb_ebp)); -ASSYM(PCB_ESP, offsetof(struct pcb, pcb_esp)); -ASSYM(PCB_EBX, offsetof(struct pcb, pcb_ebx)); -ASSYM(PCB_EIP, offsetof(struct pcb, pcb_eip)); -ASSYM(TSS_ESP0, offsetof(struct i386tss, tss_esp0)); +ASSYM(PCB_R15, offsetof(struct pcb, pcb_r15)); +ASSYM(PCB_R14, offsetof(struct pcb, pcb_r14)); +ASSYM(PCB_R13, offsetof(struct pcb, pcb_r13)); +ASSYM(PCB_R12, offsetof(struct pcb, pcb_r12)); +ASSYM(PCB_RBP, offsetof(struct pcb, pcb_rbp)); +ASSYM(PCB_RSP, offsetof(struct pcb, pcb_rsp)); +ASSYM(PCB_RBX, offsetof(struct pcb, pcb_rbx)); +ASSYM(PCB_RIP, offsetof(struct pcb, pcb_rip)); +ASSYM(PCB_RFLAGS, offsetof(struct pcb, pcb_rflags)); -ASSYM(PCB_GS, offsetof(struct pcb, pcb_gs)); -ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0)); -ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1)); -ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2)); -ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3)); -ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6)); -ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7)); -ASSYM(PCB_PSL, offsetof(struct pcb, pcb_psl)); -ASSYM(PCB_DBREGS, PCB_DBREGS); -ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext)); - -ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare)); ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); +ASSYM(PCB_FULLCTX, PCB_FULLCTX); ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save)); -ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu)); +ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct savefpu)); ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); ASSYM(PCB_SIZE, sizeof(struct pcb)); -ASSYM(PCB_VM86CALL, PCB_VM86CALL); +ASSYM(COMMON_TSS_RSP0, offsetof(struct amd64tss, tss_rsp0)); + +ASSYM(TF_R15, offsetof(struct trapframe, tf_r15)); +ASSYM(TF_R14, offsetof(struct trapframe, tf_r14)); +ASSYM(TF_R13, offsetof(struct trapframe, tf_r13)); +ASSYM(TF_R12, offsetof(struct trapframe, tf_r12)); +ASSYM(TF_R11, offsetof(struct trapframe, tf_r11)); +ASSYM(TF_R10, offsetof(struct trapframe, tf_r10)); +ASSYM(TF_R9, offsetof(struct trapframe, tf_r9)); +ASSYM(TF_R8, offsetof(struct trapframe, tf_r8)); +ASSYM(TF_RDI, offsetof(struct trapframe, tf_rdi)); +ASSYM(TF_RSI, offsetof(struct trapframe, tf_rsi)); +ASSYM(TF_RBP, offsetof(struct trapframe, tf_rbp)); +ASSYM(TF_RBX, offsetof(struct trapframe, tf_rbx)); +ASSYM(TF_RDX, offsetof(struct trapframe, tf_rdx)); +ASSYM(TF_RCX, offsetof(struct trapframe, tf_rcx)); +ASSYM(TF_RAX, offsetof(struct trapframe, tf_rax)); ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno)); ASSYM(TF_ERR, offsetof(struct trapframe, tf_err)); +ASSYM(TF_RIP, offsetof(struct trapframe, tf_rip)); ASSYM(TF_CS, offsetof(struct trapframe, tf_cs)); -ASSYM(TF_EFLAGS, offsetof(struct trapframe, tf_eflags)); +ASSYM(TF_RFLAGS, offsetof(struct trapframe, tf_rflags)); +ASSYM(TF_RSP, offsetof(struct trapframe, tf_rsp)); +ASSYM(TF_SS, offsetof(struct trapframe, tf_ss)); +ASSYM(TF_SIZE, sizeof(struct trapframe)); + ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler)); -#ifdef COMPAT_43 -ASSYM(SIGF_SC, offsetof(struct osigframe, sf_siginfo.si_sc)); -#endif ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc)); -#ifdef COMPAT_FREEBSD4 -ASSYM(SIGF_UC4, offsetof(struct sigframe4, sf_uc)); -#endif -#ifdef COMPAT_43 -ASSYM(SC_PS, offsetof(struct osigcontext, sc_ps)); -ASSYM(SC_FS, offsetof(struct osigcontext, sc_fs)); -ASSYM(SC_GS, offsetof(struct osigcontext, sc_gs)); -ASSYM(SC_TRAPNO, offsetof(struct osigcontext, sc_trapno)); -#endif -#ifdef COMPAT_FREEBSD4 -ASSYM(UC4_EFLAGS, offsetof(struct ucontext4, uc_mcontext.mc_eflags)); -ASSYM(UC4_GS, offsetof(struct ucontext4, uc_mcontext.mc_gs)); -#endif -ASSYM(UC_EFLAGS, offsetof(ucontext_t, uc_mcontext.mc_eflags)); -ASSYM(UC_GS, offsetof(ucontext_t, uc_mcontext.mc_gs)); +ASSYM(UC_EFLAGS, offsetof(ucontext_t, uc_mcontext.mc_rflags)); ASSYM(ENOENT, ENOENT); ASSYM(EFAULT, EFAULT); ASSYM(ENAMETOOLONG, ENAMETOOLONG); ASSYM(MAXPATHLEN, MAXPATHLEN); -ASSYM(BOOTINFO_SIZE, sizeof(struct bootinfo)); -ASSYM(BI_VERSION, offsetof(struct bootinfo, bi_version)); -ASSYM(BI_KERNELNAME, offsetof(struct bootinfo, bi_kernelname)); -ASSYM(BI_NFS_DISKLESS, offsetof(struct bootinfo, bi_nfs_diskless)); -ASSYM(BI_ENDCOMMON, offsetof(struct bootinfo, bi_endcommon)); -ASSYM(NFSDISKLESS_SIZE, sizeof(struct nfs_diskless)); -ASSYM(BI_SIZE, offsetof(struct bootinfo, bi_size)); -ASSYM(BI_SYMTAB, offsetof(struct bootinfo, bi_symtab)); -ASSYM(BI_ESYMTAB, offsetof(struct bootinfo, bi_esymtab)); -ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend)); ASSYM(PC_SIZEOF, sizeof(struct pcpu)); ASSYM(PC_PRVSPACE, offsetof(struct pcpu, pc_prvspace)); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); -ASSYM(PC_INT_PENDING, offsetof(struct pcpu, pc_int_pending)); -ASSYM(PC_IPENDING, offsetof(struct pcpu, pc_ipending)); -ASSYM(PC_FPENDING, offsetof(struct pcpu, pc_fpending)); -ASSYM(PC_SPENDING, offsetof(struct pcpu, pc_spending)); ASSYM(PC_FPCURTHREAD, offsetof(struct pcpu, pc_fpcurthread)); ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); -ASSYM(PC_COMMON_TSS, offsetof(struct pcpu, pc_common_tss)); -ASSYM(PC_COMMON_TSSD, offsetof(struct pcpu, pc_common_tssd)); -ASSYM(PC_TSS_GDT, offsetof(struct pcpu, pc_tss_gdt)); -ASSYM(PC_CURRENTLDT, offsetof(struct pcpu, pc_currentldt)); ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid)); - -#ifdef SMP -ASSYM(LA_VER, offsetof(struct LAPIC, version)); -ASSYM(LA_TPR, offsetof(struct LAPIC, tpr)); -ASSYM(LA_EOI, offsetof(struct LAPIC, eoi)); -ASSYM(LA_SVR, offsetof(struct LAPIC, svr)); -ASSYM(LA_ICR_LO, offsetof(struct LAPIC, icr_lo)); -ASSYM(LA_ICR_HI, offsetof(struct LAPIC, icr_hi)); -#endif +ASSYM(PC_SCRATCH_RSP, offsetof(struct pcpu, pc_scratch_rsp)); ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL)); ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL)); -ASSYM(KPSEL, GSEL(GPRIV_SEL, SEL_KPL)); +ASSYM(KUCSEL, GSEL(GUCODE_SEL, SEL_UPL)); +ASSYM(KUDSEL, GSEL(GUDATA_SEL, SEL_UPL)); -ASSYM(BC32SEL, GSEL(GBIOSCODE32_SEL, SEL_KPL)); ASSYM(GPROC0_SEL, GPROC0_SEL); -ASSYM(VM86_FRAMESIZE, sizeof(struct vm86frame)); ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock)); ASSYM(MTX_RECURSECNT, offsetof(struct mtx, mtx_recurse)); - -#ifdef PC98 -#include <machine/bus.h> - -ASSYM(BUS_SPACE_HANDLE_BASE, offsetof(struct bus_space_handle, bsh_base)); -ASSYM(BUS_SPACE_HANDLE_IAT, offsetof(struct bus_space_handle, bsh_iat)); -#endif diff --git a/sys/amd64/amd64/identcpu.c b/sys/amd64/amd64/identcpu.c index 0e8d809..62ae33d 100644 --- a/sys/amd64/amd64/identcpu.c +++ b/sys/amd64/amd64/identcpu.c @@ -57,37 +57,22 @@ #include <machine/specialreg.h> #include <machine/md_var.h> -#include <i386/isa/icu.h> -#include <i386/isa/intr_machdep.h> - -#define IDENTBLUE_CYRIX486 0 -#define IDENTBLUE_IBMCPU 1 -#define IDENTBLUE_CYRIXM2 2 +#include <amd64/isa/icu.h> +#include <amd64/isa/intr_machdep.h> /* XXX - should be in header file: */ void printcpuinfo(void); -void finishidentcpu(void); +void identify_cpu(void); void earlysetcpuclass(void); -#if defined(I586_CPU) && defined(CPU_WT_ALLOC) -void enable_K5_wt_alloc(void); -void enable_K6_wt_alloc(void); -void enable_K6_2_wt_alloc(void); -#endif void panicifcpuunsupported(void); -static void identifycyrix(void); -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) static void print_AMD_features(void); -#endif static void print_AMD_info(void); static void print_AMD_assoc(int i); -static void print_transmeta_info(void); -static void setup_tmx86_longrun(void); int cpu_class; u_int cpu_exthigh; /* Highest arg to extended CPUID */ -u_int cyrix_did; /* Device ID of Cyrix CPU */ -char machine[] = "i386"; +char machine[] = "amd64"; SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "Machine class"); @@ -99,67 +84,27 @@ static int hw_clockrate; SYSCTL_INT(_hw, OID_AUTO, clockrate, CTLFLAG_RD, &hw_clockrate, 0, "CPU instruction clock rate"); -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) static char cpu_brand[48]; -#define MAX_BRAND_INDEX 8 - -static const char *cpu_brandtable[MAX_BRAND_INDEX + 1] = { - NULL, /* No brand */ - "Intel Celeron", - "Intel Pentium III", - "Intel Pentium III Xeon", - NULL, - NULL, - NULL, - NULL, - "Intel Pentium 4" -}; -#endif - -static struct cpu_nameclass i386_cpus[] = { - { "Intel 80286", CPUCLASS_286 }, /* CPU_286 */ - { "i386SX", CPUCLASS_386 }, /* CPU_386SX */ - { "i386DX", CPUCLASS_386 }, /* CPU_386 */ - { "i486SX", CPUCLASS_486 }, /* CPU_486SX */ - { "i486DX", CPUCLASS_486 }, /* CPU_486 */ - { "Pentium", CPUCLASS_586 }, /* CPU_586 */ - { "Cyrix 486", CPUCLASS_486 }, /* CPU_486DLC */ - { "Pentium Pro", CPUCLASS_686 }, /* CPU_686 */ - { "Cyrix 5x86", CPUCLASS_486 }, /* CPU_M1SC */ - { "Cyrix 6x86", CPUCLASS_486 }, /* CPU_M1 */ - { "Blue Lightning", CPUCLASS_486 }, /* CPU_BLUE */ - { "Cyrix 6x86MX", CPUCLASS_686 }, /* CPU_M2 */ - { "NexGen 586", CPUCLASS_386 }, /* CPU_NX586 (XXX) */ - { "Cyrix 486S/DX", CPUCLASS_486 }, /* CPU_CY486DX */ - { "Pentium II", CPUCLASS_686 }, /* CPU_PII */ - { "Pentium III", CPUCLASS_686 }, /* CPU_PIII */ - { "Pentium 4", CPUCLASS_686 }, /* CPU_P4 */ +static struct cpu_nameclass amd64_cpus[] = { + { "Clawhammer", CPUCLASS_K8 }, /* CPU_CLAWHAMMER */ + { "Sledgehammer", CPUCLASS_K8 }, /* CPU_SLEDGEHAMMER */ }; -#if defined(I586_CPU) && !defined(NO_F00F_HACK) -int has_f00f_bug = 0; /* Initialized so that it can be patched. */ -#endif - void printcpuinfo(void) { -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) u_int regs[4], i; char *brand; -#endif - cpu_class = i386_cpus[cpu].cpu_class; + cpu_class = amd64_cpus[cpu].cpu_class; printf("CPU: "); - strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof (cpu_model)); + strncpy(cpu_model, amd64_cpus[cpu].cpu_name, sizeof (cpu_model)); -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) /* Check for extended CPUID information and a processor name. */ if (cpu_high > 0 && (strcmp(cpu_vendor, "GenuineIntel") == 0 || - strcmp(cpu_vendor, "AuthenticAMD") == 0 || - strcmp(cpu_vendor, "GenuineTMx86") == 0 || - strcmp(cpu_vendor, "TransmetaCPU") == 0)) { + strcmp(cpu_vendor, "AuthenticAMD") == 0)) { do_cpuid(0x80000000, regs); if (regs[0] >= 0x80000000) { cpu_exthigh = regs[0]; @@ -175,138 +120,8 @@ printcpuinfo(void) } if (strcmp(cpu_vendor, "GenuineIntel") == 0) { - if ((cpu_id & 0xf00) > 0x300) { - u_int brand_index; - - cpu_model[0] = '\0'; - - switch (cpu_id & 0x3000) { - case 0x1000: - strcpy(cpu_model, "Overdrive "); - break; - case 0x2000: - strcpy(cpu_model, "Dual "); - break; - } - - switch (cpu_id & 0xf00) { - case 0x400: - strcat(cpu_model, "i486 "); - /* Check the particular flavor of 486 */ - switch (cpu_id & 0xf0) { - case 0x00: - case 0x10: - strcat(cpu_model, "DX"); - break; - case 0x20: - strcat(cpu_model, "SX"); - break; - case 0x30: - strcat(cpu_model, "DX2"); - break; - case 0x40: - strcat(cpu_model, "SL"); - break; - case 0x50: - strcat(cpu_model, "SX2"); - break; - case 0x70: - strcat(cpu_model, - "DX2 Write-Back Enhanced"); - break; - case 0x80: - strcat(cpu_model, "DX4"); - break; - } - break; - case 0x500: - /* Check the particular flavor of 586 */ - strcat(cpu_model, "Pentium"); - switch (cpu_id & 0xf0) { - case 0x00: - strcat(cpu_model, " A-step"); - break; - case 0x10: - strcat(cpu_model, "/P5"); - break; - case 0x20: - strcat(cpu_model, "/P54C"); - break; - case 0x30: - strcat(cpu_model, "/P54T Overdrive"); - break; - case 0x40: - strcat(cpu_model, "/P55C"); - break; - case 0x70: - strcat(cpu_model, "/P54C"); - break; - case 0x80: - strcat(cpu_model, "/P55C (quarter-micron)"); - break; - default: - /* nothing */ - break; - } -#if defined(I586_CPU) && !defined(NO_F00F_HACK) - /* - * XXX - If/when Intel fixes the bug, this - * should also check the version of the - * CPU, not just that it's a Pentium. - */ - has_f00f_bug = 1; -#endif - break; - case 0x600: - /* Check the particular flavor of 686 */ - switch (cpu_id & 0xf0) { - case 0x00: - strcat(cpu_model, "Pentium Pro A-step"); - break; - case 0x10: - strcat(cpu_model, "Pentium Pro"); - break; - case 0x30: - case 0x50: - case 0x60: - strcat(cpu_model, - "Pentium II/Pentium II Xeon/Celeron"); - cpu = CPU_PII; - break; - case 0x70: - case 0x80: - case 0xa0: - case 0xb0: - strcat(cpu_model, - "Pentium III/Pentium III Xeon/Celeron"); - cpu = CPU_PIII; - break; - default: - strcat(cpu_model, "Unknown 80686"); - break; - } - break; - case 0xf00: - strcat(cpu_model, "Pentium 4"); - cpu = CPU_P4; - break; - default: - strcat(cpu_model, "unknown"); - break; - } - - /* - * If we didn't get a brand name from the extended - * CPUID, try to look it up in the brand table. - */ - if (cpu_high > 0 && *cpu_brand == '\0') { - brand_index = cpu_procinfo & CPUID_BRAND_INDEX; - if (brand_index <= MAX_BRAND_INDEX && - cpu_brandtable[brand_index] != NULL) - strcpy(cpu_brand, - cpu_brandtable[brand_index]); - } - } + /* How the hell did you get here?? */ + strcat(cpu_model, "Yamhill?"); } else if (strcmp(cpu_vendor, "AuthenticAMD") == 0) { /* * Values taken from AMD Processor Recognition @@ -315,231 +130,13 @@ printcpuinfo(void) */ strcpy(cpu_model, "AMD "); switch (cpu_id & 0xFF0) { - case 0x410: - strcat(cpu_model, "Standard Am486DX"); - break; - case 0x430: - strcat(cpu_model, "Enhanced Am486DX2 Write-Through"); - break; - case 0x470: - strcat(cpu_model, "Enhanced Am486DX2 Write-Back"); - break; - case 0x480: - strcat(cpu_model, "Enhanced Am486DX4/Am5x86 Write-Through"); - break; - case 0x490: - strcat(cpu_model, "Enhanced Am486DX4/Am5x86 Write-Back"); - break; - case 0x4E0: - strcat(cpu_model, "Am5x86 Write-Through"); - break; - case 0x4F0: - strcat(cpu_model, "Am5x86 Write-Back"); - break; - case 0x500: - strcat(cpu_model, "K5 model 0"); - tsc_is_broken = 1; - break; - case 0x510: - strcat(cpu_model, "K5 model 1"); - break; - case 0x520: - strcat(cpu_model, "K5 PR166 (model 2)"); - break; - case 0x530: - strcat(cpu_model, "K5 PR200 (model 3)"); - break; - case 0x560: - strcat(cpu_model, "K6"); - break; - case 0x570: - strcat(cpu_model, "K6 266 (model 1)"); - break; - case 0x580: - strcat(cpu_model, "K6-2"); - break; - case 0x590: - strcat(cpu_model, "K6-III"); + case 0xf00: + strcat(cpu_model, "Hammer"); break; default: strcat(cpu_model, "Unknown"); break; } -#if defined(I586_CPU) && defined(CPU_WT_ALLOC) - if ((cpu_id & 0xf00) == 0x500) { - if (((cpu_id & 0x0f0) > 0) - && ((cpu_id & 0x0f0) < 0x60) - && ((cpu_id & 0x00f) > 3)) - enable_K5_wt_alloc(); - else if (((cpu_id & 0x0f0) > 0x80) - || (((cpu_id & 0x0f0) == 0x80) - && (cpu_id & 0x00f) > 0x07)) - enable_K6_2_wt_alloc(); - else if ((cpu_id & 0x0f0) > 0x50) - enable_K6_wt_alloc(); - } -#endif - } else if (strcmp(cpu_vendor, "CyrixInstead") == 0) { - strcpy(cpu_model, "Cyrix "); - switch (cpu_id & 0xff0) { - case 0x440: - strcat(cpu_model, "MediaGX"); - break; - case 0x520: - strcat(cpu_model, "6x86"); - break; - case 0x540: - cpu_class = CPUCLASS_586; - strcat(cpu_model, "GXm"); - break; - case 0x600: - strcat(cpu_model, "6x86MX"); - break; - default: - /* - * Even though CPU supports the cpuid - * instruction, it can be disabled. - * Therefore, this routine supports all Cyrix - * CPUs. - */ - switch (cyrix_did & 0xf0) { - case 0x00: - switch (cyrix_did & 0x0f) { - case 0x00: - strcat(cpu_model, "486SLC"); - break; - case 0x01: - strcat(cpu_model, "486DLC"); - break; - case 0x02: - strcat(cpu_model, "486SLC2"); - break; - case 0x03: - strcat(cpu_model, "486DLC2"); - break; - case 0x04: - strcat(cpu_model, "486SRx"); - break; - case 0x05: - strcat(cpu_model, "486DRx"); - break; - case 0x06: - strcat(cpu_model, "486SRx2"); - break; - case 0x07: - strcat(cpu_model, "486DRx2"); - break; - case 0x08: - strcat(cpu_model, "486SRu"); - break; - case 0x09: - strcat(cpu_model, "486DRu"); - break; - case 0x0a: - strcat(cpu_model, "486SRu2"); - break; - case 0x0b: - strcat(cpu_model, "486DRu2"); - break; - default: - strcat(cpu_model, "Unknown"); - break; - } - break; - case 0x10: - switch (cyrix_did & 0x0f) { - case 0x00: - strcat(cpu_model, "486S"); - break; - case 0x01: - strcat(cpu_model, "486S2"); - break; - case 0x02: - strcat(cpu_model, "486Se"); - break; - case 0x03: - strcat(cpu_model, "486S2e"); - break; - case 0x0a: - strcat(cpu_model, "486DX"); - break; - case 0x0b: - strcat(cpu_model, "486DX2"); - break; - case 0x0f: - strcat(cpu_model, "486DX4"); - break; - default: - strcat(cpu_model, "Unknown"); - break; - } - break; - case 0x20: - if ((cyrix_did & 0x0f) < 8) - strcat(cpu_model, "6x86"); /* Where did you get it? */ - else - strcat(cpu_model, "5x86"); - break; - case 0x30: - strcat(cpu_model, "6x86"); - break; - case 0x40: - if ((cyrix_did & 0xf000) == 0x3000) { - cpu_class = CPUCLASS_586; - strcat(cpu_model, "GXm"); - } else - strcat(cpu_model, "MediaGX"); - break; - case 0x50: - strcat(cpu_model, "6x86MX"); - break; - case 0xf0: - switch (cyrix_did & 0x0f) { - case 0x0d: - strcat(cpu_model, "Overdrive CPU"); - case 0x0e: - strcpy(cpu_model, "Texas Instruments 486SXL"); - break; - case 0x0f: - strcat(cpu_model, "486SLC/DLC"); - break; - default: - strcat(cpu_model, "Unknown"); - break; - } - break; - default: - strcat(cpu_model, "Unknown"); - break; - } - break; - } - } else if (strcmp(cpu_vendor, "RiseRiseRise") == 0) { - strcpy(cpu_model, "Rise "); - switch (cpu_id & 0xff0) { - case 0x500: - strcat(cpu_model, "mP6"); - break; - default: - strcat(cpu_model, "Unknown"); - } - } else if (strcmp(cpu_vendor, "CentaurHauls") == 0) { - switch (cpu_id & 0xff0) { - case 0x540: - strcpy(cpu_model, "IDT WinChip C6"); - tsc_is_broken = 1; - break; - case 0x580: - strcpy(cpu_model, "IDT WinChip 2"); - break; - case 0x670: - strcpy(cpu_model, "VIA C3 Samuel 2"); - break; - default: - strcpy(cpu_model, "VIA/IDT Unknown"); - } - } else if (strcmp(cpu_vendor, "IBM") == 0) { - strcpy(cpu_model, "Blue Lightning CPU"); } /* @@ -552,61 +149,27 @@ printcpuinfo(void) if (*brand != '\0') strcpy(cpu_model, brand); -#endif - printf("%s (", cpu_model); switch(cpu_class) { - case CPUCLASS_286: - printf("286"); - break; -#if defined(I386_CPU) - case CPUCLASS_386: - printf("386"); - break; -#endif -#if defined(I486_CPU) - case CPUCLASS_486: - printf("486"); - bzero_vector = i486_bzero; - break; -#endif -#if defined(I586_CPU) - case CPUCLASS_586: - hw_clockrate = (tsc_freq + 5000) / 1000000; - printf("%jd.%02d-MHz ", - (intmax_t)(tsc_freq + 4999) / 1000000, - (u_int)((tsc_freq + 4999) / 10000) % 100); - printf("586"); - break; -#endif -#if defined(I686_CPU) - case CPUCLASS_686: + case CPUCLASS_K8: hw_clockrate = (tsc_freq + 5000) / 1000000; printf("%jd.%02d-MHz ", (intmax_t)(tsc_freq + 4999) / 1000000, (u_int)((tsc_freq + 4999) / 10000) % 100); - printf("686"); + printf("K8"); break; -#endif default: printf("Unknown"); /* will panic below... */ } printf("-class CPU)\n"); -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) if(*cpu_vendor) printf(" Origin = \"%s\"",cpu_vendor); if(cpu_id) printf(" Id = 0x%x", cpu_id); if (strcmp(cpu_vendor, "GenuineIntel") == 0 || - strcmp(cpu_vendor, "AuthenticAMD") == 0 || - strcmp(cpu_vendor, "RiseRiseRise") == 0 || - strcmp(cpu_vendor, "CentaurHauls") == 0 || - ((strcmp(cpu_vendor, "CyrixInstead") == 0) && - ((cpu_id & 0xf00) > 0x500))) { + strcmp(cpu_vendor, "AuthenticAMD") == 0) { printf(" Stepping = %u", cpu_id & 0xf); - if (strcmp(cpu_vendor, "CyrixInstead") == 0) - printf(" DIR=0x%04x", cyrix_did); if (cpu_high > 0) { /* * Here we should probably set up flags indicating @@ -665,72 +228,33 @@ printcpuinfo(void) cpu_exthigh >= 0x80000001) print_AMD_features(); } else if (strcmp(cpu_vendor, "CyrixInstead") == 0) { - printf(" DIR=0x%04x", cyrix_did); - printf(" Stepping=%u", (cyrix_did & 0xf000) >> 12); - printf(" Revision=%u", (cyrix_did & 0x0f00) >> 8); -#ifndef CYRIX_CACHE_REALLY_WORKS - if (cpu == CPU_M1 && (cyrix_did & 0xff00) < 0x1700) - printf("\n CPU cache: write-through mode"); -#endif } /* Avoid ugly blank lines: only print newline when we have to. */ if (*cpu_vendor || cpu_id) printf("\n"); -#endif - if (strcmp(cpu_vendor, "GenuineTMx86") == 0 || - strcmp(cpu_vendor, "TransmetaCPU") == 0) { - setup_tmx86_longrun(); - } - if (!bootverbose) return; if (strcmp(cpu_vendor, "AuthenticAMD") == 0) print_AMD_info(); - else if (strcmp(cpu_vendor, "GenuineTMx86") == 0 || - strcmp(cpu_vendor, "TransmetaCPU") == 0) - print_transmeta_info(); - -#ifdef I686_CPU - /* - * XXX - Do PPro CPUID level=2 stuff here? - * - * No, but maybe in a print_Intel_info() function called from here. - */ -#endif } void panicifcpuunsupported(void) { -#if !defined(lint) -#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU) && !defined(I686_CPU) -#error This kernel is not configured for one of the supported CPUs -#endif -#else /* lint */ -#endif /* lint */ -#if defined(I386_CPU) && (defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)) -#error I386_CPU is mutually exclusive with the other cpu types. +#ifndef HAMMER +#error "You need to specify a cpu type" #endif /* * Now that we have told the user what they have, * let them know if that machine type isn't configured. */ switch (cpu_class) { - case CPUCLASS_286: /* a 286 should not make it this far, anyway */ -#if !defined(I386_CPU) - case CPUCLASS_386: -#endif -#if !defined(I486_CPU) - case CPUCLASS_486: -#endif -#if !defined(I586_CPU) - case CPUCLASS_586: -#endif -#if !defined(I686_CPU) - case CPUCLASS_686: + case CPUCLASS_X86: +#ifndef HAMMER + case CPUCLASS_K8: #endif panic("CPU class not configured"); default: @@ -739,231 +263,28 @@ panicifcpuunsupported(void) } -static volatile u_int trap_by_rdmsr; - -/* - * Special exception 6 handler. - * The rdmsr instruction generates invalid opcodes fault on 486-class - * Cyrix CPU. Stacked eip register points the rdmsr instruction in the - * function identblue() when this handler is called. Stacked eip should - * be advanced. - */ -inthand_t bluetrap6; -#ifdef __GNUC__ -__asm -(" \ - .text; \ - .p2align 2,0x90; \ - .type " __XSTRING(CNAME(bluetrap6)) ",@function; \ -" __XSTRING(CNAME(bluetrap6)) ": \ - ss; \ - movl $0xa8c1d," __XSTRING(CNAME(trap_by_rdmsr)) "; \ - addl $2, (%esp); /* rdmsr is a 2-byte instruction */ \ - iret \ -"); -#endif - -/* - * Special exception 13 handler. - * Accessing non-existent MSR generates general protection fault. - */ -inthand_t bluetrap13; -#ifdef __GNUC__ -__asm -(" \ - .text; \ - .p2align 2,0x90; \ - .type " __XSTRING(CNAME(bluetrap13)) ",@function; \ -" __XSTRING(CNAME(bluetrap13)) ": \ - ss; \ - movl $0xa89c4," __XSTRING(CNAME(trap_by_rdmsr)) "; \ - popl %eax; /* discard errorcode. */ \ - addl $2, (%esp); /* rdmsr is a 2-bytes instruction. */ \ - iret; \ -"); -#endif - -/* - * Distinguish IBM Blue Lightning CPU from Cyrix CPUs that does not - * support cpuid instruction. This function should be called after - * loading interrupt descriptor table register. - * - * I don't like this method that handles fault, but I couldn't get - * information for any other methods. Does blue giant know? - */ -static int -identblue(void) -{ - - trap_by_rdmsr = 0; - - /* - * Cyrix 486-class CPU does not support rdmsr instruction. - * The rdmsr instruction generates invalid opcode fault, and exception - * will be trapped by bluetrap6() on Cyrix 486-class CPU. The - * bluetrap6() set the magic number to trap_by_rdmsr. - */ - setidt(6, bluetrap6, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - - /* - * Certain BIOS disables cpuid instruction of Cyrix 6x86MX CPU. - * In this case, rdmsr generates general protection fault, and - * exception will be trapped by bluetrap13(). - */ - setidt(13, bluetrap13, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - - rdmsr(0x1002); /* Cyrix CPU generates fault. */ - - if (trap_by_rdmsr == 0xa8c1d) - return IDENTBLUE_CYRIX486; - else if (trap_by_rdmsr == 0xa89c4) - return IDENTBLUE_CYRIXM2; - return IDENTBLUE_IBMCPU; -} - - -/* - * identifycyrix() set lower 16 bits of cyrix_did as follows: - * - * F E D C B A 9 8 7 6 5 4 3 2 1 0 - * +-------+-------+---------------+ - * | SID | RID | Device ID | - * | (DIR 1) | (DIR 0) | - * +-------+-------+---------------+ - */ -static void -identifycyrix(void) -{ - u_int eflags; - int ccr2_test = 0, dir_test = 0; - u_char ccr2, ccr3; - - eflags = read_eflags(); - disable_intr(); - - ccr2 = read_cyrix_reg(CCR2); - write_cyrix_reg(CCR2, ccr2 ^ CCR2_LOCK_NW); - read_cyrix_reg(CCR2); - if (read_cyrix_reg(CCR2) != ccr2) - ccr2_test = 1; - write_cyrix_reg(CCR2, ccr2); - - ccr3 = read_cyrix_reg(CCR3); - write_cyrix_reg(CCR3, ccr3 ^ CCR3_MAPEN3); - read_cyrix_reg(CCR3); - if (read_cyrix_reg(CCR3) != ccr3) - dir_test = 1; /* CPU supports DIRs. */ - write_cyrix_reg(CCR3, ccr3); - - if (dir_test) { - /* Device ID registers are available. */ - cyrix_did = read_cyrix_reg(DIR1) << 8; - cyrix_did += read_cyrix_reg(DIR0); - } else if (ccr2_test) - cyrix_did = 0x0010; /* 486S A-step */ - else - cyrix_did = 0x00ff; /* Old 486SLC/DLC and TI486SXLC/SXL */ - - write_eflags(eflags); -} - /* * Final stage of CPU identification. -- Should I check TI? */ void -finishidentcpu(void) +identify_cpu(void) { - int isblue = 0; - u_char ccr3; - u_int regs[4]; + u_int regs[4]; - if (strcmp(cpu_vendor, "CyrixInstead") == 0) { - if (cpu == CPU_486) { - /* - * These conditions are equivalent to: - * - CPU does not support cpuid instruction. - * - Cyrix/IBM CPU is detected. - */ - isblue = identblue(); - if (isblue == IDENTBLUE_IBMCPU) { - strcpy(cpu_vendor, "IBM"); - cpu = CPU_BLUE; - return; - } - } - switch (cpu_id & 0xf00) { - case 0x600: - /* - * Cyrix's datasheet does not describe DIRs. - * Therefor, I assume it does not have them - * and use the result of the cpuid instruction. - * XXX they seem to have it for now at least. -Peter - */ - identifycyrix(); - cpu = CPU_M2; - break; - default: - identifycyrix(); - /* - * This routine contains a trick. - * Don't check (cpu_id & 0x00f0) == 0x50 to detect M2, now. - */ - switch (cyrix_did & 0x00f0) { - case 0x00: - case 0xf0: - cpu = CPU_486DLC; - break; - case 0x10: - cpu = CPU_CY486DX; - break; - case 0x20: - if ((cyrix_did & 0x000f) < 8) - cpu = CPU_M1; - else - cpu = CPU_M1SC; - break; - case 0x30: - cpu = CPU_M1; - break; - case 0x40: - /* MediaGX CPU */ - cpu = CPU_M1SC; - break; - default: - /* M2 and later CPUs are treated as M2. */ - cpu = CPU_M2; + do_cpuid(0, regs); + cpu_high = regs[0]; + ((u_int *)&cpu_vendor)[0] = regs[1]; + ((u_int *)&cpu_vendor)[1] = regs[3]; + ((u_int *)&cpu_vendor)[2] = regs[2]; + cpu_vendor[12] = '\0'; - /* - * enable cpuid instruction. - */ - ccr3 = read_cyrix_reg(CCR3); - write_cyrix_reg(CCR3, CCR3_MAPEN0); - write_cyrix_reg(CCR4, read_cyrix_reg(CCR4) | CCR4_CPUID); - write_cyrix_reg(CCR3, ccr3); + do_cpuid(1, regs); + cpu_id = regs[0]; + cpu_procinfo = regs[1]; + cpu_feature = regs[3]; - do_cpuid(0, regs); - cpu_high = regs[0]; /* eax */ - do_cpuid(1, regs); - cpu_id = regs[0]; /* eax */ - cpu_feature = regs[3]; /* edx */ - break; - } - } - } else if (cpu == CPU_486 && *cpu_vendor == '\0') { - /* - * There are BlueLightning CPUs that do not change - * undefined flags by dividing 5 by 2. In this case, - * the CPU identification routine in locore.s leaves - * cpu_vendor null string and puts CPU_486 into the - * cpu. - */ - isblue = identblue(); - if (isblue == IDENTBLUE_IBMCPU) { - strcpy(cpu_vendor, "IBM"); - cpu = CPU_BLUE; - return; - } - } + /* XXX */ + cpu = CPU_CLAWHAMMER; } static void @@ -978,7 +299,6 @@ print_AMD_assoc(int i) static void print_AMD_info(void) { - quad_t amd_whcr; if (cpu_exthigh >= 0x80000005) { u_int regs[4]; @@ -1004,38 +324,8 @@ print_AMD_info(void) print_AMD_assoc((regs[2] >> 12) & 0x0f); } } - if (((cpu_id & 0xf00) == 0x500) - && (((cpu_id & 0x0f0) > 0x80) - || (((cpu_id & 0x0f0) == 0x80) - && (cpu_id & 0x00f) > 0x07))) { - /* K6-2(new core [Stepping 8-F]), K6-III or later */ - amd_whcr = rdmsr(0xc0000082); - if (!(amd_whcr & (0x3ff << 22))) { - printf("Write Allocate Disable\n"); - } else { - printf("Write Allocate Enable Limit: %dM bytes\n", - (u_int32_t)((amd_whcr & (0x3ff << 22)) >> 22) * 4); - printf("Write Allocate 15-16M bytes: %s\n", - (amd_whcr & (1 << 16)) ? "Enable" : "Disable"); - } - } else if (((cpu_id & 0xf00) == 0x500) - && ((cpu_id & 0x0f0) > 0x50)) { - /* K6, K6-2(old core) */ - amd_whcr = rdmsr(0xc0000082); - if (!(amd_whcr & (0x7f << 1))) { - printf("Write Allocate Disable\n"); - } else { - printf("Write Allocate Enable Limit: %dM bytes\n", - (u_int32_t)((amd_whcr & (0x7f << 1)) >> 1) * 4); - printf("Write Allocate 15-16M bytes: %s\n", - (amd_whcr & 0x0001) ? "Enable" : "Disable"); - printf("Hardware Write Allocate Control: %s\n", - (amd_whcr & 0x0100) ? "Enable" : "Disable"); - } - } } -#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) static void print_AMD_features(void) { @@ -1082,297 +372,3 @@ print_AMD_features(void) "\0403DNow!" ); } -#endif - -/* - * Transmeta Crusoe LongRun Support by Tamotsu Hattori. - */ - -#define MSR_TMx86_LONGRUN 0x80868010 -#define MSR_TMx86_LONGRUN_FLAGS 0x80868011 - -#define LONGRUN_MODE_MASK(x) ((x) & 0x000000007f) -#define LONGRUN_MODE_RESERVED(x) ((x) & 0xffffff80) -#define LONGRUN_MODE_WRITE(x, y) (LONGRUN_MODE_RESERVED(x) | LONGRUN_MODE_MASK(y)) - -#define LONGRUN_MODE_MINFREQUENCY 0x00 -#define LONGRUN_MODE_ECONOMY 0x01 -#define LONGRUN_MODE_PERFORMANCE 0x02 -#define LONGRUN_MODE_MAXFREQUENCY 0x03 -#define LONGRUN_MODE_UNKNOWN 0x04 -#define LONGRUN_MODE_MAX 0x04 - -union msrinfo { - u_int64_t msr; - u_int32_t regs[2]; -}; - -static u_int32_t longrun_modes[LONGRUN_MODE_MAX][3] = { - /* MSR low, MSR high, flags bit0 */ - { 0, 0, 0}, /* LONGRUN_MODE_MINFREQUENCY */ - { 0, 100, 0}, /* LONGRUN_MODE_ECONOMY */ - { 0, 100, 1}, /* LONGRUN_MODE_PERFORMANCE */ - { 100, 100, 1}, /* LONGRUN_MODE_MAXFREQUENCY */ -}; - -static u_int -tmx86_get_longrun_mode(void) -{ - u_long eflags; - union msrinfo msrinfo; - u_int low, high, flags, mode; - - eflags = read_eflags(); - disable_intr(); - - msrinfo.msr = rdmsr(MSR_TMx86_LONGRUN); - low = LONGRUN_MODE_MASK(msrinfo.regs[0]); - high = LONGRUN_MODE_MASK(msrinfo.regs[1]); - flags = rdmsr(MSR_TMx86_LONGRUN_FLAGS) & 0x01; - - for (mode = 0; mode < LONGRUN_MODE_MAX; mode++) { - if (low == longrun_modes[mode][0] && - high == longrun_modes[mode][1] && - flags == longrun_modes[mode][2]) { - goto out; - } - } - mode = LONGRUN_MODE_UNKNOWN; -out: - write_eflags(eflags); - return (mode); -} - -static u_int -tmx86_get_longrun_status(u_int * frequency, u_int * voltage, u_int * percentage) -{ - u_long eflags; - u_int regs[4]; - - eflags = read_eflags(); - disable_intr(); - - do_cpuid(0x80860007, regs); - *frequency = regs[0]; - *voltage = regs[1]; - *percentage = regs[2]; - - write_eflags(eflags); - return (1); -} - -static u_int -tmx86_set_longrun_mode(u_int mode) -{ - u_long eflags; - union msrinfo msrinfo; - - if (mode >= LONGRUN_MODE_UNKNOWN) { - return (0); - } - - eflags = read_eflags(); - disable_intr(); - - /* Write LongRun mode values to Model Specific Register. */ - msrinfo.msr = rdmsr(MSR_TMx86_LONGRUN); - msrinfo.regs[0] = LONGRUN_MODE_WRITE(msrinfo.regs[0], - longrun_modes[mode][0]); - msrinfo.regs[1] = LONGRUN_MODE_WRITE(msrinfo.regs[1], - longrun_modes[mode][1]); - wrmsr(MSR_TMx86_LONGRUN, msrinfo.msr); - - /* Write LongRun mode flags to Model Specific Register. */ - msrinfo.msr = rdmsr(MSR_TMx86_LONGRUN_FLAGS); - msrinfo.regs[0] = (msrinfo.regs[0] & ~0x01) | longrun_modes[mode][2]; - wrmsr(MSR_TMx86_LONGRUN_FLAGS, msrinfo.msr); - - write_eflags(eflags); - return (1); -} - -static u_int crusoe_longrun; -static u_int crusoe_frequency; -static u_int crusoe_voltage; -static u_int crusoe_percentage; -static u_int crusoe_performance_longrun = LONGRUN_MODE_PERFORMANCE; -static u_int crusoe_economy_longrun = LONGRUN_MODE_ECONOMY; -static struct sysctl_ctx_list crusoe_sysctl_ctx; -static struct sysctl_oid *crusoe_sysctl_tree; - -static void -tmx86_longrun_power_profile(void *arg) -{ - int state; - u_int new; - - state = power_profile_get_state(); - if (state != POWER_PROFILE_PERFORMANCE && - state != POWER_PROFILE_ECONOMY) { - return; - } - - switch (state) { - case POWER_PROFILE_PERFORMANCE: - new =crusoe_performance_longrun; - break; - case POWER_PROFILE_ECONOMY: - new = crusoe_economy_longrun; - break; - default: - new = tmx86_get_longrun_mode(); - break; - } - - if (tmx86_get_longrun_mode() != new) { - tmx86_set_longrun_mode(new); - } -} - -static int -tmx86_longrun_sysctl(SYSCTL_HANDLER_ARGS) -{ - u_int mode; - int error; - - crusoe_longrun = tmx86_get_longrun_mode(); - mode = crusoe_longrun; - error = sysctl_handle_int(oidp, &mode, 0, req); - if (error || !req->newptr) { - return (error); - } - if (mode >= LONGRUN_MODE_UNKNOWN) { - error = EINVAL; - return (error); - } - if (crusoe_longrun != mode) { - crusoe_longrun = mode; - tmx86_set_longrun_mode(crusoe_longrun); - } - - return (error); -} - -static int -tmx86_status_sysctl(SYSCTL_HANDLER_ARGS) -{ - u_int val; - int error; - - tmx86_get_longrun_status(&crusoe_frequency, - &crusoe_voltage, &crusoe_percentage); - val = *(u_int *)oidp->oid_arg1; - error = sysctl_handle_int(oidp, &val, 0, req); - return (error); -} - -static int -tmx86_longrun_profile_sysctl(SYSCTL_HANDLER_ARGS) -{ - u_int32_t *argp; - u_int32_t arg; - int error; - - argp = (u_int32_t *)oidp->oid_arg1; - arg = *argp; - error = sysctl_handle_int(oidp, &arg, 0, req); - - /* error or no new value */ - if ((error != 0) || (req->newptr == NULL)) - return (error); - - /* range check */ - if (arg >= LONGRUN_MODE_UNKNOWN) - return (EINVAL); - - /* set new value and possibly switch */ - *argp = arg; - - tmx86_longrun_power_profile(NULL); - - return (0); - -} - -static void -setup_tmx86_longrun(void) -{ - static int done = 0; - - if (done) - return; - done++; - - sysctl_ctx_init(&crusoe_sysctl_ctx); - crusoe_sysctl_tree = SYSCTL_ADD_NODE(&crusoe_sysctl_ctx, - SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO, - "crusoe", CTLFLAG_RD, 0, - "Transmeta Crusoe LongRun support"); - SYSCTL_ADD_PROC(&crusoe_sysctl_ctx, SYSCTL_CHILDREN(crusoe_sysctl_tree), - OID_AUTO, "longrun", CTLTYPE_INT | CTLFLAG_RW, - &crusoe_longrun, 0, tmx86_longrun_sysctl, "I", - "LongRun mode [0-3]"); - SYSCTL_ADD_PROC(&crusoe_sysctl_ctx, SYSCTL_CHILDREN(crusoe_sysctl_tree), - OID_AUTO, "frequency", CTLTYPE_INT | CTLFLAG_RD, - &crusoe_frequency, 0, tmx86_status_sysctl, "I", - "Current frequency (MHz)"); - SYSCTL_ADD_PROC(&crusoe_sysctl_ctx, SYSCTL_CHILDREN(crusoe_sysctl_tree), - OID_AUTO, "voltage", CTLTYPE_INT | CTLFLAG_RD, - &crusoe_voltage, 0, tmx86_status_sysctl, "I", - "Current voltage (mV)"); - SYSCTL_ADD_PROC(&crusoe_sysctl_ctx, SYSCTL_CHILDREN(crusoe_sysctl_tree), - OID_AUTO, "percentage", CTLTYPE_INT | CTLFLAG_RD, - &crusoe_percentage, 0, tmx86_status_sysctl, "I", - "Processing performance (%)"); - SYSCTL_ADD_PROC(&crusoe_sysctl_ctx, SYSCTL_CHILDREN(crusoe_sysctl_tree), - OID_AUTO, "performance_longrun", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_RW, - &crusoe_performance_longrun, 0, tmx86_longrun_profile_sysctl, "I", ""); - SYSCTL_ADD_PROC(&crusoe_sysctl_ctx, SYSCTL_CHILDREN(crusoe_sysctl_tree), - OID_AUTO, "economy_longrun", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_RW, - &crusoe_economy_longrun, 0, tmx86_longrun_profile_sysctl, "I", ""); - - /* register performance profile change handler */ - EVENTHANDLER_REGISTER(power_profile_change, tmx86_longrun_power_profile, NULL, 0); - -} - -static void -print_transmeta_info() -{ - u_int regs[4], nreg = 0; - - do_cpuid(0x80860000, regs); - nreg = regs[0]; - if (nreg >= 0x80860001) { - do_cpuid(0x80860001, regs); - printf(" Processor revision %u.%u.%u.%u\n", - (regs[1] >> 24) & 0xff, - (regs[1] >> 16) & 0xff, - (regs[1] >> 8) & 0xff, - regs[1] & 0xff); - } - if (nreg >= 0x80860002) { - do_cpuid(0x80860002, regs); - printf(" Code Morphing Software revision %u.%u.%u-%u-%u\n", - (regs[1] >> 24) & 0xff, - (regs[1] >> 16) & 0xff, - (regs[1] >> 8) & 0xff, - regs[1] & 0xff, - regs[2]); - } - if (nreg >= 0x80860006) { - char info[65]; - do_cpuid(0x80860003, (u_int*) &info[0]); - do_cpuid(0x80860004, (u_int*) &info[16]); - do_cpuid(0x80860005, (u_int*) &info[32]); - do_cpuid(0x80860006, (u_int*) &info[48]); - info[64] = 0; - printf(" %s\n", info); - } - - crusoe_longrun = tmx86_get_longrun_mode(); - tmx86_get_longrun_status(&crusoe_frequency, - &crusoe_voltage, &crusoe_percentage); - printf(" LongRun mode: %d <%dMHz %dmV %d%%>\n", crusoe_longrun, - crusoe_frequency, crusoe_voltage, crusoe_percentage); -} diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c index 3331c7b..a93968e 100644 --- a/sys/amd64/amd64/initcpu.c +++ b/sys/amd64/amd64/initcpu.c @@ -40,492 +40,19 @@ #include <machine/md_var.h> #include <machine/specialreg.h> -#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU) -#define CPU_ENABLE_SSE -#endif -#if defined(CPU_DISABLE_SSE) -#undef CPU_ENABLE_SSE -#endif - void initializecpu(void); -#if defined(I586_CPU) && defined(CPU_WT_ALLOC) -void enable_K5_wt_alloc(void); -void enable_K6_wt_alloc(void); -void enable_K6_2_wt_alloc(void); -#endif - -#ifdef I486_CPU -static void init_5x86(void); -static void init_bluelightning(void); -static void init_486dlc(void); -static void init_cy486dx(void); -#ifdef CPU_I486_ON_386 -static void init_i486_on_386(void); -#endif -static void init_6x86(void); -#endif /* I486_CPU */ - -#ifdef I686_CPU -static void init_6x86MX(void); -static void init_ppro(void); -static void init_mendocino(void); -#endif static int hw_instruction_sse; SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD, &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU"); -/* Must *NOT* be BSS or locore will bzero these after setting them */ -int cpu = 0; /* Are we 386, 386sx, 486, etc? */ -u_int cpu_feature = 0; /* Feature flags */ -u_int cpu_high = 0; /* Highest arg to CPUID */ -u_int cpu_id = 0; /* Stepping ID */ -u_int cpu_procinfo = 0; /* HyperThreading Info / Brand Index / CLFUSH */ -char cpu_vendor[20] = ""; /* CPU Origin code */ - -#ifdef CPU_ENABLE_SSE +int cpu; /* Are we 386, 386sx, 486, etc? */ +u_int cpu_feature; /* Feature flags */ +u_int cpu_high; /* Highest arg to CPUID */ +u_int cpu_id; /* Stepping ID */ +u_int cpu_procinfo; /* HyperThreading Info / Brand Index / CLFUSH */ +char cpu_vendor[20]; /* CPU Origin code */ u_int cpu_fxsr; /* SSE enabled */ -#endif - -#ifdef I486_CPU -/* - * IBM Blue Lightning - */ -static void -init_bluelightning(void) -{ - u_long eflags; - -#if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE) - need_post_dma_flush = 1; -#endif - - eflags = read_eflags(); - disable_intr(); - - load_cr0(rcr0() | CR0_CD | CR0_NW); - invd(); - -#ifdef CPU_BLUELIGHTNING_FPU_OP_CACHE - wrmsr(0x1000, 0x9c92LL); /* FP operand can be cacheable on Cyrix FPU */ -#else - wrmsr(0x1000, 0x1c92LL); /* Intel FPU */ -#endif - /* Enables 13MB and 0-640KB cache. */ - wrmsr(0x1001, (0xd0LL << 32) | 0x3ff); -#ifdef CPU_BLUELIGHTNING_3X - wrmsr(0x1002, 0x04000000LL); /* Enables triple-clock mode. */ -#else - wrmsr(0x1002, 0x03000000LL); /* Enables double-clock mode. */ -#endif - - /* Enable caching in CR0. */ - load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ - invd(); - write_eflags(eflags); -} - -/* - * Cyrix 486SLC/DLC/SR/DR series - */ -static void -init_486dlc(void) -{ - u_long eflags; - u_char ccr0; - - eflags = read_eflags(); - disable_intr(); - invd(); - - ccr0 = read_cyrix_reg(CCR0); -#ifndef CYRIX_CACHE_WORKS - ccr0 |= CCR0_NC1 | CCR0_BARB; - write_cyrix_reg(CCR0, ccr0); - invd(); -#else - ccr0 &= ~CCR0_NC0; -#ifndef CYRIX_CACHE_REALLY_WORKS - ccr0 |= CCR0_NC1 | CCR0_BARB; -#else - ccr0 |= CCR0_NC1; -#endif -#ifdef CPU_DIRECT_MAPPED_CACHE - ccr0 |= CCR0_CO; /* Direct mapped mode. */ -#endif - write_cyrix_reg(CCR0, ccr0); - - /* Clear non-cacheable region. */ - write_cyrix_reg(NCR1+2, NCR_SIZE_0K); - write_cyrix_reg(NCR2+2, NCR_SIZE_0K); - write_cyrix_reg(NCR3+2, NCR_SIZE_0K); - write_cyrix_reg(NCR4+2, NCR_SIZE_0K); - - write_cyrix_reg(0, 0); /* dummy write */ - - /* Enable caching in CR0. */ - load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ - invd(); -#endif /* !CYRIX_CACHE_WORKS */ - write_eflags(eflags); -} - - -/* - * Cyrix 486S/DX series - */ -static void -init_cy486dx(void) -{ - u_long eflags; - u_char ccr2; - - eflags = read_eflags(); - disable_intr(); - invd(); - - ccr2 = read_cyrix_reg(CCR2); -#ifdef CPU_SUSP_HLT - ccr2 |= CCR2_SUSP_HLT; -#endif - -#ifdef PC98 - /* Enables WB cache interface pin and Lock NW bit in CR0. */ - ccr2 |= CCR2_WB | CCR2_LOCK_NW; - /* Unlock NW bit in CR0. */ - write_cyrix_reg(CCR2, ccr2 & ~CCR2_LOCK_NW); - load_cr0((rcr0() & ~CR0_CD) | CR0_NW); /* CD = 0, NW = 1 */ -#endif - - write_cyrix_reg(CCR2, ccr2); - write_eflags(eflags); -} - - -/* - * Cyrix 5x86 - */ -static void -init_5x86(void) -{ - u_long eflags; - u_char ccr2, ccr3, ccr4, pcr0; - - eflags = read_eflags(); - disable_intr(); - - load_cr0(rcr0() | CR0_CD | CR0_NW); - wbinvd(); - - (void)read_cyrix_reg(CCR3); /* dummy */ - - /* Initialize CCR2. */ - ccr2 = read_cyrix_reg(CCR2); - ccr2 |= CCR2_WB; -#ifdef CPU_SUSP_HLT - ccr2 |= CCR2_SUSP_HLT; -#else - ccr2 &= ~CCR2_SUSP_HLT; -#endif - ccr2 |= CCR2_WT1; - write_cyrix_reg(CCR2, ccr2); - - /* Initialize CCR4. */ - ccr3 = read_cyrix_reg(CCR3); - write_cyrix_reg(CCR3, CCR3_MAPEN0); - - ccr4 = read_cyrix_reg(CCR4); - ccr4 |= CCR4_DTE; - ccr4 |= CCR4_MEM; -#ifdef CPU_FASTER_5X86_FPU - ccr4 |= CCR4_FASTFPE; -#else - ccr4 &= ~CCR4_FASTFPE; -#endif - ccr4 &= ~CCR4_IOMASK; - /******************************************************************** - * WARNING: The "BIOS Writers Guide" mentions that I/O recovery time - * should be 0 for errata fix. - ********************************************************************/ -#ifdef CPU_IORT - ccr4 |= CPU_IORT & CCR4_IOMASK; -#endif - write_cyrix_reg(CCR4, ccr4); - - /* Initialize PCR0. */ - /**************************************************************** - * WARNING: RSTK_EN and LOOP_EN could make your system unstable. - * BTB_EN might make your system unstable. - ****************************************************************/ - pcr0 = read_cyrix_reg(PCR0); -#ifdef CPU_RSTK_EN - pcr0 |= PCR0_RSTK; -#else - pcr0 &= ~PCR0_RSTK; -#endif -#ifdef CPU_BTB_EN - pcr0 |= PCR0_BTB; -#else - pcr0 &= ~PCR0_BTB; -#endif -#ifdef CPU_LOOP_EN - pcr0 |= PCR0_LOOP; -#else - pcr0 &= ~PCR0_LOOP; -#endif - - /**************************************************************** - * WARNING: if you use a memory mapped I/O device, don't use - * DISABLE_5X86_LSSER option, which may reorder memory mapped - * I/O access. - * IF YOUR MOTHERBOARD HAS PCI BUS, DON'T DISABLE LSSER. - ****************************************************************/ -#ifdef CPU_DISABLE_5X86_LSSER - pcr0 &= ~PCR0_LSSER; -#else - pcr0 |= PCR0_LSSER; -#endif - write_cyrix_reg(PCR0, pcr0); - - /* Restore CCR3. */ - write_cyrix_reg(CCR3, ccr3); - - (void)read_cyrix_reg(0x80); /* dummy */ - - /* Unlock NW bit in CR0. */ - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW); - load_cr0((rcr0() & ~CR0_CD) | CR0_NW); /* CD = 0, NW = 1 */ - /* Lock NW bit in CR0. */ - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW); - - write_eflags(eflags); -} - -#ifdef CPU_I486_ON_386 -/* - * There are i486 based upgrade products for i386 machines. - * In this case, BIOS doesn't enables CPU cache. - */ -static void -init_i486_on_386(void) -{ - u_long eflags; - -#if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE) - need_post_dma_flush = 1; -#endif - - eflags = read_eflags(); - disable_intr(); - - load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0, NW = 0 */ - - write_eflags(eflags); -} -#endif - -/* - * Cyrix 6x86 - * - * XXX - What should I do here? Please let me know. - */ -static void -init_6x86(void) -{ - u_long eflags; - u_char ccr3, ccr4; - - eflags = read_eflags(); - disable_intr(); - - load_cr0(rcr0() | CR0_CD | CR0_NW); - wbinvd(); - - /* Initialize CCR0. */ - write_cyrix_reg(CCR0, read_cyrix_reg(CCR0) | CCR0_NC1); - - /* Initialize CCR1. */ -#ifdef CPU_CYRIX_NO_LOCK - write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) | CCR1_NO_LOCK); -#else - write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) & ~CCR1_NO_LOCK); -#endif - - /* Initialize CCR2. */ -#ifdef CPU_SUSP_HLT - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_SUSP_HLT); -#else - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_SUSP_HLT); -#endif - - ccr3 = read_cyrix_reg(CCR3); - write_cyrix_reg(CCR3, CCR3_MAPEN0); - - /* Initialize CCR4. */ - ccr4 = read_cyrix_reg(CCR4); - ccr4 |= CCR4_DTE; - ccr4 &= ~CCR4_IOMASK; -#ifdef CPU_IORT - write_cyrix_reg(CCR4, ccr4 | (CPU_IORT & CCR4_IOMASK)); -#else - write_cyrix_reg(CCR4, ccr4 | 7); -#endif - - /* Initialize CCR5. */ -#ifdef CPU_WT_ALLOC - write_cyrix_reg(CCR5, read_cyrix_reg(CCR5) | CCR5_WT_ALLOC); -#endif - - /* Restore CCR3. */ - write_cyrix_reg(CCR3, ccr3); - - /* Unlock NW bit in CR0. */ - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW); - - /* - * Earlier revision of the 6x86 CPU could crash the system if - * L1 cache is in write-back mode. - */ - if ((cyrix_did & 0xff00) > 0x1600) - load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ - else { - /* Revision 2.6 and lower. */ -#ifdef CYRIX_CACHE_REALLY_WORKS - load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ -#else - load_cr0((rcr0() & ~CR0_CD) | CR0_NW); /* CD = 0 and NW = 1 */ -#endif - } - - /* Lock NW bit in CR0. */ - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW); - - write_eflags(eflags); -} -#endif /* I486_CPU */ - -#ifdef I686_CPU -/* - * Cyrix 6x86MX (code-named M2) - * - * XXX - What should I do here? Please let me know. - */ -static void -init_6x86MX(void) -{ - u_long eflags; - u_char ccr3, ccr4; - - eflags = read_eflags(); - disable_intr(); - - load_cr0(rcr0() | CR0_CD | CR0_NW); - wbinvd(); - - /* Initialize CCR0. */ - write_cyrix_reg(CCR0, read_cyrix_reg(CCR0) | CCR0_NC1); - - /* Initialize CCR1. */ -#ifdef CPU_CYRIX_NO_LOCK - write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) | CCR1_NO_LOCK); -#else - write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) & ~CCR1_NO_LOCK); -#endif - - /* Initialize CCR2. */ -#ifdef CPU_SUSP_HLT - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_SUSP_HLT); -#else - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_SUSP_HLT); -#endif - - ccr3 = read_cyrix_reg(CCR3); - write_cyrix_reg(CCR3, CCR3_MAPEN0); - - /* Initialize CCR4. */ - ccr4 = read_cyrix_reg(CCR4); - ccr4 &= ~CCR4_IOMASK; -#ifdef CPU_IORT - write_cyrix_reg(CCR4, ccr4 | (CPU_IORT & CCR4_IOMASK)); -#else - write_cyrix_reg(CCR4, ccr4 | 7); -#endif - - /* Initialize CCR5. */ -#ifdef CPU_WT_ALLOC - write_cyrix_reg(CCR5, read_cyrix_reg(CCR5) | CCR5_WT_ALLOC); -#endif - - /* Restore CCR3. */ - write_cyrix_reg(CCR3, ccr3); - - /* Unlock NW bit in CR0. */ - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW); - - load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ - - /* Lock NW bit in CR0. */ - write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW); - - write_eflags(eflags); -} - -static void -init_ppro(void) -{ -#ifndef SMP - u_int64_t apicbase; - - /* - * Local APIC should be diabled in UP kernel. - */ - apicbase = rdmsr(0x1b); - apicbase &= ~0x800LL; - wrmsr(0x1b, apicbase); -#endif -} - -/* - * Initialize BBL_CR_CTL3 (Control register 3: used to configure the - * L2 cache). - */ -static void -init_mendocino(void) -{ -#ifdef CPU_PPRO2CELERON - u_long eflags; - u_int64_t bbl_cr_ctl3; - - eflags = read_eflags(); - disable_intr(); - - load_cr0(rcr0() | CR0_CD | CR0_NW); - wbinvd(); - - bbl_cr_ctl3 = rdmsr(0x11e); - - /* If the L2 cache is configured, do nothing. */ - if (!(bbl_cr_ctl3 & 1)) { - bbl_cr_ctl3 = 0x134052bLL; - - /* Set L2 Cache Latency (Default: 5). */ -#ifdef CPU_CELERON_L2_LATENCY -#if CPU_L2_LATENCY > 15 -#error invalid CPU_L2_LATENCY. -#endif - bbl_cr_ctl3 |= CPU_L2_LATENCY << 1; -#else - bbl_cr_ctl3 |= 5 << 1; -#endif - wrmsr(0x11e, bbl_cr_ctl3); - } - - load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); - write_eflags(eflags); -#endif /* CPU_PPRO2CELERON */ -} - -#endif /* I686_CPU */ /* * Initialize CR4 (Control register 4) to enable SSE instructions. @@ -533,12 +60,10 @@ init_mendocino(void) void enable_sse(void) { -#if defined(CPU_ENABLE_SSE) if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) { load_cr4(rcr4() | CR4_FXSR | CR4_XMM); cpu_fxsr = hw_instruction_sse = 1; } -#endif } void @@ -546,337 +71,8 @@ initializecpu(void) { switch (cpu) { -#ifdef I486_CPU - case CPU_BLUE: - init_bluelightning(); - break; - case CPU_486DLC: - init_486dlc(); - break; - case CPU_CY486DX: - init_cy486dx(); - break; - case CPU_M1SC: - init_5x86(); - break; -#ifdef CPU_I486_ON_386 - case CPU_486: - init_i486_on_386(); - break; -#endif - case CPU_M1: - init_6x86(); - break; -#endif /* I486_CPU */ -#ifdef I686_CPU - case CPU_M2: - init_6x86MX(); - break; - case CPU_686: - if (strcmp(cpu_vendor, "GenuineIntel") == 0) { - switch (cpu_id & 0xff0) { - case 0x610: - init_ppro(); - break; - case 0x660: - init_mendocino(); - break; - } - } else if (strcmp(cpu_vendor, "AuthenticAMD") == 0) { -#if defined(I686_CPU) && defined(CPU_ATHLON_SSE_HACK) - /* - * Sometimes the BIOS doesn't enable SSE instructions. - * According to AMD document 20734, the mobile - * Duron, the (mobile) Athlon 4 and the Athlon MP - * support SSE. These correspond to cpu_id 0x66X - * or 0x67X. - */ - if ((cpu_feature & CPUID_XMM) == 0 && - ((cpu_id & ~0xf) == 0x660 || - (cpu_id & ~0xf) == 0x670 || - (cpu_id & ~0xf) == 0x680)) { - u_int regs[4]; - wrmsr(0xC0010015, rdmsr(0xC0010015) & ~0x08000); - do_cpuid(1, regs); - cpu_feature = regs[3]; - } -#endif - } - break; -#endif default: break; } enable_sse(); - -#if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE) - /* - * OS should flush L1 cache by itself because no PC-98 supports - * non-Intel CPUs. Use wbinvd instruction before DMA transfer - * when need_pre_dma_flush = 1, use invd instruction after DMA - * transfer when need_post_dma_flush = 1. If your CPU upgrade - * product supports hardware cache control, you can add the - * CPU_UPGRADE_HW_CACHE option in your kernel configuration file. - * This option eliminates unneeded cache flush instruction(s). - */ - if (strcmp(cpu_vendor, "CyrixInstead") == 0) { - switch (cpu) { -#ifdef I486_CPU - case CPU_486DLC: - need_post_dma_flush = 1; - break; - case CPU_M1SC: - need_pre_dma_flush = 1; - break; - case CPU_CY486DX: - need_pre_dma_flush = 1; -#ifdef CPU_I486_ON_386 - need_post_dma_flush = 1; -#endif - break; -#endif - default: - break; - } - } else if (strcmp(cpu_vendor, "AuthenticAMD") == 0) { - switch (cpu_id & 0xFF0) { - case 0x470: /* Enhanced Am486DX2 WB */ - case 0x490: /* Enhanced Am486DX4 WB */ - case 0x4F0: /* Am5x86 WB */ - need_pre_dma_flush = 1; - break; - } - } else if (strcmp(cpu_vendor, "IBM") == 0) { - need_post_dma_flush = 1; - } else { -#ifdef CPU_I486_ON_386 - need_pre_dma_flush = 1; -#endif - } -#endif /* PC98 && !CPU_UPGRADE_HW_CACHE */ -} - -#if defined(I586_CPU) && defined(CPU_WT_ALLOC) -/* - * Enable write allocate feature of AMD processors. - * Following two functions require the Maxmem variable being set. - */ -void -enable_K5_wt_alloc(void) -{ - u_int64_t msr; - register_t savecrit; - - /* - * Write allocate is supported only on models 1, 2, and 3, with - * a stepping of 4 or greater. - */ - if (((cpu_id & 0xf0) > 0) && ((cpu_id & 0x0f) > 3)) { - savecrit = intr_disable(); - msr = rdmsr(0x83); /* HWCR */ - wrmsr(0x83, msr & !(0x10)); - - /* - * We have to tell the chip where the top of memory is, - * since video cards could have frame bufferes there, - * memory-mapped I/O could be there, etc. - */ - if(Maxmem > 0) - msr = Maxmem / 16; - else - msr = 0; - msr |= AMD_WT_ALLOC_TME | AMD_WT_ALLOC_FRE; -#ifdef PC98 - if (!(inb(0x43b) & 4)) { - wrmsr(0x86, 0x0ff00f0); - msr |= AMD_WT_ALLOC_PRE; - } -#else - /* - * There is no way to know wheter 15-16M hole exists or not. - * Therefore, we disable write allocate for this range. - */ - wrmsr(0x86, 0x0ff00f0); - msr |= AMD_WT_ALLOC_PRE; -#endif - wrmsr(0x85, msr); - - msr=rdmsr(0x83); - wrmsr(0x83, msr|0x10); /* enable write allocate */ - intr_restore(savecrit); - } -} - -void -enable_K6_wt_alloc(void) -{ - quad_t size; - u_int64_t whcr; - u_long eflags; - - eflags = read_eflags(); - disable_intr(); - wbinvd(); - -#ifdef CPU_DISABLE_CACHE - /* - * Certain K6-2 box becomes unstable when write allocation is - * enabled. - */ - /* - * The AMD-K6 processer provides the 64-bit Test Register 12(TR12), - * but only the Cache Inhibit(CI) (bit 3 of TR12) is suppported. - * All other bits in TR12 have no effect on the processer's operation. - * The I/O Trap Restart function (bit 9 of TR12) is always enabled - * on the AMD-K6. - */ - wrmsr(0x0000000e, (u_int64_t)0x0008); -#endif - /* Don't assume that memory size is aligned with 4M. */ - if (Maxmem > 0) - size = ((Maxmem >> 8) + 3) >> 2; - else - size = 0; - - /* Limit is 508M bytes. */ - if (size > 0x7f) - size = 0x7f; - whcr = (rdmsr(0xc0000082) & ~(0x7fLL << 1)) | (size << 1); - -#if defined(PC98) || defined(NO_MEMORY_HOLE) - if (whcr & (0x7fLL << 1)) { -#ifdef PC98 - /* - * If bit 2 of port 0x43b is 0, disable wrte allocate for the - * 15-16M range. - */ - if (!(inb(0x43b) & 4)) - whcr &= ~0x0001LL; - else -#endif - whcr |= 0x0001LL; - } -#else - /* - * There is no way to know wheter 15-16M hole exists or not. - * Therefore, we disable write allocate for this range. - */ - whcr &= ~0x0001LL; -#endif - wrmsr(0x0c0000082, whcr); - - write_eflags(eflags); -} - -void -enable_K6_2_wt_alloc(void) -{ - quad_t size; - u_int64_t whcr; - u_long eflags; - - eflags = read_eflags(); - disable_intr(); - wbinvd(); - -#ifdef CPU_DISABLE_CACHE - /* - * Certain K6-2 box becomes unstable when write allocation is - * enabled. - */ - /* - * The AMD-K6 processer provides the 64-bit Test Register 12(TR12), - * but only the Cache Inhibit(CI) (bit 3 of TR12) is suppported. - * All other bits in TR12 have no effect on the processer's operation. - * The I/O Trap Restart function (bit 9 of TR12) is always enabled - * on the AMD-K6. - */ - wrmsr(0x0000000e, (u_int64_t)0x0008); -#endif - /* Don't assume that memory size is aligned with 4M. */ - if (Maxmem > 0) - size = ((Maxmem >> 8) + 3) >> 2; - else - size = 0; - - /* Limit is 4092M bytes. */ - if (size > 0x3fff) - size = 0x3ff; - whcr = (rdmsr(0xc0000082) & ~(0x3ffLL << 22)) | (size << 22); - -#if defined(PC98) || defined(NO_MEMORY_HOLE) - if (whcr & (0x3ffLL << 22)) { -#ifdef PC98 - /* - * If bit 2 of port 0x43b is 0, disable wrte allocate for the - * 15-16M range. - */ - if (!(inb(0x43b) & 4)) - whcr &= ~(1LL << 16); - else -#endif - whcr |= 1LL << 16; - } -#else - /* - * There is no way to know wheter 15-16M hole exists or not. - * Therefore, we disable write allocate for this range. - */ - whcr &= ~(1LL << 16); -#endif - wrmsr(0x0c0000082, whcr); - - write_eflags(eflags); -} -#endif /* I585_CPU && CPU_WT_ALLOC */ - -#include "opt_ddb.h" -#ifdef DDB -#include <ddb/ddb.h> - -DB_SHOW_COMMAND(cyrixreg, cyrixreg) -{ - u_long eflags; - u_int cr0; - u_char ccr1, ccr2, ccr3; - u_char ccr0 = 0, ccr4 = 0, ccr5 = 0, pcr0 = 0; - - cr0 = rcr0(); - if (strcmp(cpu_vendor,"CyrixInstead") == 0) { - eflags = read_eflags(); - disable_intr(); - - - if ((cpu != CPU_M1SC) && (cpu != CPU_CY486DX)) { - ccr0 = read_cyrix_reg(CCR0); - } - ccr1 = read_cyrix_reg(CCR1); - ccr2 = read_cyrix_reg(CCR2); - ccr3 = read_cyrix_reg(CCR3); - if ((cpu == CPU_M1SC) || (cpu == CPU_M1) || (cpu == CPU_M2)) { - write_cyrix_reg(CCR3, CCR3_MAPEN0); - ccr4 = read_cyrix_reg(CCR4); - if ((cpu == CPU_M1) || (cpu == CPU_M2)) - ccr5 = read_cyrix_reg(CCR5); - else - pcr0 = read_cyrix_reg(PCR0); - write_cyrix_reg(CCR3, ccr3); /* Restore CCR3. */ - } - write_eflags(eflags); - - if ((cpu != CPU_M1SC) && (cpu != CPU_CY486DX)) - printf("CCR0=%x, ", (u_int)ccr0); - - printf("CCR1=%x, CCR2=%x, CCR3=%x", - (u_int)ccr1, (u_int)ccr2, (u_int)ccr3); - if ((cpu == CPU_M1SC) || (cpu == CPU_M1) || (cpu == CPU_M2)) { - printf(", CCR4=%x, ", (u_int)ccr4); - if (cpu == CPU_M1SC) - printf("PCR0=%x\n", pcr0); - else - printf("CCR5=%x\n", ccr5); - } - } - printf("CR0=%x\n", cr0); } -#endif /* DDB */ diff --git a/sys/amd64/amd64/legacy.c b/sys/amd64/amd64/legacy.c index 3effb94..7f0e89c 100644 --- a/sys/amd64/amd64/legacy.c +++ b/sys/amd64/amd64/legacy.c @@ -148,21 +148,9 @@ legacy_attach(device_t dev) bus_generic_attach(dev); /* - * If we didn't see EISA or ISA on a pci bridge, create some - * connection points now so they show up "on motherboard". + * If we didn't see ISA on a pci bridge, create some + * connection points now so it shows up "on motherboard". */ - if (!devclass_get_device(devclass_find("eisa"), 0)) { - child = BUS_ADD_CHILD(dev, 0, "eisa", 0); - if (child == NULL) - panic("legacy_attach eisa"); - device_probe_and_attach(child); - } - if (!devclass_get_device(devclass_find("mca"), 0)) { - child = BUS_ADD_CHILD(dev, 0, "mca", 0); - if (child == 0) - panic("legacy_probe mca"); - device_probe_and_attach(child); - } if (!devclass_get_device(devclass_find("isa"), 0)) { child = BUS_ADD_CHILD(dev, 0, "isa", 0); if (child == NULL) diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S index 8115e23..65b01ac 100644 --- a/sys/amd64/amd64/locore.S +++ b/sys/amd64/amd64/locore.S @@ -1,10 +1,7 @@ /*- - * Copyright (c) 1990 The Regents of the University of California. + * Copyright (c) 2003 Peter Wemm <peter@FreeBSD.org> * All rights reserved. * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -13,18 +10,11 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) @@ -33,25 +23,10 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * from: @(#)locore.s 7.3 (Berkeley) 5/13/91 * $FreeBSD$ - * - * originally from: locore.s, by William F. Jolitz - * - * Substantially rewritten by David Greenman, Rod Grimes, - * Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp - * and many others. */ -#include "opt_bootp.h" -#include "opt_compat.h" -#include "opt_nfsroot.h" - -#include <sys/syscall.h> -#include <sys/reboot.h> - #include <machine/asmacros.h> -#include <machine/cputypes.h> #include <machine/psl.h> #include <machine/pmap.h> #include <machine/specialreg.h> @@ -59,14 +34,6 @@ #include "assym.s" /* - * XXX - * - * Note: This version greatly munged to avoid various assembler errors - * that may be fixed in newer versions of gas. Perhaps newer versions - * will have more pleasant appearance. - */ - -/* * PTmap is recursive pagemap at top of virtual address space. * Within PTmap, the page directory can be found (third indirection). */ @@ -75,837 +42,51 @@ .set PTD,PTmap + (PTDPTDI * PAGE_SIZE) .set PTDpde,PTD + (PTDPTDI * PDESIZE) -#ifdef SMP -/* - * Define layout of per-cpu address space. - * This is "constructed" in locore.s on the BSP and in mp_machdep.c - * for each AP. DO NOT REORDER THESE WITHOUT UPDATING THE REST! - */ - .globl SMP_prvspace, lapic - .set SMP_prvspace,(MPPTDI << PDRSHIFT) - .set lapic,SMP_prvspace + (NPTEPG-1) * PAGE_SIZE -#endif /* SMP */ - /* * Compiled KERNBASE location */ .globl kernbase .set kernbase,KERNBASE -/* - * Globals - */ - .data - ALIGN_DATA /* just to be sure */ - - .globl HIDENAME(tmpstk) - .space 0x2000 /* space for tmpstk - temporary stack */ -HIDENAME(tmpstk): - - .globl bootinfo -bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */ - - .globl KERNend -KERNend: .long 0 /* phys addr end of kernel (just after bss) */ -physfree: .long 0 /* phys addr of next free page */ - -#ifdef SMP - .globl cpu0prvpage -cpu0pp: .long 0 /* phys addr cpu0 private pg */ -cpu0prvpage: .long 0 /* relocated version */ - - .globl SMPpt -SMPptpa: .long 0 /* phys addr SMP page table */ -SMPpt: .long 0 /* relocated version */ -#endif /* SMP */ - - .globl IdlePTD -IdlePTD: .long 0 /* phys addr of kernel PTD */ - -#ifdef PAE - .globl IdlePDPT -IdlePDPT: .long 0 /* phys addr of kernel PDPT */ -#endif - -#ifdef SMP - .globl KPTphys -#endif -KPTphys: .long 0 /* phys addr of kernel page tables */ - - .globl proc0uarea, proc0kstack -proc0uarea: .long 0 /* address of proc 0 uarea space */ -proc0kstack: .long 0 /* address of proc 0 kstack space */ -p0upa: .long 0 /* phys addr of proc0's UAREA */ -p0kpa: .long 0 /* phys addr of proc0's STACK */ - -vm86phystk: .long 0 /* PA of vm86/bios stack */ - - .globl vm86paddr, vm86pa -vm86paddr: .long 0 /* address of vm86 region */ -vm86pa: .long 0 /* phys addr of vm86 region */ - -#ifdef PC98 - .globl pc98_system_parameter -pc98_system_parameter: - .space 0x240 -#endif - -/********************************************************************** - * - * Some handy macros - * - */ - -#define R(foo) ((foo)-KERNBASE) - -#define ALLOCPAGES(foo) \ - movl R(physfree), %esi ; \ - movl $((foo)*PAGE_SIZE), %eax ; \ - addl %esi, %eax ; \ - movl %eax, R(physfree) ; \ - movl %esi, %edi ; \ - movl $((foo)*PAGE_SIZE),%ecx ; \ - xorl %eax,%eax ; \ - cld ; \ - rep ; \ - stosb - -/* - * fillkpt - * eax = page frame address - * ebx = index into page table - * ecx = how many pages to map - * base = base address of page dir/table - * prot = protection bits - */ -#define fillkpt(base, prot) \ - shll $PTESHIFT,%ebx ; \ - addl base,%ebx ; \ - orl $PG_V,%eax ; \ - orl prot,%eax ; \ -1: movl %eax,(%ebx) ; \ - addl $PAGE_SIZE,%eax ; /* increment physical address */ \ - addl $PTESIZE,%ebx ; /* next pte */ \ - loop 1b - -/* - * fillkptphys(prot) - * eax = physical address - * ecx = how many pages to map - * prot = protection bits - */ -#define fillkptphys(prot) \ - movl %eax, %ebx ; \ - shrl $PAGE_SHIFT, %ebx ; \ - fillkpt(R(KPTphys), prot) - .text /********************************************************************** * - * This is where the bootblocks start us, set the ball rolling... + * This is where the loader trampoline start us, set the ball rolling... + * + * We are called with the stack looking like this: + * 0(%rsp) = 32 bit return address (cannot be used) + * 4(%rsp) = 32 bit modulep + * 8(%rsp) = 32 bit kernend * + * We are already in long mode, on a 64 bit %cs and running at KERNBASE. */ NON_GPROF_ENTRY(btext) -#ifdef PC98 - /* save SYSTEM PARAMETER for resume (NS/T or other) */ - movl $0xa1400,%esi - movl $R(pc98_system_parameter),%edi - movl $0x0240,%ecx - cld - rep - movsb -#else /* IBM-PC */ -/* Tell the bios to warmboot next time */ + /* Tell the bios to warmboot next time */ movw $0x1234,0x472 -#endif /* PC98 */ - -/* Set up a real frame in case the double return in newboot is executed. */ - pushl %ebp - movl %esp, %ebp - -/* Don't trust what the BIOS gives for eflags. */ - pushl $PSL_KERNEL - popfl - -/* - * Don't trust what the BIOS gives for %fs and %gs. Trust the bootstrap - * to set %cs, %ds, %es and %ss. - */ - mov %ds, %ax - mov %ax, %fs - mov %ax, %gs - - call recover_bootinfo - -/* Get onto a stack that we can trust. */ -/* - * XXX this step is delayed in case recover_bootinfo needs to return via - * the old stack, but it need not be, since recover_bootinfo actually - * returns via the old frame. - */ - movl $R(HIDENAME(tmpstk)),%esp - -#ifdef PC98 - /* pc98_machine_type & M_EPSON_PC98 */ - testb $0x02,R(pc98_system_parameter)+220 - jz 3f - /* epson_machine_id <= 0x0b */ - cmpb $0x0b,R(pc98_system_parameter)+224 - ja 3f - - /* count up memory */ - movl $0x100000,%eax /* next, talley remaining memory */ - movl $0xFFF-0x100,%ecx -1: movl 0(%eax),%ebx /* save location to check */ - movl $0xa55a5aa5,0(%eax) /* write test pattern */ - cmpl $0xa55a5aa5,0(%eax) /* does not check yet for rollover */ - jne 2f - movl %ebx,0(%eax) /* restore memory */ - addl $PAGE_SIZE,%eax - loop 1b -2: subl $0x100000,%eax - shrl $17,%eax - movb %al,R(pc98_system_parameter)+1 -3: - - movw R(pc98_system_parameter+0x86),%ax - movw %ax,R(cpu_id) -#endif - - call identify_cpu -/* clear bss */ -/* - * XXX this should be done a little earlier. - * - * XXX we don't check that there is memory for our bss and page tables - * before using it. - * - * XXX the boot program somewhat bogusly clears the bss. We still have - * to do it in case we were unzipped by kzipboot. Then the boot program - * only clears kzipboot's bss. - * - * XXX the gdt and idt are still somewhere in the boot program. We - * depend on the convention that the boot program is below 1MB and we - * are above 1MB to keep the gdt and idt away from the bss and page - * tables. - */ - movl $R(end),%ecx - movl $R(edata),%edi - subl %edi,%ecx - xorl %eax,%eax - cld - rep - stosb - - call create_pagetables - -/* - * If the CPU has support for VME, turn it on. - */ - testl $CPUID_VME, R(cpu_feature) - jz 1f - movl %cr4, %eax - orl $CR4_VME, %eax - movl %eax, %cr4 -1: - -/* Now enable paging */ -#ifdef PAE - movl R(IdlePDPT), %eax - movl %eax, %cr3 - movl %cr4, %eax - orl $CR4_PAE, %eax - movl %eax, %cr4 -#else - movl R(IdlePTD), %eax - movl %eax,%cr3 /* load ptd addr into mmu */ -#endif - movl %cr0,%eax /* get control word */ - orl $CR0_PE|CR0_PG,%eax /* enable paging */ - movl %eax,%cr0 /* and let's page NOW! */ + /* Don't trust what the loader gives for rflags. */ + pushq $PSL_KERNEL + popfq - pushl $begin /* jump to high virtualized address */ - ret + /* Find the metadata pointers before we lose them */ + movq %rsp, %rbp + xorq %rax, %rax + movl 4(%rbp),%eax /* modulep */ + movq %rax,modulep + movl 8(%rbp),%eax /* kernend */ + movq %rax,physfree -/* now running relocated at KERNBASE where the system is linked to run */ -begin: - /* set up bootstrap stack */ - movl proc0kstack,%eax /* location of in-kernel stack */ - /* bootstrap stack end location */ - leal (KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp - - xorl %ebp,%ebp /* mark end of frames */ - -#ifdef PAE - movl IdlePDPT,%esi -#else - movl IdlePTD,%esi -#endif - movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) - - pushl physfree /* value of first for init386(first) */ - call init386 /* wire 386 chip for unix operation */ - - /* - * Clean up the stack in a way that db_numargs() understands, so - * that backtraces in ddb don't underrun the stack. Traps for - * inaccessible memory are more fatal than usual this early. - */ - addl $4,%esp + /* Get onto a stack that we can trust - there is no going back now. */ + movq $bootstack,%rsp + xorq %rbp, %rbp + call hammer_time /* set up cpu for unix operation */ call mi_startup /* autoconfiguration, mountroot etc */ - /* NOTREACHED */ - addl $0,%esp /* for db_numargs() again */ - -/* - * Signal trampoline, copied to top of user stack - */ -NON_GPROF_ENTRY(sigcode) - calll *SIGF_HANDLER(%esp) - leal SIGF_UC(%esp),%eax /* get ucontext */ - pushl %eax - testl $PSL_VM,UC_EFLAGS(%eax) - jne 1f - movl UC_GS(%eax),%gs /* restore %gs */ -1: - movl $SYS_sigreturn,%eax - pushl %eax /* junk to fake return addr. */ - int $0x80 /* enter kernel with args */ - /* on stack */ -1: - jmp 1b - -#ifdef COMPAT_FREEBSD4 - ALIGN_TEXT -freebsd4_sigcode: - calll *SIGF_HANDLER(%esp) - leal SIGF_UC4(%esp),%eax /* get ucontext */ - pushl %eax - testl $PSL_VM,UC4_EFLAGS(%eax) - jne 1f - movl UC4_GS(%eax),%gs /* restore %gs */ -1: - movl $344,%eax /* 4.x SYS_sigreturn */ - pushl %eax /* junk to fake return addr. */ - int $0x80 /* enter kernel with args */ - /* on stack */ -1: - jmp 1b -#endif - -#ifdef COMPAT_43 - ALIGN_TEXT -osigcode: - call *SIGF_HANDLER(%esp) /* call signal handler */ - lea SIGF_SC(%esp),%eax /* get sigcontext */ - pushl %eax - testl $PSL_VM,SC_PS(%eax) - jne 9f - movl SC_GS(%eax),%gs /* restore %gs */ -9: - movl $103,%eax /* 3.x SYS_sigreturn */ - pushl %eax /* junk to fake return addr. */ - int $0x80 /* enter kernel with args */ -0: jmp 0b -#endif /* COMPAT_43 */ +0: hlt + jmp 0b - ALIGN_TEXT -esigcode: - - .data - .globl szsigcode -szsigcode: - .long esigcode-sigcode -#ifdef COMPAT_FREEBSD4 - .globl szfreebsd4_sigcode -szfreebsd4_sigcode: - .long esigcode-freebsd4_sigcode -#endif -#ifdef COMPAT_43 - .globl szosigcode -szosigcode: - .long esigcode-osigcode -#endif - .text - -/********************************************************************** - * - * Recover the bootinfo passed to us from the boot program - * - */ -recover_bootinfo: - /* - * This code is called in different ways depending on what loaded - * and started the kernel. This is used to detect how we get the - * arguments from the other code and what we do with them. - * - * Old disk boot blocks: - * (*btext)(howto, bootdev, cyloffset, esym); - * [return address == 0, and can NOT be returned to] - * [cyloffset was not supported by the FreeBSD boot code - * and always passed in as 0] - * [esym is also known as total in the boot code, and - * was never properly supported by the FreeBSD boot code] - * - * Old diskless netboot code: - * (*btext)(0,0,0,0,&nfsdiskless,0,0,0); - * [return address != 0, and can NOT be returned to] - * If we are being booted by this code it will NOT work, - * so we are just going to halt if we find this case. - * - * New uniform boot code: - * (*btext)(howto, bootdev, 0, 0, 0, &bootinfo) - * [return address != 0, and can be returned to] - * - * There may seem to be a lot of wasted arguments in here, but - * that is so the newer boot code can still load very old kernels - * and old boot code can load new kernels. - */ - - /* - * The old style disk boot blocks fake a frame on the stack and - * did an lret to get here. The frame on the stack has a return - * address of 0. - */ - cmpl $0,4(%ebp) - je olddiskboot - - /* - * We have some form of return address, so this is either the - * old diskless netboot code, or the new uniform code. That can - * be detected by looking at the 5th argument, if it is 0 - * we are being booted by the new uniform boot code. - */ - cmpl $0,24(%ebp) - je newboot - - /* - * Seems we have been loaded by the old diskless boot code, we - * don't stand a chance of running as the diskless structure - * changed considerably between the two, so just halt. - */ - hlt - - /* - * We have been loaded by the new uniform boot code. - * Let's check the bootinfo version, and if we do not understand - * it we return to the loader with a status of 1 to indicate this error - */ -newboot: - movl 28(%ebp),%ebx /* &bootinfo.version */ - movl BI_VERSION(%ebx),%eax - cmpl $1,%eax /* We only understand version 1 */ - je 1f - movl $1,%eax /* Return status */ - leave - /* - * XXX this returns to our caller's caller (as is required) since - * we didn't set up a frame and our caller did. - */ - ret - -1: - /* - * If we have a kernelname copy it in - */ - movl BI_KERNELNAME(%ebx),%esi - cmpl $0,%esi - je 2f /* No kernelname */ - movl $MAXPATHLEN,%ecx /* Brute force!!! */ - movl $R(kernelname),%edi - cmpb $'/',(%esi) /* Make sure it starts with a slash */ - je 1f - movb $'/',(%edi) - incl %edi - decl %ecx -1: - cld - rep - movsb - -2: - /* - * Determine the size of the boot loader's copy of the bootinfo - * struct. This is impossible to do properly because old versions - * of the struct don't contain a size field and there are 2 old - * versions with the same version number. - */ - movl $BI_ENDCOMMON,%ecx /* prepare for sizeless version */ - testl $RB_BOOTINFO,8(%ebp) /* bi_size (and bootinfo) valid? */ - je got_bi_size /* no, sizeless version */ - movl BI_SIZE(%ebx),%ecx -got_bi_size: - - /* - * Copy the common part of the bootinfo struct - */ - movl %ebx,%esi - movl $R(bootinfo),%edi - cmpl $BOOTINFO_SIZE,%ecx - jbe got_common_bi_size - movl $BOOTINFO_SIZE,%ecx -got_common_bi_size: - cld - rep - movsb - -#ifdef NFS_ROOT -#ifndef BOOTP_NFSV3 - /* - * If we have a nfs_diskless structure copy it in - */ - movl BI_NFS_DISKLESS(%ebx),%esi - cmpl $0,%esi - je olddiskboot - movl $R(nfs_diskless),%edi - movl $NFSDISKLESS_SIZE,%ecx - cld - rep - movsb - movl $R(nfs_diskless_valid),%edi - movl $1,(%edi) -#endif -#endif - - /* - * The old style disk boot. - * (*btext)(howto, bootdev, cyloffset, esym); - * Note that the newer boot code just falls into here to pick - * up howto and bootdev, cyloffset and esym are no longer used - */ -olddiskboot: - movl 8(%ebp),%eax - movl %eax,R(boothowto) - movl 12(%ebp),%eax - movl %eax,R(bootdev) - - ret - - -/********************************************************************** - * - * Identify the CPU and initialize anything special about it - * - */ -identify_cpu: - - /* Try to toggle alignment check flag; does not exist on 386. */ - pushfl - popl %eax - movl %eax,%ecx - orl $PSL_AC,%eax - pushl %eax - popfl - pushfl - popl %eax - xorl %ecx,%eax - andl $PSL_AC,%eax - pushl %ecx - popfl - - testl %eax,%eax - jnz try486 - - /* NexGen CPU does not have aligment check flag. */ - pushfl - movl $0x5555, %eax - xorl %edx, %edx - movl $2, %ecx - clc - divl %ecx - jz trynexgen - popfl - movl $CPU_386,R(cpu) - jmp 3f - -trynexgen: - popfl - movl $CPU_NX586,R(cpu) - movl $0x4778654e,R(cpu_vendor) # store vendor string - movl $0x72446e65,R(cpu_vendor+4) - movl $0x6e657669,R(cpu_vendor+8) - movl $0,R(cpu_vendor+12) - jmp 3f - -try486: /* Try to toggle identification flag; does not exist on early 486s. */ - pushfl - popl %eax - movl %eax,%ecx - xorl $PSL_ID,%eax - pushl %eax - popfl - pushfl - popl %eax - xorl %ecx,%eax - andl $PSL_ID,%eax - pushl %ecx - popfl - - testl %eax,%eax - jnz trycpuid - movl $CPU_486,R(cpu) - - /* - * Check Cyrix CPU - * Cyrix CPUs do not change the undefined flags following - * execution of the divide instruction which divides 5 by 2. - * - * Note: CPUID is enabled on M2, so it passes another way. - */ - pushfl - movl $0x5555, %eax - xorl %edx, %edx - movl $2, %ecx - clc - divl %ecx - jnc trycyrix - popfl - jmp 3f /* You may use Intel CPU. */ - -trycyrix: - popfl - /* - * IBM Bluelighting CPU also doesn't change the undefined flags. - * Because IBM doesn't disclose the information for Bluelighting - * CPU, we couldn't distinguish it from Cyrix's (including IBM - * brand of Cyrix CPUs). - */ - movl $0x69727943,R(cpu_vendor) # store vendor string - movl $0x736e4978,R(cpu_vendor+4) - movl $0x64616574,R(cpu_vendor+8) - jmp 3f - -trycpuid: /* Use the `cpuid' instruction. */ - xorl %eax,%eax - cpuid # cpuid 0 - movl %eax,R(cpu_high) # highest capability - movl %ebx,R(cpu_vendor) # store vendor string - movl %edx,R(cpu_vendor+4) - movl %ecx,R(cpu_vendor+8) - movb $0,R(cpu_vendor+12) - - movl $1,%eax - cpuid # cpuid 1 - movl %eax,R(cpu_id) # store cpu_id - movl %ebx,R(cpu_procinfo) # store cpu_procinfo - movl %edx,R(cpu_feature) # store cpu_feature - rorl $8,%eax # extract family type - andl $15,%eax - cmpl $5,%eax - jae 1f - - /* less than Pentium; must be 486 */ - movl $CPU_486,R(cpu) - jmp 3f -1: - /* a Pentium? */ - cmpl $5,%eax - jne 2f - movl $CPU_586,R(cpu) - jmp 3f -2: - /* Greater than Pentium...call it a Pentium Pro */ - movl $CPU_686,R(cpu) -3: - ret - - -/********************************************************************** - * - * Create the first page directory and its page tables. - * - */ - -create_pagetables: - -/* Find end of kernel image (rounded up to a page boundary). */ - movl $R(_end),%esi - -/* Include symbols, if any. */ - movl R(bootinfo+BI_ESYMTAB),%edi - testl %edi,%edi - je over_symalloc - movl %edi,%esi - movl $KERNBASE,%edi - addl %edi,R(bootinfo+BI_SYMTAB) - addl %edi,R(bootinfo+BI_ESYMTAB) -over_symalloc: - -/* If we are told where the end of the kernel space is, believe it. */ - movl R(bootinfo+BI_KERNEND),%edi - testl %edi,%edi - je no_kernend - movl %edi,%esi -no_kernend: - - addl $PAGE_MASK,%esi - andl $~PAGE_MASK,%esi - movl %esi,R(KERNend) /* save end of kernel */ - movl %esi,R(physfree) /* next free page is at end of kernel */ - -/* Allocate Kernel Page Tables */ - ALLOCPAGES(NKPT) - movl %esi,R(KPTphys) - -/* Allocate Page Table Directory */ -#ifdef PAE - /* XXX only need 32 bytes (easier for now) */ - ALLOCPAGES(1) - movl %esi,R(IdlePDPT) -#endif - ALLOCPAGES(NPGPTD) - movl %esi,R(IdlePTD) - -/* Allocate UPAGES */ - ALLOCPAGES(UAREA_PAGES) - movl %esi,R(p0upa) - addl $KERNBASE, %esi - movl %esi, R(proc0uarea) - - ALLOCPAGES(KSTACK_PAGES) - movl %esi,R(p0kpa) - addl $KERNBASE, %esi - movl %esi, R(proc0kstack) - - ALLOCPAGES(1) /* vm86/bios stack */ - movl %esi,R(vm86phystk) - - ALLOCPAGES(3) /* pgtable + ext + IOPAGES */ - movl %esi,R(vm86pa) - addl $KERNBASE, %esi - movl %esi, R(vm86paddr) - -#ifdef SMP -/* Allocate cpu0's private data page */ - ALLOCPAGES(1) - movl %esi,R(cpu0pp) - addl $KERNBASE, %esi - movl %esi, R(cpu0prvpage) /* relocated to KVM space */ - -/* Allocate SMP page table page */ - ALLOCPAGES(1) - movl %esi,R(SMPptpa) - addl $KERNBASE, %esi - movl %esi, R(SMPpt) /* relocated to KVM space */ -#endif /* SMP */ - -/* Map read-only from zero to the end of the kernel text section */ - xorl %eax, %eax - xorl %edx,%edx - movl $R(etext),%ecx - addl $PAGE_MASK,%ecx - shrl $PAGE_SHIFT,%ecx - fillkptphys(%edx) - -/* Map read-write, data, bss and symbols */ - movl $R(etext),%eax - addl $PAGE_MASK, %eax - andl $~PAGE_MASK, %eax - movl $PG_RW,%edx - movl R(KERNend),%ecx - subl %eax,%ecx - shrl $PAGE_SHIFT,%ecx - fillkptphys(%edx) - -/* Map page directory. */ -#ifdef PAE - movl R(IdlePDPT), %eax - movl $1, %ecx - fillkptphys($PG_RW) -#endif - - movl R(IdlePTD), %eax - movl $NPGPTD, %ecx - fillkptphys($PG_RW) - -/* Map proc0's UPAGES in the physical way ... */ - movl R(p0upa), %eax - movl $(UAREA_PAGES), %ecx - fillkptphys($PG_RW) - -/* Map proc0's KSTACK in the physical way ... */ - movl R(p0kpa), %eax - movl $(KSTACK_PAGES), %ecx - fillkptphys($PG_RW) - -/* Map ISA hole */ - movl $ISA_HOLE_START, %eax - movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx - fillkptphys($PG_RW) - -/* Map space for the vm86 region */ - movl R(vm86phystk), %eax - movl $4, %ecx - fillkptphys($PG_RW) - -/* Map page 0 into the vm86 page table */ - movl $0, %eax - movl $0, %ebx - movl $1, %ecx - fillkpt(R(vm86pa), $PG_RW|PG_U) - -/* ...likewise for the ISA hole */ - movl $ISA_HOLE_START, %eax - movl $ISA_HOLE_START>>PAGE_SHIFT, %ebx - movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx - fillkpt(R(vm86pa), $PG_RW|PG_U) - -#ifdef SMP -/* Map cpu0's private page into global kmem (4K @ cpu0prvpage) */ - movl R(cpu0pp), %eax - movl $1, %ecx - fillkptphys($PG_RW) - -/* Map SMP page table page into global kmem FWIW */ - movl R(SMPptpa), %eax - movl $1, %ecx - fillkptphys($PG_RW) - -/* Map the private page into the SMP page table */ - movl R(cpu0pp), %eax - movl $0, %ebx /* pte offset = 0 */ - movl $1, %ecx /* one private page coming right up */ - fillkpt(R(SMPptpa), $PG_RW) - -/* ... and put the page table table in the pde. */ - movl R(SMPptpa), %eax - movl $MPPTDI, %ebx - movl $1, %ecx - fillkpt(R(IdlePTD), $PG_RW) - -/* Fakeup VA for the local apic to allow early traps. */ - ALLOCPAGES(1) - movl %esi, %eax - movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */ - movl $1, %ecx /* one private pt coming right up */ - fillkpt(R(SMPptpa), $PG_RW) -#endif /* SMP */ - -/* install a pde for temporary double map of bottom of VA */ - movl R(KPTphys), %eax - xorl %ebx, %ebx - movl $NKPT, %ecx - fillkpt(R(IdlePTD), $PG_RW) - -/* install pde's for pt's */ - movl R(KPTphys), %eax - movl $KPTDI, %ebx - movl $NKPT, %ecx - fillkpt(R(IdlePTD), $PG_RW) - -/* install a pde recursively mapping page directory as a page table */ - movl R(IdlePTD), %eax - movl $PTDPTDI, %ebx - movl $NPGPTD,%ecx - fillkpt(R(IdlePTD), $PG_RW) - -#ifdef PAE - movl R(IdlePTD), %eax - xorl %ebx, %ebx - movl $NPGPTD, %ecx - fillkpt(R(IdlePDPT), $0x0) -#endif - - ret + .bss + ALIGN_DATA /* just to be sure */ + .space 0x1000 /* space for bootstack - temporary stack */ +bootstack: diff --git a/sys/amd64/amd64/locore.s b/sys/amd64/amd64/locore.s index 8115e23..65b01ac 100644 --- a/sys/amd64/amd64/locore.s +++ b/sys/amd64/amd64/locore.s @@ -1,10 +1,7 @@ /*- - * Copyright (c) 1990 The Regents of the University of California. + * Copyright (c) 2003 Peter Wemm <peter@FreeBSD.org> * All rights reserved. * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -13,18 +10,11 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) @@ -33,25 +23,10 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * from: @(#)locore.s 7.3 (Berkeley) 5/13/91 * $FreeBSD$ - * - * originally from: locore.s, by William F. Jolitz - * - * Substantially rewritten by David Greenman, Rod Grimes, - * Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp - * and many others. */ -#include "opt_bootp.h" -#include "opt_compat.h" -#include "opt_nfsroot.h" - -#include <sys/syscall.h> -#include <sys/reboot.h> - #include <machine/asmacros.h> -#include <machine/cputypes.h> #include <machine/psl.h> #include <machine/pmap.h> #include <machine/specialreg.h> @@ -59,14 +34,6 @@ #include "assym.s" /* - * XXX - * - * Note: This version greatly munged to avoid various assembler errors - * that may be fixed in newer versions of gas. Perhaps newer versions - * will have more pleasant appearance. - */ - -/* * PTmap is recursive pagemap at top of virtual address space. * Within PTmap, the page directory can be found (third indirection). */ @@ -75,837 +42,51 @@ .set PTD,PTmap + (PTDPTDI * PAGE_SIZE) .set PTDpde,PTD + (PTDPTDI * PDESIZE) -#ifdef SMP -/* - * Define layout of per-cpu address space. - * This is "constructed" in locore.s on the BSP and in mp_machdep.c - * for each AP. DO NOT REORDER THESE WITHOUT UPDATING THE REST! - */ - .globl SMP_prvspace, lapic - .set SMP_prvspace,(MPPTDI << PDRSHIFT) - .set lapic,SMP_prvspace + (NPTEPG-1) * PAGE_SIZE -#endif /* SMP */ - /* * Compiled KERNBASE location */ .globl kernbase .set kernbase,KERNBASE -/* - * Globals - */ - .data - ALIGN_DATA /* just to be sure */ - - .globl HIDENAME(tmpstk) - .space 0x2000 /* space for tmpstk - temporary stack */ -HIDENAME(tmpstk): - - .globl bootinfo -bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */ - - .globl KERNend -KERNend: .long 0 /* phys addr end of kernel (just after bss) */ -physfree: .long 0 /* phys addr of next free page */ - -#ifdef SMP - .globl cpu0prvpage -cpu0pp: .long 0 /* phys addr cpu0 private pg */ -cpu0prvpage: .long 0 /* relocated version */ - - .globl SMPpt -SMPptpa: .long 0 /* phys addr SMP page table */ -SMPpt: .long 0 /* relocated version */ -#endif /* SMP */ - - .globl IdlePTD -IdlePTD: .long 0 /* phys addr of kernel PTD */ - -#ifdef PAE - .globl IdlePDPT -IdlePDPT: .long 0 /* phys addr of kernel PDPT */ -#endif - -#ifdef SMP - .globl KPTphys -#endif -KPTphys: .long 0 /* phys addr of kernel page tables */ - - .globl proc0uarea, proc0kstack -proc0uarea: .long 0 /* address of proc 0 uarea space */ -proc0kstack: .long 0 /* address of proc 0 kstack space */ -p0upa: .long 0 /* phys addr of proc0's UAREA */ -p0kpa: .long 0 /* phys addr of proc0's STACK */ - -vm86phystk: .long 0 /* PA of vm86/bios stack */ - - .globl vm86paddr, vm86pa -vm86paddr: .long 0 /* address of vm86 region */ -vm86pa: .long 0 /* phys addr of vm86 region */ - -#ifdef PC98 - .globl pc98_system_parameter -pc98_system_parameter: - .space 0x240 -#endif - -/********************************************************************** - * - * Some handy macros - * - */ - -#define R(foo) ((foo)-KERNBASE) - -#define ALLOCPAGES(foo) \ - movl R(physfree), %esi ; \ - movl $((foo)*PAGE_SIZE), %eax ; \ - addl %esi, %eax ; \ - movl %eax, R(physfree) ; \ - movl %esi, %edi ; \ - movl $((foo)*PAGE_SIZE),%ecx ; \ - xorl %eax,%eax ; \ - cld ; \ - rep ; \ - stosb - -/* - * fillkpt - * eax = page frame address - * ebx = index into page table - * ecx = how many pages to map - * base = base address of page dir/table - * prot = protection bits - */ -#define fillkpt(base, prot) \ - shll $PTESHIFT,%ebx ; \ - addl base,%ebx ; \ - orl $PG_V,%eax ; \ - orl prot,%eax ; \ -1: movl %eax,(%ebx) ; \ - addl $PAGE_SIZE,%eax ; /* increment physical address */ \ - addl $PTESIZE,%ebx ; /* next pte */ \ - loop 1b - -/* - * fillkptphys(prot) - * eax = physical address - * ecx = how many pages to map - * prot = protection bits - */ -#define fillkptphys(prot) \ - movl %eax, %ebx ; \ - shrl $PAGE_SHIFT, %ebx ; \ - fillkpt(R(KPTphys), prot) - .text /********************************************************************** * - * This is where the bootblocks start us, set the ball rolling... + * This is where the loader trampoline start us, set the ball rolling... + * + * We are called with the stack looking like this: + * 0(%rsp) = 32 bit return address (cannot be used) + * 4(%rsp) = 32 bit modulep + * 8(%rsp) = 32 bit kernend * + * We are already in long mode, on a 64 bit %cs and running at KERNBASE. */ NON_GPROF_ENTRY(btext) -#ifdef PC98 - /* save SYSTEM PARAMETER for resume (NS/T or other) */ - movl $0xa1400,%esi - movl $R(pc98_system_parameter),%edi - movl $0x0240,%ecx - cld - rep - movsb -#else /* IBM-PC */ -/* Tell the bios to warmboot next time */ + /* Tell the bios to warmboot next time */ movw $0x1234,0x472 -#endif /* PC98 */ - -/* Set up a real frame in case the double return in newboot is executed. */ - pushl %ebp - movl %esp, %ebp - -/* Don't trust what the BIOS gives for eflags. */ - pushl $PSL_KERNEL - popfl - -/* - * Don't trust what the BIOS gives for %fs and %gs. Trust the bootstrap - * to set %cs, %ds, %es and %ss. - */ - mov %ds, %ax - mov %ax, %fs - mov %ax, %gs - - call recover_bootinfo - -/* Get onto a stack that we can trust. */ -/* - * XXX this step is delayed in case recover_bootinfo needs to return via - * the old stack, but it need not be, since recover_bootinfo actually - * returns via the old frame. - */ - movl $R(HIDENAME(tmpstk)),%esp - -#ifdef PC98 - /* pc98_machine_type & M_EPSON_PC98 */ - testb $0x02,R(pc98_system_parameter)+220 - jz 3f - /* epson_machine_id <= 0x0b */ - cmpb $0x0b,R(pc98_system_parameter)+224 - ja 3f - - /* count up memory */ - movl $0x100000,%eax /* next, talley remaining memory */ - movl $0xFFF-0x100,%ecx -1: movl 0(%eax),%ebx /* save location to check */ - movl $0xa55a5aa5,0(%eax) /* write test pattern */ - cmpl $0xa55a5aa5,0(%eax) /* does not check yet for rollover */ - jne 2f - movl %ebx,0(%eax) /* restore memory */ - addl $PAGE_SIZE,%eax - loop 1b -2: subl $0x100000,%eax - shrl $17,%eax - movb %al,R(pc98_system_parameter)+1 -3: - - movw R(pc98_system_parameter+0x86),%ax - movw %ax,R(cpu_id) -#endif - - call identify_cpu -/* clear bss */ -/* - * XXX this should be done a little earlier. - * - * XXX we don't check that there is memory for our bss and page tables - * before using it. - * - * XXX the boot program somewhat bogusly clears the bss. We still have - * to do it in case we were unzipped by kzipboot. Then the boot program - * only clears kzipboot's bss. - * - * XXX the gdt and idt are still somewhere in the boot program. We - * depend on the convention that the boot program is below 1MB and we - * are above 1MB to keep the gdt and idt away from the bss and page - * tables. - */ - movl $R(end),%ecx - movl $R(edata),%edi - subl %edi,%ecx - xorl %eax,%eax - cld - rep - stosb - - call create_pagetables - -/* - * If the CPU has support for VME, turn it on. - */ - testl $CPUID_VME, R(cpu_feature) - jz 1f - movl %cr4, %eax - orl $CR4_VME, %eax - movl %eax, %cr4 -1: - -/* Now enable paging */ -#ifdef PAE - movl R(IdlePDPT), %eax - movl %eax, %cr3 - movl %cr4, %eax - orl $CR4_PAE, %eax - movl %eax, %cr4 -#else - movl R(IdlePTD), %eax - movl %eax,%cr3 /* load ptd addr into mmu */ -#endif - movl %cr0,%eax /* get control word */ - orl $CR0_PE|CR0_PG,%eax /* enable paging */ - movl %eax,%cr0 /* and let's page NOW! */ + /* Don't trust what the loader gives for rflags. */ + pushq $PSL_KERNEL + popfq - pushl $begin /* jump to high virtualized address */ - ret + /* Find the metadata pointers before we lose them */ + movq %rsp, %rbp + xorq %rax, %rax + movl 4(%rbp),%eax /* modulep */ + movq %rax,modulep + movl 8(%rbp),%eax /* kernend */ + movq %rax,physfree -/* now running relocated at KERNBASE where the system is linked to run */ -begin: - /* set up bootstrap stack */ - movl proc0kstack,%eax /* location of in-kernel stack */ - /* bootstrap stack end location */ - leal (KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp - - xorl %ebp,%ebp /* mark end of frames */ - -#ifdef PAE - movl IdlePDPT,%esi -#else - movl IdlePTD,%esi -#endif - movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) - - pushl physfree /* value of first for init386(first) */ - call init386 /* wire 386 chip for unix operation */ - - /* - * Clean up the stack in a way that db_numargs() understands, so - * that backtraces in ddb don't underrun the stack. Traps for - * inaccessible memory are more fatal than usual this early. - */ - addl $4,%esp + /* Get onto a stack that we can trust - there is no going back now. */ + movq $bootstack,%rsp + xorq %rbp, %rbp + call hammer_time /* set up cpu for unix operation */ call mi_startup /* autoconfiguration, mountroot etc */ - /* NOTREACHED */ - addl $0,%esp /* for db_numargs() again */ - -/* - * Signal trampoline, copied to top of user stack - */ -NON_GPROF_ENTRY(sigcode) - calll *SIGF_HANDLER(%esp) - leal SIGF_UC(%esp),%eax /* get ucontext */ - pushl %eax - testl $PSL_VM,UC_EFLAGS(%eax) - jne 1f - movl UC_GS(%eax),%gs /* restore %gs */ -1: - movl $SYS_sigreturn,%eax - pushl %eax /* junk to fake return addr. */ - int $0x80 /* enter kernel with args */ - /* on stack */ -1: - jmp 1b - -#ifdef COMPAT_FREEBSD4 - ALIGN_TEXT -freebsd4_sigcode: - calll *SIGF_HANDLER(%esp) - leal SIGF_UC4(%esp),%eax /* get ucontext */ - pushl %eax - testl $PSL_VM,UC4_EFLAGS(%eax) - jne 1f - movl UC4_GS(%eax),%gs /* restore %gs */ -1: - movl $344,%eax /* 4.x SYS_sigreturn */ - pushl %eax /* junk to fake return addr. */ - int $0x80 /* enter kernel with args */ - /* on stack */ -1: - jmp 1b -#endif - -#ifdef COMPAT_43 - ALIGN_TEXT -osigcode: - call *SIGF_HANDLER(%esp) /* call signal handler */ - lea SIGF_SC(%esp),%eax /* get sigcontext */ - pushl %eax - testl $PSL_VM,SC_PS(%eax) - jne 9f - movl SC_GS(%eax),%gs /* restore %gs */ -9: - movl $103,%eax /* 3.x SYS_sigreturn */ - pushl %eax /* junk to fake return addr. */ - int $0x80 /* enter kernel with args */ -0: jmp 0b -#endif /* COMPAT_43 */ +0: hlt + jmp 0b - ALIGN_TEXT -esigcode: - - .data - .globl szsigcode -szsigcode: - .long esigcode-sigcode -#ifdef COMPAT_FREEBSD4 - .globl szfreebsd4_sigcode -szfreebsd4_sigcode: - .long esigcode-freebsd4_sigcode -#endif -#ifdef COMPAT_43 - .globl szosigcode -szosigcode: - .long esigcode-osigcode -#endif - .text - -/********************************************************************** - * - * Recover the bootinfo passed to us from the boot program - * - */ -recover_bootinfo: - /* - * This code is called in different ways depending on what loaded - * and started the kernel. This is used to detect how we get the - * arguments from the other code and what we do with them. - * - * Old disk boot blocks: - * (*btext)(howto, bootdev, cyloffset, esym); - * [return address == 0, and can NOT be returned to] - * [cyloffset was not supported by the FreeBSD boot code - * and always passed in as 0] - * [esym is also known as total in the boot code, and - * was never properly supported by the FreeBSD boot code] - * - * Old diskless netboot code: - * (*btext)(0,0,0,0,&nfsdiskless,0,0,0); - * [return address != 0, and can NOT be returned to] - * If we are being booted by this code it will NOT work, - * so we are just going to halt if we find this case. - * - * New uniform boot code: - * (*btext)(howto, bootdev, 0, 0, 0, &bootinfo) - * [return address != 0, and can be returned to] - * - * There may seem to be a lot of wasted arguments in here, but - * that is so the newer boot code can still load very old kernels - * and old boot code can load new kernels. - */ - - /* - * The old style disk boot blocks fake a frame on the stack and - * did an lret to get here. The frame on the stack has a return - * address of 0. - */ - cmpl $0,4(%ebp) - je olddiskboot - - /* - * We have some form of return address, so this is either the - * old diskless netboot code, or the new uniform code. That can - * be detected by looking at the 5th argument, if it is 0 - * we are being booted by the new uniform boot code. - */ - cmpl $0,24(%ebp) - je newboot - - /* - * Seems we have been loaded by the old diskless boot code, we - * don't stand a chance of running as the diskless structure - * changed considerably between the two, so just halt. - */ - hlt - - /* - * We have been loaded by the new uniform boot code. - * Let's check the bootinfo version, and if we do not understand - * it we return to the loader with a status of 1 to indicate this error - */ -newboot: - movl 28(%ebp),%ebx /* &bootinfo.version */ - movl BI_VERSION(%ebx),%eax - cmpl $1,%eax /* We only understand version 1 */ - je 1f - movl $1,%eax /* Return status */ - leave - /* - * XXX this returns to our caller's caller (as is required) since - * we didn't set up a frame and our caller did. - */ - ret - -1: - /* - * If we have a kernelname copy it in - */ - movl BI_KERNELNAME(%ebx),%esi - cmpl $0,%esi - je 2f /* No kernelname */ - movl $MAXPATHLEN,%ecx /* Brute force!!! */ - movl $R(kernelname),%edi - cmpb $'/',(%esi) /* Make sure it starts with a slash */ - je 1f - movb $'/',(%edi) - incl %edi - decl %ecx -1: - cld - rep - movsb - -2: - /* - * Determine the size of the boot loader's copy of the bootinfo - * struct. This is impossible to do properly because old versions - * of the struct don't contain a size field and there are 2 old - * versions with the same version number. - */ - movl $BI_ENDCOMMON,%ecx /* prepare for sizeless version */ - testl $RB_BOOTINFO,8(%ebp) /* bi_size (and bootinfo) valid? */ - je got_bi_size /* no, sizeless version */ - movl BI_SIZE(%ebx),%ecx -got_bi_size: - - /* - * Copy the common part of the bootinfo struct - */ - movl %ebx,%esi - movl $R(bootinfo),%edi - cmpl $BOOTINFO_SIZE,%ecx - jbe got_common_bi_size - movl $BOOTINFO_SIZE,%ecx -got_common_bi_size: - cld - rep - movsb - -#ifdef NFS_ROOT -#ifndef BOOTP_NFSV3 - /* - * If we have a nfs_diskless structure copy it in - */ - movl BI_NFS_DISKLESS(%ebx),%esi - cmpl $0,%esi - je olddiskboot - movl $R(nfs_diskless),%edi - movl $NFSDISKLESS_SIZE,%ecx - cld - rep - movsb - movl $R(nfs_diskless_valid),%edi - movl $1,(%edi) -#endif -#endif - - /* - * The old style disk boot. - * (*btext)(howto, bootdev, cyloffset, esym); - * Note that the newer boot code just falls into here to pick - * up howto and bootdev, cyloffset and esym are no longer used - */ -olddiskboot: - movl 8(%ebp),%eax - movl %eax,R(boothowto) - movl 12(%ebp),%eax - movl %eax,R(bootdev) - - ret - - -/********************************************************************** - * - * Identify the CPU and initialize anything special about it - * - */ -identify_cpu: - - /* Try to toggle alignment check flag; does not exist on 386. */ - pushfl - popl %eax - movl %eax,%ecx - orl $PSL_AC,%eax - pushl %eax - popfl - pushfl - popl %eax - xorl %ecx,%eax - andl $PSL_AC,%eax - pushl %ecx - popfl - - testl %eax,%eax - jnz try486 - - /* NexGen CPU does not have aligment check flag. */ - pushfl - movl $0x5555, %eax - xorl %edx, %edx - movl $2, %ecx - clc - divl %ecx - jz trynexgen - popfl - movl $CPU_386,R(cpu) - jmp 3f - -trynexgen: - popfl - movl $CPU_NX586,R(cpu) - movl $0x4778654e,R(cpu_vendor) # store vendor string - movl $0x72446e65,R(cpu_vendor+4) - movl $0x6e657669,R(cpu_vendor+8) - movl $0,R(cpu_vendor+12) - jmp 3f - -try486: /* Try to toggle identification flag; does not exist on early 486s. */ - pushfl - popl %eax - movl %eax,%ecx - xorl $PSL_ID,%eax - pushl %eax - popfl - pushfl - popl %eax - xorl %ecx,%eax - andl $PSL_ID,%eax - pushl %ecx - popfl - - testl %eax,%eax - jnz trycpuid - movl $CPU_486,R(cpu) - - /* - * Check Cyrix CPU - * Cyrix CPUs do not change the undefined flags following - * execution of the divide instruction which divides 5 by 2. - * - * Note: CPUID is enabled on M2, so it passes another way. - */ - pushfl - movl $0x5555, %eax - xorl %edx, %edx - movl $2, %ecx - clc - divl %ecx - jnc trycyrix - popfl - jmp 3f /* You may use Intel CPU. */ - -trycyrix: - popfl - /* - * IBM Bluelighting CPU also doesn't change the undefined flags. - * Because IBM doesn't disclose the information for Bluelighting - * CPU, we couldn't distinguish it from Cyrix's (including IBM - * brand of Cyrix CPUs). - */ - movl $0x69727943,R(cpu_vendor) # store vendor string - movl $0x736e4978,R(cpu_vendor+4) - movl $0x64616574,R(cpu_vendor+8) - jmp 3f - -trycpuid: /* Use the `cpuid' instruction. */ - xorl %eax,%eax - cpuid # cpuid 0 - movl %eax,R(cpu_high) # highest capability - movl %ebx,R(cpu_vendor) # store vendor string - movl %edx,R(cpu_vendor+4) - movl %ecx,R(cpu_vendor+8) - movb $0,R(cpu_vendor+12) - - movl $1,%eax - cpuid # cpuid 1 - movl %eax,R(cpu_id) # store cpu_id - movl %ebx,R(cpu_procinfo) # store cpu_procinfo - movl %edx,R(cpu_feature) # store cpu_feature - rorl $8,%eax # extract family type - andl $15,%eax - cmpl $5,%eax - jae 1f - - /* less than Pentium; must be 486 */ - movl $CPU_486,R(cpu) - jmp 3f -1: - /* a Pentium? */ - cmpl $5,%eax - jne 2f - movl $CPU_586,R(cpu) - jmp 3f -2: - /* Greater than Pentium...call it a Pentium Pro */ - movl $CPU_686,R(cpu) -3: - ret - - -/********************************************************************** - * - * Create the first page directory and its page tables. - * - */ - -create_pagetables: - -/* Find end of kernel image (rounded up to a page boundary). */ - movl $R(_end),%esi - -/* Include symbols, if any. */ - movl R(bootinfo+BI_ESYMTAB),%edi - testl %edi,%edi - je over_symalloc - movl %edi,%esi - movl $KERNBASE,%edi - addl %edi,R(bootinfo+BI_SYMTAB) - addl %edi,R(bootinfo+BI_ESYMTAB) -over_symalloc: - -/* If we are told where the end of the kernel space is, believe it. */ - movl R(bootinfo+BI_KERNEND),%edi - testl %edi,%edi - je no_kernend - movl %edi,%esi -no_kernend: - - addl $PAGE_MASK,%esi - andl $~PAGE_MASK,%esi - movl %esi,R(KERNend) /* save end of kernel */ - movl %esi,R(physfree) /* next free page is at end of kernel */ - -/* Allocate Kernel Page Tables */ - ALLOCPAGES(NKPT) - movl %esi,R(KPTphys) - -/* Allocate Page Table Directory */ -#ifdef PAE - /* XXX only need 32 bytes (easier for now) */ - ALLOCPAGES(1) - movl %esi,R(IdlePDPT) -#endif - ALLOCPAGES(NPGPTD) - movl %esi,R(IdlePTD) - -/* Allocate UPAGES */ - ALLOCPAGES(UAREA_PAGES) - movl %esi,R(p0upa) - addl $KERNBASE, %esi - movl %esi, R(proc0uarea) - - ALLOCPAGES(KSTACK_PAGES) - movl %esi,R(p0kpa) - addl $KERNBASE, %esi - movl %esi, R(proc0kstack) - - ALLOCPAGES(1) /* vm86/bios stack */ - movl %esi,R(vm86phystk) - - ALLOCPAGES(3) /* pgtable + ext + IOPAGES */ - movl %esi,R(vm86pa) - addl $KERNBASE, %esi - movl %esi, R(vm86paddr) - -#ifdef SMP -/* Allocate cpu0's private data page */ - ALLOCPAGES(1) - movl %esi,R(cpu0pp) - addl $KERNBASE, %esi - movl %esi, R(cpu0prvpage) /* relocated to KVM space */ - -/* Allocate SMP page table page */ - ALLOCPAGES(1) - movl %esi,R(SMPptpa) - addl $KERNBASE, %esi - movl %esi, R(SMPpt) /* relocated to KVM space */ -#endif /* SMP */ - -/* Map read-only from zero to the end of the kernel text section */ - xorl %eax, %eax - xorl %edx,%edx - movl $R(etext),%ecx - addl $PAGE_MASK,%ecx - shrl $PAGE_SHIFT,%ecx - fillkptphys(%edx) - -/* Map read-write, data, bss and symbols */ - movl $R(etext),%eax - addl $PAGE_MASK, %eax - andl $~PAGE_MASK, %eax - movl $PG_RW,%edx - movl R(KERNend),%ecx - subl %eax,%ecx - shrl $PAGE_SHIFT,%ecx - fillkptphys(%edx) - -/* Map page directory. */ -#ifdef PAE - movl R(IdlePDPT), %eax - movl $1, %ecx - fillkptphys($PG_RW) -#endif - - movl R(IdlePTD), %eax - movl $NPGPTD, %ecx - fillkptphys($PG_RW) - -/* Map proc0's UPAGES in the physical way ... */ - movl R(p0upa), %eax - movl $(UAREA_PAGES), %ecx - fillkptphys($PG_RW) - -/* Map proc0's KSTACK in the physical way ... */ - movl R(p0kpa), %eax - movl $(KSTACK_PAGES), %ecx - fillkptphys($PG_RW) - -/* Map ISA hole */ - movl $ISA_HOLE_START, %eax - movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx - fillkptphys($PG_RW) - -/* Map space for the vm86 region */ - movl R(vm86phystk), %eax - movl $4, %ecx - fillkptphys($PG_RW) - -/* Map page 0 into the vm86 page table */ - movl $0, %eax - movl $0, %ebx - movl $1, %ecx - fillkpt(R(vm86pa), $PG_RW|PG_U) - -/* ...likewise for the ISA hole */ - movl $ISA_HOLE_START, %eax - movl $ISA_HOLE_START>>PAGE_SHIFT, %ebx - movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx - fillkpt(R(vm86pa), $PG_RW|PG_U) - -#ifdef SMP -/* Map cpu0's private page into global kmem (4K @ cpu0prvpage) */ - movl R(cpu0pp), %eax - movl $1, %ecx - fillkptphys($PG_RW) - -/* Map SMP page table page into global kmem FWIW */ - movl R(SMPptpa), %eax - movl $1, %ecx - fillkptphys($PG_RW) - -/* Map the private page into the SMP page table */ - movl R(cpu0pp), %eax - movl $0, %ebx /* pte offset = 0 */ - movl $1, %ecx /* one private page coming right up */ - fillkpt(R(SMPptpa), $PG_RW) - -/* ... and put the page table table in the pde. */ - movl R(SMPptpa), %eax - movl $MPPTDI, %ebx - movl $1, %ecx - fillkpt(R(IdlePTD), $PG_RW) - -/* Fakeup VA for the local apic to allow early traps. */ - ALLOCPAGES(1) - movl %esi, %eax - movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */ - movl $1, %ecx /* one private pt coming right up */ - fillkpt(R(SMPptpa), $PG_RW) -#endif /* SMP */ - -/* install a pde for temporary double map of bottom of VA */ - movl R(KPTphys), %eax - xorl %ebx, %ebx - movl $NKPT, %ecx - fillkpt(R(IdlePTD), $PG_RW) - -/* install pde's for pt's */ - movl R(KPTphys), %eax - movl $KPTDI, %ebx - movl $NKPT, %ecx - fillkpt(R(IdlePTD), $PG_RW) - -/* install a pde recursively mapping page directory as a page table */ - movl R(IdlePTD), %eax - movl $PTDPTDI, %ebx - movl $NPGPTD,%ecx - fillkpt(R(IdlePTD), $PG_RW) - -#ifdef PAE - movl R(IdlePTD), %eax - xorl %ebx, %ebx - movl $NPGPTD, %ecx - fillkpt(R(IdlePDPT), $0x0) -#endif - - ret + .bss + ALIGN_DATA /* just to be sure */ + .space 0x1000 /* space for bootstack - temporary stack */ +bootstack: diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index e5377d4..51afb57 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -47,9 +47,7 @@ #include "opt_isa.h" #include "opt_maxmem.h" #include "opt_msgbuf.h" -#include "opt_npx.h" #include "opt_perfmon.h" -#include "opt_swtch.h" #include "opt_kstack_pages.h" #include <sys/param.h> @@ -100,104 +98,49 @@ #include <machine/reg.h> #include <machine/clock.h> #include <machine/specialreg.h> -#include <machine/bootinfo.h> #include <machine/md_var.h> -#include <machine/pc/bios.h> -#include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */ #include <machine/proc.h> #ifdef PERFMON #include <machine/perfmon.h> #endif -#ifdef SMP -#include <machine/privatespace.h> -#include <machine/smp.h> -#endif +#include <machine/tss.h> -#include <i386/isa/icu.h> -#include <i386/isa/intr_machdep.h> +#include <amd64/isa/icu.h> +#include <amd64/isa/intr_machdep.h> #include <isa/rtc.h> -#include <machine/vm86.h> #include <sys/ptrace.h> #include <machine/sigframe.h> -extern void init386(int first); +extern void hammer_time(void); extern void dblfault_handler(void); extern void printcpuinfo(void); /* XXX header file */ -extern void finishidentcpu(void); +extern void identify_cpu(void); extern void panicifcpuunsupported(void); extern void initializecpu(void); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) -#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU) -#define CPU_ENABLE_SSE -#endif -#if defined(CPU_DISABLE_SSE) -#undef CPU_ENABLE_SSE -#endif - static void cpu_startup(void *); static void fpstate_drop(struct thread *td); static void get_fpcontext(struct thread *td, mcontext_t *mcp); static int set_fpcontext(struct thread *td, const mcontext_t *mcp); -#ifdef CPU_ENABLE_SSE -static void set_fpregs_xmm(struct save87 *, struct savexmm *); -static void fill_fpregs_xmm(struct savexmm *, struct save87 *); -#endif /* CPU_ENABLE_SSE */ SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) int _udatasel, _ucodesel; u_int atdevbase; -#if defined(SWTCH_OPTIM_STATS) -int stupid_switch; -SYSCTL_INT(_debug, OID_AUTO, stupid_switch, - CTLFLAG_RW, &stupid_switch, 0, ""); -int swtch_optim_stats; -SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats, - CTLFLAG_RW, &swtch_optim_stats, 0, ""); -int tlb_flush_count; -SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count, - CTLFLAG_RW, &tlb_flush_count, 0, ""); -int lazy_flush_count; -SYSCTL_INT(_debug, OID_AUTO, lazy_flush_count, - CTLFLAG_RW, &lazy_flush_count, 0, ""); -int lazy_flush_fixup; -SYSCTL_INT(_debug, OID_AUTO, lazy_flush_fixup, - CTLFLAG_RW, &lazy_flush_fixup, 0, ""); -#ifdef SMP -int lazy_flush_smpfixup; -SYSCTL_INT(_debug, OID_AUTO, lazy_flush_smpfixup, - CTLFLAG_RW, &lazy_flush_smpfixup, 0, ""); -int lazy_flush_smpipi; -SYSCTL_INT(_debug, OID_AUTO, lazy_flush_smpipi, - CTLFLAG_RW, &lazy_flush_smpipi, 0, ""); -int lazy_flush_smpbadcr3; -SYSCTL_INT(_debug, OID_AUTO, lazy_flush_smpbadcr3, - CTLFLAG_RW, &lazy_flush_smpbadcr3, 0, ""); -int lazy_flush_smpmiss; -SYSCTL_INT(_debug, OID_AUTO, lazy_flush_smpmiss, - CTLFLAG_RW, &lazy_flush_smpmiss, 0, ""); -#endif -#endif -#ifdef LAZY_SWITCH -int lazy_flush_enable = 1; -SYSCTL_INT(_debug, OID_AUTO, lazy_flush_enable, - CTLFLAG_RW, &lazy_flush_enable, 0, ""); -#endif +u_int64_t modulep; /* phys addr of metadata table */ +u_int64_t physfree; /* first free page after kernel */ +u_int64_t IdlePTD; /* phys addr of kernel PTD */ +u_int64_t IdlePDP; /* phys addr of kernel level 3 */ +u_int64_t IdlePML4; /* phys addr of kernel level 4 */ +struct user *proc0uarea; /* address of proc 0 uarea space */ +vm_offset_t proc0kstack; /* address of proc 0 kstack space */ int cold = 1; -#ifdef COMPAT_43 -static void osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code); -#endif -#ifdef COMPAT_FREEBSD4 -static void freebsd4_sendsig(sig_t catcher, int sig, sigset_t *mask, - u_long code); -#endif - long Maxmem = 0; vm_paddr_t phys_avail[10]; @@ -208,9 +151,7 @@ vm_paddr_t phys_avail[10]; struct kva_md_info kmi; static struct trapframe proc0_tf; -#ifndef SMP static struct pcpu __pcpu; -#endif struct mtx icu_lock; @@ -260,10 +201,8 @@ cpu_startup(dummy) bufinit(); vm_pager_bufferinit(); -#ifndef SMP /* For SMP, we delay the cpu_setregs() until after SMP startup. */ cpu_setregs(); -#endif } /* @@ -276,256 +215,6 @@ cpu_startup(dummy) * frame pointer, it returns to the user * specified pc, psl. */ -#ifdef COMPAT_43 -static void -osendsig(catcher, sig, mask, code) - sig_t catcher; - int sig; - sigset_t *mask; - u_long code; -{ - struct osigframe sf, *fp; - struct proc *p; - struct thread *td; - struct sigacts *psp; - struct trapframe *regs; - int oonstack; - - td = curthread; - p = td->td_proc; - PROC_LOCK_ASSERT(p, MA_OWNED); - psp = p->p_sigacts; - regs = td->td_frame; - oonstack = sigonstack(regs->tf_esp); - - /* Allocate space for the signal handler context. */ - if ((p->p_flag & P_ALTSTACK) && !oonstack && - SIGISMEMBER(psp->ps_sigonstack, sig)) { - fp = (struct osigframe *)(p->p_sigstk.ss_sp + - p->p_sigstk.ss_size - sizeof(struct osigframe)); -#if defined(COMPAT_43) || defined(COMPAT_SUNOS) - p->p_sigstk.ss_flags |= SS_ONSTACK; -#endif - } else - fp = (struct osigframe *)regs->tf_esp - 1; - PROC_UNLOCK(p); - - /* Translate the signal if appropriate. */ - if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) - sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; - - /* Build the argument list for the signal handler. */ - sf.sf_signum = sig; - sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; - PROC_LOCK(p); - if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { - /* Signal handler installed with SA_SIGINFO. */ - sf.sf_arg2 = (register_t)&fp->sf_siginfo; - sf.sf_siginfo.si_signo = sig; - sf.sf_siginfo.si_code = code; - sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; - } else { - /* Old FreeBSD-style arguments. */ - sf.sf_arg2 = code; - sf.sf_addr = regs->tf_err; - sf.sf_ahu.sf_handler = catcher; - } - PROC_UNLOCK(p); - - /* Save most if not all of trap frame. */ - sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; - sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; - sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; - sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; - sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; - sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; - sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; - sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; - sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; - sf.sf_siginfo.si_sc.sc_es = regs->tf_es; - sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; - sf.sf_siginfo.si_sc.sc_gs = rgs(); - sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; - - /* Build the signal context to be used by osigreturn(). */ - sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0; - SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); - sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; - sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; - sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; - sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; - sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; - sf.sf_siginfo.si_sc.sc_err = regs->tf_err; - - /* - * If we're a vm86 process, we want to save the segment registers. - * We also change eflags to be our emulated eflags, not the actual - * eflags. - */ - if (regs->tf_eflags & PSL_VM) { - /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ - struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; - struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; - - sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; - sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; - sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; - sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; - - if (vm86->vm86_has_vme == 0) - sf.sf_siginfo.si_sc.sc_ps = - (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | - (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); - - /* See sendsig() for comments. */ - tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); - } - - /* - * Copy the sigframe out to the user's stack. - */ - if (copyout(&sf, fp, sizeof(*fp)) != 0) { -#ifdef DEBUG - printf("process %ld has trashed its stack\n", (long)p->p_pid); -#endif - PROC_LOCK(p); - sigexit(td, SIGILL); - } - - regs->tf_esp = (int)fp; - regs->tf_eip = PS_STRINGS - szosigcode; - regs->tf_eflags &= ~PSL_T; - regs->tf_cs = _ucodesel; - regs->tf_ds = _udatasel; - regs->tf_es = _udatasel; - regs->tf_fs = _udatasel; - load_gs(_udatasel); - regs->tf_ss = _udatasel; - PROC_LOCK(p); -} -#endif /* COMPAT_43 */ - -#ifdef COMPAT_FREEBSD4 -static void -freebsd4_sendsig(catcher, sig, mask, code) - sig_t catcher; - int sig; - sigset_t *mask; - u_long code; -{ - struct sigframe4 sf, *sfp; - struct proc *p; - struct thread *td; - struct sigacts *psp; - struct trapframe *regs; - int oonstack; - - td = curthread; - p = td->td_proc; - PROC_LOCK_ASSERT(p, MA_OWNED); - psp = p->p_sigacts; - regs = td->td_frame; - oonstack = sigonstack(regs->tf_esp); - - /* Save user context. */ - bzero(&sf, sizeof(sf)); - sf.sf_uc.uc_sigmask = *mask; - sf.sf_uc.uc_stack = p->p_sigstk; - sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK) - ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; - sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; - sf.sf_uc.uc_mcontext.mc_gs = rgs(); - bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); - - /* Allocate space for the signal handler context. */ - if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack && - SIGISMEMBER(psp->ps_sigonstack, sig)) { - sfp = (struct sigframe4 *)(p->p_sigstk.ss_sp + - p->p_sigstk.ss_size - sizeof(struct sigframe4)); -#if defined(COMPAT_43) || defined(COMPAT_SUNOS) - p->p_sigstk.ss_flags |= SS_ONSTACK; -#endif - } else - sfp = (struct sigframe4 *)regs->tf_esp - 1; - PROC_UNLOCK(p); - - /* Translate the signal if appropriate. */ - if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) - sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; - - /* Build the argument list for the signal handler. */ - sf.sf_signum = sig; - sf.sf_ucontext = (register_t)&sfp->sf_uc; - PROC_LOCK(p); - if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { - /* Signal handler installed with SA_SIGINFO. */ - sf.sf_siginfo = (register_t)&sfp->sf_si; - sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; - - /* Fill in POSIX parts */ - sf.sf_si.si_signo = sig; - sf.sf_si.si_code = code; - sf.sf_si.si_addr = (void *)regs->tf_err; - } else { - /* Old FreeBSD-style arguments. */ - sf.sf_siginfo = code; - sf.sf_addr = regs->tf_err; - sf.sf_ahu.sf_handler = catcher; - } - PROC_UNLOCK(p); - - /* - * If we're a vm86 process, we want to save the segment registers. - * We also change eflags to be our emulated eflags, not the actual - * eflags. - */ - if (regs->tf_eflags & PSL_VM) { - struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; - struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; - - sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; - sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; - sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; - sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; - - if (vm86->vm86_has_vme == 0) - sf.sf_uc.uc_mcontext.mc_eflags = - (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | - (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); - - /* - * Clear PSL_NT to inhibit T_TSSFLT faults on return from - * syscalls made by the signal handler. This just avoids - * wasting time for our lazy fixup of such faults. PSL_NT - * does nothing in vm86 mode, but vm86 programs can set it - * almost legitimately in probes for old cpu types. - */ - tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); - } - - /* - * Copy the sigframe out to the user's stack. - */ - if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { -#ifdef DEBUG - printf("process %ld has trashed its stack\n", (long)p->p_pid); -#endif - PROC_LOCK(p); - sigexit(td, SIGILL); - } - - regs->tf_esp = (int)sfp; - regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode; - regs->tf_eflags &= ~PSL_T; - regs->tf_cs = _ucodesel; - regs->tf_ds = _udatasel; - regs->tf_es = _udatasel; - regs->tf_fs = _udatasel; - regs->tf_ss = _udatasel; - PROC_LOCK(p); -} -#endif /* COMPAT_FREEBSD4 */ - void sendsig(catcher, sig, mask, code) sig_t catcher; @@ -545,20 +234,8 @@ sendsig(catcher, sig, mask, code) p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; -#ifdef COMPAT_FREEBSD4 - if (SIGISMEMBER(psp->ps_freebsd4, sig)) { - freebsd4_sendsig(catcher, sig, mask, code); - return; - } -#endif -#ifdef COMPAT_43 - if (SIGISMEMBER(psp->ps_osigset, sig)) { - osendsig(catcher, sig, mask, code); - return; - } -#endif regs = td->td_frame; - oonstack = sigonstack(regs->tf_esp); + oonstack = sigonstack(regs->tf_rsp); /* Save user context. */ bzero(&sf, sizeof(sf)); @@ -567,8 +244,7 @@ sendsig(catcher, sig, mask, code) sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; - sf.sf_uc.uc_mcontext.mc_gs = rgs(); - bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); + bcopy(regs, &sf.sf_uc.uc_mcontext.mc_r15, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext); fpstate_drop(td); @@ -582,9 +258,9 @@ sendsig(catcher, sig, mask, code) p->p_sigstk.ss_flags |= SS_ONSTACK; #endif } else - sp = (char *)regs->tf_esp - sizeof(struct sigframe); + sp = (char *)regs->tf_rsp - sizeof(struct sigframe); /* Align to 16 bytes. */ - sfp = (struct sigframe *)((unsigned int)sp & ~0xF); + sfp = (struct sigframe *)((unsigned long)sp & ~0xF); PROC_UNLOCK(p); /* Translate the signal if appropriate. */ @@ -592,56 +268,27 @@ sendsig(catcher, sig, mask, code) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ - sf.sf_signum = sig; - sf.sf_ucontext = (register_t)&sfp->sf_uc; + regs->tf_rdi = sig; /* arg 1 in %rdi */ + regs->tf_rdx = (register_t)&sfp->sf_uc; /* arg 3 in %rdx */ PROC_LOCK(p); if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ - sf.sf_siginfo = (register_t)&sfp->sf_si; + regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */ sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = code; - sf.sf_si.si_addr = (void *)regs->tf_err; + regs->tf_rcx = regs->tf_err; /* arg 4 in %rcx */ } else { /* Old FreeBSD-style arguments. */ - sf.sf_siginfo = code; - sf.sf_addr = regs->tf_err; + regs->tf_rsi = code; /* arg 2 in %rsi */ + regs->tf_rcx = regs->tf_err; /* arg 4 in %rcx */ sf.sf_ahu.sf_handler = catcher; } PROC_UNLOCK(p); /* - * If we're a vm86 process, we want to save the segment registers. - * We also change eflags to be our emulated eflags, not the actual - * eflags. - */ - if (regs->tf_eflags & PSL_VM) { - struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; - struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; - - sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; - sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; - sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; - sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; - - if (vm86->vm86_has_vme == 0) - sf.sf_uc.uc_mcontext.mc_eflags = - (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | - (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); - - /* - * Clear PSL_NT to inhibit T_TSSFLT faults on return from - * syscalls made by the signal handler. This just avoids - * wasting time for our lazy fixup of such faults. PSL_NT - * does nothing in vm86 mode, but vm86 programs can set it - * almost legitimately in probes for old cpu types. - */ - tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); - } - - /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { @@ -652,14 +299,10 @@ sendsig(catcher, sig, mask, code) sigexit(td, SIGILL); } - regs->tf_esp = (int)sfp; - regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); - regs->tf_eflags &= ~PSL_T; + regs->tf_rsp = (long)sfp; + regs->tf_rip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); + regs->tf_rflags &= ~PSL_T; regs->tf_cs = _ucodesel; - regs->tf_ds = _udatasel; - regs->tf_es = _udatasel; - regs->tf_fs = _udatasel; - regs->tf_ss = _udatasel; PROC_LOCK(p); } @@ -674,232 +317,6 @@ sendsig(catcher, sig, mask, code) * * MPSAFE */ -#ifdef COMPAT_43 -int -osigreturn(td, uap) - struct thread *td; - struct osigreturn_args /* { - struct osigcontext *sigcntxp; - } */ *uap; -{ - struct osigcontext sc; - struct trapframe *regs; - struct osigcontext *scp; - struct proc *p = td->td_proc; - int eflags, error; - - regs = td->td_frame; - error = copyin(uap->sigcntxp, &sc, sizeof(sc)); - if (error != 0) - return (error); - scp = ≻ - eflags = scp->sc_ps; - if (eflags & PSL_VM) { - struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; - struct vm86_kernel *vm86; - - /* - * if pcb_ext == 0 or vm86_inited == 0, the user hasn't - * set up the vm86 area, and we can't enter vm86 mode. - */ - if (td->td_pcb->pcb_ext == 0) - return (EINVAL); - vm86 = &td->td_pcb->pcb_ext->ext_vm86; - if (vm86->vm86_inited == 0) - return (EINVAL); - - /* Go back to user mode if both flags are set. */ - if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) - trapsignal(td, SIGBUS, 0); - - if (vm86->vm86_has_vme) { - eflags = (tf->tf_eflags & ~VME_USERCHANGE) | - (eflags & VME_USERCHANGE) | PSL_VM; - } else { - vm86->vm86_eflags = eflags; /* save VIF, VIP */ - eflags = (tf->tf_eflags & ~VM_USERCHANGE) | - (eflags & VM_USERCHANGE) | PSL_VM; - } - tf->tf_vm86_ds = scp->sc_ds; - tf->tf_vm86_es = scp->sc_es; - tf->tf_vm86_fs = scp->sc_fs; - tf->tf_vm86_gs = scp->sc_gs; - tf->tf_ds = _udatasel; - tf->tf_es = _udatasel; - tf->tf_fs = _udatasel; - } else { - /* - * Don't allow users to change privileged or reserved flags. - */ - /* - * XXX do allow users to change the privileged flag PSL_RF. - * The cpu sets PSL_RF in tf_eflags for faults. Debuggers - * should sometimes set it there too. tf_eflags is kept in - * the signal context during signal handling and there is no - * other place to remember it, so the PSL_RF bit may be - * corrupted by the signal handler without us knowing. - * Corruption of the PSL_RF bit at worst causes one more or - * one less debugger trap, so allowing it is fairly harmless. - */ - if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { - return (EINVAL); - } - - /* - * Don't allow users to load a valid privileged %cs. Let the - * hardware check for invalid selectors, excess privilege in - * other selectors, invalid %eip's and invalid %esp's. - */ - if (!CS_SECURE(scp->sc_cs)) { - trapsignal(td, SIGBUS, T_PROTFLT); - return (EINVAL); - } - regs->tf_ds = scp->sc_ds; - regs->tf_es = scp->sc_es; - regs->tf_fs = scp->sc_fs; - } - - /* Restore remaining registers. */ - regs->tf_eax = scp->sc_eax; - regs->tf_ebx = scp->sc_ebx; - regs->tf_ecx = scp->sc_ecx; - regs->tf_edx = scp->sc_edx; - regs->tf_esi = scp->sc_esi; - regs->tf_edi = scp->sc_edi; - regs->tf_cs = scp->sc_cs; - regs->tf_ss = scp->sc_ss; - regs->tf_isp = scp->sc_isp; - regs->tf_ebp = scp->sc_fp; - regs->tf_esp = scp->sc_sp; - regs->tf_eip = scp->sc_pc; - regs->tf_eflags = eflags; - - PROC_LOCK(p); -#if defined(COMPAT_43) || defined(COMPAT_SUNOS) - if (scp->sc_onstack & 1) - p->p_sigstk.ss_flags |= SS_ONSTACK; - else - p->p_sigstk.ss_flags &= ~SS_ONSTACK; -#endif - SIGSETOLD(td->td_sigmask, scp->sc_mask); - SIG_CANTMASK(td->td_sigmask); - signotify(td); - PROC_UNLOCK(p); - return (EJUSTRETURN); -} -#endif /* COMPAT_43 */ - -#ifdef COMPAT_FREEBSD4 -/* - * MPSAFE - */ -int -freebsd4_sigreturn(td, uap) - struct thread *td; - struct freebsd4_sigreturn_args /* { - const ucontext4 *sigcntxp; - } */ *uap; -{ - struct ucontext4 uc; - struct proc *p = td->td_proc; - struct trapframe *regs; - const struct ucontext4 *ucp; - int cs, eflags, error; - - error = copyin(uap->sigcntxp, &uc, sizeof(uc)); - if (error != 0) - return (error); - ucp = &uc; - regs = td->td_frame; - eflags = ucp->uc_mcontext.mc_eflags; - if (eflags & PSL_VM) { - struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; - struct vm86_kernel *vm86; - - /* - * if pcb_ext == 0 or vm86_inited == 0, the user hasn't - * set up the vm86 area, and we can't enter vm86 mode. - */ - if (td->td_pcb->pcb_ext == 0) - return (EINVAL); - vm86 = &td->td_pcb->pcb_ext->ext_vm86; - if (vm86->vm86_inited == 0) - return (EINVAL); - - /* Go back to user mode if both flags are set. */ - if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) - trapsignal(td, SIGBUS, 0); - - if (vm86->vm86_has_vme) { - eflags = (tf->tf_eflags & ~VME_USERCHANGE) | - (eflags & VME_USERCHANGE) | PSL_VM; - } else { - vm86->vm86_eflags = eflags; /* save VIF, VIP */ - eflags = (tf->tf_eflags & ~VM_USERCHANGE) | - (eflags & VM_USERCHANGE) | PSL_VM; - } - bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); - tf->tf_eflags = eflags; - tf->tf_vm86_ds = tf->tf_ds; - tf->tf_vm86_es = tf->tf_es; - tf->tf_vm86_fs = tf->tf_fs; - tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; - tf->tf_ds = _udatasel; - tf->tf_es = _udatasel; - tf->tf_fs = _udatasel; - } else { - /* - * Don't allow users to change privileged or reserved flags. - */ - /* - * XXX do allow users to change the privileged flag PSL_RF. - * The cpu sets PSL_RF in tf_eflags for faults. Debuggers - * should sometimes set it there too. tf_eflags is kept in - * the signal context during signal handling and there is no - * other place to remember it, so the PSL_RF bit may be - * corrupted by the signal handler without us knowing. - * Corruption of the PSL_RF bit at worst causes one more or - * one less debugger trap, so allowing it is fairly harmless. - */ - if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { - printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags); - return (EINVAL); - } - - /* - * Don't allow users to load a valid privileged %cs. Let the - * hardware check for invalid selectors, excess privilege in - * other selectors, invalid %eip's and invalid %esp's. - */ - cs = ucp->uc_mcontext.mc_cs; - if (!CS_SECURE(cs)) { - printf("freebsd4_sigreturn: cs = 0x%x\n", cs); - trapsignal(td, SIGBUS, T_PROTFLT); - return (EINVAL); - } - - bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); - } - - PROC_LOCK(p); -#if defined(COMPAT_43) || defined(COMPAT_SUNOS) - if (ucp->uc_mcontext.mc_onstack & 1) - p->p_sigstk.ss_flags |= SS_ONSTACK; - else - p->p_sigstk.ss_flags &= ~SS_ONSTACK; -#endif - - td->td_sigmask = ucp->uc_sigmask; - SIG_CANTMASK(td->td_sigmask); - signotify(td); - PROC_UNLOCK(p); - return (EJUSTRETURN); -} -#endif /* COMPAT_FREEBSD4 */ - -/* - * MPSAFE - */ int sigreturn(td, uap) struct thread *td; @@ -911,86 +328,50 @@ sigreturn(td, uap) struct proc *p = td->td_proc; struct trapframe *regs; const ucontext_t *ucp; - int cs, eflags, error, ret; + long rflags; + int cs, error, ret; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; - eflags = ucp->uc_mcontext.mc_eflags; - if (eflags & PSL_VM) { - struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; - struct vm86_kernel *vm86; - - /* - * if pcb_ext == 0 or vm86_inited == 0, the user hasn't - * set up the vm86 area, and we can't enter vm86 mode. - */ - if (td->td_pcb->pcb_ext == 0) - return (EINVAL); - vm86 = &td->td_pcb->pcb_ext->ext_vm86; - if (vm86->vm86_inited == 0) - return (EINVAL); - - /* Go back to user mode if both flags are set. */ - if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) - trapsignal(td, SIGBUS, 0); - - if (vm86->vm86_has_vme) { - eflags = (tf->tf_eflags & ~VME_USERCHANGE) | - (eflags & VME_USERCHANGE) | PSL_VM; - } else { - vm86->vm86_eflags = eflags; /* save VIF, VIP */ - eflags = (tf->tf_eflags & ~VM_USERCHANGE) | - (eflags & VM_USERCHANGE) | PSL_VM; - } - bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); - tf->tf_eflags = eflags; - tf->tf_vm86_ds = tf->tf_ds; - tf->tf_vm86_es = tf->tf_es; - tf->tf_vm86_fs = tf->tf_fs; - tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; - tf->tf_ds = _udatasel; - tf->tf_es = _udatasel; - tf->tf_fs = _udatasel; - } else { - /* - * Don't allow users to change privileged or reserved flags. - */ - /* - * XXX do allow users to change the privileged flag PSL_RF. - * The cpu sets PSL_RF in tf_eflags for faults. Debuggers - * should sometimes set it there too. tf_eflags is kept in - * the signal context during signal handling and there is no - * other place to remember it, so the PSL_RF bit may be - * corrupted by the signal handler without us knowing. - * Corruption of the PSL_RF bit at worst causes one more or - * one less debugger trap, so allowing it is fairly harmless. - */ - if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { - printf("sigreturn: eflags = 0x%x\n", eflags); - return (EINVAL); - } - - /* - * Don't allow users to load a valid privileged %cs. Let the - * hardware check for invalid selectors, excess privilege in - * other selectors, invalid %eip's and invalid %esp's. - */ - cs = ucp->uc_mcontext.mc_cs; - if (!CS_SECURE(cs)) { - printf("sigreturn: cs = 0x%x\n", cs); - trapsignal(td, SIGBUS, T_PROTFLT); - return (EINVAL); - } + rflags = ucp->uc_mcontext.mc_rflags; + /* + * Don't allow users to change privileged or reserved flags. + */ + /* + * XXX do allow users to change the privileged flag PSL_RF. + * The cpu sets PSL_RF in tf_rflags for faults. Debuggers + * should sometimes set it there too. tf_rflags is kept in + * the signal context during signal handling and there is no + * other place to remember it, so the PSL_RF bit may be + * corrupted by the signal handler without us knowing. + * Corruption of the PSL_RF bit at worst causes one more or + * one less debugger trap, so allowing it is fairly harmless. + */ + if (!EFL_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) { + printf("sigreturn: rflags = 0x%x\n", rflags); + return (EINVAL); + } - ret = set_fpcontext(td, &ucp->uc_mcontext); - if (ret != 0) - return (ret); - bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); + /* + * Don't allow users to load a valid privileged %cs. Let the + * hardware check for invalid selectors, excess privilege in + * other selectors, invalid %eip's and invalid %esp's. + */ + cs = ucp->uc_mcontext.mc_cs; + if (!CS_SECURE(cs)) { + printf("sigreturn: cs = 0x%x\n", cs); + trapsignal(td, SIGBUS, T_PROTFLT); + return (EINVAL); } + ret = set_fpcontext(td, &ucp->uc_mcontext); + if (ret != 0) + return (ret); + bcopy(&ucp->uc_mcontext.mc_r15, regs, sizeof(*regs)); + PROC_LOCK(p); #if defined(COMPAT_43) || defined(COMPAT_SUNOS) if (ucp->uc_mcontext.mc_onstack & 1) @@ -1003,6 +384,7 @@ sigreturn(td, uap) SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); + td->td_pcb->pcb_flags |= PCB_FULLCTX; return (EJUSTRETURN); } @@ -1056,11 +438,6 @@ void cpu_idle(void) { -#ifdef SMP - if (mp_grab_cpu_hlt()) - return; -#endif - if (cpu_idle_hlt) { disable_intr(); if (sched_runnable()) { @@ -1087,59 +464,16 @@ exec_setregs(td, entry, stack, ps_strings) u_long ps_strings; { struct trapframe *regs = td->td_frame; - struct pcb *pcb = td->td_pcb; - /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ - pcb->pcb_gs = _udatasel; - load_gs(_udatasel); - - if (td->td_proc->p_md.md_ldt) - user_ldt_free(td); - bzero((char *)regs, sizeof(struct trapframe)); - regs->tf_eip = entry; - regs->tf_esp = stack; - regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); + regs->tf_rip = entry; + /* This strangeness is to ensure alignment after the implied return address */ + regs->tf_rsp = ((stack - 8) & ~0xF) + 8; + regs->tf_rdi = stack; /* argv */ + regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); regs->tf_ss = _udatasel; - regs->tf_ds = _udatasel; - regs->tf_es = _udatasel; - regs->tf_fs = _udatasel; regs->tf_cs = _ucodesel; - /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ - regs->tf_ebx = ps_strings; - - /* - * Reset the hardware debug registers if they were in use. - * They won't have any meaning for the newly exec'd process. - */ - if (pcb->pcb_flags & PCB_DBREGS) { - pcb->pcb_dr0 = 0; - pcb->pcb_dr1 = 0; - pcb->pcb_dr2 = 0; - pcb->pcb_dr3 = 0; - pcb->pcb_dr6 = 0; - pcb->pcb_dr7 = 0; - if (pcb == PCPU_GET(curpcb)) { - /* - * Clear the debug registers on the running - * CPU, otherwise they will end up affecting - * the next process we switch to. - */ - reset_dbregs(); - } - pcb->pcb_flags &= ~PCB_DBREGS; - } - - /* - * Initialize the math emulator (if any) for the current process. - * Actually, just clear the bit that says that the emulator has - * been initialized. Initialization is delayed until the process - * traps to the emulator (if it is done at all) mainly because - * emulators don't provide an entry point for initialization. - */ - td->td_pcb->pcb_flags &= ~FP_SOFTFP; - /* * Arrange to trap the next npx or `fwait' instruction (see npx.c * for why fwait must be trapped at least if there is an npx or an @@ -1164,30 +498,18 @@ exec_setregs(td, entry, stack, ps_strings) * soon. */ fpstate_drop(td); - - /* - * XXX - Linux emulator - * Make sure sure edx is 0x0 on entry. Linux binaries depend - * on it. - */ - td->td_retval[1] = 0; } void cpu_setregs(void) { - unsigned int cr0; + register_t cr0; cr0 = rcr0(); -#ifdef SMP cr0 |= CR0_NE; /* Done by npxinit() */ -#endif cr0 |= CR0_MP | CR0_TS; /* Done at every execve() too. */ -#ifndef I386_CPU cr0 |= CR0_WP | CR0_AM; -#endif load_cr0(cr0); - load_gs(_udatasel); } static int @@ -1207,16 +529,9 @@ SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, CTLFLAG_RW, &disable_rtc_set, 0, ""); -SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, - CTLFLAG_RD, &bootinfo, bootinfo, ""); - SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, CTLFLAG_RW, &wall_cmos_clock, 0, ""); -u_long bootdev; /* not a dev_t - encoding is different */ -SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, - CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in dev_t format)"); - /* * Initialize 386 and configure to run kernel */ @@ -1225,28 +540,13 @@ SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, * Initialize segments & interrupt table */ -int _default_ldt; -union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */ +struct user_segment_descriptor gdt[NGDT];/* global descriptor table */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ -union descriptor ldt[NLDT]; /* local descriptor table */ -#ifdef SMP -/* table descriptors - used to load tables by microp */ -struct region_descriptor r_gdt, r_idt; -#endif -int private_tss; /* flag indicating private tss */ - -#if defined(I586_CPU) && !defined(NO_F00F_HACK) -extern int has_f00f_bug; -#endif - -static struct i386tss dblfault_tss; -static char dblfault_stack[PAGE_SIZE]; - -extern struct user *proc0uarea; -extern vm_offset_t proc0kstack; +static char dblfault_stack[PAGE_SIZE] __aligned(16); +struct amd64tss common_tss; /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { @@ -1256,214 +556,94 @@ struct soft_segment_descriptor gdt_segs[] = { 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ - 0, 0, + 0, /* long */ 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GCODE_SEL 1 Code Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ - 0, /* segment descriptor priority level */ + SEL_KPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ - 0, 0, - 1, /* default 32 vs 16 bit size */ + 1, /* long */ + 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GDATA_SEL 2 Data Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ - 0, /* segment descriptor priority level */ + SEL_KPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ - 0, 0, - 1, /* default 32 vs 16 bit size */ + 1, /* long */ + 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, -/* GPRIV_SEL 3 SMP Per-Processor Private Data Descriptor */ +/* GUCODE32_SEL 3 32 bit Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ - SDT_MEMRWA, /* segment type */ - 0, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 1, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, -/* GPROC0_SEL 4 Proc 0 Tss Descriptor */ -{ - 0x0, /* segment base address */ - sizeof(struct i386tss)-1,/* length - all address space */ - SDT_SYS386TSS, /* segment type */ - 0, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 0, /* unused - default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, -/* GLDT_SEL 5 LDT Descriptor */ -{ (int) ldt, /* segment base address */ - sizeof(ldt)-1, /* length - all address space */ - SDT_SYSLDT, /* segment type */ + SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 0, /* unused - default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, -/* GUSERLDT_SEL 6 User LDT Descriptor per process */ -{ (int) ldt, /* segment base address */ - (512 * sizeof(union descriptor)-1), /* length */ - SDT_SYSLDT, /* segment type */ - 0, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 0, /* unused - default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, -/* GTGATE_SEL 7 Null Descriptor - Placeholder */ -{ 0x0, /* segment base address */ - 0x0, /* length - all address space */ - 0, /* segment type */ - 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, -/* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ -{ 0x400, /* segment base address */ - 0xfffff, /* length */ - SDT_MEMRWA, /* segment type */ - 0, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, + 0, /* long */ 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, -/* GPANIC_SEL 9 Panic Tss Descriptor */ -{ (int) &dblfault_tss, /* segment base address */ - sizeof(struct i386tss)-1,/* length - all address space */ - SDT_SYS386TSS, /* segment type */ - 0, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 0, /* unused - default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, -/* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */ -{ 0, /* segment base address (overwritten) */ - 0xfffff, /* length */ - SDT_MEMERA, /* segment type */ - 0, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, -/* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */ -{ 0, /* segment base address (overwritten) */ - 0xfffff, /* length */ - SDT_MEMERA, /* segment type */ - 0, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, -/* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */ -{ 0, /* segment base address (overwritten) */ - 0xfffff, /* length */ +/* GUDATA_SEL 4 32/64 bit Data Descriptor for user */ +{ 0x0, /* segment base address */ + 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ - 0, /* segment descriptor priority level */ + SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ - 0, 0, + 0, /* long */ 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, -/* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */ -{ 0, /* segment base address (overwritten) */ - 0xfffff, /* length */ - SDT_MEMRWA, /* segment type */ - 0, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, -/* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */ -{ 0, /* segment base address (overwritten) */ - 0xfffff, /* length */ - SDT_MEMRWA, /* segment type */ - 0, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, -}; - -static struct soft_segment_descriptor ldt_segs[] = { - /* Null Descriptor - overwritten by call gate */ -{ 0x0, /* segment base address */ - 0x0, /* length - all address space */ - 0, /* segment type */ - 0, /* segment descriptor priority level */ - 0, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, - /* Null Descriptor - overwritten by call gate */ -{ 0x0, /* segment base address */ - 0x0, /* length - all address space */ - 0, /* segment type */ - 0, /* segment descriptor priority level */ - 0, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, - /* Null Descriptor - overwritten by call gate */ -{ 0x0, /* segment base address */ - 0x0, /* length - all address space */ - 0, /* segment type */ - 0, /* segment descriptor priority level */ - 0, /* segment descriptor present */ - 0, 0, - 0, /* default 32 vs 16 bit size */ - 0 /* limit granularity (byte/page units)*/ }, - /* Code Descriptor for user */ +/* GUCODE_SEL 5 64 bit Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ - 0, 0, - 1, /* default 32 vs 16 bit size */ + 1, /* long */ + 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, - /* Null Descriptor - overwritten by call gate */ +/* GPROC0_SEL 6 Proc 0 Tss Descriptor */ +{ + 0x0, /* segment base address */ + sizeof(struct amd64tss)-1,/* length - all address space */ + SDT_SYSTSS, /* segment type */ + SEL_KPL, /* segment descriptor priority level */ + 1, /* segment descriptor present */ + 0, /* long */ + 0, /* unused - default 32 vs 16 bit size */ + 0 /* limit granularity (byte/page units)*/ }, +/* Actually, the TSS is a system descriptor which is double size */ { 0x0, /* segment base address */ - 0x0, /* length - all address space */ + 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ - 0, 0, + 0, /* long */ 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, - /* Data Descriptor for user */ -{ 0x0, /* segment base address */ - 0xfffff, /* length - all address space */ - SDT_MEMRWA, /* segment type */ - SEL_UPL, /* segment descriptor priority level */ - 1, /* segment descriptor present */ - 0, 0, - 1, /* default 32 vs 16 bit size */ - 1 /* limit granularity (byte/page units)*/ }, }; void -setidt(idx, func, typ, dpl, selec) +setidt(idx, func, typ, dpl, ist) int idx; inthand_t *func; int typ; int dpl; - int selec; + int ist; { struct gate_descriptor *ip; ip = idt + idx; - ip->gd_looffset = (int)func; - ip->gd_selector = selec; - ip->gd_stkcpy = 0; + ip->gd_looffset = (uintptr_t)func; + ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL); + ip->gd_ist = ist; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; - ip->gd_hioffset = ((int)func)>>16 ; + ip->gd_hioffset = ((uintptr_t)func)>>16 ; } #define IDTVEC(name) __CONCAT(X,name) @@ -1473,24 +653,68 @@ extern inthand_t IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), - IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); + IDTVEC(xmm), IDTVEC(int0x80_syscall), + IDTVEC(fast_syscall), IDTVEC(fast_syscall32); void sdtossd(sd, ssd) - struct segment_descriptor *sd; + struct user_segment_descriptor *sd; struct soft_segment_descriptor *ssd; { + ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; + ssd->ssd_long = sd->sd_long; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } +void +ssdtosd(ssd, sd) + struct soft_segment_descriptor *ssd; + struct user_segment_descriptor *sd; +{ + + sd->sd_lobase = (ssd->ssd_base) & 0xffffff; + sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff; + sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; + sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; + sd->sd_type = ssd->ssd_type; + sd->sd_dpl = ssd->ssd_dpl; + sd->sd_p = ssd->ssd_p; + sd->sd_long = ssd->ssd_long; + sd->sd_def32 = ssd->ssd_def32; + sd->sd_gran = ssd->ssd_gran; +} + +void +ssdtosyssd(ssd, sd) + struct soft_segment_descriptor *ssd; + struct system_segment_descriptor *sd; +{ + + sd->sd_lobase = (ssd->ssd_base) & 0xffffff; + sd->sd_hibase = (ssd->ssd_base >> 24) & 0xffffff; + sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; + sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; + sd->sd_type = ssd->ssd_type; + sd->sd_dpl = ssd->ssd_dpl; + sd->sd_p = ssd->ssd_p; + sd->sd_gran = ssd->ssd_gran; +} + + #define PHYSMAP_SIZE (2 * 8) +struct bios_smap { + u_int64_t base; + u_int64_t length; + u_int32_t type; +} __packed; + /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and @@ -1505,116 +729,48 @@ sdtossd(sd, ssd) * XXX first should be vm_paddr_t. */ static void -getmemsize(int first) +getmemsize(caddr_t kmdp, u_int64_t first) { int i, physmap_idx, pa_indx; - int hasbrokenint12; u_int basemem, extmem; - struct vm86frame vmf; - struct vm86context vmc; vm_paddr_t pa, physmap[PHYSMAP_SIZE]; pt_entry_t *pte; char *cp; - struct bios_smap *smap; + struct bios_smap *smapbase, *smap, *smapend; + u_int32_t smapsize; - hasbrokenint12 = 0; - TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12); - bzero(&vmf, sizeof(struct vm86frame)); bzero(physmap, sizeof(physmap)); basemem = 0; + physmap_idx = 0; /* - * Some newer BIOSes has broken INT 12H implementation which cause - * kernel panic immediately. In this case, we need to scan SMAP - * with INT 15:E820 first, then determine base memory size. - */ - if (hasbrokenint12) { - goto int15e820; - } - - /* - * Perform "base memory" related probes & setup - */ - vm86_intcall(0x12, &vmf); - basemem = vmf.vmf_ax; - if (basemem > 640) { - printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", - basemem); - basemem = 640; - } - - /* - * XXX if biosbasemem is now < 640, there is a `hole' - * between the end of base memory and the start of - * ISA memory. The hole may be empty or it may - * contain BIOS code or data. Map it read/write so - * that the BIOS can write to it. (Memory from 0 to - * the physical end of the kernel is mapped read-only - * to begin with and then parts of it are remapped. - * The parts that aren't remapped form holes that - * remain read-only and are unused by the kernel. - * The base memory area is below the physical end of - * the kernel and right now forms a read-only hole. - * The part of it from PAGE_SIZE to - * (trunc_page(biosbasemem * 1024) - 1) will be - * remapped and used by the kernel later.) + * get memory map from INT 15:E820, kindly supplied by the loader. * - * This code is similar to the code used in - * pmap_mapdev, but since no memory needs to be - * allocated we simply change the mapping. - */ - for (pa = trunc_page(basemem * 1024); - pa < ISA_HOLE_START; pa += PAGE_SIZE) - pmap_kenter(KERNBASE + pa, pa); - - /* - * if basemem != 640, map pages r/w into vm86 page table so - * that the bios can scribble on it. - */ - pte = (pt_entry_t *)vm86paddr; - for (i = basemem / 4; i < 160; i++) - pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; - -int15e820: - /* - * map page 1 R/W into the kernel page table so we can use it - * as a buffer. The kernel will unmap this page later. - */ - pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT); - - /* - * get memory map with INT 15:E820 + * subr_module.c says: + * "Consumer may safely assume that size value precedes data." + * ie: an int32_t immediately precedes smap. */ - vmc.npages = 0; - smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT)); - vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di); + smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); + if (smapbase == 0) { + panic("No BIOS smap info from loader!"); + goto deep_shit; + } + smapsize = *((u_int32_t *)smapbase - 1); + smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); - physmap_idx = 0; - vmf.vmf_ebx = 0; - do { - vmf.vmf_eax = 0xE820; - vmf.vmf_edx = SMAP_SIG; - vmf.vmf_ecx = sizeof(struct bios_smap); - i = vm86_datacall(0x15, &vmf, &vmc); - if (i || vmf.vmf_eax != SMAP_SIG) - break; + for (smap = smapbase; smap < smapend; smap++) { if (boothowto & RB_VERBOSE) - printf("SMAP type=%02x base=%016llx len=%016llx\n", + printf("SMAP type=%02x base=%016lx len=%016lx\n", smap->type, smap->base, smap->length); - if (smap->type != 0x01) - goto next_run; - - if (smap->length == 0) - goto next_run; + if (smap->type != 0x01) { + continue; + } -#ifndef PAE - if (smap->base >= 0xffffffff) { - printf("%uK of memory above 4GB ignored\n", - (u_int)(smap->length / 1024)); - goto next_run; + if (smap->length == 0) { +next_run: + continue; } -#endif for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { @@ -1627,7 +783,7 @@ int15e820: if (smap->base == physmap[physmap_idx + 1]) { physmap[physmap_idx + 1] += smap->length; - goto next_run; + continue; } physmap_idx += 2; @@ -1638,12 +794,12 @@ int15e820: } physmap[physmap_idx] = smap->base; physmap[physmap_idx + 1] = smap->base + smap->length; -next_run: ; - } while (vmf.vmf_ebx != 0); + } /* * Perform "base memory" related probes & setup based on SMAP */ +deep_shit: if (basemem == 0) { for (i = 0; i <= physmap_idx; i += 2) { if (physmap[i] == 0x00000000) { @@ -1653,6 +809,10 @@ next_run: ; } if (basemem == 0) { + basemem = rtcin(RTC_BASELO) + (rtcin(RTC_BASEHI) << 8); + } + + if (basemem == 0) { basemem = 640; } @@ -1662,36 +822,20 @@ next_run: ; basemem = 640; } +#if 0 for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) pmap_kenter(KERNBASE + pa, pa); - - pte = (pt_entry_t *)vm86paddr; - for (i = basemem / 4; i < 160; i++) - pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; +#endif } if (physmap[1] != 0) goto physmap_done; /* - * If we failed above, try memory map with INT 15:E801 + * Prefer the RTC value for extended memory. */ - vmf.vmf_ax = 0xE801; - if (vm86_intcall(0x15, &vmf) == 0) { - extmem = vmf.vmf_cx + vmf.vmf_dx * 64; - } else { -#if 0 - vmf.vmf_ah = 0x88; - vm86_intcall(0x15, &vmf); - extmem = vmf.vmf_ax; -#else - /* - * Prefer the RTC value for extended memory. - */ - extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); -#endif - } + extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); /* * Special hack for chipsets that still remap the 384k hole when @@ -1716,19 +860,10 @@ physmap_done: /* * Now, physmap contains a map of physical memory. */ - -#ifdef SMP - /* make hole for AP bootstrap code */ - physmap[1] = mp_bootaddress(physmap[1] / 1024); - - /* look for the MP hardware - needed for apic addresses */ - i386_mp_probe(); -#endif - /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be - * called something like "Maxphyspage". We may adjust this + * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); @@ -1778,7 +913,7 @@ physmap_done: /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. - */ + */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); @@ -1813,7 +948,7 @@ physmap_done: */ if (pa >= 0x100000 && pa < first) continue; - + page_bad = FALSE; /* @@ -1913,17 +1048,97 @@ physmap_done: avail_end = phys_avail[pa_indx]; } +static u_int64_t +allocpages(int n) +{ + u_int64_t ret; + + ret = physfree; + bzero((void *)ret, n * PAGE_SIZE); + physfree += n * PAGE_SIZE; + return (ret); +} + +static void +create_pagetables(void) +{ + u_int64_t p0kpa; + u_int64_t p0upa; + u_int64_t KPTphys; + int i; + + /* Allocate pages */ + KPTphys = allocpages(NKPT); + IdlePML4 = allocpages(NKPML4E); + IdlePDP = allocpages(NKPDPE); + IdlePTD = allocpages(NPGPTD); + p0upa = allocpages(UAREA_PAGES); + p0kpa = allocpages(KSTACK_PAGES); + + proc0uarea = (struct user *)(p0upa + KERNBASE); + proc0kstack = p0kpa + KERNBASE; + + /* Fill in the underlying page table pages */ + /* Read-only from zero to physfree */ + /* XXX not fully used, underneath 2M pages */ + for (i = 0; (i << PAGE_SHIFT) < physfree; i++) { + ((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT; + ((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V; + } + + /* Now map the page tables at their location within PTmap */ + for (i = 0; i < NKPT; i++) { + ((pd_entry_t *)IdlePTD)[i + KPTDI] = KPTphys + (i << PAGE_SHIFT); + ((pd_entry_t *)IdlePTD)[i + KPTDI] |= PG_RW | PG_V; + } + + /* Map from zero to end of allocations under 2M pages */ + /* This replaces some of the PTD entries above */ + for (i = 0; (i << PDRSHIFT) < physfree; i++) { + ((pd_entry_t *)IdlePTD)[i] = i << PDRSHIFT; + ((pd_entry_t *)IdlePTD)[i] |= PG_RW | PG_V | PG_PS; + } + + /* Now map the page tables at their location within PTmap */ + for (i = 0; i < NKPT; i++) { + ((pd_entry_t *)IdlePTD)[i] = KPTphys + (i << PAGE_SHIFT); + ((pd_entry_t *)IdlePTD)[i] |= PG_RW | PG_V; + } + + /* Now map the PTD at the top of the PTmap (ie: PTD[]) */ + for (i = 0; i < NPGPTD; i++) { + ((pd_entry_t *)IdlePTD)[i + PTDPTDI] = IdlePTD + (i << PAGE_SHIFT); + ((pd_entry_t *)IdlePTD)[i + PTDPTDI] |= PG_RW | PG_V; + } + + /* And connect up the PTD to the PDP */ + for (i = 0; i < NPGPTD; i++) { + ((pdp_entry_t *)IdlePDP)[i] = IdlePTD + (i << PAGE_SHIFT); + ((pdp_entry_t *)IdlePDP)[i] |= PG_RW | PG_V | PG_U; + } + + /* And connect up the PDP to the PML4 */ + ((pdp_entry_t *)IdlePML4)[0] = IdlePDP; + ((pdp_entry_t *)IdlePML4)[0] |= PG_RW | PG_V | PG_U; +} + void -init386(first) - int first; +hammer_time(void) { - struct gate_descriptor *gdp; - int gsel_tss, metadata_missing, off, x; -#ifndef SMP - /* table descriptors - used to load tables by microp */ + caddr_t kmdp; + int gsel_tss, off, x; struct region_descriptor r_gdt, r_idt; -#endif struct pcpu *pc; + u_int64_t msr; + + /* Turn on PTE NX (no execute) bit */ + msr = rdmsr(MSR_EFER) | EFER_NXE; + wrmsr(MSR_EFER, msr); + create_pagetables(); + + /* XXX do %cr0 as well */ + load_cr4(rcr4() | CR4_PGE | CR4_PSE); + load_cr3(IdlePML4); proc0.p_uarea = proc0uarea; thread0.td_kstack = proc0kstack; @@ -1937,50 +1152,36 @@ init386(first) */ proc_linkup(&proc0, &ksegrp0, &kse0, &thread0); - metadata_missing = 0; - if (bootinfo.bi_modulep) { - preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; - preload_bootstrap_relocate(KERNBASE); - } else { - metadata_missing = 1; - } - if (envmode == 1) - kern_envp = static_env; - else if (bootinfo.bi_envp) - kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; + preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE); + preload_bootstrap_relocate(KERNBASE); + kmdp = preload_search_by_type("elf64 kernel"); + if (kmdp == NULL) + kmdp = preload_search_by_type("elf kernel"); + boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); + kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *) + KERNBASE; /* Init basic tunables, hz etc */ init_param1(); /* - * make gdt memory segments, the code segment goes up to end of the - * page with etext in it, the data segment goes to the end of - * the address space + * make gdt memory segments */ - /* - * XXX text protection is temporarily (?) disabled. The limit was - * i386_btop(round_page(etext)) - 1. - */ - gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1); - gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1); -#ifdef SMP - pc = &SMP_prvspace[0].pcpu; - gdt_segs[GPRIV_SEL].ssd_limit = - atop(sizeof(struct privatespace) - 1); -#else - pc = &__pcpu; - gdt_segs[GPRIV_SEL].ssd_limit = - atop(sizeof(struct pcpu) - 1); -#endif - gdt_segs[GPRIV_SEL].ssd_base = (int) pc; - gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; + gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss; - for (x = 0; x < NGDT; x++) - ssdtosd(&gdt_segs[x], &gdt[x].sd); + for (x = 0; x < NGDT; x++) { + if (x != GPROC0_SEL && x != (GPROC0_SEL + 1)) + ssdtosd(&gdt_segs[x], &gdt[x]); + } + ssdtosyssd(&gdt_segs[GPROC0_SEL], (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; - r_gdt.rd_base = (int) gdt; + r_gdt.rd_base = (long) gdt; lgdt(&r_gdt); + pc = &__pcpu; + + wrmsr(MSR_FSBASE, (u_int64_t)pc); + wrmsr(MSR_GSBASE, (u_int64_t)pc); + wrmsr(MSR_KGSBASE, (u_int64_t)pc); pcpu_init(pc, 0, sizeof(struct pcpu)); PCPU_SET(prvspace, pc); @@ -1998,68 +1199,33 @@ init386(first) mtx_init(&clock_lock, "clk", NULL, MTX_SPIN | MTX_RECURSE); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); - /* make ldt memory segments */ - /* - * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it - * should be spelled ...MAX_USER... - */ - ldt_segs[LUCODE_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1); - ldt_segs[LUDATA_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1); - for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) - ssdtosd(&ldt_segs[x], &ldt[x].sd); - - _default_ldt = GSEL(GLDT_SEL, SEL_KPL); - lldt(_default_ldt); - PCPU_SET(currentldt, _default_ldt); - /* exceptions */ for (x = 0; x < NIDT; x++) - setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(1, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(3, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL - , GSEL(GCODE_SEL, SEL_KPL)); - setidt(8, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); - setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(14, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(19, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, - GSEL(GCODE_SEL, SEL_KPL)); + setidt(x, &IDTVEC(rsvd), SDT_SYSTGT, SEL_KPL, 0); + setidt(0, &IDTVEC(div), SDT_SYSTGT, SEL_KPL, 0); + setidt(1, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0); + setidt(2, &IDTVEC(nmi), SDT_SYSTGT, SEL_KPL, 0); + setidt(3, &IDTVEC(bpt), SDT_SYSIGT, SEL_KPL, 0); + setidt(4, &IDTVEC(ofl), SDT_SYSTGT, SEL_KPL, 0); + setidt(5, &IDTVEC(bnd), SDT_SYSTGT, SEL_KPL, 0); + setidt(6, &IDTVEC(ill), SDT_SYSTGT, SEL_KPL, 0); + setidt(7, &IDTVEC(dna), SDT_SYSTGT, SEL_KPL, 0); + setidt(8, (inthand_t *)dblfault_handler, SDT_SYSIGT, SEL_KPL, 1); + setidt(9, &IDTVEC(fpusegm), SDT_SYSTGT, SEL_KPL, 0); + setidt(10, &IDTVEC(tss), SDT_SYSTGT, SEL_KPL, 0); + setidt(11, &IDTVEC(missing), SDT_SYSTGT, SEL_KPL, 0); + setidt(12, &IDTVEC(stk), SDT_SYSTGT, SEL_KPL, 0); + setidt(13, &IDTVEC(prot), SDT_SYSTGT, SEL_KPL, 0); + setidt(14, &IDTVEC(page), SDT_SYSTGT, SEL_KPL, 0); + setidt(15, &IDTVEC(rsvd), SDT_SYSTGT, SEL_KPL, 0); + setidt(16, &IDTVEC(fpu), SDT_SYSTGT, SEL_KPL, 0); + setidt(17, &IDTVEC(align), SDT_SYSTGT, SEL_KPL, 0); + setidt(18, &IDTVEC(mchk), SDT_SYSTGT, SEL_KPL, 0); + setidt(19, &IDTVEC(xmm), SDT_SYSTGT, SEL_KPL, 0); + setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYSTGT, SEL_UPL, 0); r_idt.rd_limit = sizeof(idt0) - 1; - r_idt.rd_base = (int) idt; + r_idt.rd_base = (long) idt; lidt(&r_idt); /* @@ -2067,9 +1233,6 @@ init386(first) */ cninit(); - if (metadata_missing) - printf("WARNING: loader(8) metadata is missing!\n"); - #ifdef DEV_ISA isa_defaultirq(); #endif @@ -2080,44 +1243,31 @@ init386(first) Debugger("Boot flags requested debugger"); #endif - finishidentcpu(); /* Final stage of CPU initialization */ - setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); - setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); + identify_cpu(); /* Final stage of CPU initialization */ initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ - /* Note: -16 is so we can grow the trapframe if we came from vm86 */ - PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + - KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16); - PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); + common_tss.tss_rsp0 = thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb); + /* XXX we need to update tss_rsp0 in cpu_switch */ + /* XXX maybe not yet, everything is still running in supervisor mode */ + + /* doublefault stack space, runs on ist1 */ + common_tss.tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; + gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); - private_tss = 0; - PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); - PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); - PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); ltr(gsel_tss); - dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = - dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; - dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = - dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); -#ifdef PAE - dblfault_tss.tss_cr3 = (int)IdlePDPT; -#else - dblfault_tss.tss_cr3 = (int)IdlePTD; -#endif - dblfault_tss.tss_eip = (int)dblfault_handler; - dblfault_tss.tss_eflags = PSL_KERNEL; - dblfault_tss.tss_ds = dblfault_tss.tss_es = - dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); - dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); - dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); - dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); - - vm86_initialize(); - getmemsize(first); + /* Set up the fast syscall stuff */ + msr = rdmsr(MSR_EFER) | EFER_SCE; + wrmsr(MSR_EFER, msr); + wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); + wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); + msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | + ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); + wrmsr(MSR_STAR, msr); + wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); + + getmemsize(kmdp, physfree); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ @@ -2128,35 +1278,14 @@ init386(first) msgbufinit(msgbufp, MSGBUF_SIZE); - /* make a call gate to reenter kernel with */ - gdp = &ldt[LSYS5CALLS_SEL].gd; - - x = (int) &IDTVEC(lcall_syscall); - gdp->gd_looffset = x; - gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); - gdp->gd_stkcpy = 1; - gdp->gd_type = SDT_SYS386CGT; - gdp->gd_dpl = SEL_UPL; - gdp->gd_p = 1; - gdp->gd_hioffset = x >> 16; - - /* XXX does this work? */ - ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; - ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; - /* transfer to user mode */ - _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); - _udatasel = LSEL(LUDATA_SEL, SEL_UPL); + _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); + _udatasel = GSEL(GUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; /* XXXKSE */ -#ifdef PAE - thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; -#else - thread0.td_pcb->pcb_cr3 = (int)IdlePTD; -#endif - thread0.td_pcb->pcb_ext = 0; + thread0.td_pcb->pcb_cr3 = IdlePML4; thread0.td_frame = &proc0_tf; } @@ -2165,56 +1294,17 @@ cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { } -#if defined(I586_CPU) && !defined(NO_F00F_HACK) -static void f00f_hack(void *unused); -SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL); - -static void -f00f_hack(void *unused) { - struct gate_descriptor *new_idt; -#ifndef SMP - struct region_descriptor r_idt; -#endif - vm_offset_t tmp; - - if (!has_f00f_bug) - return; - - GIANT_REQUIRED; - - printf("Intel Pentium detected, installing workaround for F00F bug\n"); - - r_idt.rd_limit = sizeof(idt0) - 1; - - tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2); - if (tmp == 0) - panic("kmem_alloc returned 0"); - if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0) - panic("kmem_alloc returned non-page-aligned memory"); - /* Put the first seven entries in the lower page */ - new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8)); - bcopy(idt, new_idt, sizeof(idt0)); - r_idt.rd_base = (int)new_idt; - lidt(&r_idt); - idt = new_idt; - if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE, - VM_PROT_READ, FALSE) != KERN_SUCCESS) - panic("vm_map_protect failed"); - return; -} -#endif /* defined(I586_CPU) && !NO_F00F_HACK */ - int ptrace_set_pc(struct thread *td, unsigned long addr) { - td->td_frame->tf_eip = addr; + td->td_frame->tf_rip = addr; return (0); } int ptrace_single_step(struct thread *td) { - td->td_frame->tf_eflags |= PSL_T; + td->td_frame->tf_rflags |= PSL_T; return (0); } @@ -2225,23 +1315,27 @@ fill_regs(struct thread *td, struct reg *regs) struct trapframe *tp; tp = td->td_frame; - regs->r_fs = tp->tf_fs; - regs->r_es = tp->tf_es; - regs->r_ds = tp->tf_ds; - regs->r_edi = tp->tf_edi; - regs->r_esi = tp->tf_esi; - regs->r_ebp = tp->tf_ebp; - regs->r_ebx = tp->tf_ebx; - regs->r_edx = tp->tf_edx; - regs->r_ecx = tp->tf_ecx; - regs->r_eax = tp->tf_eax; - regs->r_eip = tp->tf_eip; + regs->r_r15 = tp->tf_r15; + regs->r_r14 = tp->tf_r14; + regs->r_r13 = tp->tf_r13; + regs->r_r12 = tp->tf_r12; + regs->r_r11 = tp->tf_r11; + regs->r_r10 = tp->tf_r10; + regs->r_r9 = tp->tf_r9; + regs->r_r8 = tp->tf_r8; + regs->r_rdi = tp->tf_rdi; + regs->r_rsi = tp->tf_rsi; + regs->r_rbp = tp->tf_rbp; + regs->r_rbx = tp->tf_rbx; + regs->r_rdx = tp->tf_rdx; + regs->r_rcx = tp->tf_rcx; + regs->r_rax = tp->tf_rax; + regs->r_rip = tp->tf_rip; regs->r_cs = tp->tf_cs; - regs->r_eflags = tp->tf_eflags; - regs->r_esp = tp->tf_esp; + regs->r_rflags = tp->tf_rflags; + regs->r_rsp = tp->tf_rsp; regs->r_ss = tp->tf_ss; pcb = td->td_pcb; - regs->r_gs = pcb->pcb_gs; return (0); } @@ -2252,106 +1346,107 @@ set_regs(struct thread *td, struct reg *regs) struct trapframe *tp; tp = td->td_frame; - if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || + if (!EFL_SECURE(regs->r_rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); - tp->tf_fs = regs->r_fs; - tp->tf_es = regs->r_es; - tp->tf_ds = regs->r_ds; - tp->tf_edi = regs->r_edi; - tp->tf_esi = regs->r_esi; - tp->tf_ebp = regs->r_ebp; - tp->tf_ebx = regs->r_ebx; - tp->tf_edx = regs->r_edx; - tp->tf_ecx = regs->r_ecx; - tp->tf_eax = regs->r_eax; - tp->tf_eip = regs->r_eip; + tp->tf_r15 = regs->r_r15; + tp->tf_r14 = regs->r_r14; + tp->tf_r13 = regs->r_r13; + tp->tf_r12 = regs->r_r12; + tp->tf_r11 = regs->r_r11; + tp->tf_r10 = regs->r_r10; + tp->tf_r9 = regs->r_r9; + tp->tf_r8 = regs->r_r8; + tp->tf_rdi = regs->r_rdi; + tp->tf_rsi = regs->r_rsi; + tp->tf_rbp = regs->r_rbp; + tp->tf_rbx = regs->r_rbx; + tp->tf_rdx = regs->r_rdx; + tp->tf_rcx = regs->r_rcx; + tp->tf_rax = regs->r_rax; + tp->tf_rip = regs->r_rip; tp->tf_cs = regs->r_cs; - tp->tf_eflags = regs->r_eflags; - tp->tf_esp = regs->r_esp; + tp->tf_rflags = regs->r_rflags; + tp->tf_rsp = regs->r_rsp; tp->tf_ss = regs->r_ss; pcb = td->td_pcb; - pcb->pcb_gs = regs->r_gs; return (0); } -#ifdef CPU_ENABLE_SSE +/* XXX check all this stuff! */ +/* externalize from sv_xmm */ static void -fill_fpregs_xmm(sv_xmm, sv_87) - struct savexmm *sv_xmm; - struct save87 *sv_87; +fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs) { - register struct env87 *penv_87 = &sv_87->sv_env; - register struct envxmm *penv_xmm = &sv_xmm->sv_env; + struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; + struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; - bzero(sv_87, sizeof(*sv_87)); + /* pcb -> fpregs */ + bzero(fpregs, sizeof(*fpregs)); /* FPU control/status */ - penv_87->en_cw = penv_xmm->en_cw; - penv_87->en_sw = penv_xmm->en_sw; - penv_87->en_tw = penv_xmm->en_tw; - penv_87->en_fip = penv_xmm->en_fip; - penv_87->en_fcs = penv_xmm->en_fcs; - penv_87->en_opcode = penv_xmm->en_opcode; - penv_87->en_foo = penv_xmm->en_foo; - penv_87->en_fos = penv_xmm->en_fos; + penv_fpreg->en_cw = penv_xmm->en_cw; + penv_fpreg->en_sw = penv_xmm->en_sw; + penv_fpreg->en_tw = penv_xmm->en_tw; + penv_fpreg->en_opcode = penv_xmm->en_opcode; + penv_fpreg->en_rip = penv_xmm->en_rip; + penv_fpreg->en_rdp = penv_xmm->en_rdp; + penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr; + penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask; /* FPU registers */ for (i = 0; i < 8; ++i) - sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; + bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10); + + /* SSE registers */ + for (i = 0; i < 16; ++i) + bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16); } +/* internalize from fpregs into sv_xmm */ static void -set_fpregs_xmm(sv_87, sv_xmm) - struct save87 *sv_87; - struct savexmm *sv_xmm; +set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm) { - register struct env87 *penv_87 = &sv_87->sv_env; - register struct envxmm *penv_xmm = &sv_xmm->sv_env; + struct envxmm *penv_xmm = &sv_xmm->sv_env; + struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; int i; + /* fpregs -> pcb */ /* FPU control/status */ - penv_xmm->en_cw = penv_87->en_cw; - penv_xmm->en_sw = penv_87->en_sw; - penv_xmm->en_tw = penv_87->en_tw; - penv_xmm->en_fip = penv_87->en_fip; - penv_xmm->en_fcs = penv_87->en_fcs; - penv_xmm->en_opcode = penv_87->en_opcode; - penv_xmm->en_foo = penv_87->en_foo; - penv_xmm->en_fos = penv_87->en_fos; + penv_xmm->en_cw = penv_fpreg->en_cw; + penv_xmm->en_sw = penv_fpreg->en_sw; + penv_xmm->en_tw = penv_fpreg->en_tw; + penv_xmm->en_opcode = penv_fpreg->en_opcode; + penv_xmm->en_rip = penv_fpreg->en_rip; + penv_xmm->en_rdp = penv_fpreg->en_rdp; + penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr; + penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask; /* FPU registers */ for (i = 0; i < 8; ++i) - sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; + bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10); + + /* SSE registers */ + for (i = 0; i < 16; ++i) + bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16); } -#endif /* CPU_ENABLE_SSE */ +/* externalize from td->pcb */ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { -#ifdef CPU_ENABLE_SSE - if (cpu_fxsr) { - fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm, - (struct save87 *)fpregs); - return (0); - } -#endif /* CPU_ENABLE_SSE */ - bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs); + + fill_fpregs_xmm(&td->td_pcb->pcb_save, fpregs); return (0); } +/* internalize to td->pcb */ int set_fpregs(struct thread *td, struct fpreg *fpregs) { -#ifdef CPU_ENABLE_SSE - if (cpu_fxsr) { - set_fpregs_xmm((struct save87 *)fpregs, - &td->td_pcb->pcb_save.sv_xmm); - return (0); - } -#endif /* CPU_ENABLE_SSE */ - bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs); + + set_fpregs_xmm(fpregs, &td->td_pcb->pcb_save); return (0); } @@ -2366,29 +1461,32 @@ get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret) tp = td->td_frame; PROC_LOCK(curthread->td_proc); - mcp->mc_onstack = sigonstack(tp->tf_esp); + mcp->mc_onstack = sigonstack(tp->tf_rsp); PROC_UNLOCK(curthread->td_proc); - mcp->mc_gs = td->td_pcb->pcb_gs; - mcp->mc_fs = tp->tf_fs; - mcp->mc_es = tp->tf_es; - mcp->mc_ds = tp->tf_ds; - mcp->mc_edi = tp->tf_edi; - mcp->mc_esi = tp->tf_esi; - mcp->mc_ebp = tp->tf_ebp; - mcp->mc_isp = tp->tf_isp; + mcp->mc_r15 = tp->tf_r15; + mcp->mc_r14 = tp->tf_r14; + mcp->mc_r13 = tp->tf_r13; + mcp->mc_r12 = tp->tf_r12; + mcp->mc_r11 = tp->tf_r11; + mcp->mc_r10 = tp->tf_r10; + mcp->mc_r9 = tp->tf_r9; + mcp->mc_r8 = tp->tf_r8; + mcp->mc_rdi = tp->tf_rdi; + mcp->mc_rsi = tp->tf_rsi; + mcp->mc_rbp = tp->tf_rbp; + mcp->mc_rbx = tp->tf_rbx; + mcp->mc_rcx = tp->tf_rcx; if (clear_ret != 0) { - mcp->mc_eax = 0; - mcp->mc_edx = 0; + mcp->mc_rax = 0; + mcp->mc_rdx = 0; } else { - mcp->mc_eax = tp->tf_eax; - mcp->mc_edx = tp->tf_edx; + mcp->mc_rax = tp->tf_rax; + mcp->mc_rdx = tp->tf_rdx; } - mcp->mc_ebx = tp->tf_ebx; - mcp->mc_ecx = tp->tf_ecx; - mcp->mc_eip = tp->tf_eip; + mcp->mc_rip = tp->tf_rip; mcp->mc_cs = tp->tf_cs; - mcp->mc_eflags = tp->tf_eflags; - mcp->mc_esp = tp->tf_esp; + mcp->mc_rflags = tp->tf_rflags; + mcp->mc_rsp = tp->tf_rsp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp); @@ -2405,29 +1503,34 @@ int set_mcontext(struct thread *td, const mcontext_t *mcp) { struct trapframe *tp; - int eflags, ret; + long rflags; + int ret; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp)) return (EINVAL); - eflags = (mcp->mc_eflags & PSL_USERCHANGE) | - (tp->tf_eflags & ~PSL_USERCHANGE); + rflags = (mcp->mc_rflags & PSL_USERCHANGE) | + (tp->tf_rflags & ~PSL_USERCHANGE); if ((ret = set_fpcontext(td, mcp)) == 0) { - tp->tf_fs = mcp->mc_fs; - tp->tf_es = mcp->mc_es; - tp->tf_ds = mcp->mc_ds; - tp->tf_edi = mcp->mc_edi; - tp->tf_esi = mcp->mc_esi; - tp->tf_ebp = mcp->mc_ebp; - tp->tf_ebx = mcp->mc_ebx; - tp->tf_edx = mcp->mc_edx; - tp->tf_ecx = mcp->mc_ecx; - tp->tf_eax = mcp->mc_eax; - tp->tf_eip = mcp->mc_eip; - tp->tf_eflags = eflags; - tp->tf_esp = mcp->mc_esp; + tp->tf_r15 = mcp->mc_r15; + tp->tf_r14 = mcp->mc_r14; + tp->tf_r13 = mcp->mc_r13; + tp->tf_r12 = mcp->mc_r12; + tp->tf_r11 = mcp->mc_r11; + tp->tf_r10 = mcp->mc_r10; + tp->tf_r9 = mcp->mc_r9; + tp->tf_r8 = mcp->mc_r8; + tp->tf_rdi = mcp->mc_rdi; + tp->tf_rsi = mcp->mc_rsi; + tp->tf_rbp = mcp->mc_rbp; + tp->tf_rbx = mcp->mc_rbx; + tp->tf_rdx = mcp->mc_rdx; + tp->tf_rcx = mcp->mc_rcx; + tp->tf_rax = mcp->mc_rax; + tp->tf_rip = mcp->mc_rip; + tp->tf_rflags = rflags; + tp->tf_rsp = mcp->mc_rsp; tp->tf_ss = mcp->mc_ss; - td->td_pcb->pcb_gs = mcp->mc_gs; ret = 0; } return (ret); @@ -2436,11 +1539,7 @@ set_mcontext(struct thread *td, const mcontext_t *mcp) static void get_fpcontext(struct thread *td, mcontext_t *mcp) { -#ifndef DEV_NPX - mcp->mc_fpformat = _MC_FPFMT_NODEV; - mcp->mc_ownedfp = _MC_FPOWNED_NONE; -#else - union savefpu *addr; + struct savefpu *addr; /* * XXX mc_fpstate might be misaligned, since its declaration is not @@ -2454,34 +1553,28 @@ get_fpcontext(struct thread *td, mcontext_t *mcp) * called, although this requires knowing too much about * npxgetregs()'s internals. */ - addr = (union savefpu *)&mcp->mc_fpstate; - if (td == PCPU_GET(fpcurthread) && -#ifdef CPU_ENABLE_SSE - cpu_fxsr && -#endif - ((uintptr_t)(void *)addr & 0xF)) { + addr = (struct savefpu *)&mcp->mc_fpstate; + if (td == PCPU_GET(fpcurthread) && ((uintptr_t)(void *)addr & 0xF)) { do addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); } mcp->mc_ownedfp = npxgetregs(td, addr); - if (addr != (union savefpu *)&mcp->mc_fpstate) { + if (addr != (struct savefpu *)&mcp->mc_fpstate) { bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2)); } mcp->mc_fpformat = npxformat(); -#endif } static int set_fpcontext(struct thread *td, const mcontext_t *mcp) { - union savefpu *addr; + struct savefpu *addr; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); - else if (mcp->mc_fpformat != _MC_FPFMT_387 && - mcp->mc_fpformat != _MC_FPFMT_XMM) + else if (mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) /* We don't care what state is left in the FPU or PCB. */ @@ -2489,24 +1582,19 @@ set_fpcontext(struct thread *td, const mcontext_t *mcp) else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { /* XXX align as above. */ - addr = (union savefpu *)&mcp->mc_fpstate; + addr = (struct savefpu *)&mcp->mc_fpstate; if (td == PCPU_GET(fpcurthread) && -#ifdef CPU_ENABLE_SSE - cpu_fxsr && -#endif ((uintptr_t)(void *)addr & 0xF)) { do addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate)); } -#ifdef DEV_NPX /* * XXX we violate the dubious requirement that npxsetregs() * be called with interrupts disabled. */ npxsetregs(td, addr); -#endif /* * Don't bother putting things back where they were in the * misaligned case, since we know that the caller won't use @@ -2523,13 +1611,11 @@ fpstate_drop(struct thread *td) register_t s; s = intr_disable(); -#ifdef DEV_NPX if (PCPU_GET(fpcurthread) == td) npxdrop(); -#endif /* * XXX force a full drop of the npx. The above only drops it if we - * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. + * owned it. * * XXX I don't much like npxgetregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. @@ -2544,185 +1630,17 @@ fpstate_drop(struct thread *td) int fill_dbregs(struct thread *td, struct dbreg *dbregs) { - struct pcb *pcb; - if (td == NULL) { - dbregs->dr[0] = rdr0(); - dbregs->dr[1] = rdr1(); - dbregs->dr[2] = rdr2(); - dbregs->dr[3] = rdr3(); - dbregs->dr[4] = rdr4(); - dbregs->dr[5] = rdr5(); - dbregs->dr[6] = rdr6(); - dbregs->dr[7] = rdr7(); - } else { - pcb = td->td_pcb; - dbregs->dr[0] = pcb->pcb_dr0; - dbregs->dr[1] = pcb->pcb_dr1; - dbregs->dr[2] = pcb->pcb_dr2; - dbregs->dr[3] = pcb->pcb_dr3; - dbregs->dr[4] = 0; - dbregs->dr[5] = 0; - dbregs->dr[6] = pcb->pcb_dr6; - dbregs->dr[7] = pcb->pcb_dr7; - } return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { - struct pcb *pcb; - int i; - u_int32_t mask1, mask2; - - if (td == NULL) { - load_dr0(dbregs->dr[0]); - load_dr1(dbregs->dr[1]); - load_dr2(dbregs->dr[2]); - load_dr3(dbregs->dr[3]); - load_dr4(dbregs->dr[4]); - load_dr5(dbregs->dr[5]); - load_dr6(dbregs->dr[6]); - load_dr7(dbregs->dr[7]); - } else { - /* - * Don't let an illegal value for dr7 get set. Specifically, - * check for undefined settings. Setting these bit patterns - * result in undefined behaviour and can lead to an unexpected - * TRCTRAP. - */ - for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; - i++, mask1 <<= 2, mask2 <<= 2) - if ((dbregs->dr[7] & mask1) == mask2) - return (EINVAL); - - pcb = td->td_pcb; - - /* - * Don't let a process set a breakpoint that is not within the - * process's address space. If a process could do this, it - * could halt the system by setting a breakpoint in the kernel - * (if ddb was enabled). Thus, we need to check to make sure - * that no breakpoints are being enabled for addresses outside - * process's address space, unless, perhaps, we were called by - * uid 0. - * - * XXX - what about when the watched area of the user's - * address space is written into from within the kernel - * ... wouldn't that still cause a breakpoint to be generated - * from within kernel mode? - */ - - if (suser(td) != 0) { - if (dbregs->dr[7] & 0x3) { - /* dr0 is enabled */ - if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) - return (EINVAL); - } - - if (dbregs->dr[7] & (0x3<<2)) { - /* dr1 is enabled */ - if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) - return (EINVAL); - } - - if (dbregs->dr[7] & (0x3<<4)) { - /* dr2 is enabled */ - if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) - return (EINVAL); - } - - if (dbregs->dr[7] & (0x3<<6)) { - /* dr3 is enabled */ - if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) - return (EINVAL); - } - } - - pcb->pcb_dr0 = dbregs->dr[0]; - pcb->pcb_dr1 = dbregs->dr[1]; - pcb->pcb_dr2 = dbregs->dr[2]; - pcb->pcb_dr3 = dbregs->dr[3]; - pcb->pcb_dr6 = dbregs->dr[6]; - pcb->pcb_dr7 = dbregs->dr[7]; - - pcb->pcb_flags |= PCB_DBREGS; - } return (0); } -/* - * Return > 0 if a hardware breakpoint has been hit, and the - * breakpoint was in user space. Return 0, otherwise. - */ -int -user_dbreg_trap(void) -{ - u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ - u_int32_t bp; /* breakpoint bits extracted from dr6 */ - int nbp; /* number of breakpoints that triggered */ - caddr_t addr[4]; /* breakpoint addresses */ - int i; - - dr7 = rdr7(); - if ((dr7 & 0x000000ff) == 0) { - /* - * all GE and LE bits in the dr7 register are zero, - * thus the trap couldn't have been caused by the - * hardware debug registers - */ - return 0; - } - - nbp = 0; - dr6 = rdr6(); - bp = dr6 & 0x0000000f; - - if (!bp) { - /* - * None of the breakpoint bits are set meaning this - * trap was not caused by any of the debug registers - */ - return 0; - } - - /* - * at least one of the breakpoints were hit, check to see - * which ones and if any of them are user space addresses - */ - - if (bp & 0x01) { - addr[nbp++] = (caddr_t)rdr0(); - } - if (bp & 0x02) { - addr[nbp++] = (caddr_t)rdr1(); - } - if (bp & 0x04) { - addr[nbp++] = (caddr_t)rdr2(); - } - if (bp & 0x08) { - addr[nbp++] = (caddr_t)rdr3(); - } - - for (i=0; i<nbp; i++) { - if (addr[i] < - (caddr_t)VM_MAXUSER_ADDRESS) { - /* - * addr[i] is in user space - */ - return nbp; - } - } - - /* - * None of the breakpoints are in user space. - */ - return 0; -} - - #ifndef DDB void Debugger(const char *msg) @@ -2776,3 +1694,64 @@ outb(u_int port, u_char data) } #endif /* DDB */ + +void +bcopy(const void *src, void *dest, size_t len) +{ + const char *csrc; + char *cdest; + size_t i; + + csrc = (const char *)src; + cdest = (char *)dest; + if (src < dest) { + for (i = len - 1; i != (size_t)-1; i--) + cdest[i] = csrc[i]; + } else { + for (i = 0; i < len; i++) + cdest[i] = csrc[i]; + } +} + +void * +memcpy(void *dest, const void *src, size_t len) +{ + + bcopy(src, dest, len); + return dest; +} + +void +bzero(void *buf, size_t len) +{ + char *cbuf; + size_t i; + + cbuf = (char *)buf; + for (i = 0; i < len; i++) + cbuf[i] = 0; +} + +void +pagezero(void *buf) +{ + + bzero(buf, PAGE_SIZE); +} + +int +bcmp(const void *s1, const void *s2, size_t len) +{ + const char *cs1, *cs2; + int diff; + size_t i; + + cs1 = (const char *)s1; + cs2 = (const char *)s2; + for (i = 0; i < len; i++) { + diff = cs2[i] - cs1[i]; + if (diff) + return diff; + } + return 0; +} diff --git a/sys/amd64/amd64/mem.c b/sys/amd64/amd64/mem.c index 8ec934f..38a6e13 100644 --- a/sys/amd64/amd64/mem.c +++ b/sys/amd64/amd64/mem.c @@ -98,7 +98,7 @@ mmclose(dev_t dev, int flags, int fmt, struct thread *td) { switch (minor(dev)) { case 14: - td->td_frame->tf_eflags &= ~PSL_IOPL; + td->td_frame->tf_rflags &= ~PSL_IOPL; } return (0); } @@ -124,7 +124,7 @@ mmopen(dev_t dev, int flags, int fmt, struct thread *td) error = securelevel_gt(td->td_ucred, 0); if (error != 0) return (error); - td->td_frame->tf_eflags |= PSL_IOPL; + td->td_frame->tf_rflags |= PSL_IOPL; break; } return (0); @@ -135,7 +135,7 @@ static int mmrw(dev_t dev, struct uio *uio, int flags) { int o; - u_int c = 0, v; + u_long c = 0, v; struct iovec *iov; int error = 0; vm_offset_t addr, eaddr; @@ -159,7 +159,7 @@ mmrw(dev_t dev, struct uio *uio, int flags) v &= ~PAGE_MASK; pmap_kenter((vm_offset_t)ptvmmap, v); o = (int)uio->uio_offset & PAGE_MASK; - c = (u_int)(PAGE_SIZE - ((int)iov->iov_base & PAGE_MASK)); + c = (u_long)(PAGE_SIZE - ((long)iov->iov_base & PAGE_MASK)); c = min(c, (u_int)(PAGE_SIZE - o)); c = min(c, (u_int)iov->iov_len); error = uiomove((caddr_t)&ptvmmap[o], (int)c, uio); @@ -177,17 +177,17 @@ mmrw(dev_t dev, struct uio *uio, int flags) addr = trunc_page(uio->uio_offset); eaddr = round_page(uio->uio_offset + c); - if (addr < (vm_offset_t)VADDR(PTDPTDI, 0)) + if (addr < (vm_offset_t)VADDR(0, 0, PTDPTDI, 0)) return (EFAULT); for (; addr < eaddr; addr += PAGE_SIZE) if (pmap_extract(kernel_pmap, addr) == 0) return (EFAULT); - if (!kernacc((caddr_t)(int)uio->uio_offset, c, + if (!kernacc((caddr_t)(long)uio->uio_offset, c, uio->uio_rw == UIO_READ ? VM_PROT_READ : VM_PROT_WRITE)) return (EFAULT); - error = uiomove((caddr_t)(int)uio->uio_offset, (int)c, uio); + error = uiomove((caddr_t)(long)uio->uio_offset, (int)c, uio); continue; default: @@ -317,15 +317,6 @@ mem_range_attr_set(struct mem_range_desc *mrd, int *arg) return (mem_range_softc.mr_op->set(&mem_range_softc, mrd, arg)); } -#ifdef SMP -void -mem_range_AP_init(void) -{ - if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP) - (mem_range_softc.mr_op->initAP(&mem_range_softc)); -} -#endif - static int mem_modevent(module_t mod, int type, void *data) { diff --git a/sys/amd64/amd64/nexus.c b/sys/amd64/amd64/nexus.c index 3894222..07d117e 100644 --- a/sys/amd64/amd64/nexus.c +++ b/sys/amd64/amd64/nexus.c @@ -58,21 +58,13 @@ #include <machine/pmap.h> #include <machine/resource.h> -#ifdef APIC_IO -#include <machine/smp.h> -#include <machine/mpapic.h> -#endif #ifdef DEV_ISA #include <isa/isavar.h> -#ifdef PC98 -#include <pc98/pc98/pc98.h> -#else -#include <i386/isa/isa.h> -#endif +#include <amd64/isa/isa.h> #endif -#include <i386/isa/icu.h> -#include <i386/isa/intr_machdep.h> +#include <amd64/isa/icu.h> +#include <amd64/isa/intr_machdep.h> #include <sys/rtprio.h> static MALLOC_DEFINE(M_NEXUSDEV, "nexusdev", "Nexus device"); @@ -165,16 +157,12 @@ nexus_probe(device_t dev) * APIC interrupts are global though. * * XXX We depend on the AT PIC driver correctly claiming IRQ 2 - * to prevent its reuse elsewhere in the !APIC_IO case. + * to prevent its reuse elsewhere. */ irq_rman.rm_start = 0; irq_rman.rm_type = RMAN_ARRAY; irq_rman.rm_descr = "Interrupt request lines"; -#ifdef APIC_IO - irq_rman.rm_end = APIC_INTMAPSIZE - 1; -#else irq_rman.rm_end = 15; -#endif if (rman_init(&irq_rman) || rman_manage_region(&irq_rman, irq_rman.rm_start, irq_rman.rm_end)) @@ -186,11 +174,7 @@ nexus_probe(device_t dev) * multiple bridges. (eg: laptops with docking stations) */ drq_rman.rm_start = 0; -#ifdef PC98 - drq_rman.rm_end = 3; -#else drq_rman.rm_end = 7; -#endif drq_rman.rm_type = RMAN_ARRAY; drq_rman.rm_descr = "DMA request lines"; /* XXX drq 0 not available on some machines */ @@ -342,28 +326,11 @@ nexus_alloc_resource(device_t bus, device_t child, int type, int *rid, rman_set_bustag(rv, I386_BUS_SPACE_MEM); } else if (type == SYS_RES_IOPORT) { rman_set_bustag(rv, I386_BUS_SPACE_IO); -#ifndef PC98 rman_set_bushandle(rv, rv->r_start); -#endif } -#ifdef PC98 - if ((type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) && - i386_bus_space_handle_alloc(rv->r_bustag, rv->r_start, count, - &rv->r_bushandle) != 0) { - rman_release_resource(rv); - return 0; - } -#endif - if (needactivate) { if (bus_activate_resource(child, type, *rid, rv)) { -#ifdef PC98 - if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) { - i386_bus_space_handle_free(rv->r_bustag, - rv->r_bushandle, rv->r_bushandle->bsh_sz); - } -#endif rman_release_resource(rv); return 0; } @@ -399,13 +366,7 @@ nexus_activate_resource(device_t bus, device_t child, int type, int rid, vaddr = (caddr_t) pmap_mapdev(paddr-poffs, psize+poffs) + poffs; } rman_set_virtual(r, vaddr); -#ifdef PC98 - /* PC-98: the type of bus_space_handle_t is the structure. */ - r->r_bushandle->bsh_base = (bus_addr_t) vaddr; -#else - /* IBM-PC: the type of bus_space_handle_t is u_int */ rman_set_bushandle(r, (bus_space_handle_t) vaddr); -#endif } return (rman_activate_resource(r)); } @@ -437,12 +398,6 @@ nexus_release_resource(device_t bus, device_t child, int type, int rid, if (error) return error; } -#ifdef PC98 - if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) { - i386_bus_space_handle_free(r->r_bustag, r->r_bushandle, - r->r_bushandle->bsh_sz); - } -#endif return (rman_release_resource(r)); } diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 037dafb..20bc3f0 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -99,10 +99,8 @@ * and to when physical maps must be made correct. */ -#include "opt_pmap.h" #include "opt_msgbuf.h" #include "opt_kstack_pages.h" -#include "opt_swtch.h" #include <sys/param.h> #include <sys/systm.h> @@ -116,9 +114,6 @@ #include <sys/user.h> #include <sys/vmmeter.h> #include <sys/sysctl.h> -#ifdef SMP -#include <sys/smp.h> -#endif #include <vm/vm.h> #include <vm/vm_param.h> @@ -135,12 +130,6 @@ #include <machine/cputypes.h> #include <machine/md_var.h> #include <machine/specialreg.h> -#if defined(SMP) || defined(APIC_IO) -#include <machine/smp.h> -#include <machine/apic.h> -#include <machine/segments.h> -#include <machine/tss.h> -#endif /* SMP || APIC_IO */ #define PMAP_KEEP_PDIRS #ifndef PMAP_SHPGPERPROC @@ -185,25 +174,15 @@ struct pmap kernel_pmap_store; LIST_HEAD(pmaplist, pmap); static struct pmaplist allpmaps; static struct mtx allpmaps_lock; -#if defined(SMP) && defined(LAZY_SWITCH) -static struct mtx lazypmap_lock; -#endif vm_paddr_t avail_start; /* PA of first available physical page */ vm_paddr_t avail_end; /* PA of last available physical page */ vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ -static int pgeflag; /* PG_G or-in */ -static int pseflag; /* PG_PS or-in */ static int nkpt; vm_offset_t kernel_vm_end; -extern u_int32_t KERNend; - -#ifdef PAE -static uma_zone_t pdptzone; -#endif /* * Data for the pv entry allocation mechanism @@ -230,9 +209,6 @@ struct msgbuf *msgbufp = 0; static pt_entry_t *pt_crashdumpmap; static caddr_t crashdumpmap; -#ifdef SMP -extern pt_entry_t *SMPpt; -#endif static pt_entry_t *PMAP1 = 0; static pt_entry_t *PADDR1 = 0; @@ -257,19 +233,14 @@ static vm_page_t pmap_page_lookup(vm_object_t object, vm_pindex_t pindex); static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); static vm_offset_t pmap_kmem_choose(vm_offset_t addr); static void *pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); -#ifdef PAE -static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); -#endif - -static pd_entry_t pdir4mb; CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); /* * Move the kernel virtual free pointer to the next - * 4MB. This is used to help improve performance - * by using a large (4MB) page for much of the kernel + * 2MB. This is used to help improve performance + * by using a large (2MB) page for much of the kernel * (.text, .data, .bss) */ static vm_offset_t @@ -277,17 +248,7 @@ pmap_kmem_choose(vm_offset_t addr) { vm_offset_t newaddr = addr; -#ifdef I686_CPU_not /* Problem seems to have gone away */ - /* Deal with un-resolved Pentium4 issues */ - if (cpu_class == CPUCLASS_686 && - strcmp(cpu_vendor, "GenuineIntel") == 0 && - (cpu_id & 0xf00) == 0xf00) - return newaddr; -#endif -#ifndef DISABLE_PSE - if (cpu_feature & CPUID_PSE) - newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); -#endif + newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); return newaddr; } @@ -333,16 +294,12 @@ pmap_bootstrap(firstaddr, loadaddr) /* * Initialize the kernel pmap (which is statically allocated). */ - kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); -#ifdef PAE - kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); -#endif + kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + IdlePTD); + kernel_pmap->pm_pdp = (pdp_entry_t *) (KERNBASE + IdlePDP); + kernel_pmap->pm_pml4 = (pdp_entry_t *) (KERNBASE + IdlePML4); kernel_pmap->pm_active = -1; /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvlist); LIST_INIT(&allpmaps); -#if defined(SMP) && defined(LAZY_SWITCH) - mtx_init(&lazypmap_lock, "lazypmap", NULL, MTX_SPIN); -#endif mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); @@ -398,128 +355,9 @@ pmap_bootstrap(firstaddr, loadaddr) for (i = 0; i < NKPT; i++) PTD[i] = 0; - pgeflag = 0; -#ifndef DISABLE_PG_G - if (cpu_feature & CPUID_PGE) - pgeflag = PG_G; -#endif -#ifdef I686_CPU_not /* Problem seems to have gone away */ - /* Deal with un-resolved Pentium4 issues */ - if (cpu_class == CPUCLASS_686 && - strcmp(cpu_vendor, "GenuineIntel") == 0 && - (cpu_id & 0xf00) == 0xf00) { - printf("Warning: Pentium 4 cpu: PG_G disabled (global flag)\n"); - pgeflag = 0; - } -#endif - -/* - * Initialize the 4MB page size flag - */ - pseflag = 0; -/* - * The 4MB page version of the initial - * kernel page mapping. - */ - pdir4mb = 0; - -#ifndef DISABLE_PSE - if (cpu_feature & CPUID_PSE) - pseflag = PG_PS; -#endif -#ifdef I686_CPU_not /* Problem seems to have gone away */ - /* Deal with un-resolved Pentium4 issues */ - if (cpu_class == CPUCLASS_686 && - strcmp(cpu_vendor, "GenuineIntel") == 0 && - (cpu_id & 0xf00) == 0xf00) { - printf("Warning: Pentium 4 cpu: PG_PS disabled (4MB pages)\n"); - pseflag = 0; - } -#endif -#ifndef DISABLE_PSE - if (pseflag) { - pd_entry_t ptditmp; - /* - * Note that we have enabled PSE mode - */ - ptditmp = *(PTmap + i386_btop(KERNBASE)); - ptditmp &= ~(NBPDR - 1); - ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; - pdir4mb = ptditmp; - } -#endif -#ifndef SMP - /* - * Turn on PGE/PSE. SMP does this later on since the - * 4K page tables are required for AP boot (for now). - * XXX fixme. - */ - pmap_set_opt(); -#endif -#ifdef SMP - if (cpu_apic_address == 0) - panic("pmap_bootstrap: no local apic! (non-SMP hardware?)"); - - /* local apic is mapped on last page */ - SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag | - (cpu_apic_address & PG_FRAME)); -#endif invltlb(); } -/* - * Enable 4MB page mode for MP startup. Turn on PG_G support. - * BSP will run this after all the AP's have started up. - */ -void -pmap_set_opt(void) -{ - pt_entry_t *pte; - vm_offset_t va, endva; - - if (pgeflag && (cpu_feature & CPUID_PGE)) { - load_cr4(rcr4() | CR4_PGE); - invltlb(); /* Insurance */ - } -#ifndef DISABLE_PSE - if (pseflag && (cpu_feature & CPUID_PSE)) { - load_cr4(rcr4() | CR4_PSE); - invltlb(); /* Insurance */ - } -#endif - if (PCPU_GET(cpuid) == 0) { -#ifndef DISABLE_PSE - if (pdir4mb) { - kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb; - invltlb(); /* Insurance */ - } -#endif - if (pgeflag) { - /* Turn on PG_G for text, data, bss pages. */ - va = (vm_offset_t)btext; -#ifndef DISABLE_PSE - if (pseflag && (cpu_feature & CPUID_PSE)) { - if (va < KERNBASE + (1 << PDRSHIFT)) - va = KERNBASE + (1 << PDRSHIFT); - } -#endif - endva = KERNBASE + KERNend; - while (va < endva) { - pte = vtopte(va); - if (*pte) - *pte |= pgeflag; - va += PAGE_SIZE; - } - invltlb(); /* Insurance */ - } - /* - * We do not need to broadcast the invltlb here, because - * each AP does it the moment it is released from the boot - * lock. See ap_init(). - */ - } -} - static void * pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) { @@ -527,15 +365,6 @@ pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) return (void *)kmem_alloc(kernel_map, bytes); } -#ifdef PAE -static void * -pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) -{ - *flags = UMA_SLAB_PRIV; - return (contigmalloc(PAGE_SIZE, NULL, 0, 0x0ULL, 0xffffffffULL, 1, 0)); -} -#endif - /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap @@ -574,12 +403,6 @@ pmap_init(phys_start, phys_end) uma_zone_set_allocf(pvzone, pmap_pv_allocf); uma_prealloc(pvzone, initial_pvs); -#ifdef PAE - pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, - NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, 0); - uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); -#endif - /* * Now it is safe to enable pv_table recording. */ @@ -642,125 +465,8 @@ pmap_track_modified(vm_offset_t va) return 0; } -#ifdef I386_CPU -/* - * i386 only has "invalidate everything" and no SMP to worry about. - */ -PMAP_INLINE void -pmap_invalidate_page(pmap_t pmap, vm_offset_t va) -{ - - if (pmap == kernel_pmap || pmap->pm_active) - invltlb(); -} - -PMAP_INLINE void -pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) -{ - - if (pmap == kernel_pmap || pmap->pm_active) - invltlb(); -} - -PMAP_INLINE void -pmap_invalidate_all(pmap_t pmap) -{ - - if (pmap == kernel_pmap || pmap->pm_active) - invltlb(); -} -#else /* !I386_CPU */ -#ifdef SMP /* - * For SMP, these functions have to use the IPI mechanism for coherence. - */ -void -pmap_invalidate_page(pmap_t pmap, vm_offset_t va) -{ - u_int cpumask; - u_int other_cpus; - - critical_enter(); - /* - * We need to disable interrupt preemption but MUST NOT have - * interrupts disabled here. - * XXX we may need to hold schedlock to get a coherent pm_active - */ - if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { - invlpg(va); - smp_invlpg(va); - } else { - cpumask = PCPU_GET(cpumask); - other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) - invlpg(va); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg(pmap->pm_active & other_cpus, va); - } - critical_exit(); -} - -void -pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) -{ - u_int cpumask; - u_int other_cpus; - vm_offset_t addr; - - critical_enter(); - /* - * We need to disable interrupt preemption but MUST NOT have - * interrupts disabled here. - * XXX we may need to hold schedlock to get a coherent pm_active - */ - if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { - for (addr = sva; addr < eva; addr += PAGE_SIZE) - invlpg(addr); - smp_invlpg_range(sva, eva); - } else { - cpumask = PCPU_GET(cpumask); - other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) - for (addr = sva; addr < eva; addr += PAGE_SIZE) - invlpg(addr); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg_range(pmap->pm_active & other_cpus, - sva, eva); - } - critical_exit(); -} - -void -pmap_invalidate_all(pmap_t pmap) -{ - u_int cpumask; - u_int other_cpus; - -#ifdef SWTCH_OPTIM_STATS - tlb_flush_count++; -#endif - critical_enter(); - /* - * We need to disable interrupt preemption but MUST NOT have - * interrupts disabled here. - * XXX we may need to hold schedlock to get a coherent pm_active - */ - if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) { - invltlb(); - smp_invltlb(); - } else { - cpumask = PCPU_GET(cpumask); - other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) - invltlb(); - if (pmap->pm_active & other_cpus) - smp_masked_invltlb(pmap->pm_active & other_cpus); - } - critical_exit(); -} -#else /* !SMP */ -/* - * Normal, non-SMP, 486+ invalidation functions. + * Normal invalidation functions. * We inline these within pmap.c for speed. */ PMAP_INLINE void @@ -788,8 +494,6 @@ pmap_invalidate_all(pmap_t pmap) if (pmap == kernel_pmap || pmap->pm_active) invltlb(); } -#endif /* !SMP */ -#endif /* !I386_CPU */ /* * Are we current address space or kernel? @@ -828,7 +532,7 @@ pmap_pte_quick(pmap, va) *PMAP1 = newpf | PG_RW | PG_V; pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR1); } - return PADDR1 + (i386_btop(va) & (NPTEPG - 1)); + return PADDR1 + (amd64_btop(va) & (NPTEPG - 1)); } return (0); } @@ -878,7 +582,7 @@ pmap_kenter(vm_offset_t va, vm_paddr_t pa) pt_entry_t *pte; pte = vtopte(va); - pte_store(pte, pa | PG_RW | PG_V | pgeflag); + pte_store(pte, pa | PG_RW | PG_V | PG_G); } /* @@ -1213,7 +917,7 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) * Do an invltlb to make the invalidated mapping * take effect immediately. */ - pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); + pteva = VM_MAXUSER_ADDRESS + amd64_ptob(m->pindex); pmap_invalidate_page(pmap, pteva); } @@ -1272,10 +976,9 @@ pmap_pinit0(pmap) struct pmap *pmap; { - pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); -#ifdef PAE - pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); -#endif + pmap->pm_pdir = (pd_entry_t *)(KERNBASE + IdlePTD); + pmap->pm_pdp = (pdp_entry_t *)(KERNBASE + IdlePDP); + pmap->pm_pml4 = (pml4_entry_t *)(KERNBASE + IdlePML4); pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvlist); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1293,6 +996,8 @@ pmap_pinit(pmap) register struct pmap *pmap; { vm_page_t ptdpg[NPGPTD]; + vm_page_t pdppg; + vm_page_t pml4pg; vm_paddr_t pa; int i; @@ -1303,14 +1008,10 @@ pmap_pinit(pmap) if (pmap->pm_pdir == NULL) { pmap->pm_pdir = (pd_entry_t *)kmem_alloc_pageable(kernel_map, NBPTD); -#ifdef PAE - pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); - KASSERT(((vm_offset_t)pmap->pm_pdpt & - ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, - ("pmap_pinit: pdpt misaligned")); - KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), - ("pmap_pinit: pdpt above 4g")); -#endif + pmap->pm_pdp = (pdp_entry_t *)kmem_alloc_pageable(kernel_map, + PAGE_SIZE); + pmap->pm_pml4 = (pml4_entry_t *)kmem_alloc_pageable(kernel_map, + PAGE_SIZE); } /* @@ -1318,7 +1019,7 @@ pmap_pinit(pmap) */ if (pmap->pm_pteobj == NULL) pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI + - NPGPTD); + NPGPTD + 2); /* * allocate the page directory page(s) @@ -1333,31 +1034,51 @@ pmap_pinit(pmap) vm_page_unlock_queues(); } + pml4pg = vm_page_grab(pmap->pm_pteobj, PTDPTDI + NPGPTD, + VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED | VM_ALLOC_ZERO); + vm_page_lock_queues(); + vm_page_flag_clear(pml4pg, PG_BUSY); + pml4pg->valid = VM_PAGE_BITS_ALL; + vm_page_unlock_queues(); + + pdppg = vm_page_grab(pmap->pm_pteobj, PTDPTDI + NPGPTD + 1, + VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED | VM_ALLOC_ZERO); + vm_page_lock_queues(); + vm_page_flag_clear(pdppg, PG_BUSY); + pdppg->valid = VM_PAGE_BITS_ALL; + vm_page_unlock_queues(); + pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); + pmap_qenter((vm_offset_t)pmap->pm_pdp, &pdppg, 1); + pmap_qenter((vm_offset_t)pmap->pm_pml4, &pml4pg, 1); for (i = 0; i < NPGPTD; i++) { if ((ptdpg[i]->flags & PG_ZERO) == 0) bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE); } + if ((pdppg->flags & PG_ZERO) == 0) + bzero(pmap->pm_pdp, PAGE_SIZE); + if ((pml4pg->flags & PG_ZERO) == 0) + bzero(pmap->pm_pml4, PAGE_SIZE); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); /* Wire in kernel global address entries. */ - /* XXX copies current process, does not fill in MPPTDI */ bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); -#ifdef SMP - pmap->pm_pdir[MPPTDI] = PTD[MPPTDI]; -#endif /* install self-referential address mapping entry(s) */ for (i = 0; i < NPGPTD; i++) { pa = VM_PAGE_TO_PHYS(ptdpg[i]); pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M; -#ifdef PAE - pmap->pm_pdpt[i] = pa | PG_V; -#endif } + /* Connect ptd pages to pdp */ + for (i = 0; i < NPGPTD; i++) { + pa = VM_PAGE_TO_PHYS(ptdpg[i]); + pmap->pm_pdp[i] = pa | PG_RW | PG_V | PG_U; + } + /* connect pdp to pml4 */ + pmap->pm_pml4[0] = VM_PAGE_TO_PHYS(pdppg) | PG_RW | PG_V | PG_U; pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvlist); @@ -1422,7 +1143,7 @@ _pmap_allocpte(pmap, ptepindex) */ if ((m->flags & PG_ZERO) == 0) { if (pmap_is_current(pmap)) { - pteva = VM_MAXUSER_ADDRESS + i386_ptob(ptepindex); + pteva = VM_MAXUSER_ADDRESS + amd64_ptob(ptepindex); bzero((caddr_t) pteva, PAGE_SIZE); } else { pmap_zero_page(m); @@ -1455,7 +1176,7 @@ pmap_allocpte(pmap_t pmap, vm_offset_t va) ptepa = pmap->pm_pdir[ptepindex]; /* - * This supports switching from a 4MB page to a + * This supports switching from a 2MB page to a * normal 4K page. */ if (ptepa & PG_PS) { @@ -1493,121 +1214,6 @@ pmap_allocpte(pmap_t pmap, vm_offset_t va) * Pmap allocation/deallocation routines. ***************************************************/ -#ifdef LAZY_SWITCH -#ifdef SMP -/* - * Deal with a SMP shootdown of other users of the pmap that we are - * trying to dispose of. This can be a bit hairy. - */ -static u_int *lazymask; -static u_int lazyptd; -static volatile u_int lazywait; - -void pmap_lazyfix_action(void); - -void -pmap_lazyfix_action(void) -{ - u_int mymask = PCPU_GET(cpumask); - - if (rcr3() == lazyptd) { - load_cr3(PCPU_GET(curpcb)->pcb_cr3); -#ifdef SWTCH_OPTIM_STATS - atomic_add_int(&lazy_flush_smpfixup, 1); - } else { - if (*lazymask & mymask) - lazy_flush_smpbadcr3++; - else - lazy_flush_smpmiss++; -#endif - } - atomic_clear_int(lazymask, mymask); - atomic_store_rel_int(&lazywait, 1); -} - -static void -pmap_lazyfix_self(u_int mymask) -{ - - if (rcr3() == lazyptd) { - load_cr3(PCPU_GET(curpcb)->pcb_cr3); -#ifdef SWTCH_OPTIM_STATS - lazy_flush_fixup++; - } else { - if (*lazymask & mymask) - lazy_flush_smpbadcr3++; - else - lazy_flush_smpmiss++; -#endif - } - atomic_clear_int(lazymask, mymask); -} - - -static void -pmap_lazyfix(pmap_t pmap) -{ - u_int mymask = PCPU_GET(cpumask); - u_int mask; - register u_int spins; - - while ((mask = pmap->pm_active) != 0) { - spins = 50000000; - mask = mask & -mask; /* Find least significant set bit */ - mtx_lock_spin(&lazypmap_lock); -#ifdef PAE - lazyptd = vtophys(pmap->pm_pdpt); -#else - lazyptd = vtophys(pmap->pm_pdir); -#endif - if (mask == mymask) { - lazymask = &pmap->pm_active; - pmap_lazyfix_self(mymask); - } else { - atomic_store_rel_int((u_int *)&lazymask, - (u_int)&pmap->pm_active); - atomic_store_rel_int(&lazywait, 0); - ipi_selected(mask, IPI_LAZYPMAP); - while (lazywait == 0) { - ia32_pause(); - if (--spins == 0) - break; - } -#ifdef SWTCH_OPTIM_STATS - lazy_flush_smpipi++; -#endif - } - mtx_unlock_spin(&lazypmap_lock); - if (spins == 0) - printf("pmap_lazyfix: spun for 50000000\n"); - } -} - -#else /* SMP */ - -/* - * Cleaning up on uniprocessor is easy. For various reasons, we're - * unlikely to have to even execute this code, including the fact - * that the cleanup is deferred until the parent does a wait(2), which - * means that another userland process has run. - */ -static void -pmap_lazyfix(pmap_t pmap) -{ - u_int cr3; - - cr3 = vtophys(pmap->pm_pdir); - if (cr3 == rcr3()) { - load_cr3(PCPU_GET(curpcb)->pcb_cr3); - pmap->pm_active &= ~(PCPU_GET(cpumask)); -#ifdef SWTCH_OPTIM_STATS - lazy_flush_fixup++; -#endif - } -} -#endif /* SMP */ -#endif /* LAZY_SWITCH */ - /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. @@ -1629,33 +1235,33 @@ pmap_release(pmap_t pmap) ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); -#ifdef LAZY_SWITCH - pmap_lazyfix(pmap); -#endif mtx_lock_spin(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) * sizeof(*pmap->pm_pdir)); -#ifdef SMP - pmap->pm_pdir[MPPTDI] = 0; -#endif pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); vm_page_lock_queues(); for (i = 0; i < NPGPTD; i++) { m = TAILQ_FIRST(&object->memq); -#ifdef PAE - KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME), + KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdp[i] & PG_FRAME), ("pmap_release: got wrong ptd page")); -#endif m->wire_count--; atomic_subtract_int(&cnt.v_wire_count, 1); vm_page_busy(m); vm_page_free_zero(m); } + /* now free pdp and pml4 */ + for (i = 0; i < 2; i++) { + m = TAILQ_FIRST(&object->memq); + m->wire_count--; + atomic_subtract_int(&cnt.v_wire_count, 1); + vm_page_busy(m); + vm_page_free(m); + } KASSERT(TAILQ_EMPTY(&object->memq), ("pmap_release: leaking page table pages")); vm_page_unlock_queues(); @@ -2200,7 +1806,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, opa = origpte & PG_FRAME; if (origpte & PG_PS) - panic("pmap_enter: attempted pmap_enter on 4MB page"); + panic("pmap_enter: attempted pmap_enter on 2MB page"); /* * Mapping has not changed, must be protection or wiring change. @@ -2295,7 +1901,7 @@ validate: if (va < VM_MAXUSER_ADDRESS) newpte |= PG_U; if (pmap == kernel_pmap) - newpte |= pgeflag; + newpte |= PG_G; /* * if the mapping or permission bits are different, we need @@ -2353,7 +1959,7 @@ retry: */ if (ptepa) { if (ptepa & PG_PS) - panic("pmap_enter_quick: unexpected mapping into 4MB page"); + panic("pmap_enter_quick: unexpected mapping into 2MB page"); if (pmap->pm_pteobj->root && (pmap->pm_pteobj->root->pindex == ptepindex)) { mpte = pmap->pm_pteobj->root; @@ -2424,11 +2030,7 @@ pmap_kenter_temporary(vm_offset_t pa, int i) va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); pmap_kenter(va, pa); -#ifndef I386_CPU invlpg(va); -#else - invltlb(); -#endif return ((void *)crashdumpmap); } @@ -2455,7 +2057,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, * processor address space. Note that some shortcuts * are taken, but the code works. */ - if (pseflag && (object->type == OBJT_DEVICE) && + if ((object->type == OBJT_DEVICE) && ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { int i; vm_page_t m[1]; @@ -2510,7 +2112,7 @@ retry: return; } - psize = i386_btop(size); + psize = amd64_btop(size); if ((object->type != OBJT_VNODE) || ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && @@ -2558,7 +2160,7 @@ retry: vm_page_busy(p); vm_page_unlock_queues(); mpte = pmap_enter_quick(pmap, - addr + i386_ptob(tmpidx), p, mpte); + addr + amd64_ptob(tmpidx), p, mpte); vm_page_lock_queues(); vm_page_wakeup(p); } @@ -2801,38 +2403,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, } } -#ifdef SMP - -/* - * pmap_zpi_switchin*() - * - * These functions allow us to avoid doing IPIs alltogether in certain - * temporary page-mapping situations (page zeroing). Instead to deal - * with being preempted and moved onto a different cpu we invalidate - * the page when the scheduler switches us in. This does not occur - * very often so we remain relatively optimal with very little effort. - */ -static void -pmap_zpi_switchin12(void) -{ - invlpg((u_int)CADDR1); - invlpg((u_int)CADDR2); -} - -static void -pmap_zpi_switchin2(void) -{ - invlpg((u_int)CADDR2); -} - -static void -pmap_zpi_switchin3(void) -{ - invlpg((u_int)CADDR3); -} - -#endif - /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. @@ -2845,23 +2415,8 @@ pmap_zero_page(vm_page_t m) if (*CMAP2) panic("pmap_zero_page: CMAP2 busy"); *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; -#ifdef I386_CPU - invltlb(); -#else -#ifdef SMP - curthread->td_switchin = pmap_zpi_switchin2; -#endif - invlpg((u_int)CADDR2); -#endif -#if defined(I686_CPU) - if (cpu_class == CPUCLASS_686) - i686_pagezero(CADDR2); - else -#endif - bzero(CADDR2, PAGE_SIZE); -#ifdef SMP - curthread->td_switchin = NULL; -#endif + invlpg((u_long)CADDR2); + pagezero(CADDR2); *CMAP2 = 0; mtx_unlock(&CMAPCADDR12_lock); } @@ -2880,23 +2435,11 @@ pmap_zero_page_area(vm_page_t m, int off, int size) if (*CMAP2) panic("pmap_zero_page: CMAP2 busy"); *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; -#ifdef I386_CPU - invltlb(); -#else -#ifdef SMP - curthread->td_switchin = pmap_zpi_switchin2; -#endif - invlpg((u_int)CADDR2); -#endif -#if defined(I686_CPU) - if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE) - i686_pagezero(CADDR2); + invlpg((u_long)CADDR2); + if (off == 0 && size == PAGE_SIZE) + pagezero(CADDR2); else -#endif bzero((char *)CADDR2 + off, size); -#ifdef SMP - curthread->td_switchin = NULL; -#endif *CMAP2 = 0; mtx_unlock(&CMAPCADDR12_lock); } @@ -2914,23 +2457,8 @@ pmap_zero_page_idle(vm_page_t m) if (*CMAP3) panic("pmap_zero_page: CMAP3 busy"); *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; -#ifdef I386_CPU - invltlb(); -#else -#ifdef SMP - curthread->td_switchin = pmap_zpi_switchin3; -#endif - invlpg((u_int)CADDR3); -#endif -#if defined(I686_CPU) - if (cpu_class == CPUCLASS_686) - i686_pagezero(CADDR3); - else -#endif - bzero(CADDR3, PAGE_SIZE); -#ifdef SMP - curthread->td_switchin = NULL; -#endif + invlpg((u_long)CADDR3); + pagezero(CADDR3); *CMAP3 = 0; } @@ -2951,19 +2479,9 @@ pmap_copy_page(vm_page_t src, vm_page_t dst) panic("pmap_copy_page: CMAP2 busy"); *CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A; *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M; -#ifdef I386_CPU - invltlb(); -#else -#ifdef SMP - curthread->td_switchin = pmap_zpi_switchin12; -#endif - invlpg((u_int)CADDR1); - invlpg((u_int)CADDR2); -#endif + invlpg((u_long)CADDR1); + invlpg((u_long)CADDR2); bcopy(CADDR1, CADDR2, PAGE_SIZE); -#ifdef SMP - curthread->td_switchin = NULL; -#endif *CMAP1 = 0; *CMAP2 = 0; mtx_unlock(&CMAPCADDR12_lock); @@ -3443,20 +2961,12 @@ pmap_activate(struct thread *td) { struct proc *p = td->td_proc; pmap_t pmap; - u_int32_t cr3; + u_int64_t cr3; critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); -#if defined(SMP) - atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); -#else - pmap->pm_active |= 1; -#endif -#ifdef PAE - cr3 = vtophys(pmap->pm_pdpt); -#else - cr3 = vtophys(pmap->pm_pdir); -#endif + pmap->pm_active |= PCPU_GET(cpumask); + cr3 = vtophys(pmap->pm_pml4); /* XXXKSE this is wrong. * pmap_activate is for the current thread on the current cpu */ @@ -3470,9 +2980,6 @@ pmap_activate(struct thread *td) td->td_pcb->pcb_cr3 = cr3; } load_cr3(cr3); -#ifdef SWTCH_OPTIM_STATS - tlb_flush_count++; -#endif critical_exit(); } diff --git a/sys/amd64/amd64/sigtramp.S b/sys/amd64/amd64/sigtramp.S new file mode 100644 index 0000000..a05ea85 --- /dev/null +++ b/sys/amd64/amd64/sigtramp.S @@ -0,0 +1,56 @@ +/*- + * Copyright (c) 2003 Peter Wemm <peter@freeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/syscall.h> + +#include <machine/asmacros.h> + +#include "assym.s" + + .text +/********************************************************************** + * + * Signal trampoline, copied to top of user stack + * + */ +NON_GPROF_ENTRY(sigcode) + call *SIGF_HANDLER(%rsp) /* call signal handler */ + lea SIGF_UC(%rsp),%rdi /* get ucontext_t */ + pushq $0 /* junk to fake return addr. */ + movq $SYS_sigreturn,%rax + syscall /* enter kernel with args */ +0: hlt /* trap priviliged instruction */ + jmp 0b + + ALIGN_TEXT +esigcode: + + .data + .globl szsigcode +szsigcode: + .long esigcode-sigcode diff --git a/sys/amd64/amd64/sigtramp.s b/sys/amd64/amd64/sigtramp.s new file mode 100644 index 0000000..a05ea85 --- /dev/null +++ b/sys/amd64/amd64/sigtramp.s @@ -0,0 +1,56 @@ +/*- + * Copyright (c) 2003 Peter Wemm <peter@freeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/syscall.h> + +#include <machine/asmacros.h> + +#include "assym.s" + + .text +/********************************************************************** + * + * Signal trampoline, copied to top of user stack + * + */ +NON_GPROF_ENTRY(sigcode) + call *SIGF_HANDLER(%rsp) /* call signal handler */ + lea SIGF_UC(%rsp),%rdi /* get ucontext_t */ + pushq $0 /* junk to fake return addr. */ + movq $SYS_sigreturn,%rax + syscall /* enter kernel with args */ +0: hlt /* trap priviliged instruction */ + jmp 0b + + ALIGN_TEXT +esigcode: + + .data + .globl szsigcode +szsigcode: + .long esigcode-sigcode diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index f0f99e4..06a3c4e 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -33,8 +33,6 @@ * $FreeBSD$ */ -#include "opt_npx.h" - #include <machine/asmacros.h> #include <machine/cputypes.h> #include <machine/pmap.h> @@ -42,632 +40,20 @@ #include "assym.s" -#define IDXSHIFT 10 - - .data - .globl bcopy_vector -bcopy_vector: - .long generic_bcopy - .globl bzero_vector -bzero_vector: - .long generic_bzero - .globl copyin_vector -copyin_vector: - .long generic_copyin - .globl copyout_vector -copyout_vector: - .long generic_copyout -#if defined(I586_CPU) && defined(DEV_NPX) -kernel_fpu_lock: - .byte 0xfe - .space 3 -#endif .text -/* - * bcopy family - * void bzero(void *buf, u_int len) - */ - -ENTRY(bzero) - MEXITCOUNT - jmp *bzero_vector - -ENTRY(generic_bzero) - pushl %edi - movl 8(%esp),%edi - movl 12(%esp),%ecx - xorl %eax,%eax - shrl $2,%ecx - cld - rep - stosl - movl 12(%esp),%ecx - andl $3,%ecx - rep - stosb - popl %edi - ret - -#ifdef I486_CPU -ENTRY(i486_bzero) - movl 4(%esp),%edx - movl 8(%esp),%ecx - xorl %eax,%eax -/* - * do 64 byte chunks first - * - * XXX this is probably over-unrolled at least for DX2's - */ -2: - cmpl $64,%ecx - jb 3f - movl %eax,(%edx) - movl %eax,4(%edx) - movl %eax,8(%edx) - movl %eax,12(%edx) - movl %eax,16(%edx) - movl %eax,20(%edx) - movl %eax,24(%edx) - movl %eax,28(%edx) - movl %eax,32(%edx) - movl %eax,36(%edx) - movl %eax,40(%edx) - movl %eax,44(%edx) - movl %eax,48(%edx) - movl %eax,52(%edx) - movl %eax,56(%edx) - movl %eax,60(%edx) - addl $64,%edx - subl $64,%ecx - jnz 2b - ret - -/* - * do 16 byte chunks - */ - SUPERALIGN_TEXT -3: - cmpl $16,%ecx - jb 4f - movl %eax,(%edx) - movl %eax,4(%edx) - movl %eax,8(%edx) - movl %eax,12(%edx) - addl $16,%edx - subl $16,%ecx - jnz 3b - ret - -/* - * do 4 byte chunks - */ - SUPERALIGN_TEXT -4: - cmpl $4,%ecx - jb 5f - movl %eax,(%edx) - addl $4,%edx - subl $4,%ecx - jnz 4b - ret - -/* - * do 1 byte chunks - * a jump table seems to be faster than a loop or more range reductions - * - * XXX need a const section for non-text - */ - .data -jtab: - .long do0 - .long do1 - .long do2 - .long do3 - - .text - SUPERALIGN_TEXT -5: - jmp *jtab(,%ecx,4) - - SUPERALIGN_TEXT -do3: - movw %ax,(%edx) - movb %al,2(%edx) - ret - - SUPERALIGN_TEXT -do2: - movw %ax,(%edx) - ret - - SUPERALIGN_TEXT -do1: - movb %al,(%edx) - ret - - SUPERALIGN_TEXT -do0: - ret -#endif - -#if defined(I586_CPU) && defined(DEV_NPX) -ENTRY(i586_bzero) - movl 4(%esp),%edx - movl 8(%esp),%ecx - - /* - * The FPU register method is twice as fast as the integer register - * method unless the target is in the L1 cache and we pre-allocate a - * cache line for it (then the integer register method is 4-5 times - * faster). However, we never pre-allocate cache lines, since that - * would make the integer method 25% or more slower for the common - * case when the target isn't in either the L1 cache or the L2 cache. - * Thus we normally use the FPU register method unless the overhead - * would be too large. - */ - cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */ - jb intreg_i586_bzero - - /* - * The FPU registers may belong to an application or to fastmove() - * or to another invocation of bcopy() or ourself in a higher level - * interrupt or trap handler. Preserving the registers is - * complicated since we avoid it if possible at all levels. We - * want to localize the complications even when that increases them. - * Here the extra work involves preserving CR0_TS in TS. - * `fpcurthread != NULL' is supposed to be the condition that all the - * FPU resources belong to an application, but fpcurthread and CR0_TS - * aren't set atomically enough for this condition to work in - * interrupt handlers. - * - * Case 1: FPU registers belong to the application: we must preserve - * the registers if we use them, so we only use the FPU register - * method if the target size is large enough to amortize the extra - * overhead for preserving them. CR0_TS must be preserved although - * it is very likely to end up as set. - * - * Case 2: FPU registers belong to fastmove(): fastmove() currently - * makes the registers look like they belong to an application so - * that cpu_switch() and savectx() don't have to know about it, so - * this case reduces to case 1. - * - * Case 3: FPU registers belong to the kernel: don't use the FPU - * register method. This case is unlikely, and supporting it would - * be more complicated and might take too much stack. - * - * Case 4: FPU registers don't belong to anyone: the FPU registers - * don't need to be preserved, so we always use the FPU register - * method. CR0_TS must be preserved although it is very likely to - * always end up as clear. - */ - cmpl $0,PCPU(FPCURTHREAD) - je i586_bz1 - - /* - * XXX don't use the FPU for cases 1 and 2, since preemptive - * scheduling of ithreads broke these cases. Note that we can - * no longer get here from an interrupt handler, since the - * context sitch to the interrupt handler will have saved the - * FPU state. - */ - jmp intreg_i586_bzero - - cmpl $256+184,%ecx /* empirical; not quite 2*108 more */ - jb intreg_i586_bzero - sarb $1,kernel_fpu_lock - jc intreg_i586_bzero - smsw %ax - clts - subl $108,%esp - fnsave 0(%esp) - jmp i586_bz2 - -i586_bz1: - sarb $1,kernel_fpu_lock - jc intreg_i586_bzero - smsw %ax - clts - fninit /* XXX should avoid needing this */ -i586_bz2: - fldz - - /* - * Align to an 8 byte boundary (misalignment in the main loop would - * cost a factor of >= 2). Avoid jumps (at little cost if it is - * already aligned) by always zeroing 8 bytes and using the part up - * to the _next_ alignment position. - */ - fstl 0(%edx) - addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */ - addl $8,%edx - andl $~7,%edx - subl %edx,%ecx - - /* - * Similarly align `len' to a multiple of 8. - */ - fstl -8(%edx,%ecx) - decl %ecx - andl $~7,%ecx - - /* - * This wouldn't be any faster if it were unrolled, since the loop - * control instructions are much faster than the fstl and/or done - * in parallel with it so their overhead is insignificant. - */ -fpureg_i586_bzero_loop: - fstl 0(%edx) - addl $8,%edx - subl $8,%ecx - cmpl $8,%ecx - jae fpureg_i586_bzero_loop - - cmpl $0,PCPU(FPCURTHREAD) - je i586_bz3 - - /* XXX check that the condition for cases 1-2 stayed false. */ -i586_bzero_oops: - int $3 - jmp i586_bzero_oops - - frstor 0(%esp) - addl $108,%esp - lmsw %ax - movb $0xfe,kernel_fpu_lock - ret - -i586_bz3: - fstp %st(0) - lmsw %ax - movb $0xfe,kernel_fpu_lock - ret - -intreg_i586_bzero: - /* - * `rep stos' seems to be the best method in practice for small - * counts. Fancy methods usually take too long to start up due - * to cache and BTB misses. - */ - pushl %edi - movl %edx,%edi - xorl %eax,%eax - shrl $2,%ecx - cld - rep - stosl - movl 12(%esp),%ecx - andl $3,%ecx - jne 1f - popl %edi - ret - -1: - rep - stosb - popl %edi - ret -#endif /* I586_CPU && defined(DEV_NPX) */ - -ENTRY(i686_pagezero) - pushl %edi - pushl %ebx - - movl 12(%esp), %edi - movl $1024, %ecx - cld - - ALIGN_TEXT -1: - xorl %eax, %eax - repe - scasl - jnz 2f - - popl %ebx - popl %edi - ret - - ALIGN_TEXT - -2: - incl %ecx - subl $4, %edi - - movl %ecx, %edx - cmpl $16, %ecx - - jge 3f - - movl %edi, %ebx - andl $0x3f, %ebx - shrl %ebx - shrl %ebx - movl $16, %ecx - subl %ebx, %ecx - -3: - subl %ecx, %edx - rep - stosl - - movl %edx, %ecx - testl %edx, %edx - jnz 1b - - popl %ebx - popl %edi - ret - -/* fillw(pat, base, cnt) */ +/* fillw(pat, base, cnt) */ +/* %rdi,%rsi, %rdx */ ENTRY(fillw) - pushl %edi - movl 8(%esp),%eax - movl 12(%esp),%edi - movl 16(%esp),%ecx + movq %rdi,%rax + movq %rsi,%rdi + movq %rdx,%rcx cld rep stosw - popl %edi - ret - -ENTRY(bcopyb) - pushl %esi - pushl %edi - movl 12(%esp),%esi - movl 16(%esp),%edi - movl 20(%esp),%ecx - movl %edi,%eax - subl %esi,%eax - cmpl %ecx,%eax /* overlapping && src < dst? */ - jb 1f - cld /* nope, copy forwards */ - rep - movsb - popl %edi - popl %esi - ret - - ALIGN_TEXT -1: - addl %ecx,%edi /* copy backwards. */ - addl %ecx,%esi - decl %edi - decl %esi - std - rep - movsb - popl %edi - popl %esi - cld ret -ENTRY(bcopy) - MEXITCOUNT - jmp *bcopy_vector - -/* - * generic_bcopy(src, dst, cnt) - * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 - */ -ENTRY(generic_bcopy) - pushl %esi - pushl %edi - movl 12(%esp),%esi - movl 16(%esp),%edi - movl 20(%esp),%ecx - - movl %edi,%eax - subl %esi,%eax - cmpl %ecx,%eax /* overlapping && src < dst? */ - jb 1f - - shrl $2,%ecx /* copy by 32-bit words */ - cld /* nope, copy forwards */ - rep - movsl - movl 20(%esp),%ecx - andl $3,%ecx /* any bytes left? */ - rep - movsb - popl %edi - popl %esi - ret - - ALIGN_TEXT -1: - addl %ecx,%edi /* copy backwards */ - addl %ecx,%esi - decl %edi - decl %esi - andl $3,%ecx /* any fractional bytes? */ - std - rep - movsb - movl 20(%esp),%ecx /* copy remainder by 32-bit words */ - shrl $2,%ecx - subl $3,%esi - subl $3,%edi - rep - movsl - popl %edi - popl %esi - cld - ret - -#if defined(I586_CPU) && defined(DEV_NPX) -ENTRY(i586_bcopy) - pushl %esi - pushl %edi - movl 12(%esp),%esi - movl 16(%esp),%edi - movl 20(%esp),%ecx - - movl %edi,%eax - subl %esi,%eax - cmpl %ecx,%eax /* overlapping && src < dst? */ - jb 1f - - cmpl $1024,%ecx - jb small_i586_bcopy - - sarb $1,kernel_fpu_lock - jc small_i586_bcopy - cmpl $0,PCPU(FPCURTHREAD) - je i586_bc1 - - /* XXX turn off handling of cases 1-2, as above. */ - movb $0xfe,kernel_fpu_lock - jmp small_i586_bcopy - - smsw %dx - clts - subl $108,%esp - fnsave 0(%esp) - jmp 4f - -i586_bc1: - smsw %dx - clts - fninit /* XXX should avoid needing this */ - - ALIGN_TEXT -4: - pushl %ecx -#define DCACHE_SIZE 8192 - cmpl $(DCACHE_SIZE-512)/2,%ecx - jbe 2f - movl $(DCACHE_SIZE-512)/2,%ecx -2: - subl %ecx,0(%esp) - cmpl $256,%ecx - jb 5f /* XXX should prefetch if %ecx >= 32 */ - pushl %esi - pushl %ecx - ALIGN_TEXT -3: - movl 0(%esi),%eax - movl 32(%esi),%eax - movl 64(%esi),%eax - movl 96(%esi),%eax - movl 128(%esi),%eax - movl 160(%esi),%eax - movl 192(%esi),%eax - movl 224(%esi),%eax - addl $256,%esi - subl $256,%ecx - cmpl $256,%ecx - jae 3b - popl %ecx - popl %esi -5: - ALIGN_TEXT -large_i586_bcopy_loop: - fildq 0(%esi) - fildq 8(%esi) - fildq 16(%esi) - fildq 24(%esi) - fildq 32(%esi) - fildq 40(%esi) - fildq 48(%esi) - fildq 56(%esi) - fistpq 56(%edi) - fistpq 48(%edi) - fistpq 40(%edi) - fistpq 32(%edi) - fistpq 24(%edi) - fistpq 16(%edi) - fistpq 8(%edi) - fistpq 0(%edi) - addl $64,%esi - addl $64,%edi - subl $64,%ecx - cmpl $64,%ecx - jae large_i586_bcopy_loop - popl %eax - addl %eax,%ecx - cmpl $64,%ecx - jae 4b - - cmpl $0,PCPU(FPCURTHREAD) - je i586_bc2 - - /* XXX check that the condition for cases 1-2 stayed false. */ -i586_bcopy_oops: - int $3 - jmp i586_bcopy_oops - - frstor 0(%esp) - addl $108,%esp -i586_bc2: - lmsw %dx - movb $0xfe,kernel_fpu_lock - -/* - * This is a duplicate of the main part of generic_bcopy. See the comments - * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and - * would mess up high resolution profiling. - */ - ALIGN_TEXT -small_i586_bcopy: - shrl $2,%ecx - cld - rep - movsl - movl 20(%esp),%ecx - andl $3,%ecx - rep - movsb - popl %edi - popl %esi - ret - - ALIGN_TEXT -1: - addl %ecx,%edi - addl %ecx,%esi - decl %edi - decl %esi - andl $3,%ecx - std - rep - movsb - movl 20(%esp),%ecx - shrl $2,%ecx - subl $3,%esi - subl $3,%edi - rep - movsl - popl %edi - popl %esi - cld - ret -#endif /* I586_CPU && defined(DEV_NPX) */ - -/* - * Note: memcpy does not support overlapping copies - */ -ENTRY(memcpy) - pushl %edi - pushl %esi - movl 12(%esp),%edi - movl 16(%esp),%esi - movl 20(%esp),%ecx - movl %edi,%eax - shrl $2,%ecx /* copy by 32-bit words */ - cld /* nope, copy forwards */ - rep - movsl - movl 20(%esp),%ecx - andl $3,%ecx /* any bytes left? */ - rep - movsb - popl %esi - popl %edi - ret - - /*****************************************************************************/ /* copyout and fubyte family */ /*****************************************************************************/ @@ -687,22 +73,13 @@ ENTRY(memcpy) */ /* - * copyout(from_kernel, to_user, len) - MP SAFE (if not I386_CPU) + * copyout(from_kernel, to_user, len) - MP SAFE + * %rdi, %rsi, %rdx */ ENTRY(copyout) - MEXITCOUNT - jmp *copyout_vector - -ENTRY(generic_copyout) - movl PCPU(CURPCB),%eax - movl $copyout_fault,PCB_ONFAULT(%eax) - pushl %esi - pushl %edi - pushl %ebx - movl 16(%esp),%esi - movl 20(%esp),%edi - movl 24(%esp),%ebx - testl %ebx,%ebx /* anything to do? */ + movq PCPU(CURPCB),%rax + movq $copyout_fault,PCB_ONFAULT(%rax) + testq %rdx,%rdx /* anything to do? */ jz done_copyout /* @@ -715,8 +92,8 @@ ENTRY(generic_copyout) /* * First, prevent address wrapping. */ - movl %edi,%eax - addl %ebx,%eax + movq %rsi,%rax + addq %rdx,%rax jc copyout_fault /* * XXX STOP USING VM_MAXUSER_ADDRESS. @@ -724,469 +101,95 @@ ENTRY(generic_copyout) * looks like there is an off by one error, and of course it caused an off * by one error in several places. */ - cmpl $VM_MAXUSER_ADDRESS,%eax + movq $VM_MAXUSER_ADDRESS,%rcx + cmpq %rcx,%rax ja copyout_fault -#ifdef I386_CPU - -/* - * We have to check each PTE for user write permission. - * The checking may cause a page fault, so it is important to set - * up everything for return via copyout_fault before here. - */ - /* compute number of pages */ - movl %edi,%ecx - andl $PAGE_MASK,%ecx - addl %ebx,%ecx - decl %ecx - shrl $IDXSHIFT+2,%ecx - incl %ecx - - /* compute PTE offset for start address */ - movl %edi,%edx - shrl $IDXSHIFT,%edx - andb $0xfc,%dl - -1: - /* check PTE for each page */ - leal PTmap(%edx),%eax - shrl $IDXSHIFT,%eax - andb $0xfc,%al - testb $PG_V,PTmap(%eax) /* PTE page must be valid */ - je 4f - movb PTmap(%edx),%al - andb $PG_V|PG_RW|PG_U,%al /* page must be valid and user writable */ - cmpb $PG_V|PG_RW|PG_U,%al - je 2f - -4: - /* simulate a trap */ - pushl %edx - pushl %ecx - shll $IDXSHIFT,%edx - pushl %edx - call trapwrite /* trapwrite(addr) */ - popl %edx - popl %ecx - popl %edx - - testl %eax,%eax /* if not ok, return EFAULT */ - jnz copyout_fault - -2: - addl $4,%edx - decl %ecx - jnz 1b /* check next page */ -#endif /* I386_CPU */ + xchgq %rdi, %rsi + /* bcopy(%rsi, %rdi, %rdx) */ + movq %rdx,%rcx - /* bcopy(%esi, %edi, %ebx) */ - movl %ebx,%ecx - -#if defined(I586_CPU) && defined(DEV_NPX) - ALIGN_TEXT -slow_copyout: -#endif - shrl $2,%ecx + shrq $3,%rcx cld rep - movsl - movb %bl,%cl - andb $3,%cl + movsq + movb %dl,%cl + andb $7,%cl rep movsb done_copyout: - popl %ebx - popl %edi - popl %esi - xorl %eax,%eax - movl PCPU(CURPCB),%edx - movl %eax,PCB_ONFAULT(%edx) + xorq %rax,%rax + movq PCPU(CURPCB),%rdx + movq %rax,PCB_ONFAULT(%rdx) ret ALIGN_TEXT copyout_fault: - popl %ebx - popl %edi - popl %esi - movl PCPU(CURPCB),%edx - movl $0,PCB_ONFAULT(%edx) - movl $EFAULT,%eax + movq PCPU(CURPCB),%rdx + movq $0,PCB_ONFAULT(%rdx) + movq $EFAULT,%rax ret -#if defined(I586_CPU) && defined(DEV_NPX) -ENTRY(i586_copyout) - /* - * Duplicated from generic_copyout. Could be done a bit better. - */ - movl PCPU(CURPCB),%eax - movl $copyout_fault,PCB_ONFAULT(%eax) - pushl %esi - pushl %edi - pushl %ebx - movl 16(%esp),%esi - movl 20(%esp),%edi - movl 24(%esp),%ebx - testl %ebx,%ebx /* anything to do? */ - jz done_copyout - - /* - * Check explicitly for non-user addresses. If 486 write protection - * is being used, this check is essential because we are in kernel - * mode so the h/w does not provide any protection against writing - * kernel addresses. - */ - - /* - * First, prevent address wrapping. - */ - movl %edi,%eax - addl %ebx,%eax - jc copyout_fault -/* - * XXX STOP USING VM_MAXUSER_ADDRESS. - * It is an end address, not a max, so every time it is used correctly it - * looks like there is an off by one error, and of course it caused an off - * by one error in several places. - */ - cmpl $VM_MAXUSER_ADDRESS,%eax - ja copyout_fault - - /* bcopy(%esi, %edi, %ebx) */ -3: - movl %ebx,%ecx - /* - * End of duplicated code. - */ - - cmpl $1024,%ecx - jb slow_copyout - - pushl %ecx - call fastmove - addl $4,%esp - jmp done_copyout -#endif /* I586_CPU && defined(DEV_NPX) */ - /* * copyin(from_user, to_kernel, len) - MP SAFE + * %rdi, %rsi, %rdx */ ENTRY(copyin) - MEXITCOUNT - jmp *copyin_vector - -ENTRY(generic_copyin) - movl PCPU(CURPCB),%eax - movl $copyin_fault,PCB_ONFAULT(%eax) - pushl %esi - pushl %edi - movl 12(%esp),%esi /* caddr_t from */ - movl 16(%esp),%edi /* caddr_t to */ - movl 20(%esp),%ecx /* size_t len */ + movq PCPU(CURPCB),%rax + movq $copyin_fault,PCB_ONFAULT(%rax) + testq %rdx,%rdx /* anything to do? */ + jz done_copyin /* * make sure address is valid */ - movl %esi,%edx - addl %ecx,%edx + movq %rdi,%rax + addq %rdx,%rax jc copyin_fault - cmpl $VM_MAXUSER_ADDRESS,%edx + movq $VM_MAXUSER_ADDRESS,%rcx + cmpq %rcx,%rax ja copyin_fault -#if defined(I586_CPU) && defined(DEV_NPX) - ALIGN_TEXT -slow_copyin: -#endif + xchgq %rdi, %rsi + movq %rdx, %rcx movb %cl,%al - shrl $2,%ecx /* copy longword-wise */ + shrq $3,%rcx /* copy longword-wise */ cld rep - movsl + movsq movb %al,%cl - andb $3,%cl /* copy remaining bytes */ + andb $7,%cl /* copy remaining bytes */ rep movsb -#if defined(I586_CPU) && defined(DEV_NPX) - ALIGN_TEXT done_copyin: -#endif - popl %edi - popl %esi - xorl %eax,%eax - movl PCPU(CURPCB),%edx - movl %eax,PCB_ONFAULT(%edx) + xorq %rax,%rax + movq PCPU(CURPCB),%rdx + movq %rax,PCB_ONFAULT(%rdx) ret ALIGN_TEXT copyin_fault: - popl %edi - popl %esi - movl PCPU(CURPCB),%edx - movl $0,PCB_ONFAULT(%edx) - movl $EFAULT,%eax + movq PCPU(CURPCB),%rdx + movq $0,PCB_ONFAULT(%rdx) + movq $EFAULT,%rax ret -#if defined(I586_CPU) && defined(DEV_NPX) -ENTRY(i586_copyin) - /* - * Duplicated from generic_copyin. Could be done a bit better. - */ - movl PCPU(CURPCB),%eax - movl $copyin_fault,PCB_ONFAULT(%eax) - pushl %esi - pushl %edi - movl 12(%esp),%esi /* caddr_t from */ - movl 16(%esp),%edi /* caddr_t to */ - movl 20(%esp),%ecx /* size_t len */ - - /* - * make sure address is valid - */ - movl %esi,%edx - addl %ecx,%edx - jc copyin_fault - cmpl $VM_MAXUSER_ADDRESS,%edx - ja copyin_fault - /* - * End of duplicated code. - */ - - cmpl $1024,%ecx - jb slow_copyin - - pushl %ebx /* XXX prepare for fastmove_fault */ - pushl %ecx - call fastmove - addl $8,%esp - jmp done_copyin -#endif /* I586_CPU && defined(DEV_NPX) */ - -#if defined(I586_CPU) && defined(DEV_NPX) -/* fastmove(src, dst, len) - src in %esi - dst in %edi - len in %ecx XXX changed to on stack for profiling - uses %eax and %edx for tmp. storage - */ -/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ -ENTRY(fastmove) - pushl %ebp - movl %esp,%ebp - subl $PCB_SAVEFPU_SIZE+3*4,%esp - - movl 8(%ebp),%ecx - cmpl $63,%ecx - jbe fastmove_tail - - testl $7,%esi /* check if src addr is multiple of 8 */ - jnz fastmove_tail - - testl $7,%edi /* check if dst addr is multiple of 8 */ - jnz fastmove_tail - - /* XXX grab FPU context atomically. */ - cli - -/* if (fpcurthread != NULL) { */ - cmpl $0,PCPU(FPCURTHREAD) - je 6f -/* fnsave(&curpcb->pcb_savefpu); */ - movl PCPU(CURPCB),%eax - fnsave PCB_SAVEFPU(%eax) -/* FPCURTHREAD = NULL; */ - movl $0,PCPU(FPCURTHREAD) -/* } */ -6: -/* now we own the FPU. */ - -/* - * The process' FP state is saved in the pcb, but if we get - * switched, the cpu_switch() will store our FP state in the - * pcb. It should be possible to avoid all the copying for - * this, e.g., by setting a flag to tell cpu_switch() to - * save the state somewhere else. - */ -/* tmp = curpcb->pcb_savefpu; */ - movl %ecx,-12(%ebp) - movl %esi,-8(%ebp) - movl %edi,-4(%ebp) - movl %esp,%edi - movl PCPU(CURPCB),%esi - addl $PCB_SAVEFPU,%esi - cld - movl $PCB_SAVEFPU_SIZE>>2,%ecx - rep - movsl - movl -12(%ebp),%ecx - movl -8(%ebp),%esi - movl -4(%ebp),%edi -/* stop_emulating(); */ - clts -/* fpcurthread = curthread; */ - movl PCPU(CURTHREAD),%eax - movl %eax,PCPU(FPCURTHREAD) - movl PCPU(CURPCB),%eax - - /* XXX end of atomic FPU context grab. */ - sti - - movl $fastmove_fault,PCB_ONFAULT(%eax) -4: - movl %ecx,-12(%ebp) - cmpl $1792,%ecx - jbe 2f - movl $1792,%ecx -2: - subl %ecx,-12(%ebp) - cmpl $256,%ecx - jb 5f - movl %ecx,-8(%ebp) - movl %esi,-4(%ebp) - ALIGN_TEXT -3: - movl 0(%esi),%eax - movl 32(%esi),%eax - movl 64(%esi),%eax - movl 96(%esi),%eax - movl 128(%esi),%eax - movl 160(%esi),%eax - movl 192(%esi),%eax - movl 224(%esi),%eax - addl $256,%esi - subl $256,%ecx - cmpl $256,%ecx - jae 3b - movl -8(%ebp),%ecx - movl -4(%ebp),%esi -5: - ALIGN_TEXT -fastmove_loop: - fildq 0(%esi) - fildq 8(%esi) - fildq 16(%esi) - fildq 24(%esi) - fildq 32(%esi) - fildq 40(%esi) - fildq 48(%esi) - fildq 56(%esi) - fistpq 56(%edi) - fistpq 48(%edi) - fistpq 40(%edi) - fistpq 32(%edi) - fistpq 24(%edi) - fistpq 16(%edi) - fistpq 8(%edi) - fistpq 0(%edi) - addl $-64,%ecx - addl $64,%esi - addl $64,%edi - cmpl $63,%ecx - ja fastmove_loop - movl -12(%ebp),%eax - addl %eax,%ecx - cmpl $64,%ecx - jae 4b - - /* XXX ungrab FPU context atomically. */ - cli - -/* curpcb->pcb_savefpu = tmp; */ - movl %ecx,-12(%ebp) - movl %esi,-8(%ebp) - movl %edi,-4(%ebp) - movl PCPU(CURPCB),%edi - addl $PCB_SAVEFPU,%edi - movl %esp,%esi - cld - movl $PCB_SAVEFPU_SIZE>>2,%ecx - rep - movsl - movl -12(%ebp),%ecx - movl -8(%ebp),%esi - movl -4(%ebp),%edi - -/* start_emulating(); */ - smsw %ax - orb $CR0_TS,%al - lmsw %ax -/* fpcurthread = NULL; */ - movl $0,PCPU(FPCURTHREAD) - - /* XXX end of atomic FPU context ungrab. */ - sti - - ALIGN_TEXT -fastmove_tail: - movl PCPU(CURPCB),%eax - movl $fastmove_tail_fault,PCB_ONFAULT(%eax) - - movb %cl,%al - shrl $2,%ecx /* copy longword-wise */ - cld - rep - movsl - movb %al,%cl - andb $3,%cl /* copy remaining bytes */ - rep - movsb - - movl %ebp,%esp - popl %ebp - ret - - ALIGN_TEXT -fastmove_fault: - /* XXX ungrab FPU context atomically. */ - cli - - movl PCPU(CURPCB),%edi - addl $PCB_SAVEFPU,%edi - movl %esp,%esi - cld - movl $PCB_SAVEFPU_SIZE>>2,%ecx - rep - movsl - - smsw %ax - orb $CR0_TS,%al - lmsw %ax - movl $0,PCPU(FPCURTHREAD) - - /* XXX end of atomic FPU context ungrab. */ - sti - -fastmove_tail_fault: - movl %ebp,%esp - popl %ebp - addl $8,%esp - popl %ebx - popl %edi - popl %esi - movl PCPU(CURPCB),%edx - movl $0,PCB_ONFAULT(%edx) - movl $EFAULT,%eax - ret -#endif /* I586_CPU && defined(DEV_NPX) */ - /* * casuptr. Compare and set user pointer. Returns -1 or the current value. + * dst = %rdi, old = %rsi, new = %rdx */ ENTRY(casuptr) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx /* dst */ - movl 8(%esp),%eax /* old */ - movl 12(%esp),%ecx /* new */ + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) - cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ + movq $VM_MAXUSER_ADDRESS-4,%rax + cmpq %rax,%rdi /* verify address is valid */ ja fusufault -#if defined(SMP) - lock cmpxchgl %ecx, (%edx) /* Compare and set. */ -#else /* !SMP */ - cmpxchgl %ecx, (%edx) -#endif /* !SMP */ + movq %rsi, %rax /* old */ + cmpxchgq %rdx, (%rdi) /* new = %rdx */ /* * The old value is in %eax. If the store succeeded it will be the @@ -1194,30 +197,45 @@ ENTRY(casuptr) * be the current value. */ - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl $0,PCB_ONFAULT(%ecx) + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + movq $0,PCB_ONFAULT(%rcx) ret /* * fu{byte,sword,word} - MP SAFE * * Fetch a byte (sword, word) from user memory + * %rdi */ -ENTRY(fuword) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx /* from */ +ENTRY(fuword64) + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) - cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ + movq $VM_MAXUSER_ADDRESS-8,%rax + cmpq %rax,%rdi /* verify address is valid */ ja fusufault - movl (%edx),%eax - movl $0,PCB_ONFAULT(%ecx) + movq (%rdi),%rax + movq $0,PCB_ONFAULT(%rcx) ret ENTRY(fuword32) - jmp fuword + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + + movq $VM_MAXUSER_ADDRESS-4,%rax + cmpq %rax,%rdi /* verify address is valid */ + ja fusufault + +# XXX use the 64 extend + xorq %rax, %rax + movl (%rdi),%eax + movq $0,PCB_ONFAULT(%rcx) + ret + +ENTRY(fuword) + jmp fuword32 /* * These two routines are called from the profiling code, potentially @@ -1227,191 +245,126 @@ ENTRY(fuword32) */ ALTENTRY(suswintr) ENTRY(fuswintr) - movl $-1,%eax + movq $-1,%rax ret /* * fuword16 - MP SAFE */ ENTRY(fuword16) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) - cmpl $VM_MAXUSER_ADDRESS-2,%edx + movq $VM_MAXUSER_ADDRESS-2,%rax + cmpq %rax,%rdi ja fusufault - movzwl (%edx),%eax - movl $0,PCB_ONFAULT(%ecx) +# XXX use the 64 extend + xorq %rax, %rax + movzwl (%rdi),%eax + movq $0,PCB_ONFAULT(%rcx) ret /* * fubyte - MP SAFE */ ENTRY(fubyte) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) - cmpl $VM_MAXUSER_ADDRESS-1,%edx + movq $VM_MAXUSER_ADDRESS-1,%rax + cmpq %rax,%rdi ja fusufault - movzbl (%edx),%eax - movl $0,PCB_ONFAULT(%ecx) +# XXX use the 64 extend + xorq %rax, %rax + movzbl (%rdi),%eax + movq $0,PCB_ONFAULT(%rcx) ret ALIGN_TEXT fusufault: - movl PCPU(CURPCB),%ecx - xorl %eax,%eax - movl %eax,PCB_ONFAULT(%ecx) - decl %eax + movq PCPU(CURPCB),%rcx + xorq %rax,%rax + movq %rax,PCB_ONFAULT(%rcx) + decq %rax ret /* - * su{byte,sword,word} - MP SAFE (if not I386_CPU) + * su{byte,sword,word} - MP SAFE * * Write a byte (word, longword) to user memory + * addr = %rdi, value = %rsi */ -ENTRY(suword) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx - -#ifdef I386_CPU - - /* XXX - page boundary crossing is still not handled */ - movl %edx,%eax - shrl $IDXSHIFT,%edx - andb $0xfc,%dl - - leal PTmap(%edx),%ecx - shrl $IDXSHIFT,%ecx - andb $0xfc,%cl - testb $PG_V,PTmap(%ecx) /* PTE page must be valid */ - je 4f - movb PTmap(%edx),%dl - andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */ - cmpb $PG_V|PG_RW|PG_U,%dl - je 1f +ENTRY(suword64) + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) -4: - /* simulate a trap */ - pushl %eax - call trapwrite - popl %edx /* remove junk parameter from stack */ - testl %eax,%eax - jnz fusufault -1: - movl 4(%esp),%edx -#endif - - cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */ + movq $VM_MAXUSER_ADDRESS-8,%rax + cmpq %rax,%rdi /* verify address validity */ ja fusufault - movl 8(%esp),%eax - movl %eax,(%edx) - xorl %eax,%eax - movl PCPU(CURPCB),%ecx - movl %eax,PCB_ONFAULT(%ecx) + movq %rsi,(%rdi) + xorq %rax,%rax + movq PCPU(CURPCB),%rcx + movq %rax,PCB_ONFAULT(%rcx) ret ENTRY(suword32) - jmp suword + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + + movq $VM_MAXUSER_ADDRESS-4,%rax + cmpq %rax,%rdi /* verify address validity */ + ja fusufault + + movl %esi,(%rdi) + xorq %rax,%rax + movq PCPU(CURPCB),%rcx + movq %rax,PCB_ONFAULT(%rcx) + ret + +ENTRY(suword) + jmp suword32 /* - * suword16 - MP SAFE (if not I386_CPU) + * suword16 - MP SAFE */ ENTRY(suword16) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx - -#ifdef I386_CPU - - /* XXX - page boundary crossing is still not handled */ - movl %edx,%eax - shrl $IDXSHIFT,%edx - andb $0xfc,%dl - - leal PTmap(%edx),%ecx - shrl $IDXSHIFT,%ecx - andb $0xfc,%cl - testb $PG_V,PTmap(%ecx) /* PTE page must be valid */ - je 4f - movb PTmap(%edx),%dl - andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */ - cmpb $PG_V|PG_RW|PG_U,%dl - je 1f + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) -4: - /* simulate a trap */ - pushl %eax - call trapwrite - popl %edx /* remove junk parameter from stack */ - testl %eax,%eax - jnz fusufault -1: - movl 4(%esp),%edx -#endif - - cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */ + movq $VM_MAXUSER_ADDRESS-2,%rax + cmpq %rax,%rdi /* verify address validity */ ja fusufault - movw 8(%esp),%ax - movw %ax,(%edx) - xorl %eax,%eax - movl PCPU(CURPCB),%ecx /* restore trashed register */ - movl %eax,PCB_ONFAULT(%ecx) + movw %si,(%rdi) + xorq %rax,%rax + movq PCPU(CURPCB),%rcx /* restore trashed register */ + movq %rax,PCB_ONFAULT(%rcx) ret /* - * subyte - MP SAFE (if not I386_CPU) + * subyte - MP SAFE */ ENTRY(subyte) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx - -#ifdef I386_CPU - - movl %edx,%eax - shrl $IDXSHIFT,%edx - andb $0xfc,%dl - - leal PTmap(%edx),%ecx - shrl $IDXSHIFT,%ecx - andb $0xfc,%cl - testb $PG_V,PTmap(%ecx) /* PTE page must be valid */ - je 4f - movb PTmap(%edx),%dl - andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */ - cmpb $PG_V|PG_RW|PG_U,%dl - je 1f - -4: - /* simulate a trap */ - pushl %eax - call trapwrite - popl %edx /* remove junk parameter from stack */ - testl %eax,%eax - jnz fusufault -1: - movl 4(%esp),%edx -#endif + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) - cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */ + movq $VM_MAXUSER_ADDRESS-1,%rax + cmpq %rax,%rdi /* verify address validity */ ja fusufault - movb 8(%esp),%al - movb %al,(%edx) - xorl %eax,%eax - movl PCPU(CURPCB),%ecx /* restore trashed register */ - movl %eax,PCB_ONFAULT(%ecx) + movl %esi, %eax + movb %al,(%rdi) + xorq %rax,%rax + movq PCPU(CURPCB),%rcx /* restore trashed register */ + movq %rax,PCB_ONFAULT(%rcx) ret /* * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE + * %rdi, %rsi, %rdx, %rcx * * copy a string from from to to, stop when a 0 character is reached. * return ENAMETOOLONG if string is longer than maxlen, and @@ -1419,32 +372,29 @@ ENTRY(subyte) * return the actual length in *lencopied. */ ENTRY(copyinstr) - pushl %esi - pushl %edi - movl PCPU(CURPCB),%ecx - movl $cpystrflt,PCB_ONFAULT(%ecx) - - movl 12(%esp),%esi /* %esi = from */ - movl 16(%esp),%edi /* %edi = to */ - movl 20(%esp),%edx /* %edx = maxlen */ + movq %rdx, %r8 /* %r8 = maxlen */ + movq %rcx, %r9 /* %r9 = *len */ + xchgq %rdi, %rsi /* %rdi = from, %rsi = to */ + movq PCPU(CURPCB),%rcx + movq $cpystrflt,PCB_ONFAULT(%rcx) - movl $VM_MAXUSER_ADDRESS,%eax + movq $VM_MAXUSER_ADDRESS,%rax /* make sure 'from' is within bounds */ - subl %esi,%eax + subq %rsi,%rax jbe cpystrflt /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ - cmpl %edx,%eax + cmpq %rdx,%rax jae 1f - movl %eax,%edx - movl %eax,20(%esp) + movq %rax,%rdx + movq %rax,%r8 1: - incl %edx + incq %rdx cld 2: - decl %edx + decq %rdx jz 3f lodsb @@ -1453,50 +403,46 @@ ENTRY(copyinstr) jnz 2b /* Success -- 0 byte reached */ - decl %edx - xorl %eax,%eax + decq %rdx + xorq %rax,%rax jmp cpystrflt_x 3: - /* edx is zero - return ENAMETOOLONG or EFAULT */ - cmpl $VM_MAXUSER_ADDRESS,%esi + /* rdx is zero - return ENAMETOOLONG or EFAULT */ + movq $VM_MAXUSER_ADDRESS,%rax + cmpq %rax,%rsi jae cpystrflt 4: - movl $ENAMETOOLONG,%eax + movq $ENAMETOOLONG,%rax jmp cpystrflt_x cpystrflt: - movl $EFAULT,%eax + movq $EFAULT,%rax cpystrflt_x: /* set *lencopied and return %eax */ - movl PCPU(CURPCB),%ecx - movl $0,PCB_ONFAULT(%ecx) - movl 20(%esp),%ecx - subl %edx,%ecx - movl 24(%esp),%edx - testl %edx,%edx + movq PCPU(CURPCB),%rcx + movq $0,PCB_ONFAULT(%rcx) + + testq %r9,%r9 jz 1f - movl %ecx,(%edx) + subq %rdx,%r8 + movq %r8,(%r9) 1: - popl %edi - popl %esi ret /* * copystr(from, to, maxlen, int *lencopied) - MP SAFE + * %rdi, %rsi, %rdx, %rcx */ ENTRY(copystr) - pushl %esi - pushl %edi + movq %rdx, %r8 /* %r8 = maxlen */ - movl 12(%esp),%esi /* %esi = from */ - movl 16(%esp),%edi /* %edi = to */ - movl 20(%esp),%edx /* %edx = maxlen */ - incl %edx + xchgq %rdi, %rsi + incq %rdx cld 1: - decl %edx + decq %rdx jz 4f lodsb stosb @@ -1504,159 +450,45 @@ ENTRY(copystr) jnz 1b /* Success -- 0 byte reached */ - decl %edx - xorl %eax,%eax + decq %rdx + xorq %rax,%rax jmp 6f 4: - /* edx is zero -- return ENAMETOOLONG */ - movl $ENAMETOOLONG,%eax + /* rdx is zero -- return ENAMETOOLONG */ + movq $ENAMETOOLONG,%rax 6: - /* set *lencopied and return %eax */ - movl 20(%esp),%ecx - subl %edx,%ecx - movl 24(%esp),%edx - testl %edx,%edx + + testq %rcx, %rcx jz 7f - movl %ecx,(%edx) + /* set *lencopied and return %rax */ + subq %rdx, %r8 + movq %r8, (%rcx) 7: - popl %edi - popl %esi - ret - -ENTRY(bcmp) - pushl %edi - pushl %esi - movl 12(%esp),%edi - movl 16(%esp),%esi - movl 20(%esp),%edx - xorl %eax,%eax - - movl %edx,%ecx - shrl $2,%ecx - cld /* compare forwards */ - repe - cmpsl - jne 1f - - movl %edx,%ecx - andl $3,%ecx - repe - cmpsb - je 2f -1: - incl %eax -2: - popl %esi - popl %edi ret - /* * Handling of special 386 registers and descriptor tables etc + * %rdi */ /* void lgdt(struct region_descriptor *rdp); */ ENTRY(lgdt) /* reload the descriptor table */ - movl 4(%esp),%eax - lgdt (%eax) + lgdt (%rdi) /* flush the prefetch q */ jmp 1f nop 1: - /* reload "stale" selectors */ - movl $KDSEL,%eax + movl $KDSEL, %eax mov %ax,%ds mov %ax,%es + mov %ax,%fs /* Beware, use wrmsr to set 64 bit base */ mov %ax,%gs mov %ax,%ss - movl $KPSEL,%eax - mov %ax,%fs /* reload code selector by turning return into intersegmental return */ - movl (%esp),%eax - pushl %eax - movl $KCSEL,4(%esp) - lret - -/* ssdtosd(*ssdp,*sdp) */ -ENTRY(ssdtosd) - pushl %ebx - movl 8(%esp),%ecx - movl 8(%ecx),%ebx - shll $16,%ebx - movl (%ecx),%edx - roll $16,%edx - movb %dh,%bl - movb %dl,%bh - rorl $8,%ebx - movl 4(%ecx),%eax - movw %ax,%dx - andl $0xf0000,%eax - orl %eax,%ebx - movl 12(%esp),%ecx - movl %edx,(%ecx) - movl %ebx,4(%ecx) - popl %ebx - ret - -/* void reset_dbregs() */ -ENTRY(reset_dbregs) - movl $0,%eax - movl %eax,%dr7 /* disable all breapoints first */ - movl %eax,%dr0 - movl %eax,%dr1 - movl %eax,%dr2 - movl %eax,%dr3 - movl %eax,%dr6 - ret - -/*****************************************************************************/ -/* setjump, longjump */ -/*****************************************************************************/ - -ENTRY(setjmp) - movl 4(%esp),%eax - movl %ebx,(%eax) /* save ebx */ - movl %esp,4(%eax) /* save esp */ - movl %ebp,8(%eax) /* save ebp */ - movl %esi,12(%eax) /* save esi */ - movl %edi,16(%eax) /* save edi */ - movl (%esp),%edx /* get rta */ - movl %edx,20(%eax) /* save eip */ - xorl %eax,%eax /* return(0); */ - ret - -ENTRY(longjmp) - movl 4(%esp),%eax - movl (%eax),%ebx /* restore ebx */ - movl 4(%eax),%esp /* restore esp */ - movl 8(%eax),%ebp /* restore ebp */ - movl 12(%eax),%esi /* restore esi */ - movl 16(%eax),%edi /* restore edi */ - movl 20(%eax),%edx /* get rta */ - movl %edx,(%esp) /* put in return frame */ - xorl %eax,%eax /* return(1); */ - incl %eax - ret - -/* - * Support for BB-profiling (gcc -a). The kernbb program will extract - * the data from the kernel. - */ - - .data - ALIGN_DATA - .globl bbhead -bbhead: - .long 0 - - .text -NON_GPROF_ENTRY(__bb_init_func) - movl 4(%esp),%eax - movl $1,(%eax) - movl bbhead,%edx - movl %edx,16(%eax) - movl %eax,bbhead - NON_GPROF_RET + popq %rax + pushq $KCSEL + pushq %rax + lretq diff --git a/sys/amd64/amd64/support.s b/sys/amd64/amd64/support.s index f0f99e4..06a3c4e 100644 --- a/sys/amd64/amd64/support.s +++ b/sys/amd64/amd64/support.s @@ -33,8 +33,6 @@ * $FreeBSD$ */ -#include "opt_npx.h" - #include <machine/asmacros.h> #include <machine/cputypes.h> #include <machine/pmap.h> @@ -42,632 +40,20 @@ #include "assym.s" -#define IDXSHIFT 10 - - .data - .globl bcopy_vector -bcopy_vector: - .long generic_bcopy - .globl bzero_vector -bzero_vector: - .long generic_bzero - .globl copyin_vector -copyin_vector: - .long generic_copyin - .globl copyout_vector -copyout_vector: - .long generic_copyout -#if defined(I586_CPU) && defined(DEV_NPX) -kernel_fpu_lock: - .byte 0xfe - .space 3 -#endif .text -/* - * bcopy family - * void bzero(void *buf, u_int len) - */ - -ENTRY(bzero) - MEXITCOUNT - jmp *bzero_vector - -ENTRY(generic_bzero) - pushl %edi - movl 8(%esp),%edi - movl 12(%esp),%ecx - xorl %eax,%eax - shrl $2,%ecx - cld - rep - stosl - movl 12(%esp),%ecx - andl $3,%ecx - rep - stosb - popl %edi - ret - -#ifdef I486_CPU -ENTRY(i486_bzero) - movl 4(%esp),%edx - movl 8(%esp),%ecx - xorl %eax,%eax -/* - * do 64 byte chunks first - * - * XXX this is probably over-unrolled at least for DX2's - */ -2: - cmpl $64,%ecx - jb 3f - movl %eax,(%edx) - movl %eax,4(%edx) - movl %eax,8(%edx) - movl %eax,12(%edx) - movl %eax,16(%edx) - movl %eax,20(%edx) - movl %eax,24(%edx) - movl %eax,28(%edx) - movl %eax,32(%edx) - movl %eax,36(%edx) - movl %eax,40(%edx) - movl %eax,44(%edx) - movl %eax,48(%edx) - movl %eax,52(%edx) - movl %eax,56(%edx) - movl %eax,60(%edx) - addl $64,%edx - subl $64,%ecx - jnz 2b - ret - -/* - * do 16 byte chunks - */ - SUPERALIGN_TEXT -3: - cmpl $16,%ecx - jb 4f - movl %eax,(%edx) - movl %eax,4(%edx) - movl %eax,8(%edx) - movl %eax,12(%edx) - addl $16,%edx - subl $16,%ecx - jnz 3b - ret - -/* - * do 4 byte chunks - */ - SUPERALIGN_TEXT -4: - cmpl $4,%ecx - jb 5f - movl %eax,(%edx) - addl $4,%edx - subl $4,%ecx - jnz 4b - ret - -/* - * do 1 byte chunks - * a jump table seems to be faster than a loop or more range reductions - * - * XXX need a const section for non-text - */ - .data -jtab: - .long do0 - .long do1 - .long do2 - .long do3 - - .text - SUPERALIGN_TEXT -5: - jmp *jtab(,%ecx,4) - - SUPERALIGN_TEXT -do3: - movw %ax,(%edx) - movb %al,2(%edx) - ret - - SUPERALIGN_TEXT -do2: - movw %ax,(%edx) - ret - - SUPERALIGN_TEXT -do1: - movb %al,(%edx) - ret - - SUPERALIGN_TEXT -do0: - ret -#endif - -#if defined(I586_CPU) && defined(DEV_NPX) -ENTRY(i586_bzero) - movl 4(%esp),%edx - movl 8(%esp),%ecx - - /* - * The FPU register method is twice as fast as the integer register - * method unless the target is in the L1 cache and we pre-allocate a - * cache line for it (then the integer register method is 4-5 times - * faster). However, we never pre-allocate cache lines, since that - * would make the integer method 25% or more slower for the common - * case when the target isn't in either the L1 cache or the L2 cache. - * Thus we normally use the FPU register method unless the overhead - * would be too large. - */ - cmpl $256,%ecx /* empirical; clts, fninit, smsw cost a lot */ - jb intreg_i586_bzero - - /* - * The FPU registers may belong to an application or to fastmove() - * or to another invocation of bcopy() or ourself in a higher level - * interrupt or trap handler. Preserving the registers is - * complicated since we avoid it if possible at all levels. We - * want to localize the complications even when that increases them. - * Here the extra work involves preserving CR0_TS in TS. - * `fpcurthread != NULL' is supposed to be the condition that all the - * FPU resources belong to an application, but fpcurthread and CR0_TS - * aren't set atomically enough for this condition to work in - * interrupt handlers. - * - * Case 1: FPU registers belong to the application: we must preserve - * the registers if we use them, so we only use the FPU register - * method if the target size is large enough to amortize the extra - * overhead for preserving them. CR0_TS must be preserved although - * it is very likely to end up as set. - * - * Case 2: FPU registers belong to fastmove(): fastmove() currently - * makes the registers look like they belong to an application so - * that cpu_switch() and savectx() don't have to know about it, so - * this case reduces to case 1. - * - * Case 3: FPU registers belong to the kernel: don't use the FPU - * register method. This case is unlikely, and supporting it would - * be more complicated and might take too much stack. - * - * Case 4: FPU registers don't belong to anyone: the FPU registers - * don't need to be preserved, so we always use the FPU register - * method. CR0_TS must be preserved although it is very likely to - * always end up as clear. - */ - cmpl $0,PCPU(FPCURTHREAD) - je i586_bz1 - - /* - * XXX don't use the FPU for cases 1 and 2, since preemptive - * scheduling of ithreads broke these cases. Note that we can - * no longer get here from an interrupt handler, since the - * context sitch to the interrupt handler will have saved the - * FPU state. - */ - jmp intreg_i586_bzero - - cmpl $256+184,%ecx /* empirical; not quite 2*108 more */ - jb intreg_i586_bzero - sarb $1,kernel_fpu_lock - jc intreg_i586_bzero - smsw %ax - clts - subl $108,%esp - fnsave 0(%esp) - jmp i586_bz2 - -i586_bz1: - sarb $1,kernel_fpu_lock - jc intreg_i586_bzero - smsw %ax - clts - fninit /* XXX should avoid needing this */ -i586_bz2: - fldz - - /* - * Align to an 8 byte boundary (misalignment in the main loop would - * cost a factor of >= 2). Avoid jumps (at little cost if it is - * already aligned) by always zeroing 8 bytes and using the part up - * to the _next_ alignment position. - */ - fstl 0(%edx) - addl %edx,%ecx /* part of %ecx -= new_%edx - %edx */ - addl $8,%edx - andl $~7,%edx - subl %edx,%ecx - - /* - * Similarly align `len' to a multiple of 8. - */ - fstl -8(%edx,%ecx) - decl %ecx - andl $~7,%ecx - - /* - * This wouldn't be any faster if it were unrolled, since the loop - * control instructions are much faster than the fstl and/or done - * in parallel with it so their overhead is insignificant. - */ -fpureg_i586_bzero_loop: - fstl 0(%edx) - addl $8,%edx - subl $8,%ecx - cmpl $8,%ecx - jae fpureg_i586_bzero_loop - - cmpl $0,PCPU(FPCURTHREAD) - je i586_bz3 - - /* XXX check that the condition for cases 1-2 stayed false. */ -i586_bzero_oops: - int $3 - jmp i586_bzero_oops - - frstor 0(%esp) - addl $108,%esp - lmsw %ax - movb $0xfe,kernel_fpu_lock - ret - -i586_bz3: - fstp %st(0) - lmsw %ax - movb $0xfe,kernel_fpu_lock - ret - -intreg_i586_bzero: - /* - * `rep stos' seems to be the best method in practice for small - * counts. Fancy methods usually take too long to start up due - * to cache and BTB misses. - */ - pushl %edi - movl %edx,%edi - xorl %eax,%eax - shrl $2,%ecx - cld - rep - stosl - movl 12(%esp),%ecx - andl $3,%ecx - jne 1f - popl %edi - ret - -1: - rep - stosb - popl %edi - ret -#endif /* I586_CPU && defined(DEV_NPX) */ - -ENTRY(i686_pagezero) - pushl %edi - pushl %ebx - - movl 12(%esp), %edi - movl $1024, %ecx - cld - - ALIGN_TEXT -1: - xorl %eax, %eax - repe - scasl - jnz 2f - - popl %ebx - popl %edi - ret - - ALIGN_TEXT - -2: - incl %ecx - subl $4, %edi - - movl %ecx, %edx - cmpl $16, %ecx - - jge 3f - - movl %edi, %ebx - andl $0x3f, %ebx - shrl %ebx - shrl %ebx - movl $16, %ecx - subl %ebx, %ecx - -3: - subl %ecx, %edx - rep - stosl - - movl %edx, %ecx - testl %edx, %edx - jnz 1b - - popl %ebx - popl %edi - ret - -/* fillw(pat, base, cnt) */ +/* fillw(pat, base, cnt) */ +/* %rdi,%rsi, %rdx */ ENTRY(fillw) - pushl %edi - movl 8(%esp),%eax - movl 12(%esp),%edi - movl 16(%esp),%ecx + movq %rdi,%rax + movq %rsi,%rdi + movq %rdx,%rcx cld rep stosw - popl %edi - ret - -ENTRY(bcopyb) - pushl %esi - pushl %edi - movl 12(%esp),%esi - movl 16(%esp),%edi - movl 20(%esp),%ecx - movl %edi,%eax - subl %esi,%eax - cmpl %ecx,%eax /* overlapping && src < dst? */ - jb 1f - cld /* nope, copy forwards */ - rep - movsb - popl %edi - popl %esi - ret - - ALIGN_TEXT -1: - addl %ecx,%edi /* copy backwards. */ - addl %ecx,%esi - decl %edi - decl %esi - std - rep - movsb - popl %edi - popl %esi - cld ret -ENTRY(bcopy) - MEXITCOUNT - jmp *bcopy_vector - -/* - * generic_bcopy(src, dst, cnt) - * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 - */ -ENTRY(generic_bcopy) - pushl %esi - pushl %edi - movl 12(%esp),%esi - movl 16(%esp),%edi - movl 20(%esp),%ecx - - movl %edi,%eax - subl %esi,%eax - cmpl %ecx,%eax /* overlapping && src < dst? */ - jb 1f - - shrl $2,%ecx /* copy by 32-bit words */ - cld /* nope, copy forwards */ - rep - movsl - movl 20(%esp),%ecx - andl $3,%ecx /* any bytes left? */ - rep - movsb - popl %edi - popl %esi - ret - - ALIGN_TEXT -1: - addl %ecx,%edi /* copy backwards */ - addl %ecx,%esi - decl %edi - decl %esi - andl $3,%ecx /* any fractional bytes? */ - std - rep - movsb - movl 20(%esp),%ecx /* copy remainder by 32-bit words */ - shrl $2,%ecx - subl $3,%esi - subl $3,%edi - rep - movsl - popl %edi - popl %esi - cld - ret - -#if defined(I586_CPU) && defined(DEV_NPX) -ENTRY(i586_bcopy) - pushl %esi - pushl %edi - movl 12(%esp),%esi - movl 16(%esp),%edi - movl 20(%esp),%ecx - - movl %edi,%eax - subl %esi,%eax - cmpl %ecx,%eax /* overlapping && src < dst? */ - jb 1f - - cmpl $1024,%ecx - jb small_i586_bcopy - - sarb $1,kernel_fpu_lock - jc small_i586_bcopy - cmpl $0,PCPU(FPCURTHREAD) - je i586_bc1 - - /* XXX turn off handling of cases 1-2, as above. */ - movb $0xfe,kernel_fpu_lock - jmp small_i586_bcopy - - smsw %dx - clts - subl $108,%esp - fnsave 0(%esp) - jmp 4f - -i586_bc1: - smsw %dx - clts - fninit /* XXX should avoid needing this */ - - ALIGN_TEXT -4: - pushl %ecx -#define DCACHE_SIZE 8192 - cmpl $(DCACHE_SIZE-512)/2,%ecx - jbe 2f - movl $(DCACHE_SIZE-512)/2,%ecx -2: - subl %ecx,0(%esp) - cmpl $256,%ecx - jb 5f /* XXX should prefetch if %ecx >= 32 */ - pushl %esi - pushl %ecx - ALIGN_TEXT -3: - movl 0(%esi),%eax - movl 32(%esi),%eax - movl 64(%esi),%eax - movl 96(%esi),%eax - movl 128(%esi),%eax - movl 160(%esi),%eax - movl 192(%esi),%eax - movl 224(%esi),%eax - addl $256,%esi - subl $256,%ecx - cmpl $256,%ecx - jae 3b - popl %ecx - popl %esi -5: - ALIGN_TEXT -large_i586_bcopy_loop: - fildq 0(%esi) - fildq 8(%esi) - fildq 16(%esi) - fildq 24(%esi) - fildq 32(%esi) - fildq 40(%esi) - fildq 48(%esi) - fildq 56(%esi) - fistpq 56(%edi) - fistpq 48(%edi) - fistpq 40(%edi) - fistpq 32(%edi) - fistpq 24(%edi) - fistpq 16(%edi) - fistpq 8(%edi) - fistpq 0(%edi) - addl $64,%esi - addl $64,%edi - subl $64,%ecx - cmpl $64,%ecx - jae large_i586_bcopy_loop - popl %eax - addl %eax,%ecx - cmpl $64,%ecx - jae 4b - - cmpl $0,PCPU(FPCURTHREAD) - je i586_bc2 - - /* XXX check that the condition for cases 1-2 stayed false. */ -i586_bcopy_oops: - int $3 - jmp i586_bcopy_oops - - frstor 0(%esp) - addl $108,%esp -i586_bc2: - lmsw %dx - movb $0xfe,kernel_fpu_lock - -/* - * This is a duplicate of the main part of generic_bcopy. See the comments - * there. Jumping into generic_bcopy would cost a whole 0-1 cycles and - * would mess up high resolution profiling. - */ - ALIGN_TEXT -small_i586_bcopy: - shrl $2,%ecx - cld - rep - movsl - movl 20(%esp),%ecx - andl $3,%ecx - rep - movsb - popl %edi - popl %esi - ret - - ALIGN_TEXT -1: - addl %ecx,%edi - addl %ecx,%esi - decl %edi - decl %esi - andl $3,%ecx - std - rep - movsb - movl 20(%esp),%ecx - shrl $2,%ecx - subl $3,%esi - subl $3,%edi - rep - movsl - popl %edi - popl %esi - cld - ret -#endif /* I586_CPU && defined(DEV_NPX) */ - -/* - * Note: memcpy does not support overlapping copies - */ -ENTRY(memcpy) - pushl %edi - pushl %esi - movl 12(%esp),%edi - movl 16(%esp),%esi - movl 20(%esp),%ecx - movl %edi,%eax - shrl $2,%ecx /* copy by 32-bit words */ - cld /* nope, copy forwards */ - rep - movsl - movl 20(%esp),%ecx - andl $3,%ecx /* any bytes left? */ - rep - movsb - popl %esi - popl %edi - ret - - /*****************************************************************************/ /* copyout and fubyte family */ /*****************************************************************************/ @@ -687,22 +73,13 @@ ENTRY(memcpy) */ /* - * copyout(from_kernel, to_user, len) - MP SAFE (if not I386_CPU) + * copyout(from_kernel, to_user, len) - MP SAFE + * %rdi, %rsi, %rdx */ ENTRY(copyout) - MEXITCOUNT - jmp *copyout_vector - -ENTRY(generic_copyout) - movl PCPU(CURPCB),%eax - movl $copyout_fault,PCB_ONFAULT(%eax) - pushl %esi - pushl %edi - pushl %ebx - movl 16(%esp),%esi - movl 20(%esp),%edi - movl 24(%esp),%ebx - testl %ebx,%ebx /* anything to do? */ + movq PCPU(CURPCB),%rax + movq $copyout_fault,PCB_ONFAULT(%rax) + testq %rdx,%rdx /* anything to do? */ jz done_copyout /* @@ -715,8 +92,8 @@ ENTRY(generic_copyout) /* * First, prevent address wrapping. */ - movl %edi,%eax - addl %ebx,%eax + movq %rsi,%rax + addq %rdx,%rax jc copyout_fault /* * XXX STOP USING VM_MAXUSER_ADDRESS. @@ -724,469 +101,95 @@ ENTRY(generic_copyout) * looks like there is an off by one error, and of course it caused an off * by one error in several places. */ - cmpl $VM_MAXUSER_ADDRESS,%eax + movq $VM_MAXUSER_ADDRESS,%rcx + cmpq %rcx,%rax ja copyout_fault -#ifdef I386_CPU - -/* - * We have to check each PTE for user write permission. - * The checking may cause a page fault, so it is important to set - * up everything for return via copyout_fault before here. - */ - /* compute number of pages */ - movl %edi,%ecx - andl $PAGE_MASK,%ecx - addl %ebx,%ecx - decl %ecx - shrl $IDXSHIFT+2,%ecx - incl %ecx - - /* compute PTE offset for start address */ - movl %edi,%edx - shrl $IDXSHIFT,%edx - andb $0xfc,%dl - -1: - /* check PTE for each page */ - leal PTmap(%edx),%eax - shrl $IDXSHIFT,%eax - andb $0xfc,%al - testb $PG_V,PTmap(%eax) /* PTE page must be valid */ - je 4f - movb PTmap(%edx),%al - andb $PG_V|PG_RW|PG_U,%al /* page must be valid and user writable */ - cmpb $PG_V|PG_RW|PG_U,%al - je 2f - -4: - /* simulate a trap */ - pushl %edx - pushl %ecx - shll $IDXSHIFT,%edx - pushl %edx - call trapwrite /* trapwrite(addr) */ - popl %edx - popl %ecx - popl %edx - - testl %eax,%eax /* if not ok, return EFAULT */ - jnz copyout_fault - -2: - addl $4,%edx - decl %ecx - jnz 1b /* check next page */ -#endif /* I386_CPU */ + xchgq %rdi, %rsi + /* bcopy(%rsi, %rdi, %rdx) */ + movq %rdx,%rcx - /* bcopy(%esi, %edi, %ebx) */ - movl %ebx,%ecx - -#if defined(I586_CPU) && defined(DEV_NPX) - ALIGN_TEXT -slow_copyout: -#endif - shrl $2,%ecx + shrq $3,%rcx cld rep - movsl - movb %bl,%cl - andb $3,%cl + movsq + movb %dl,%cl + andb $7,%cl rep movsb done_copyout: - popl %ebx - popl %edi - popl %esi - xorl %eax,%eax - movl PCPU(CURPCB),%edx - movl %eax,PCB_ONFAULT(%edx) + xorq %rax,%rax + movq PCPU(CURPCB),%rdx + movq %rax,PCB_ONFAULT(%rdx) ret ALIGN_TEXT copyout_fault: - popl %ebx - popl %edi - popl %esi - movl PCPU(CURPCB),%edx - movl $0,PCB_ONFAULT(%edx) - movl $EFAULT,%eax + movq PCPU(CURPCB),%rdx + movq $0,PCB_ONFAULT(%rdx) + movq $EFAULT,%rax ret -#if defined(I586_CPU) && defined(DEV_NPX) -ENTRY(i586_copyout) - /* - * Duplicated from generic_copyout. Could be done a bit better. - */ - movl PCPU(CURPCB),%eax - movl $copyout_fault,PCB_ONFAULT(%eax) - pushl %esi - pushl %edi - pushl %ebx - movl 16(%esp),%esi - movl 20(%esp),%edi - movl 24(%esp),%ebx - testl %ebx,%ebx /* anything to do? */ - jz done_copyout - - /* - * Check explicitly for non-user addresses. If 486 write protection - * is being used, this check is essential because we are in kernel - * mode so the h/w does not provide any protection against writing - * kernel addresses. - */ - - /* - * First, prevent address wrapping. - */ - movl %edi,%eax - addl %ebx,%eax - jc copyout_fault -/* - * XXX STOP USING VM_MAXUSER_ADDRESS. - * It is an end address, not a max, so every time it is used correctly it - * looks like there is an off by one error, and of course it caused an off - * by one error in several places. - */ - cmpl $VM_MAXUSER_ADDRESS,%eax - ja copyout_fault - - /* bcopy(%esi, %edi, %ebx) */ -3: - movl %ebx,%ecx - /* - * End of duplicated code. - */ - - cmpl $1024,%ecx - jb slow_copyout - - pushl %ecx - call fastmove - addl $4,%esp - jmp done_copyout -#endif /* I586_CPU && defined(DEV_NPX) */ - /* * copyin(from_user, to_kernel, len) - MP SAFE + * %rdi, %rsi, %rdx */ ENTRY(copyin) - MEXITCOUNT - jmp *copyin_vector - -ENTRY(generic_copyin) - movl PCPU(CURPCB),%eax - movl $copyin_fault,PCB_ONFAULT(%eax) - pushl %esi - pushl %edi - movl 12(%esp),%esi /* caddr_t from */ - movl 16(%esp),%edi /* caddr_t to */ - movl 20(%esp),%ecx /* size_t len */ + movq PCPU(CURPCB),%rax + movq $copyin_fault,PCB_ONFAULT(%rax) + testq %rdx,%rdx /* anything to do? */ + jz done_copyin /* * make sure address is valid */ - movl %esi,%edx - addl %ecx,%edx + movq %rdi,%rax + addq %rdx,%rax jc copyin_fault - cmpl $VM_MAXUSER_ADDRESS,%edx + movq $VM_MAXUSER_ADDRESS,%rcx + cmpq %rcx,%rax ja copyin_fault -#if defined(I586_CPU) && defined(DEV_NPX) - ALIGN_TEXT -slow_copyin: -#endif + xchgq %rdi, %rsi + movq %rdx, %rcx movb %cl,%al - shrl $2,%ecx /* copy longword-wise */ + shrq $3,%rcx /* copy longword-wise */ cld rep - movsl + movsq movb %al,%cl - andb $3,%cl /* copy remaining bytes */ + andb $7,%cl /* copy remaining bytes */ rep movsb -#if defined(I586_CPU) && defined(DEV_NPX) - ALIGN_TEXT done_copyin: -#endif - popl %edi - popl %esi - xorl %eax,%eax - movl PCPU(CURPCB),%edx - movl %eax,PCB_ONFAULT(%edx) + xorq %rax,%rax + movq PCPU(CURPCB),%rdx + movq %rax,PCB_ONFAULT(%rdx) ret ALIGN_TEXT copyin_fault: - popl %edi - popl %esi - movl PCPU(CURPCB),%edx - movl $0,PCB_ONFAULT(%edx) - movl $EFAULT,%eax + movq PCPU(CURPCB),%rdx + movq $0,PCB_ONFAULT(%rdx) + movq $EFAULT,%rax ret -#if defined(I586_CPU) && defined(DEV_NPX) -ENTRY(i586_copyin) - /* - * Duplicated from generic_copyin. Could be done a bit better. - */ - movl PCPU(CURPCB),%eax - movl $copyin_fault,PCB_ONFAULT(%eax) - pushl %esi - pushl %edi - movl 12(%esp),%esi /* caddr_t from */ - movl 16(%esp),%edi /* caddr_t to */ - movl 20(%esp),%ecx /* size_t len */ - - /* - * make sure address is valid - */ - movl %esi,%edx - addl %ecx,%edx - jc copyin_fault - cmpl $VM_MAXUSER_ADDRESS,%edx - ja copyin_fault - /* - * End of duplicated code. - */ - - cmpl $1024,%ecx - jb slow_copyin - - pushl %ebx /* XXX prepare for fastmove_fault */ - pushl %ecx - call fastmove - addl $8,%esp - jmp done_copyin -#endif /* I586_CPU && defined(DEV_NPX) */ - -#if defined(I586_CPU) && defined(DEV_NPX) -/* fastmove(src, dst, len) - src in %esi - dst in %edi - len in %ecx XXX changed to on stack for profiling - uses %eax and %edx for tmp. storage - */ -/* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ -ENTRY(fastmove) - pushl %ebp - movl %esp,%ebp - subl $PCB_SAVEFPU_SIZE+3*4,%esp - - movl 8(%ebp),%ecx - cmpl $63,%ecx - jbe fastmove_tail - - testl $7,%esi /* check if src addr is multiple of 8 */ - jnz fastmove_tail - - testl $7,%edi /* check if dst addr is multiple of 8 */ - jnz fastmove_tail - - /* XXX grab FPU context atomically. */ - cli - -/* if (fpcurthread != NULL) { */ - cmpl $0,PCPU(FPCURTHREAD) - je 6f -/* fnsave(&curpcb->pcb_savefpu); */ - movl PCPU(CURPCB),%eax - fnsave PCB_SAVEFPU(%eax) -/* FPCURTHREAD = NULL; */ - movl $0,PCPU(FPCURTHREAD) -/* } */ -6: -/* now we own the FPU. */ - -/* - * The process' FP state is saved in the pcb, but if we get - * switched, the cpu_switch() will store our FP state in the - * pcb. It should be possible to avoid all the copying for - * this, e.g., by setting a flag to tell cpu_switch() to - * save the state somewhere else. - */ -/* tmp = curpcb->pcb_savefpu; */ - movl %ecx,-12(%ebp) - movl %esi,-8(%ebp) - movl %edi,-4(%ebp) - movl %esp,%edi - movl PCPU(CURPCB),%esi - addl $PCB_SAVEFPU,%esi - cld - movl $PCB_SAVEFPU_SIZE>>2,%ecx - rep - movsl - movl -12(%ebp),%ecx - movl -8(%ebp),%esi - movl -4(%ebp),%edi -/* stop_emulating(); */ - clts -/* fpcurthread = curthread; */ - movl PCPU(CURTHREAD),%eax - movl %eax,PCPU(FPCURTHREAD) - movl PCPU(CURPCB),%eax - - /* XXX end of atomic FPU context grab. */ - sti - - movl $fastmove_fault,PCB_ONFAULT(%eax) -4: - movl %ecx,-12(%ebp) - cmpl $1792,%ecx - jbe 2f - movl $1792,%ecx -2: - subl %ecx,-12(%ebp) - cmpl $256,%ecx - jb 5f - movl %ecx,-8(%ebp) - movl %esi,-4(%ebp) - ALIGN_TEXT -3: - movl 0(%esi),%eax - movl 32(%esi),%eax - movl 64(%esi),%eax - movl 96(%esi),%eax - movl 128(%esi),%eax - movl 160(%esi),%eax - movl 192(%esi),%eax - movl 224(%esi),%eax - addl $256,%esi - subl $256,%ecx - cmpl $256,%ecx - jae 3b - movl -8(%ebp),%ecx - movl -4(%ebp),%esi -5: - ALIGN_TEXT -fastmove_loop: - fildq 0(%esi) - fildq 8(%esi) - fildq 16(%esi) - fildq 24(%esi) - fildq 32(%esi) - fildq 40(%esi) - fildq 48(%esi) - fildq 56(%esi) - fistpq 56(%edi) - fistpq 48(%edi) - fistpq 40(%edi) - fistpq 32(%edi) - fistpq 24(%edi) - fistpq 16(%edi) - fistpq 8(%edi) - fistpq 0(%edi) - addl $-64,%ecx - addl $64,%esi - addl $64,%edi - cmpl $63,%ecx - ja fastmove_loop - movl -12(%ebp),%eax - addl %eax,%ecx - cmpl $64,%ecx - jae 4b - - /* XXX ungrab FPU context atomically. */ - cli - -/* curpcb->pcb_savefpu = tmp; */ - movl %ecx,-12(%ebp) - movl %esi,-8(%ebp) - movl %edi,-4(%ebp) - movl PCPU(CURPCB),%edi - addl $PCB_SAVEFPU,%edi - movl %esp,%esi - cld - movl $PCB_SAVEFPU_SIZE>>2,%ecx - rep - movsl - movl -12(%ebp),%ecx - movl -8(%ebp),%esi - movl -4(%ebp),%edi - -/* start_emulating(); */ - smsw %ax - orb $CR0_TS,%al - lmsw %ax -/* fpcurthread = NULL; */ - movl $0,PCPU(FPCURTHREAD) - - /* XXX end of atomic FPU context ungrab. */ - sti - - ALIGN_TEXT -fastmove_tail: - movl PCPU(CURPCB),%eax - movl $fastmove_tail_fault,PCB_ONFAULT(%eax) - - movb %cl,%al - shrl $2,%ecx /* copy longword-wise */ - cld - rep - movsl - movb %al,%cl - andb $3,%cl /* copy remaining bytes */ - rep - movsb - - movl %ebp,%esp - popl %ebp - ret - - ALIGN_TEXT -fastmove_fault: - /* XXX ungrab FPU context atomically. */ - cli - - movl PCPU(CURPCB),%edi - addl $PCB_SAVEFPU,%edi - movl %esp,%esi - cld - movl $PCB_SAVEFPU_SIZE>>2,%ecx - rep - movsl - - smsw %ax - orb $CR0_TS,%al - lmsw %ax - movl $0,PCPU(FPCURTHREAD) - - /* XXX end of atomic FPU context ungrab. */ - sti - -fastmove_tail_fault: - movl %ebp,%esp - popl %ebp - addl $8,%esp - popl %ebx - popl %edi - popl %esi - movl PCPU(CURPCB),%edx - movl $0,PCB_ONFAULT(%edx) - movl $EFAULT,%eax - ret -#endif /* I586_CPU && defined(DEV_NPX) */ - /* * casuptr. Compare and set user pointer. Returns -1 or the current value. + * dst = %rdi, old = %rsi, new = %rdx */ ENTRY(casuptr) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx /* dst */ - movl 8(%esp),%eax /* old */ - movl 12(%esp),%ecx /* new */ + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) - cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ + movq $VM_MAXUSER_ADDRESS-4,%rax + cmpq %rax,%rdi /* verify address is valid */ ja fusufault -#if defined(SMP) - lock cmpxchgl %ecx, (%edx) /* Compare and set. */ -#else /* !SMP */ - cmpxchgl %ecx, (%edx) -#endif /* !SMP */ + movq %rsi, %rax /* old */ + cmpxchgq %rdx, (%rdi) /* new = %rdx */ /* * The old value is in %eax. If the store succeeded it will be the @@ -1194,30 +197,45 @@ ENTRY(casuptr) * be the current value. */ - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl $0,PCB_ONFAULT(%ecx) + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + movq $0,PCB_ONFAULT(%rcx) ret /* * fu{byte,sword,word} - MP SAFE * * Fetch a byte (sword, word) from user memory + * %rdi */ -ENTRY(fuword) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx /* from */ +ENTRY(fuword64) + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) - cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ + movq $VM_MAXUSER_ADDRESS-8,%rax + cmpq %rax,%rdi /* verify address is valid */ ja fusufault - movl (%edx),%eax - movl $0,PCB_ONFAULT(%ecx) + movq (%rdi),%rax + movq $0,PCB_ONFAULT(%rcx) ret ENTRY(fuword32) - jmp fuword + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + + movq $VM_MAXUSER_ADDRESS-4,%rax + cmpq %rax,%rdi /* verify address is valid */ + ja fusufault + +# XXX use the 64 extend + xorq %rax, %rax + movl (%rdi),%eax + movq $0,PCB_ONFAULT(%rcx) + ret + +ENTRY(fuword) + jmp fuword32 /* * These two routines are called from the profiling code, potentially @@ -1227,191 +245,126 @@ ENTRY(fuword32) */ ALTENTRY(suswintr) ENTRY(fuswintr) - movl $-1,%eax + movq $-1,%rax ret /* * fuword16 - MP SAFE */ ENTRY(fuword16) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) - cmpl $VM_MAXUSER_ADDRESS-2,%edx + movq $VM_MAXUSER_ADDRESS-2,%rax + cmpq %rax,%rdi ja fusufault - movzwl (%edx),%eax - movl $0,PCB_ONFAULT(%ecx) +# XXX use the 64 extend + xorq %rax, %rax + movzwl (%rdi),%eax + movq $0,PCB_ONFAULT(%rcx) ret /* * fubyte - MP SAFE */ ENTRY(fubyte) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) - cmpl $VM_MAXUSER_ADDRESS-1,%edx + movq $VM_MAXUSER_ADDRESS-1,%rax + cmpq %rax,%rdi ja fusufault - movzbl (%edx),%eax - movl $0,PCB_ONFAULT(%ecx) +# XXX use the 64 extend + xorq %rax, %rax + movzbl (%rdi),%eax + movq $0,PCB_ONFAULT(%rcx) ret ALIGN_TEXT fusufault: - movl PCPU(CURPCB),%ecx - xorl %eax,%eax - movl %eax,PCB_ONFAULT(%ecx) - decl %eax + movq PCPU(CURPCB),%rcx + xorq %rax,%rax + movq %rax,PCB_ONFAULT(%rcx) + decq %rax ret /* - * su{byte,sword,word} - MP SAFE (if not I386_CPU) + * su{byte,sword,word} - MP SAFE * * Write a byte (word, longword) to user memory + * addr = %rdi, value = %rsi */ -ENTRY(suword) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx - -#ifdef I386_CPU - - /* XXX - page boundary crossing is still not handled */ - movl %edx,%eax - shrl $IDXSHIFT,%edx - andb $0xfc,%dl - - leal PTmap(%edx),%ecx - shrl $IDXSHIFT,%ecx - andb $0xfc,%cl - testb $PG_V,PTmap(%ecx) /* PTE page must be valid */ - je 4f - movb PTmap(%edx),%dl - andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */ - cmpb $PG_V|PG_RW|PG_U,%dl - je 1f +ENTRY(suword64) + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) -4: - /* simulate a trap */ - pushl %eax - call trapwrite - popl %edx /* remove junk parameter from stack */ - testl %eax,%eax - jnz fusufault -1: - movl 4(%esp),%edx -#endif - - cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */ + movq $VM_MAXUSER_ADDRESS-8,%rax + cmpq %rax,%rdi /* verify address validity */ ja fusufault - movl 8(%esp),%eax - movl %eax,(%edx) - xorl %eax,%eax - movl PCPU(CURPCB),%ecx - movl %eax,PCB_ONFAULT(%ecx) + movq %rsi,(%rdi) + xorq %rax,%rax + movq PCPU(CURPCB),%rcx + movq %rax,PCB_ONFAULT(%rcx) ret ENTRY(suword32) - jmp suword + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) + + movq $VM_MAXUSER_ADDRESS-4,%rax + cmpq %rax,%rdi /* verify address validity */ + ja fusufault + + movl %esi,(%rdi) + xorq %rax,%rax + movq PCPU(CURPCB),%rcx + movq %rax,PCB_ONFAULT(%rcx) + ret + +ENTRY(suword) + jmp suword32 /* - * suword16 - MP SAFE (if not I386_CPU) + * suword16 - MP SAFE */ ENTRY(suword16) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx - -#ifdef I386_CPU - - /* XXX - page boundary crossing is still not handled */ - movl %edx,%eax - shrl $IDXSHIFT,%edx - andb $0xfc,%dl - - leal PTmap(%edx),%ecx - shrl $IDXSHIFT,%ecx - andb $0xfc,%cl - testb $PG_V,PTmap(%ecx) /* PTE page must be valid */ - je 4f - movb PTmap(%edx),%dl - andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */ - cmpb $PG_V|PG_RW|PG_U,%dl - je 1f + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) -4: - /* simulate a trap */ - pushl %eax - call trapwrite - popl %edx /* remove junk parameter from stack */ - testl %eax,%eax - jnz fusufault -1: - movl 4(%esp),%edx -#endif - - cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */ + movq $VM_MAXUSER_ADDRESS-2,%rax + cmpq %rax,%rdi /* verify address validity */ ja fusufault - movw 8(%esp),%ax - movw %ax,(%edx) - xorl %eax,%eax - movl PCPU(CURPCB),%ecx /* restore trashed register */ - movl %eax,PCB_ONFAULT(%ecx) + movw %si,(%rdi) + xorq %rax,%rax + movq PCPU(CURPCB),%rcx /* restore trashed register */ + movq %rax,PCB_ONFAULT(%rcx) ret /* - * subyte - MP SAFE (if not I386_CPU) + * subyte - MP SAFE */ ENTRY(subyte) - movl PCPU(CURPCB),%ecx - movl $fusufault,PCB_ONFAULT(%ecx) - movl 4(%esp),%edx - -#ifdef I386_CPU - - movl %edx,%eax - shrl $IDXSHIFT,%edx - andb $0xfc,%dl - - leal PTmap(%edx),%ecx - shrl $IDXSHIFT,%ecx - andb $0xfc,%cl - testb $PG_V,PTmap(%ecx) /* PTE page must be valid */ - je 4f - movb PTmap(%edx),%dl - andb $PG_V|PG_RW|PG_U,%dl /* page must be valid and user writable */ - cmpb $PG_V|PG_RW|PG_U,%dl - je 1f - -4: - /* simulate a trap */ - pushl %eax - call trapwrite - popl %edx /* remove junk parameter from stack */ - testl %eax,%eax - jnz fusufault -1: - movl 4(%esp),%edx -#endif + movq PCPU(CURPCB),%rcx + movq $fusufault,PCB_ONFAULT(%rcx) - cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */ + movq $VM_MAXUSER_ADDRESS-1,%rax + cmpq %rax,%rdi /* verify address validity */ ja fusufault - movb 8(%esp),%al - movb %al,(%edx) - xorl %eax,%eax - movl PCPU(CURPCB),%ecx /* restore trashed register */ - movl %eax,PCB_ONFAULT(%ecx) + movl %esi, %eax + movb %al,(%rdi) + xorq %rax,%rax + movq PCPU(CURPCB),%rcx /* restore trashed register */ + movq %rax,PCB_ONFAULT(%rcx) ret /* * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE + * %rdi, %rsi, %rdx, %rcx * * copy a string from from to to, stop when a 0 character is reached. * return ENAMETOOLONG if string is longer than maxlen, and @@ -1419,32 +372,29 @@ ENTRY(subyte) * return the actual length in *lencopied. */ ENTRY(copyinstr) - pushl %esi - pushl %edi - movl PCPU(CURPCB),%ecx - movl $cpystrflt,PCB_ONFAULT(%ecx) - - movl 12(%esp),%esi /* %esi = from */ - movl 16(%esp),%edi /* %edi = to */ - movl 20(%esp),%edx /* %edx = maxlen */ + movq %rdx, %r8 /* %r8 = maxlen */ + movq %rcx, %r9 /* %r9 = *len */ + xchgq %rdi, %rsi /* %rdi = from, %rsi = to */ + movq PCPU(CURPCB),%rcx + movq $cpystrflt,PCB_ONFAULT(%rcx) - movl $VM_MAXUSER_ADDRESS,%eax + movq $VM_MAXUSER_ADDRESS,%rax /* make sure 'from' is within bounds */ - subl %esi,%eax + subq %rsi,%rax jbe cpystrflt /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ - cmpl %edx,%eax + cmpq %rdx,%rax jae 1f - movl %eax,%edx - movl %eax,20(%esp) + movq %rax,%rdx + movq %rax,%r8 1: - incl %edx + incq %rdx cld 2: - decl %edx + decq %rdx jz 3f lodsb @@ -1453,50 +403,46 @@ ENTRY(copyinstr) jnz 2b /* Success -- 0 byte reached */ - decl %edx - xorl %eax,%eax + decq %rdx + xorq %rax,%rax jmp cpystrflt_x 3: - /* edx is zero - return ENAMETOOLONG or EFAULT */ - cmpl $VM_MAXUSER_ADDRESS,%esi + /* rdx is zero - return ENAMETOOLONG or EFAULT */ + movq $VM_MAXUSER_ADDRESS,%rax + cmpq %rax,%rsi jae cpystrflt 4: - movl $ENAMETOOLONG,%eax + movq $ENAMETOOLONG,%rax jmp cpystrflt_x cpystrflt: - movl $EFAULT,%eax + movq $EFAULT,%rax cpystrflt_x: /* set *lencopied and return %eax */ - movl PCPU(CURPCB),%ecx - movl $0,PCB_ONFAULT(%ecx) - movl 20(%esp),%ecx - subl %edx,%ecx - movl 24(%esp),%edx - testl %edx,%edx + movq PCPU(CURPCB),%rcx + movq $0,PCB_ONFAULT(%rcx) + + testq %r9,%r9 jz 1f - movl %ecx,(%edx) + subq %rdx,%r8 + movq %r8,(%r9) 1: - popl %edi - popl %esi ret /* * copystr(from, to, maxlen, int *lencopied) - MP SAFE + * %rdi, %rsi, %rdx, %rcx */ ENTRY(copystr) - pushl %esi - pushl %edi + movq %rdx, %r8 /* %r8 = maxlen */ - movl 12(%esp),%esi /* %esi = from */ - movl 16(%esp),%edi /* %edi = to */ - movl 20(%esp),%edx /* %edx = maxlen */ - incl %edx + xchgq %rdi, %rsi + incq %rdx cld 1: - decl %edx + decq %rdx jz 4f lodsb stosb @@ -1504,159 +450,45 @@ ENTRY(copystr) jnz 1b /* Success -- 0 byte reached */ - decl %edx - xorl %eax,%eax + decq %rdx + xorq %rax,%rax jmp 6f 4: - /* edx is zero -- return ENAMETOOLONG */ - movl $ENAMETOOLONG,%eax + /* rdx is zero -- return ENAMETOOLONG */ + movq $ENAMETOOLONG,%rax 6: - /* set *lencopied and return %eax */ - movl 20(%esp),%ecx - subl %edx,%ecx - movl 24(%esp),%edx - testl %edx,%edx + + testq %rcx, %rcx jz 7f - movl %ecx,(%edx) + /* set *lencopied and return %rax */ + subq %rdx, %r8 + movq %r8, (%rcx) 7: - popl %edi - popl %esi - ret - -ENTRY(bcmp) - pushl %edi - pushl %esi - movl 12(%esp),%edi - movl 16(%esp),%esi - movl 20(%esp),%edx - xorl %eax,%eax - - movl %edx,%ecx - shrl $2,%ecx - cld /* compare forwards */ - repe - cmpsl - jne 1f - - movl %edx,%ecx - andl $3,%ecx - repe - cmpsb - je 2f -1: - incl %eax -2: - popl %esi - popl %edi ret - /* * Handling of special 386 registers and descriptor tables etc + * %rdi */ /* void lgdt(struct region_descriptor *rdp); */ ENTRY(lgdt) /* reload the descriptor table */ - movl 4(%esp),%eax - lgdt (%eax) + lgdt (%rdi) /* flush the prefetch q */ jmp 1f nop 1: - /* reload "stale" selectors */ - movl $KDSEL,%eax + movl $KDSEL, %eax mov %ax,%ds mov %ax,%es + mov %ax,%fs /* Beware, use wrmsr to set 64 bit base */ mov %ax,%gs mov %ax,%ss - movl $KPSEL,%eax - mov %ax,%fs /* reload code selector by turning return into intersegmental return */ - movl (%esp),%eax - pushl %eax - movl $KCSEL,4(%esp) - lret - -/* ssdtosd(*ssdp,*sdp) */ -ENTRY(ssdtosd) - pushl %ebx - movl 8(%esp),%ecx - movl 8(%ecx),%ebx - shll $16,%ebx - movl (%ecx),%edx - roll $16,%edx - movb %dh,%bl - movb %dl,%bh - rorl $8,%ebx - movl 4(%ecx),%eax - movw %ax,%dx - andl $0xf0000,%eax - orl %eax,%ebx - movl 12(%esp),%ecx - movl %edx,(%ecx) - movl %ebx,4(%ecx) - popl %ebx - ret - -/* void reset_dbregs() */ -ENTRY(reset_dbregs) - movl $0,%eax - movl %eax,%dr7 /* disable all breapoints first */ - movl %eax,%dr0 - movl %eax,%dr1 - movl %eax,%dr2 - movl %eax,%dr3 - movl %eax,%dr6 - ret - -/*****************************************************************************/ -/* setjump, longjump */ -/*****************************************************************************/ - -ENTRY(setjmp) - movl 4(%esp),%eax - movl %ebx,(%eax) /* save ebx */ - movl %esp,4(%eax) /* save esp */ - movl %ebp,8(%eax) /* save ebp */ - movl %esi,12(%eax) /* save esi */ - movl %edi,16(%eax) /* save edi */ - movl (%esp),%edx /* get rta */ - movl %edx,20(%eax) /* save eip */ - xorl %eax,%eax /* return(0); */ - ret - -ENTRY(longjmp) - movl 4(%esp),%eax - movl (%eax),%ebx /* restore ebx */ - movl 4(%eax),%esp /* restore esp */ - movl 8(%eax),%ebp /* restore ebp */ - movl 12(%eax),%esi /* restore esi */ - movl 16(%eax),%edi /* restore edi */ - movl 20(%eax),%edx /* get rta */ - movl %edx,(%esp) /* put in return frame */ - xorl %eax,%eax /* return(1); */ - incl %eax - ret - -/* - * Support for BB-profiling (gcc -a). The kernbb program will extract - * the data from the kernel. - */ - - .data - ALIGN_DATA - .globl bbhead -bbhead: - .long 0 - - .text -NON_GPROF_ENTRY(__bb_init_func) - movl 4(%esp),%eax - movl $1,(%eax) - movl bbhead,%edx - movl %edx,16(%eax) - movl %eax,bbhead - NON_GPROF_RET + popq %rax + pushq $KCSEL + pushq %rax + lretq diff --git a/sys/amd64/amd64/swtch.s b/sys/amd64/amd64/swtch.s index a19baa7..8cca838 100644 --- a/sys/amd64/amd64/swtch.s +++ b/sys/amd64/amd64/swtch.s @@ -36,9 +36,6 @@ * $FreeBSD$ */ -#include "opt_npx.h" -#include "opt_swtch.h" - #include <machine/asmacros.h> #include "assym.s" @@ -57,37 +54,26 @@ * about its state. This is only a slight optimization and is probably * not worth it anymore. Note that we need to clear the pm_active bits so * we do need the old proc if it still exists. - * 0(%esp) = ret - * 4(%esp) = oldtd - * 8(%esp) = newtd + * %rdi = oldtd + * %rsi = newtd */ ENTRY(cpu_throw) - movl PCPU(CPUID), %esi - movl 4(%esp),%ecx /* Old thread */ - testl %ecx,%ecx /* no thread? */ + xorq %rax, %rax + movl PCPU(CPUID), %eax + testq %rdi,%rdi /* no thread? */ jz 1f /* release bit from old pm_active */ - movl TD_PROC(%ecx), %eax /* thread->td_proc */ - movl P_VMSPACE(%eax), %ebx /* proc->p_vmspace */ -#ifdef SMP - lock -#endif - btrl %esi, VM_PMAP+PM_ACTIVE(%ebx) /* clear old */ + movq TD_PROC(%rdi), %rdx /* oldtd->td_proc */ + movq P_VMSPACE(%rdx), %rdx /* proc->p_vmspace */ + btrq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */ 1: - movl 8(%esp),%ecx /* New thread */ - movl TD_PCB(%ecx),%edx -#ifdef SWTCH_OPTIM_STATS - incl tlb_flush_count -#endif - movl PCB_CR3(%edx),%eax - movl %eax,%cr3 /* new address space */ + movq TD_PCB(%rsi),%rdx /* newtd->td_proc */ + movq PCB_CR3(%rdx),%rdx + movq %rdx,%cr3 /* new address space */ /* set bit in new pm_active */ - movl TD_PROC(%ecx),%eax - movl P_VMSPACE(%eax), %ebx -#ifdef SMP - lock -#endif - btsl %esi, VM_PMAP+PM_ACTIVE(%ebx) /* set new */ + movq TD_PROC(%rsi),%rdx + movq P_VMSPACE(%rdx), %rdx + btsq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */ jmp sw1 /* @@ -95,278 +81,184 @@ ENTRY(cpu_throw) * * Save the current thread state, then select the next thread to run * and load its state. - * 0(%esp) = ret - * 4(%esp) = oldtd - * 8(%esp) = newtd + * %rdi = oldtd + * %rsi = newtd */ ENTRY(cpu_switch) /* Switch to new thread. First, save context. */ - movl 4(%esp),%ecx - #ifdef INVARIANTS - testl %ecx,%ecx /* no thread? */ + testq %rdi,%rdi /* no thread? */ jz badsw2 /* no, panic */ #endif - movl TD_PCB(%ecx),%edx + movq TD_PCB(%rdi),%rdx - movl (%esp),%eax /* Hardware registers */ - movl %eax,PCB_EIP(%edx) - movl %ebx,PCB_EBX(%edx) - movl %esp,PCB_ESP(%edx) - movl %ebp,PCB_EBP(%edx) - movl %esi,PCB_ESI(%edx) - movl %edi,PCB_EDI(%edx) - movl %gs,PCB_GS(%edx) - pushfl /* PSL */ - popl PCB_PSL(%edx) + movq (%rsp),%rax /* Hardware registers */ + movq %rax,PCB_RIP(%rdx) + movq %rbx,PCB_RBX(%rdx) + movq %rsp,PCB_RSP(%rdx) + movq %rbp,PCB_RBP(%rdx) + movq %r12,PCB_R12(%rdx) + movq %r13,PCB_R13(%rdx) + movq %r14,PCB_R14(%rdx) + movq %r15,PCB_R15(%rdx) + pushfq /* PSL */ + popq PCB_RFLAGS(%rdx) - /* Test if debug registers should be saved. */ - testl $PCB_DBREGS,PCB_FLAGS(%edx) - jz 1f /* no, skip over */ - movl %dr7,%eax /* yes, do the save */ - movl %eax,PCB_DR7(%edx) - andl $0x0000fc00, %eax /* disable all watchpoints */ - movl %eax,%dr7 - movl %dr6,%eax - movl %eax,PCB_DR6(%edx) - movl %dr3,%eax - movl %eax,PCB_DR3(%edx) - movl %dr2,%eax - movl %eax,PCB_DR2(%edx) - movl %dr1,%eax - movl %eax,PCB_DR1(%edx) - movl %dr0,%eax - movl %eax,PCB_DR0(%edx) -1: - -#ifdef DEV_NPX /* have we used fp, and need a save? */ - cmpl %ecx,PCPU(FPCURTHREAD) + cmpq %rdi,PCPU(FPCURTHREAD) jne 1f - addl $PCB_SAVEFPU,%edx /* h/w bugs make saving complicated */ - pushl %edx + pushq %rdi + pushq %rsi + addq $PCB_SAVEFPU,%rdx /* h/w bugs make saving complicated */ + movq %rdx, %rdi call npxsave /* do it in a big C function */ - popl %eax + popq %rsi + popq %rdi 1: -#endif /* Save is done. Now fire up new thread. Leave old vmspace. */ - movl %ecx,%edi - movl 8(%esp),%ecx /* New thread */ #ifdef INVARIANTS - testl %ecx,%ecx /* no thread? */ + testq %rsi,%rsi /* no thread? */ jz badsw3 /* no, panic */ #endif - movl TD_PCB(%ecx),%edx - movl PCPU(CPUID), %esi + movq TD_PCB(%rsi),%rdx + xorq %rax, %rax + movl PCPU(CPUID), %eax /* switch address space */ - movl PCB_CR3(%edx),%eax -#ifdef LAZY_SWITCH - cmpl $0,lazy_flush_enable - je 1f - cmpl %eax,IdlePTD /* Kernel address space? */ -#ifdef SWTCH_OPTIM_STATS - je 3f -#else - je sw1 -#endif -1: -#endif - movl %cr3,%ebx /* The same address space? */ - cmpl %ebx,%eax -#ifdef SWTCH_OPTIM_STATS - je 2f /* Yes, skip all that cruft */ -#else - je sw1 -#endif -#ifdef SWTCH_OPTIM_STATS - incl tlb_flush_count -#endif - movl %eax,%cr3 /* new address space */ + movq PCB_CR3(%rdx),%rdx + movq %rdx,%cr3 /* new address space */ /* Release bit from old pmap->pm_active */ - movl TD_PROC(%edi), %eax /* oldproc */ - movl P_VMSPACE(%eax), %ebx -#ifdef SMP - lock -#endif - btrl %esi, VM_PMAP+PM_ACTIVE(%ebx) /* clear old */ + movq TD_PROC(%rdi), %rdx /* oldproc */ + movq P_VMSPACE(%rdx), %rdx + btrq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */ /* Set bit in new pmap->pm_active */ - movl TD_PROC(%ecx),%eax /* newproc */ - movl P_VMSPACE(%eax), %ebx -#ifdef SMP - lock -#endif - btsl %esi, VM_PMAP+PM_ACTIVE(%ebx) /* set new */ - -#ifdef LAZY_SWITCH -#ifdef SWTCH_OPTIM_STATS - jmp sw1 - -2: /* same address space */ - incl swtch_optim_stats - jmp sw1 - -3: /* kernel address space */ - incl lazy_flush_count -#endif -#endif + movq TD_PROC(%rsi),%rdx /* newproc */ + movq P_VMSPACE(%rdx), %rdx + btsq %rax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */ sw1: /* * At this point, we've switched address spaces and are ready * to load up the rest of the next context. */ - cmpl $0, PCB_EXT(%edx) /* has pcb extension? */ - je 1f /* If not, use the default */ - btsl %esi, private_tss /* mark use of private tss */ - movl PCB_EXT(%edx), %edi /* new tss descriptor */ - jmp 2f /* Load it up */ + movq TD_PCB(%rsi),%rdx -1: /* - * Use the common default TSS instead of our own. - * Set our stack pointer into the TSS, it's set to just - * below the PCB. In C, common_tss.tss_esp0 = &pcb - 16; - */ - leal -16(%edx), %ebx /* leave space for vm86 */ - movl %ebx, PCPU(COMMON_TSS) + TSS_ESP0 - - /* - * Test this CPU's bit in the bitmap to see if this - * CPU was using a private TSS. - */ - btrl %esi, private_tss /* Already using the common? */ - jae 3f /* if so, skip reloading */ - PCPU_ADDR(COMMON_TSSD, %edi) -2: - /* Move correct tss descriptor into GDT slot, then reload tr. */ - movl PCPU(TSS_GDT), %ebx /* entry in GDT */ - movl 0(%edi), %eax - movl %eax, 0(%ebx) - movl 4(%edi), %eax - movl %eax, 4(%ebx) - movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */ - ltr %si -3: + /* Update the TSS_RSP0 pointer for the next interrupt */ + leaq -16(%rdx), %rbx + movq %rbx, common_tss + COMMON_TSS_RSP0 /* Restore context. */ - movl PCB_EBX(%edx),%ebx - movl PCB_ESP(%edx),%esp - movl PCB_EBP(%edx),%ebp - movl PCB_ESI(%edx),%esi - movl PCB_EDI(%edx),%edi - movl PCB_EIP(%edx),%eax - movl %eax,(%esp) - pushl PCB_PSL(%edx) - popfl - - movl %edx, PCPU(CURPCB) - movl %ecx, PCPU(CURTHREAD) /* into next thread */ + movq PCB_RBX(%rdx),%rbx + movq PCB_RSP(%rdx),%rsp + movq PCB_RBP(%rdx),%rbp + movq PCB_R12(%rdx),%r12 + movq PCB_R13(%rdx),%r13 + movq PCB_R14(%rdx),%r14 + movq PCB_R15(%rdx),%r15 + movq PCB_RIP(%rdx),%rax + movq %rax,(%rsp) + pushq PCB_RFLAGS(%rdx) + popfq + + movq %rdx, PCPU(CURPCB) + movq %rsi, PCPU(CURTHREAD) /* into next thread */ - /* - * Determine the LDT to use and load it if is the default one and - * that is not the current one. - */ - movl TD_PROC(%ecx),%eax - cmpl $0,P_MD+MD_LDT(%eax) - jnz 1f - movl _default_ldt,%eax - cmpl PCPU(CURRENTLDT),%eax - je 2f - lldt _default_ldt - movl %eax,PCPU(CURRENTLDT) - jmp 2f -1: - /* Load the LDT when it is not the default one. */ - pushl %edx /* Preserve pointer to pcb. */ - addl $P_MD,%eax /* Pointer to mdproc is arg. */ - pushl %eax - call set_user_ldt - addl $4,%esp - popl %edx -2: - - /* This must be done after loading the user LDT. */ - .globl cpu_switch_load_gs -cpu_switch_load_gs: - movl PCB_GS(%edx),%gs - - /* Test if debug registers should be restored. */ - testl $PCB_DBREGS,PCB_FLAGS(%edx) - jz 1f - - /* - * Restore debug registers. The special code for dr7 is to - * preserve the current values of its reserved bits. - */ - movl PCB_DR6(%edx),%eax - movl %eax,%dr6 - movl PCB_DR3(%edx),%eax - movl %eax,%dr3 - movl PCB_DR2(%edx),%eax - movl %eax,%dr2 - movl PCB_DR1(%edx),%eax - movl %eax,%dr1 - movl PCB_DR0(%edx),%eax - movl %eax,%dr0 - movl %dr7,%eax - andl $0x0000fc00,%eax - movl PCB_DR7(%edx),%ecx - andl $~0x0000fc00,%ecx - orl %ecx,%eax - movl %eax,%dr7 -1: ret #ifdef INVARIANTS badsw1: - pushal - pushl $sw0_1 + pushq %rax + pushq %rcx + pushq %rdx + pushq %rbx + pushq %rbp + pushq %rsi + pushq %rdi + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r11 + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq $sw0_1 call panic sw0_1: .asciz "cpu_throw: no newthread supplied" badsw2: - pushal - pushl $sw0_2 + pushq %rax + pushq %rcx + pushq %rdx + pushq %rbx + pushq %rbp + pushq %rsi + pushq %rdi + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r11 + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq $sw0_2 call panic sw0_2: .asciz "cpu_switch: no curthread supplied" badsw3: - pushal - pushl $sw0_3 + pushq %rax + pushq %rcx + pushq %rdx + pushq %rbx + pushq %rbp + pushq %rsi + pushq %rdi + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r11 + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq $sw0_3 call panic sw0_3: .asciz "cpu_switch: no newthread supplied" #endif +noswitch: .asciz "cpu_switch: called!" +nothrow: .asciz "cpu_throw: called!" /* * savectx(pcb) * Update pcb, saving current processor state. */ ENTRY(savectx) /* Fetch PCB. */ - movl 4(%esp),%ecx + movq %rdi,%rcx - /* Save caller's return address. Child won't execute this routine. */ - movl (%esp),%eax - movl %eax,PCB_EIP(%ecx) + /* Save caller's return address. */ + movq (%rsp),%rax + movq %rax,PCB_RIP(%rcx) - movl %cr3,%eax - movl %eax,PCB_CR3(%ecx) + movq %cr3,%rax + movq %rax,PCB_CR3(%rcx) - movl %ebx,PCB_EBX(%ecx) - movl %esp,PCB_ESP(%ecx) - movl %ebp,PCB_EBP(%ecx) - movl %esi,PCB_ESI(%ecx) - movl %edi,PCB_EDI(%ecx) - movl %gs,PCB_GS(%ecx) - pushfl - popl PCB_PSL(%ecx) + movq %rbx,PCB_RBX(%rcx) + movq %rsp,PCB_RSP(%rcx) + movq %rbp,PCB_RBP(%rcx) + movq %r12,PCB_R12(%rcx) + movq %r13,PCB_R13(%rcx) + movq %r14,PCB_R14(%rcx) + movq %r15,PCB_R15(%rcx) + pushfq + popq PCB_RFLAGS(%rcx) -#ifdef DEV_NPX /* * If fpcurthread == NULL, then the npx h/w state is irrelevant and the * state had better already be in the pcb. This is true for forks @@ -379,30 +271,25 @@ ENTRY(savectx) * have to handle h/w bugs for reloading. We used to lose the * parent's npx state for forks by forgetting to reload. */ - pushfl + pushfq cli - movl PCPU(FPCURTHREAD),%eax - testl %eax,%eax + movq PCPU(FPCURTHREAD),%rax + testq %rax,%rax je 1f - pushl %ecx - movl TD_PCB(%eax),%eax - leal PCB_SAVEFPU(%eax),%eax - pushl %eax - pushl %eax + pushq %rcx + pushq %rax + movq TD_PCB(%rax),%rdi + leaq PCB_SAVEFPU(%rdi),%rdi call npxsave - addl $4,%esp - popl %eax - popl %ecx + popq %rax + popq %rcx - pushl $PCB_SAVEFPU_SIZE - leal PCB_SAVEFPU(%ecx),%ecx - pushl %ecx - pushl %eax + movq $PCB_SAVEFPU_SIZE,%rdx /* arg 3 */ + leaq PCB_SAVEFPU(%rcx),%rsi /* arg 2 */ + movq %rax,%rdi /* arg 1 */ call bcopy - addl $12,%esp 1: - popfl -#endif /* DEV_NPX */ + popfq ret diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c index e1ed617..c815a59 100644 --- a/sys/amd64/amd64/sys_machdep.c +++ b/sys/amd64/amd64/sys_machdep.c @@ -35,46 +35,11 @@ * */ -#include "opt_kstack_pages.h" -#include "opt_mac.h" - #include <sys/param.h> #include <sys/systm.h> #include <sys/lock.h> -#include <sys/mac.h> -#include <sys/malloc.h> -#include <sys/mutex.h> #include <sys/proc.h> -#include <sys/smp.h> #include <sys/sysproto.h> -#include <sys/user.h> - -#include <vm/vm.h> -#include <vm/pmap.h> -#include <vm/vm_map.h> -#include <vm/vm_extern.h> - -#include <machine/cpu.h> -#include <machine/pcb_ext.h> /* pcb.h included by sys/user.h */ -#include <machine/proc.h> -#include <machine/sysarch.h> - -#include <vm/vm_kern.h> /* for kernel_map */ - -#define MAX_LD 8192 -#define LD_PER_PAGE 512 -#define NEW_MAX_LD(num) ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1)) -#define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3) - - - -static int i386_get_ldt(struct thread *, char *); -static int i386_set_ldt(struct thread *, char *); -static int i386_get_ioperm(struct thread *, char *); -static int i386_set_ioperm(struct thread *, char *); -#ifdef SMP -static void set_user_ldt_rv(struct thread *); -#endif #ifndef _SYS_SYSPROTO_H_ struct sysarch_args { @@ -90,476 +55,10 @@ sysarch(td, uap) { int error; - mtx_lock(&Giant); switch(uap->op) { - case I386_GET_LDT: - error = i386_get_ldt(td, uap->parms); - break; - - case I386_SET_LDT: - error = i386_set_ldt(td, uap->parms); - break; - case I386_GET_IOPERM: - error = i386_get_ioperm(td, uap->parms); - break; - case I386_SET_IOPERM: - error = i386_set_ioperm(td, uap->parms); - break; - case I386_VM86: - error = vm86_sysarch(td, uap->parms); - break; default: error = EINVAL; break; } - mtx_unlock(&Giant); - return (error); -} - -int -i386_extend_pcb(struct thread *td) -{ - int i, offset; - u_long *addr; - struct pcb_ext *ext; - struct soft_segment_descriptor ssd = { - 0, /* segment base address (overwritten) */ - ctob(IOPAGES + 1) - 1, /* length */ - SDT_SYS386TSS, /* segment type */ - 0, /* priority level */ - 1, /* descriptor present */ - 0, 0, - 0, /* default 32 size */ - 0 /* granularity */ - }; - - if (td->td_proc->p_flag & P_THREADED) - return (EINVAL); /* XXXKSE */ -/* XXXKSE All the code below only works in 1:1 needs changing */ - ext = (struct pcb_ext *)kmem_alloc(kernel_map, ctob(IOPAGES+1)); - if (ext == 0) - return (ENOMEM); - bzero(ext, sizeof(struct pcb_ext)); - /* -16 is so we can convert a trapframe into vm86trapframe inplace */ - ext->ext_tss.tss_esp0 = td->td_kstack + ctob(KSTACK_PAGES) - - sizeof(struct pcb) - 16; - ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); - /* - * The last byte of the i/o map must be followed by an 0xff byte. - * We arbitrarily allocate 16 bytes here, to keep the starting - * address on a doubleword boundary. - */ - offset = PAGE_SIZE - 16; - ext->ext_tss.tss_ioopt = - (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16; - ext->ext_iomap = (caddr_t)ext + offset; - ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32; - - addr = (u_long *)ext->ext_vm86.vm86_intmap; - for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++) - *addr++ = ~0; - - ssd.ssd_base = (unsigned)&ext->ext_tss; - ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext); - ssdtosd(&ssd, &ext->ext_tssd); - - KASSERT(td->td_proc == curthread->td_proc, ("giving TSS to !curproc")); - KASSERT(td->td_pcb->pcb_ext == 0, ("already have a TSS!")); - mtx_lock_spin(&sched_lock); - td->td_pcb->pcb_ext = ext; - - /* switch to the new TSS after syscall completes */ - td->td_flags |= TDF_NEEDRESCHED; - mtx_unlock_spin(&sched_lock); - - return 0; -} - -static int -i386_set_ioperm(td, args) - struct thread *td; - char *args; -{ - int i, error; - struct i386_ioperm_args ua; - char *iomap; - - if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0) - return (error); - -#ifdef MAC - if ((error = mac_check_sysarch_ioperm(td->td_ucred)) != 0) - return (error); -#endif - if ((error = suser(td)) != 0) - return (error); - if ((error = securelevel_gt(td->td_ucred, 0)) != 0) - return (error); - /* - * XXX - * While this is restricted to root, we should probably figure out - * whether any other driver is using this i/o address, as so not to - * cause confusion. This probably requires a global 'usage registry'. - */ - - if (td->td_pcb->pcb_ext == 0) - if ((error = i386_extend_pcb(td)) != 0) - return (error); - iomap = (char *)td->td_pcb->pcb_ext->ext_iomap; - - if (ua.start + ua.length > IOPAGES * PAGE_SIZE * NBBY) - return (EINVAL); - - for (i = ua.start; i < ua.start + ua.length; i++) { - if (ua.enable) - iomap[i >> 3] &= ~(1 << (i & 7)); - else - iomap[i >> 3] |= (1 << (i & 7)); - } - return (error); -} - -static int -i386_get_ioperm(td, args) - struct thread *td; - char *args; -{ - int i, state, error; - struct i386_ioperm_args ua; - char *iomap; - - if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0) - return (error); - if (ua.start >= IOPAGES * PAGE_SIZE * NBBY) - return (EINVAL); - - if (td->td_pcb->pcb_ext == 0) { - ua.length = 0; - goto done; - } - - iomap = (char *)td->td_pcb->pcb_ext->ext_iomap; - - i = ua.start; - state = (iomap[i >> 3] >> (i & 7)) & 1; - ua.enable = !state; - ua.length = 1; - - for (i = ua.start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) { - if (state != ((iomap[i >> 3] >> (i & 7)) & 1)) - break; - ua.length++; - } - -done: - error = copyout(&ua, args, sizeof(struct i386_ioperm_args)); return (error); } - -/* - * Update the GDT entry pointing to the LDT to point to the LDT of the - * current process. - * - * This must be called with sched_lock held. Unfortunately, we can't use a - * mtx_assert() here because cpu_switch() calls this function after changing - * curproc but before sched_lock's owner is updated in mi_switch(). - */ -void -set_user_ldt(struct mdproc *mdp) -{ - struct proc_ldt *pldt; - - pldt = mdp->md_ldt; -#ifdef SMP - gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pldt->ldt_sd; -#else - gdt[GUSERLDT_SEL].sd = pldt->ldt_sd; -#endif - lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); - PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL)); -} - -#ifdef SMP -static void -set_user_ldt_rv(struct thread *td) -{ - - if (td->td_proc != curthread->td_proc) - return; - - mtx_lock_spin(&sched_lock); - set_user_ldt(&td->td_proc->p_md); - mtx_unlock_spin(&sched_lock); -} -#endif - -/* - * Must be called with either sched_lock free or held but not recursed. - * If it does not return NULL, it will return with it owned. - */ -struct proc_ldt * -user_ldt_alloc(struct mdproc *mdp, int len) -{ - struct proc_ldt *pldt, *new_ldt; - - if (mtx_owned(&sched_lock)) - mtx_unlock_spin(&sched_lock); - mtx_assert(&sched_lock, MA_NOTOWNED); - MALLOC(new_ldt, struct proc_ldt *, sizeof(struct proc_ldt), - M_SUBPROC, M_WAITOK); - - new_ldt->ldt_len = len = NEW_MAX_LD(len); - new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map, - len * sizeof(union descriptor)); - if (new_ldt->ldt_base == NULL) { - FREE(new_ldt, M_SUBPROC); - return NULL; - } - new_ldt->ldt_refcnt = 1; - new_ldt->ldt_active = 0; - - mtx_lock_spin(&sched_lock); - gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)new_ldt->ldt_base; - gdt_segs[GUSERLDT_SEL].ssd_limit = len * sizeof(union descriptor) - 1; - ssdtosd(&gdt_segs[GUSERLDT_SEL], &new_ldt->ldt_sd); - - if ((pldt = mdp->md_ldt)) { - if (len > pldt->ldt_len) - len = pldt->ldt_len; - bcopy(pldt->ldt_base, new_ldt->ldt_base, - len * sizeof(union descriptor)); - } else { - bcopy(ldt, new_ldt->ldt_base, sizeof(ldt)); - } - return new_ldt; -} - -/* - * Must be called either with sched_lock free or held but not recursed. - * If md_ldt is not NULL, it will return with sched_lock released. - */ -void -user_ldt_free(struct thread *td) -{ - struct mdproc *mdp = &td->td_proc->p_md; - struct proc_ldt *pldt = mdp->md_ldt; - - if (pldt == NULL) - return; - - if (!mtx_owned(&sched_lock)) - mtx_lock_spin(&sched_lock); - mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED); - if (td == PCPU_GET(curthread)) { - lldt(_default_ldt); - PCPU_SET(currentldt, _default_ldt); - } - - mdp->md_ldt = NULL; - if (--pldt->ldt_refcnt == 0) { - mtx_unlock_spin(&sched_lock); - kmem_free(kernel_map, (vm_offset_t)pldt->ldt_base, - pldt->ldt_len * sizeof(union descriptor)); - FREE(pldt, M_SUBPROC); - } else - mtx_unlock_spin(&sched_lock); -} - -static int -i386_get_ldt(td, args) - struct thread *td; - char *args; -{ - int error = 0; - struct proc_ldt *pldt = td->td_proc->p_md.md_ldt; - int nldt, num; - union descriptor *lp; - struct i386_ldt_args ua, *uap = &ua; - - if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0) - return(error); - -#ifdef DEBUG - printf("i386_get_ldt: start=%d num=%d descs=%p\n", - uap->start, uap->num, (void *)uap->descs); -#endif - - /* verify range of LDTs exist */ - if ((uap->start < 0) || (uap->num <= 0)) - return(EINVAL); - - if (pldt) { - nldt = pldt->ldt_len; - num = min(uap->num, nldt); - lp = &((union descriptor *)(pldt->ldt_base))[uap->start]; - } else { - nldt = sizeof(ldt)/sizeof(ldt[0]); - num = min(uap->num, nldt); - lp = &ldt[uap->start]; - } - if (uap->start + num > nldt) - return(EINVAL); - - error = copyout(lp, uap->descs, num * sizeof(union descriptor)); - if (!error) - td->td_retval[0] = num; - - return(error); -} - -static int -i386_set_ldt(td, args) - struct thread *td; - char *args; -{ - int error = 0, i, n; - int largest_ld; - struct mdproc *mdp = &td->td_proc->p_md; - struct proc_ldt *pldt = mdp->md_ldt; - struct i386_ldt_args ua, *uap = &ua; - union descriptor *descs; - caddr_t old_ldt_base; - int descs_size, old_ldt_len; - register_t savecrit; - - if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0) - return(error); - -#ifdef DEBUG - printf("i386_set_ldt: start=%d num=%d descs=%p\n", - uap->start, uap->num, (void *)uap->descs); -#endif - - /* verify range of descriptors to modify */ - if ((uap->start < 0) || (uap->start >= MAX_LD) || (uap->num < 0) || - (uap->num > MAX_LD)) - { - return(EINVAL); - } - largest_ld = uap->start + uap->num - 1; - if (largest_ld >= MAX_LD) - return(EINVAL); - - /* allocate user ldt */ - if (!pldt || largest_ld >= pldt->ldt_len) { - struct proc_ldt *new_ldt = user_ldt_alloc(mdp, largest_ld); - if (new_ldt == NULL) - return ENOMEM; - if (pldt) { - old_ldt_base = pldt->ldt_base; - old_ldt_len = pldt->ldt_len; - pldt->ldt_sd = new_ldt->ldt_sd; - pldt->ldt_base = new_ldt->ldt_base; - pldt->ldt_len = new_ldt->ldt_len; - mtx_unlock_spin(&sched_lock); - kmem_free(kernel_map, (vm_offset_t)old_ldt_base, - old_ldt_len * sizeof(union descriptor)); - FREE(new_ldt, M_SUBPROC); -#ifndef SMP - mtx_lock_spin(&sched_lock); -#endif - } else { - mdp->md_ldt = pldt = new_ldt; -#ifdef SMP - mtx_unlock_spin(&sched_lock); -#endif - } -#ifdef SMP - /* signal other cpus to reload ldt */ - smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv, - NULL, td); -#else - set_user_ldt(mdp); - mtx_unlock_spin(&sched_lock); -#endif - } - - descs_size = uap->num * sizeof(union descriptor); - descs = (union descriptor *)kmem_alloc(kernel_map, descs_size); - if (descs == NULL) - return (ENOMEM); - error = copyin(&uap->descs[0], descs, descs_size); - if (error) { - kmem_free(kernel_map, (vm_offset_t)descs, descs_size); - return (error); - } - /* Check descriptors for access violations */ - for (i = 0, n = uap->start; i < uap->num; i++, n++) { - union descriptor *dp; - dp = &descs[i]; - - switch (dp->sd.sd_type) { - case SDT_SYSNULL: /* system null */ - dp->sd.sd_p = 0; - break; - case SDT_SYS286TSS: /* system 286 TSS available */ - case SDT_SYSLDT: /* system local descriptor table */ - case SDT_SYS286BSY: /* system 286 TSS busy */ - case SDT_SYSTASKGT: /* system task gate */ - case SDT_SYS286IGT: /* system 286 interrupt gate */ - case SDT_SYS286TGT: /* system 286 trap gate */ - case SDT_SYSNULL2: /* undefined by Intel */ - case SDT_SYS386TSS: /* system 386 TSS available */ - case SDT_SYSNULL3: /* undefined by Intel */ - case SDT_SYS386BSY: /* system 386 TSS busy */ - case SDT_SYSNULL4: /* undefined by Intel */ - case SDT_SYS386IGT: /* system 386 interrupt gate */ - case SDT_SYS386TGT: /* system 386 trap gate */ - case SDT_SYS286CGT: /* system 286 call gate */ - case SDT_SYS386CGT: /* system 386 call gate */ - /* I can't think of any reason to allow a user proc - * to create a segment of these types. They are - * for OS use only. - */ - kmem_free(kernel_map, (vm_offset_t)descs, descs_size); - return EACCES; - /*NOTREACHED*/ - - /* memory segment types */ - case SDT_MEMEC: /* memory execute only conforming */ - case SDT_MEMEAC: /* memory execute only accessed conforming */ - case SDT_MEMERC: /* memory execute read conforming */ - case SDT_MEMERAC: /* memory execute read accessed conforming */ - /* Must be "present" if executable and conforming. */ - if (dp->sd.sd_p == 0) { - kmem_free(kernel_map, (vm_offset_t)descs, - descs_size); - return (EACCES); - } - break; - case SDT_MEMRO: /* memory read only */ - case SDT_MEMROA: /* memory read only accessed */ - case SDT_MEMRW: /* memory read write */ - case SDT_MEMRWA: /* memory read write accessed */ - case SDT_MEMROD: /* memory read only expand dwn limit */ - case SDT_MEMRODA: /* memory read only expand dwn lim accessed */ - case SDT_MEMRWD: /* memory read write expand dwn limit */ - case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */ - case SDT_MEME: /* memory execute only */ - case SDT_MEMEA: /* memory execute only accessed */ - case SDT_MEMER: /* memory execute read */ - case SDT_MEMERA: /* memory execute read accessed */ - break; - default: - kmem_free(kernel_map, (vm_offset_t)descs, descs_size); - return(EINVAL); - /*NOTREACHED*/ - } - - /* Only user (ring-3) descriptors may be present. */ - if ((dp->sd.sd_p != 0) && (dp->sd.sd_dpl != SEL_UPL)) { - kmem_free(kernel_map, (vm_offset_t)descs, descs_size); - return (EACCES); - } - } - - /* Fill in range */ - savecrit = intr_disable(); - bcopy(descs, - &((union descriptor *)(pldt->ldt_base))[uap->start], - uap->num * sizeof(union descriptor)); - td->td_retval[0] = uap->start; - intr_restore(savecrit); - kmem_free(kernel_map, (vm_offset_t)descs, descs_size); - return (0); -} diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 2884b86..980e95f 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -47,8 +47,6 @@ #include "opt_ddb.h" #include "opt_isa.h" #include "opt_ktrace.h" -#include "opt_npx.h" -#include "opt_trap.h" #include <sys/param.h> #include <sys/bus.h> @@ -81,39 +79,22 @@ #include <machine/cpu.h> #include <machine/md_var.h> #include <machine/pcb.h> -#ifdef SMP -#include <machine/smp.h> -#endif #include <machine/tss.h> -#include <i386/isa/icu.h> -#include <i386/isa/intr_machdep.h> - -#ifdef POWERFAIL_NMI -#include <sys/syslog.h> -#include <machine/clock.h> -#endif - -#include <machine/vm86.h> +#include <amd64/isa/icu.h> +#include <amd64/isa/intr_machdep.h> #include <ddb/ddb.h> #include <sys/sysctl.h> -int (*pmath_emulate)(struct trapframe *); - extern void trap(struct trapframe frame); -#ifdef I386_CPU -extern int trapwrite(unsigned addr); -#endif extern void syscall(struct trapframe frame); static int trap_pfault(struct trapframe *, int, vm_offset_t); static void trap_fatal(struct trapframe *, vm_offset_t); void dblfault_handler(void); -extern inthand_t IDTVEC(lcall_syscall); - #define MAX_TRAP_MSG 28 static char *trap_msg[] = { "", /* 0 unused */ @@ -147,10 +128,6 @@ static char *trap_msg[] = { "machine check trap", /* 28 T_MCHK */ }; -#if defined(I586_CPU) && !defined(NO_F00F_HACK) -extern int has_f00f_bug; -#endif - #ifdef DDB static int ddb_on_nmi = 1; SYSCTL_INT(_machdep, OID_AUTO, ddb_on_nmi, CTLFLAG_RW, @@ -185,9 +162,6 @@ trap(frame) u_int sticks = 0; int i = 0, ucode = 0, type, code; vm_offset_t eva; -#ifdef POWERFAIL_NMI - static int lastalert = 0; -#endif atomic_add_int(&cnt.v_trap, 1); type = frame.tf_trapno; @@ -200,7 +174,7 @@ trap(frame) } #endif - if ((frame.tf_eflags & PSL_I) == 0) { + if ((frame.tf_rflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled * interrupts and then trapped. Enabling interrupts @@ -208,12 +182,11 @@ trap(frame) * interrupts disabled until they are accidentally * enabled later. */ - if (ISPL(frame.tf_cs) == SEL_UPL || (frame.tf_eflags & PSL_VM)) + if (ISPL(frame.tf_cs) == SEL_UPL) printf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curproc->p_comm, type); - else if (type != T_BPTFLT && type != T_TRCTRAP && - frame.tf_eip != (int)cpu_switch_load_gs) { + else if (type != T_BPTFLT && type != T_TRCTRAP) { /* * XXX not quite right, since this may be for a * multiple fault in user mode. @@ -234,12 +207,6 @@ trap(frame) code = frame.tf_err; if (type == T_PAGEFLT) { /* - * For some Cyrix CPUs, %cr2 is clobbered by - * interrupts. This problem is worked around by using - * an interrupt gate for the pagefault handler. We - * are finally ready to read %cr2 and then must - * reenable interrupts. - * * If we get a page fault while holding a spin lock, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to @@ -248,9 +215,7 @@ trap(frame) * to the debugger. */ eva = rcr2(); - if (PCPU_GET(spinlocks) == NULL) - enable_intr(); - else + if (PCPU_GET(spinlocks) != NULL) trap_fatal(&frame, eva); } @@ -259,9 +224,7 @@ trap(frame) ether_poll(poll_in_trap); #endif /* DEVICE_POLLING */ - if ((ISPL(frame.tf_cs) == SEL_UPL) || - ((frame.tf_eflags & PSL_VM) && - !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL))) { + if (ISPL(frame.tf_cs) == SEL_UPL) { /* user trap */ sticks = td->td_sticks; @@ -277,36 +240,19 @@ trap(frame) case T_BPTFLT: /* bpt instruction fault */ case T_TRCTRAP: /* trace trap */ - frame.tf_eflags &= ~PSL_T; + frame.tf_rflags &= ~PSL_T; i = SIGTRAP; break; case T_ARITHTRAP: /* arithmetic trap */ -#ifdef DEV_NPX ucode = npxtrap(); if (ucode == -1) goto userout; -#else - ucode = code; -#endif i = SIGFPE; break; - /* - * The following two traps can happen in - * vm86 mode, and, if so, we want to handle - * them specially. - */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ - if (frame.tf_eflags & PSL_VM) { - i = vm86_emulate((struct vm86frame *)&frame); - if (i == 0) - goto user; - break; - } - /* FALLTHROUGH */ - case T_SEGNPFLT: /* segment not present fault */ case T_TSSFLT: /* invalid TSS fault */ case T_DOUBLEFLT: /* double fault */ @@ -317,21 +263,6 @@ trap(frame) case T_PAGEFLT: /* page fault */ i = trap_pfault(&frame, TRUE, eva); -#if defined(I586_CPU) && !defined(NO_F00F_HACK) - if (i == -2) { - /* - * The f00f hack workaround has triggered, so - * treat the fault as an illegal instruction - * (T_PRIVINFLT) instead of a page fault. - */ - type = frame.tf_trapno = T_PRIVINFLT; - - /* Proceed as in that case. */ - ucode = type; - i = SIGILL; - break; - } -#endif if (i == -1) goto userout; if (i == 0) @@ -347,19 +278,6 @@ trap(frame) #ifdef DEV_ISA case T_NMI: -#ifdef POWERFAIL_NMI -#ifndef TIMER_FREQ -# define TIMER_FREQ 1193182 -#endif - mtx_lock(&Giant); - if (time_second - lastalert > 10) { - log(LOG_WARNING, "NMI: power fail\n"); - sysbeep(TIMER_FREQ/880, hz); - lastalert = time_second; - } - mtx_unlock(&Giant); - goto userout; -#else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ /* XXX Giant */ if (isa_nmi(code) == 0) { @@ -377,7 +295,6 @@ trap(frame) } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; -#endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ case T_OFLOW: /* integer overflow fault */ @@ -391,26 +308,11 @@ trap(frame) break; case T_DNA: -#ifdef DEV_NPX /* transparent fault (due to context switch "late") */ if (npxdna()) goto userout; -#endif - if (!pmath_emulate) { - i = SIGFPE; - ucode = FPE_FPU_NP_TRAP; - break; - } - mtx_lock(&Giant); - i = (*pmath_emulate)(&frame); - mtx_unlock(&Giant); - if (i == 0) { - if (!(frame.tf_eflags & PSL_T)) - goto userout; - frame.tf_eflags &= ~PSL_T; - i = SIGTRAP; - } - /* else ucode = emulator_only_knows() XXX */ + i = SIGFPE; + ucode = FPE_FPU_NP_TRAP; break; case T_FPOPFLT: /* FPU operand fetch fault */ @@ -434,61 +336,22 @@ trap(frame) goto out; case T_DNA: -#ifdef DEV_NPX /* * The kernel is apparently using npx for copying. * XXX this should be fatal unless the kernel has * registered such use. */ - if (npxdna()) + if (npxdna()) { + printf("npxdna in kernel mode!\n"); goto out; -#endif + } break; - /* - * The following two traps can happen in - * vm86 mode, and, if so, we want to handle - * them specially. - */ - case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ - if (frame.tf_eflags & PSL_VM) { - i = vm86_emulate((struct vm86frame *)&frame); - if (i != 0) - /* - * returns to original process - */ - vm86_trap((struct vm86frame *)&frame); - goto out; - } - if (type == T_STKFLT) - break; - - /* FALL THROUGH */ + break; + case T_PROTFLT: /* general protection fault */ case T_SEGNPFLT: /* segment not present fault */ - if (PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL) - break; - - /* - * Invalid %fs's and %gs's can be created using - * procfs or PT_SETREGS or by invalidating the - * underlying LDT entry. This causes a fault - * in kernel mode when the kernel attempts to - * switch contexts. Lose the bad context - * (XXX) so that we can continue, and generate - * a signal. - */ - if (frame.tf_eip == (int)cpu_switch_load_gs) { - PCPU_GET(curpcb)->pcb_gs = 0; -#if 0 - PROC_LOCK(p); - psignal(p, SIGBUS); - PROC_UNLOCK(p); -#endif - goto out; - } - if (td->td_intr_nesting_level != 0) break; @@ -502,26 +365,14 @@ trap(frame) * selectors and pointers when the user changes * them. */ - if (frame.tf_eip == (int)doreti_iret) { - frame.tf_eip = (int)doreti_iret_fault; - goto out; - } - if (frame.tf_eip == (int)doreti_popl_ds) { - frame.tf_eip = (int)doreti_popl_ds_fault; - goto out; - } - if (frame.tf_eip == (int)doreti_popl_es) { - frame.tf_eip = (int)doreti_popl_es_fault; - goto out; - } - if (frame.tf_eip == (int)doreti_popl_fs) { - frame.tf_eip = (int)doreti_popl_fs_fault; + if (frame.tf_rip == (long)doreti_iret) { + frame.tf_rip = (long)doreti_iret_fault; goto out; } if (PCPU_GET(curpcb) != NULL && PCPU_GET(curpcb)->pcb_onfault != NULL) { - frame.tf_eip = - (int)PCPU_GET(curpcb)->pcb_onfault; + frame.tf_rip = + (long)PCPU_GET(curpcb)->pcb_onfault; goto out; } break; @@ -536,50 +387,13 @@ trap(frame) * problem here and not every time the kernel is * entered. */ - if (frame.tf_eflags & PSL_NT) { - frame.tf_eflags &= ~PSL_NT; + if (frame.tf_rflags & PSL_NT) { + frame.tf_rflags &= ~PSL_NT; goto out; } break; case T_TRCTRAP: /* trace trap */ - if (frame.tf_eip == (int)IDTVEC(lcall_syscall)) { - /* - * We've just entered system mode via the - * syscall lcall. Continue single stepping - * silently until the syscall handler has - * saved the flags. - */ - goto out; - } - if (frame.tf_eip == (int)IDTVEC(lcall_syscall) + 1) { - /* - * The syscall handler has now saved the - * flags. Stop single stepping it. - */ - frame.tf_eflags &= ~PSL_T; - goto out; - } - /* - * Ignore debug register trace traps due to - * accesses in the user's address space, which - * can happen under several conditions such as - * if a user sets a watchpoint on a buffer and - * then passes that buffer to a system call. - * We still want to get TRCTRAPS for addresses - * in kernel space because that is useful when - * debugging the kernel. - */ - /* XXX Giant */ - if (user_dbreg_trap() && - !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL)) { - /* - * Reset breakpoint bits because the - * processor doesn't - */ - load_dr6(rdr6() & 0xfffffff0); - goto out; - } /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ @@ -597,16 +411,6 @@ trap(frame) #ifdef DEV_ISA case T_NMI: -#ifdef POWERFAIL_NMI - mtx_lock(&Giant); - if (time_second - lastalert > 10) { - log(LOG_WARNING, "NMI: power fail\n"); - sysbeep(TIMER_FREQ/880, hz); - lastalert = time_second; - } - mtx_unlock(&Giant); - goto out; -#else /* !POWERFAIL_NMI */ /* XXX Giant */ /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { @@ -624,7 +428,6 @@ trap(frame) } else if (panic_on_nmi == 0) goto out; /* FALLTHROUGH */ -#endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ } @@ -677,16 +480,7 @@ trap_pfault(frame, usermode, eva) if (va >= KERNBASE) { /* * Don't allow user-mode faults in kernel address space. - * An exception: if the faulting address is the invalid - * instruction entry in the IDT, then the Intel Pentium - * F00F bug workaround was triggered, and we need to - * treat it is as an illegal instruction, and not a page - * fault. */ -#if defined(I586_CPU) && !defined(NO_F00F_HACK) - if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) - return -2; -#endif if (usermode) goto nogo; @@ -742,7 +536,7 @@ nogo: if (td->td_intr_nesting_level == 0 && PCPU_GET(curpcb) != NULL && PCPU_GET(curpcb)->pcb_onfault != NULL) { - frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault; + frame->tf_rip = (long)PCPU_GET(curpcb)->pcb_onfault; return (0); } trap_fatal(frame, eva); @@ -765,18 +559,12 @@ trap_fatal(frame, eva) code = frame->tf_err; type = frame->tf_trapno; - sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); + sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)], &softseg); if (type <= MAX_TRAP_MSG) printf("\n\nFatal trap %d: %s while in %s mode\n", type, trap_msg[type], - frame->tf_eflags & PSL_VM ? "vm86" : ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); -#ifdef SMP - /* two separate prints in case of a trap on an unmapped page */ - printf("cpuid = %d; ", PCPU_GET(cpuid)); - printf("lapic.id = %08x\n", lapic.id); -#endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%x\n", eva); printf("fault code = %s %s, %s\n", @@ -785,33 +573,31 @@ trap_fatal(frame, eva) code & PGEX_P ? "protection violation" : "page not present"); } printf("instruction pointer = 0x%x:0x%x\n", - frame->tf_cs & 0xffff, frame->tf_eip); - if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) { + frame->tf_cs & 0xffff, frame->tf_rip); + if (ISPL(frame->tf_cs) == SEL_UPL) { ss = frame->tf_ss & 0xffff; - esp = frame->tf_esp; + esp = frame->tf_rsp; } else { ss = GSEL(GDATA_SEL, SEL_KPL); - esp = (int)&frame->tf_esp; + esp = (long)&frame->tf_rsp; } printf("stack pointer = 0x%x:0x%x\n", ss, esp); - printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp); + printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_rbp); printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); - printf(" = DPL %d, pres %d, def32 %d, gran %d\n", - softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, + printf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n", + softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32, softseg.ssd_gran); printf("processor eflags = "); - if (frame->tf_eflags & PSL_T) + if (frame->tf_rflags & PSL_T) printf("trace trap, "); - if (frame->tf_eflags & PSL_I) + if (frame->tf_rflags & PSL_I) printf("interrupt enabled, "); - if (frame->tf_eflags & PSL_NT) + if (frame->tf_rflags & PSL_NT) printf("nested task, "); - if (frame->tf_eflags & PSL_RF) + if (frame->tf_rflags & PSL_RF) printf("resume, "); - if (frame->tf_eflags & PSL_VM) - printf("vm86, "); - printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); + printf("IOPL = %d\n", (frame->tf_rflags & PSL_IOPL) >> 12); printf("current process = "); if (curproc) { printf("%lu (%s)\n", @@ -841,75 +627,14 @@ trap_fatal(frame, eva) * a frame for a trap/exception onto the stack. This usually occurs * when the stack overflows (such is the case with infinite recursion, * for example). - * - * XXX Note that the current PTD gets replaced by IdlePTD when the - * task switch occurs. This means that the stack that was active at - * the time of the double fault is not available at <kstack> unless - * the machine was idle when the double fault occurred. The downside - * of this is that "trace <ebp>" in ddb won't work. */ void dblfault_handler() { - printf("\nFatal double fault:\n"); - printf("eip = 0x%x\n", PCPU_GET(common_tss.tss_eip)); - printf("esp = 0x%x\n", PCPU_GET(common_tss.tss_esp)); - printf("ebp = 0x%x\n", PCPU_GET(common_tss.tss_ebp)); -#ifdef SMP - /* two separate prints in case of a trap on an unmapped page */ - printf("cpuid = %d; ", PCPU_GET(cpuid)); - printf("lapic.id = %08x\n", lapic.id); -#endif + printf("\nFatal double fault\n"); panic("double fault"); } -#ifdef I386_CPU -/* - * Compensate for 386 brain damage (missing URKR). - * This is a little simpler than the pagefault handler in trap() because - * it the page tables have already been faulted in and high addresses - * are thrown out early for other reasons. - */ -int trapwrite(addr) - unsigned addr; -{ - struct thread *td; - struct proc *p; - vm_offset_t va; - struct vmspace *vm; - int rv; - - va = trunc_page((vm_offset_t)addr); - /* - * XXX - MAX is END. Changed > to >= for temp. fix. - */ - if (va >= VM_MAXUSER_ADDRESS) - return (1); - - td = curthread; - p = td->td_proc; - vm = p->p_vmspace; - - PROC_LOCK(p); - ++p->p_lock; - PROC_UNLOCK(p); - - /* - * fault the data page - */ - rv = vm_fault(&vm->vm_map, va, VM_PROT_WRITE, VM_FAULT_DIRTY); - - PROC_LOCK(p); - --p->p_lock; - PROC_UNLOCK(p); - - if (rv != KERN_SUCCESS) - return 1; - - return (0); -} -#endif - /* * syscall - system call request C handler * @@ -923,12 +648,14 @@ syscall(frame) struct sysent *callp; struct thread *td = curthread; struct proc *p = td->td_proc; - register_t orig_tf_eflags; + register_t orig_tf_rflags; u_int sticks; int error; int narg; - int args[8]; + register_t args[8]; + register_t *argp; u_int code; + int reg, regcnt; /* * note: PCPU_LAZY_INC() can only be used if we can afford @@ -945,39 +672,28 @@ syscall(frame) } #endif + reg = 0; + regcnt = 6; sticks = td->td_sticks; td->td_frame = &frame; if (td->td_ucred != p->p_ucred) cred_update_thread(td); if (p->p_flag & P_THREADED) thread_user_enter(p, td); - params = (caddr_t)frame.tf_esp + sizeof(int); - code = frame.tf_eax; - orig_tf_eflags = frame.tf_eflags; + params = (caddr_t)frame.tf_rsp + sizeof(register_t); + code = frame.tf_rax; + orig_tf_rflags = frame.tf_rflags; if (p->p_sysent->sv_prepsyscall) { /* * The prep code is MP aware. */ - (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms); + (*p->p_sysent->sv_prepsyscall)(&frame, (int *)args, &code, ¶ms); } else { - /* - * Need to check if this is a 32 bit or 64 bit syscall. - * fuword is MP aware. - */ - if (code == SYS_syscall) { - /* - * Code is first argument, followed by actual args. - */ - code = fuword(params); - params += sizeof(int); - } else if (code == SYS___syscall) { - /* - * Like syscall, but code is a quad, so as to maintain - * quad alignment for the rest of the arguments. - */ - code = fuword(params); - params += sizeof(quad_t); + if (code == SYS_syscall || code == SYS___syscall) { + code = frame.tf_rdi; + reg++; + regcnt--; } } @@ -994,15 +710,25 @@ syscall(frame) /* * copyin and the ktrsyscall()/ktrsysret() code is MP-aware */ - if (params != NULL && narg != 0) - error = copyin(params, (caddr_t)args, - (u_int)(narg * sizeof(int))); - else + if (narg <= regcnt) { + argp = &frame.tf_rdi; + argp += reg; error = 0; - + } else { + KASSERT(narg <= sizeof(args) / sizeof(args[0]), + ("Too many syscall arguments!")); + KASSERT(params != NULL, ("copyin args with no params!")); + argp = &frame.tf_rdi; + argp += reg; + bcopy(argp, args, sizeof(args[0]) * regcnt); + error = copyin(params, &args[regcnt], + (narg - regcnt) * sizeof(args[0])); + argp = &args[0]; + } + #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) - ktrsyscall(code, narg, args); + ktrsyscall(code, narg, argp); #endif /* @@ -1014,18 +740,18 @@ syscall(frame) if (error == 0) { td->td_retval[0] = 0; - td->td_retval[1] = frame.tf_edx; + td->td_retval[1] = frame.tf_rdx; STOPEVENT(p, S_SCE, narg); - error = (*callp->sy_call)(td, args); + error = (*callp->sy_call)(td, argp); } switch (error) { case 0: - frame.tf_eax = td->td_retval[0]; - frame.tf_edx = td->td_retval[1]; - frame.tf_eflags &= ~PSL_C; + frame.tf_rax = td->td_retval[0]; + frame.tf_rdx = td->td_retval[1]; + frame.tf_rflags &= ~PSL_C; break; case ERESTART: @@ -1033,7 +759,7 @@ syscall(frame) * Reconstruct pc, assuming lcall $X,y is 7 bytes, * int 0x80 is 2 bytes. We saved this in tf_err. */ - frame.tf_eip -= frame.tf_err; + frame.tf_rip -= frame.tf_err; break; case EJUSTRETURN: @@ -1046,8 +772,8 @@ syscall(frame) else error = p->p_sysent->sv_errtbl[error]; } - frame.tf_eax = error; - frame.tf_eflags |= PSL_C; + frame.tf_rax = error; + frame.tf_rflags |= PSL_C; break; } @@ -1060,8 +786,8 @@ syscall(frame) /* * Traced syscall. */ - if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) { - frame.tf_eflags &= ~PSL_T; + if (orig_tf_rflags & PSL_T) { + frame.tf_rflags &= ~PSL_T; trapsignal(td, SIGTRAP, 0); } diff --git a/sys/amd64/amd64/tsc.c b/sys/amd64/amd64/tsc.c index 36ae1b9..e375718 100644 --- a/sys/amd64/amd64/tsc.c +++ b/sys/amd64/amd64/tsc.c @@ -42,7 +42,7 @@ uint64_t tsc_freq; int tsc_is_broken; -u_int tsc_present; +u_int tsc_present = 1; #ifdef SMP static int smp_tsc; @@ -66,14 +66,6 @@ init_TSC(void) { u_int64_t tscval[2]; - if (cpu_feature & CPUID_TSC) - tsc_present = 1; - else - tsc_present = 0; - - if (!tsc_present) - return; - if (bootverbose) printf("Calibrating TSC clock ... "); @@ -83,38 +75,9 @@ init_TSC(void) tsc_freq = tscval[1] - tscval[0]; if (bootverbose) - printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq); - -#ifdef SMP - /* - * We can not use the TSC in SMP mode unless the TSCs on all CPUs - * are somehow synchronized. Some hardware configurations do - * this, but we have no way of determining whether this is the - * case, so we do not use the TSC in multi-processor systems - * unless the user indicated (by setting kern.timecounter.smp_tsc - * to 1) that he believes that his TSCs are synchronized. - */ - if (mp_ncpus > 1 && !smp_tsc) - return; -#endif - - /* - * We can not use the TSC if we support APM. Precise timekeeping - * on an APM'ed machine is at best a fools pursuit, since - * any and all of the time spent in various SMM code can't - * be reliably accounted for. Reading the RTC is your only - * source of reliable time info. The i8254 looses too of course - * but we need to have some kind of time... - * We don't know at this point whether APM is going to be used - * or not, nor when it might be activated. Play it safe. - */ - if (power_pm_get_type() == POWER_PM_TYPE_APM) { - if (bootverbose) - printf("TSC timecounter disabled: APM enabled.\n"); - return; - } + printf("TSC clock: %lu Hz\n", tsc_freq); - if (tsc_present && tsc_freq != 0 && !tsc_is_broken) { + if (tsc_freq != 0 && !tsc_is_broken) { tsc_timecounter.tc_frequency = tsc_freq; tc_init(&tsc_timecounter); } @@ -139,7 +102,7 @@ sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) return (error); } -SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_QUAD | CTLFLAG_RW, +SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_LONG | CTLFLAG_RW, 0, sizeof(u_int), sysctl_machdep_tsc_freq, "IU", ""); static unsigned diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index bdb189f..a4ebe9c 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -41,11 +41,6 @@ * $FreeBSD$ */ -#include "opt_npx.h" -#ifdef PC98 -#include "opt_pc98.h" -#endif -#include "opt_reset.h" #include "opt_isa.h" #include "opt_kstack_pages.h" @@ -61,15 +56,12 @@ #include <sys/kernel.h> #include <sys/ktr.h> #include <sys/mutex.h> -#include <sys/smp.h> #include <sys/sysctl.h> #include <sys/unistd.h> #include <machine/cpu.h> #include <machine/md_var.h> #include <machine/pcb.h> -#include <machine/pcb_ext.h> -#include <machine/vm86.h> #include <vm/vm.h> #include <vm/vm_param.h> @@ -81,18 +73,9 @@ #include <sys/user.h> -#ifdef PC98 -#include <pc98/pc98/pc98.h> -#else -#include <i386/isa/isa.h> -#endif +#include <amd64/isa/isa.h> static void cpu_reset_real(void); -#ifdef SMP -static void cpu_reset_proxy(void); -static u_int cpu_reset_proxyid; -static volatile u_int cpu_reset_proxy_active; -#endif extern int _ucodesel, _udatasel; /* @@ -110,37 +93,17 @@ cpu_fork(td1, p2, td2, flags) register struct proc *p1; struct pcb *pcb2; struct mdproc *mdp2; -#ifdef DEV_NPX register_t savecrit; -#endif p1 = td1->td_proc; - if ((flags & RFPROC) == 0) { - if ((flags & RFMEM) == 0) { - /* unshare user LDT */ - struct mdproc *mdp1 = &p1->p_md; - struct proc_ldt *pldt = mdp1->md_ldt; - if (pldt && pldt->ldt_refcnt > 1) { - pldt = user_ldt_alloc(mdp1, pldt->ldt_len); - if (pldt == NULL) - panic("could not copy LDT"); - mdp1->md_ldt = pldt; - set_user_ldt(mdp1); - user_ldt_free(td1); - } - } + if ((flags & RFPROC) == 0) return; - } /* Ensure that p1's pcb is up to date. */ -#ifdef DEV_NPX - if (td1 == curthread) - td1->td_pcb->pcb_gs = rgs(); savecrit = intr_disable(); if (PCPU_GET(fpcurthread) == td1) npxsave(&td1->td_pcb->pcb_save); intr_restore(savecrit); -#endif /* Point the pcb to the top of the stack */ pcb2 = (struct pcb *)(td2->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; @@ -157,63 +120,34 @@ cpu_fork(td1, p2, td2, flags) * Create a new fresh stack for the new process. * Copy the trap frame for the return to user mode as if from a * syscall. This copies most of the user mode register values. - * The -16 is so we can expand the trapframe if we go to vm86. */ - td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb - 16) - 1; + td2->td_frame = (struct trapframe *)td2->td_pcb - 1; bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe)); - td2->td_frame->tf_eax = 0; /* Child returns zero */ - td2->td_frame->tf_eflags &= ~PSL_C; /* success */ - td2->td_frame->tf_edx = 1; + td2->td_frame->tf_rax = 0; /* Child returns zero */ + td2->td_frame->tf_rflags &= ~PSL_C; /* success */ + td2->td_frame->tf_rdx = 1; /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ -#ifdef PAE - pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt); -#else - pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir); -#endif - pcb2->pcb_edi = 0; - pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */ - pcb2->pcb_ebp = 0; - pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *); - pcb2->pcb_ebx = (int)td2; /* fork_trampoline argument */ - pcb2->pcb_eip = (int)fork_trampoline; - pcb2->pcb_psl = td2->td_frame->tf_eflags & ~PSL_I; /* ints disabled */ - pcb2->pcb_gs = rgs(); + pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pml4); + pcb2->pcb_r12 = (register_t)fork_return; /* fork_trampoline argument */ + pcb2->pcb_rbp = 0; + pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *); + pcb2->pcb_rbx = (register_t)td2; /* fork_trampoline argument */ + pcb2->pcb_rip = (register_t)fork_trampoline; + pcb2->pcb_rflags = td2->td_frame->tf_rflags & ~PSL_I; /* ints disabled */ /*- - * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_flags: cloned above. * pcb2->pcb_onfault: cloned above (always NULL here?). - * pcb2->pcb_gs: cloned above. - * pcb2->pcb_ext: cleared below. - */ - - /* - * XXX don't copy the i/o pages. this should probably be fixed. */ - pcb2->pcb_ext = 0; - - /* Copy the LDT, if necessary. */ - mtx_lock_spin(&sched_lock); - if (mdp2->md_ldt != 0) { - if (flags & RFMEM) { - mdp2->md_ldt->ldt_refcnt++; - } else { - mdp2->md_ldt = user_ldt_alloc(mdp2, - mdp2->md_ldt->ldt_len); - if (mdp2->md_ldt == NULL) - panic("could not copy LDT"); - } - } - mtx_unlock_spin(&sched_lock); /* * Now, cpu_switch() can schedule the new process. - * pcb_esp is loaded pointing to the cpu_switch() stack frame + * pcb_rsp is loaded pointing to the cpu_switch() stack frame * containing the return address when exiting cpu_switch. * This will normally be to fork_trampoline(), which will have * %ebx loaded with the new proc's pointer. fork_trampoline() @@ -238,8 +172,8 @@ cpu_set_fork_handler(td, func, arg) * Note that the trap frame follows the args, so the function * is really called like this: func(arg, frame); */ - td->td_pcb->pcb_esi = (int) func; /* function */ - td->td_pcb->pcb_ebx = (int) arg; /* first arg */ + td->td_pcb->pcb_r12 = (long) func; /* function */ + td->td_pcb->pcb_rbx = (long) arg; /* first arg */ } void @@ -247,51 +181,19 @@ cpu_exit(struct thread *td) { struct mdproc *mdp; - /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ mdp = &td->td_proc->p_md; - if (mdp->md_ldt) { - td->td_pcb->pcb_gs = _udatasel; - load_gs(_udatasel); - user_ldt_free(td); - } - reset_dbregs(); } void cpu_thread_exit(struct thread *td) { - struct pcb *pcb = td->td_pcb; -#ifdef DEV_NPX + npxexit(td); -#endif - if (pcb->pcb_flags & PCB_DBREGS) { - /* - * disable all hardware breakpoints - */ - reset_dbregs(); - pcb->pcb_flags &= ~PCB_DBREGS; - } } void cpu_thread_clean(struct thread *td) { - struct pcb *pcb; - - pcb = td->td_pcb; - if (pcb->pcb_ext != 0) { - /* XXXKSE XXXSMP not SMP SAFE.. what locks do we have? */ - /* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */ - /* - * XXX do we need to move the TSS off the allocated pages - * before freeing them? (not done here) - */ - mtx_lock(&Giant); - kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext, - ctob(IOPAGES + 1)); - mtx_unlock(&Giant); - pcb->pcb_ext = 0; - } } void @@ -306,7 +208,7 @@ cpu_thread_setup(struct thread *td) td->td_pcb = (struct pcb *)(td->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; - td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1; + td->td_frame = (struct trapframe *)td->td_pcb - 1; } /* @@ -319,61 +221,6 @@ cpu_thread_setup(struct thread *td) void cpu_set_upcall(struct thread *td, void *pcb) { - struct pcb *pcb2; - - /* Point the pcb to the top of the stack. */ - pcb2 = td->td_pcb; - - /* - * Copy the upcall pcb. This loads kernel regs. - * Those not loaded individually below get their default - * values here. - * - * XXXKSE It might be a good idea to simply skip this as - * the values of the other registers may be unimportant. - * This would remove any requirement for knowing the KSE - * at this time (see the matching comment below for - * more analysis) (need a good safe default). - */ - bcopy(pcb, pcb2, sizeof(*pcb2)); - - /* - * Create a new fresh stack for the new thread. - * The -16 is so we can expand the trapframe if we go to vm86. - * Don't forget to set this stack value into whatever supplies - * the address for the fault handlers. - * The contexts are filled in at the time we actually DO the - * upcall as only then do we know which KSE we got. - */ - td->td_frame = (struct trapframe *)((caddr_t)pcb2 - 16) - 1; - - /* - * Set registers for trampoline to user mode. Leave space for the - * return address on stack. These are the kernel mode register values. - */ -#ifdef PAE - pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdpt); -#else - pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdir); -#endif - pcb2->pcb_edi = 0; - pcb2->pcb_esi = (int)fork_return; /* trampoline arg */ - pcb2->pcb_ebp = 0; - pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */ - pcb2->pcb_ebx = (int)td; /* trampoline arg */ - pcb2->pcb_eip = (int)fork_trampoline; - pcb2->pcb_psl &= ~(PSL_I); /* interrupts must be disabled */ - pcb2->pcb_gs = rgs(); - /* - * If we didn't copy the pcb, we'd need to do the following registers: - * pcb2->pcb_dr*: cloned above. - * pcb2->pcb_savefpu: cloned above. - * pcb2->pcb_flags: cloned above. - * pcb2->pcb_onfault: cloned above (always NULL here?). - * pcb2->pcb_gs: cloned above. XXXKSE ??? - * pcb2->pcb_ext: cleared below. - */ - pcb2->pcb_ext = NULL; } /* @@ -384,30 +231,6 @@ cpu_set_upcall(struct thread *td, void *pcb) void cpu_set_upcall_kse(struct thread *td, struct kse_upcall *ku) { - - /* - * Do any extra cleaning that needs to be done. - * The thread may have optional components - * that are not present in a fresh thread. - * This may be a recycled thread so make it look - * as though it's newly allocated. - */ - cpu_thread_clean(td); - - /* - * Set the trap frame to point at the beginning of the uts - * function. - */ - td->td_frame->tf_esp = - (int)ku->ku_stack.ss_sp + ku->ku_stack.ss_size - 16; - td->td_frame->tf_eip = (int)ku->ku_func; - - /* - * Pass the address of the mailbox for this kse to the uts - * function as a parameter on the stack. - */ - suword((void *)(td->td_frame->tf_esp + sizeof(void *)), - (int)ku->ku_mailbox); } void @@ -417,116 +240,29 @@ cpu_wait(p) } /* - * Convert kernel VA to physical address - */ -vm_paddr_t -kvtop(void *addr) -{ - vm_paddr_t pa; - - pa = pmap_kextract((vm_offset_t)addr); - if (pa == 0) - panic("kvtop: zero page frame"); - return (pa); -} - -/* * Force reset the processor by invalidating the entire address space! */ -#ifdef SMP -static void -cpu_reset_proxy() -{ - - cpu_reset_proxy_active = 1; - while (cpu_reset_proxy_active == 1) - ; /* Wait for other cpu to see that we've started */ - stop_cpus((1<<cpu_reset_proxyid)); - printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid); - DELAY(1000000); - cpu_reset_real(); -} -#endif - void cpu_reset() { -#ifdef SMP - if (smp_active == 0) { - cpu_reset_real(); - /* NOTREACHED */ - } else { - - u_int map; - int cnt; - printf("cpu_reset called on cpu#%d\n", PCPU_GET(cpuid)); - - map = PCPU_GET(other_cpus) & ~ stopped_cpus; - - if (map != 0) { - printf("cpu_reset: Stopping other CPUs\n"); - stop_cpus(map); /* Stop all other CPUs */ - } - - if (PCPU_GET(cpuid) == 0) { - DELAY(1000000); - cpu_reset_real(); - /* NOTREACHED */ - } else { - /* We are not BSP (CPU #0) */ - - cpu_reset_proxyid = PCPU_GET(cpuid); - cpustop_restartfunc = cpu_reset_proxy; - cpu_reset_proxy_active = 0; - printf("cpu_reset: Restarting BSP\n"); - started_cpus = (1<<0); /* Restart CPU #0 */ - - cnt = 0; - while (cpu_reset_proxy_active == 0 && cnt < 10000000) - cnt++; /* Wait for BSP to announce restart */ - if (cpu_reset_proxy_active == 0) - printf("cpu_reset: Failed to restart BSP\n"); - enable_intr(); - cpu_reset_proxy_active = 2; - - while (1); - /* NOTREACHED */ - } - } -#else cpu_reset_real(); -#endif } static void cpu_reset_real() { -#ifdef PC98 - /* - * Attempt to do a CPU reset via CPU reset port. - */ - disable_intr(); - if ((inb(0x35) & 0xa0) != 0xa0) { - outb(0x37, 0x0f); /* SHUT0 = 0. */ - outb(0x37, 0x0b); /* SHUT1 = 0. */ - } - outb(0xf0, 0x00); /* Reset. */ -#else /* * Attempt to do a CPU reset via the keyboard controller, * do not turn of the GateA20, as any machine that fails * to do the reset here would then end up in no man's land. */ -#if !defined(BROKEN_KEYBOARD_RESET) outb(IO_KBD + 4, 0xFE); DELAY(500000); /* wait 0.5 sec to see if that did it */ printf("Keyboard reset did not work, attempting CPU shutdown\n"); DELAY(1000000); /* wait 1 sec for printf to complete */ -#endif -#endif /* PC98 */ /* force a shutdown by unmapping entire address space ! */ bzero((caddr_t)PTD, NBPTD); diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index 5d615ee..bfa88a6 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -1,5 +1,5 @@ -# -# GENERIC -- Generic kernel configuration file for FreeBSD/i386 +# ex:ts=8 +# GENERIC -- Generic kernel configuration file for FreeBSD/amd64 # # For more information on this file, please read the handbook section on # Kernel Configuration Files: @@ -18,245 +18,80 @@ # # $FreeBSD$ -machine i386 -cpu I486_CPU -cpu I586_CPU -cpu I686_CPU +machine amd64 +cpu HAMMER ident GENERIC +maxusers 0 + +makeoptions NO_MODULES=not_yet #To statically compile in device wiring instead of /boot/device.hints #hints "GENERIC.hints" #Default places to look for devices. -makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols - options SCHED_4BSD #4BSD scheduler options INET #InterNETworking -options INET6 #IPv6 communications protocols options FFS #Berkeley Fast Filesystem options SOFTUPDATES #Enable FFS soft updates support -options UFS_ACL #Support for access control lists -options UFS_DIRHASH #Improve performance on big directories options MD_ROOT #MD is a potential root device options NFSCLIENT #Network Filesystem Client -options NFSSERVER #Network Filesystem Server +options NFSSERVER options NFS_ROOT #NFS usable as root device, requires NFSCLIENT -options MSDOSFS #MSDOS Filesystem -options CD9660 #ISO 9660 Filesystem -options PROCFS #Process filesystem (requires PSEUDOFS) -options PSEUDOFS #Pseudo-filesystem framework -options COMPAT_43 #Compatible with BSD 4.3 [KEEP THIS!] -options COMPAT_FREEBSD4 #Compatible with FreeBSD4 -options SCSI_DELAY=15000 #Delay (in ms) before probing SCSI -options KTRACE #ktrace(1) support -options SYSVSHM #SYSV-style shared memory -options SYSVMSG #SYSV-style message queues -options SYSVSEM #SYSV-style semaphores -options _KPOSIX_PRIORITY_SCHEDULING #Posix P1003_1B real-time extensions -options KBD_INSTALL_CDEV # install a CDEV entry in /dev -options AHC_REG_PRETTY_PRINT # Print register bitfields in debug - # output. Adds ~128k to driver. -options AHD_REG_PRETTY_PRINT # Print register bitfields in debug - # output. Adds ~215k to driver. # Debugging for use in -current -options DDB #Enable the kernel debugger options INVARIANTS #Enable calls of extra sanity checking options INVARIANT_SUPPORT #Extra sanity checks of internal structures, required by INVARIANTS -options WITNESS #Enable checks to detect deadlocks and cycles -options WITNESS_SKIPSPIN #Don't run witness on spinlocks for speed - -# To make an SMP kernel, the next two are needed -#options SMP # Symmetric MultiProcessor Kernel -#options APIC_IO # Symmetric (APIC) I/O device isa -device eisa device pci -# Floppy drives -device fdc - # ATA and ATAPI devices device ata device atadisk # ATA disk drives device atapicd # ATAPI CDROM drives -device atapifd # ATAPI floppy drives -device atapist # ATAPI tape drives -options ATA_STATIC_ID #Static device numbering - -# SCSI Controllers -device ahb # EISA AHA1742 family -device ahc # AHA2940 and onboard AIC7xxx devices -device ahd # AHA39320/29320 and onboard AIC79xx devices -device amd # AMD 53C974 (Tekram DC-390(T)) -device isp # Qlogic family -device mpt # LSI-Logic MPT-Fusion -#device ncr # NCR/Symbios Logic -device sym # NCR/Symbios Logic (newer chipsets + those of `ncr') -device trm # Tekram DC395U/UW/F DC315U adapters -device adv # Advansys SCSI adapters -device adw # Advansys wide SCSI adapters -device aha # Adaptec 154x SCSI adapters -device aic # Adaptec 15[012]x SCSI adapters, AIC-6[23]60. -device bt # Buslogic/Mylex MultiMaster SCSI adapters - -device ncv # NCR 53C500 -device nsp # Workbit Ninja SCSI-3 -device stg # TMC 18C30/18C50 - -# RAID controllers interfaced to the SCSI subsystem -device asr # DPT SmartRAID V, VI and Adaptec SCSI RAID -device ciss # Compaq Smart RAID 5* -device dpt # DPT Smartcache III, IV - See NOTES for options! -device iir # Intel Integrated RAID -device mly # Mylex AcceleRAID/eXtremeRAID - -# SCSI peripherals -device scbus # SCSI bus (required) -device ch # SCSI media changers -device da # Direct Access (disks) -device sa # Sequential Access (tape etc) -device cd # CD -device pass # Passthrough device (direct SCSI access) -device ses # SCSI Environmental Services (and SAF-TE) - -# RAID controllers -device aac # Adaptec FSA RAID -device aacp # SCSI passthrough for aac (requires CAM) -device amr # AMI MegaRAID -device ida # Compaq Smart RAID -device mlx # Mylex DAC960 family -device pst # Promise Supertrak SX6000 -device twe # 3ware ATA RAID +options KTRACE # atkbdc0 controls both the keyboard and the PS/2 mouse -device atkbdc # AT keyboard controller -device atkbd # AT keyboard -device psm # PS/2 mouse +#device atkbdc # AT keyboard controller +#device atkbd # AT keyboard +#device psm # PS/2 mouse -device vga # VGA video card driver +#device vga # VGA video card driver -device splash # Splash screen and screen saver support +#device splash # Splash screen and screen saver support # syscons is the default console driver, resembling an SCO console -device sc - -# Enable this for the pcvt (VT220 compatible) console driver -#device vt -#options XSERVER # support for X server on a vt console -#options FAT_CURSOR # start with block cursor +#device sc -device agp # support several AGP chipsets - -# Floating point support - do not disable. -device npx - -# Power management support (see NOTES for more options) -#device apm -# Add suspend/resume support for the i8254. -device pmtimer - -# PCCARD (PCMCIA) support -# Pcmcia and cardbus bridge support -device cbb # cardbus (yenta) bridge -#device pcic # ExCA ISA and PCI bridges -device pccard # PC Card (16-bit) bus -device cardbus # CardBus (32-bit) bus +#device agp # support several AGP chipsets # Serial (COM) ports device sio # 8250, 16[45]50 based serial ports # Parallel port -device ppc -device ppbus # Parallel port bus (required) -device lpt # Printer -device plip # TCP/IP over parallel -device ppi # Parallel port interface device -#device vpo # Requires scbus and da - - -# PCI Ethernet NICs. -device de # DEC/Intel DC21x4x (``Tulip'') -device em # Intel PRO/1000 adapter Gigabit Ethernet Card -device txp # 3Com 3cR990 (``Typhoon'') -device vx # 3Com 3c590, 3c595 (``Vortex'') +#device ppc +#device ppbus # Parallel port bus (required) +#device lpt # Printer # PCI Ethernet NICs that use the common MII bus controller code. # NOTE: Be sure to keep the 'device miibus' line in order to use these NICs! device miibus # MII bus support -device dc # DEC/Intel 21143 and various workalikes device fxp # Intel EtherExpress PRO/100B (82557, 82558) -device pcn # AMD Am79C97x PCI 10/100 (precedence over 'lnc') -device rl # RealTek 8129/8139 -device sf # Adaptec AIC-6915 (``Starfire'') -device sis # Silicon Integrated Systems SiS 900/SiS 7016 -device ste # Sundance ST201 (D-Link DFE-550TX) -device tl # Texas Instruments ThunderLAN -device tx # SMC EtherPower II (83c170 ``EPIC'') -device vr # VIA Rhine, Rhine II -device wb # Winbond W89C840F device xl # 3Com 3c90x (``Boomerang'', ``Cyclone'') device bge # Broadcom BCM570xx Gigabit Ethernet -# ISA Ethernet NICs. pccard nics included. -device cs # Crystal Semiconductor CS89x0 NIC -# 'device ed' requires 'device miibus' -device ed # NE[12]000, SMC Ultra, 3c503, DS8390 cards -device ex # Intel EtherExpress Pro/10 and Pro/10+ -device ep # Etherlink III based cards -device fe # Fujitsu MB8696x based cards -device ie # EtherExpress 8/16, 3C507, StarLAN 10 etc. -device lnc # NE2100, NE32-VL Lance Ethernet cards -device sn # SMC's 9000 series of ethernet chips -device xe # Xircom pccard ethernet - -# ISA devices that use the old ISA shims -#device le - -# Wireless NIC cards -device wlan # 802.11 support -device an # Aironet 4500/4800 802.11 wireless NICs. -device awi # BayStack 660 and others -device wi # WaveLAN/Intersil/Symbol 802.11 wireless NICs. -#device wl # Older non 802.11 Wavelan wireless NIC. - # Pseudo devices - the number indicates how many units to allocate. device random # Entropy device device loop # Network loopback device ether # Ethernet support -device sl # Kernel SLIP -device ppp # Kernel PPP -device tun # Packet tunnel. -device pty # Pseudo-ttys (telnet etc) device md # Memory "disks" -device gif # IPv6 and IPv4 tunneling -device faith # IPv6-to-IPv4 relaying (translation) - -# The `bpf' device enables the Berkeley Packet Filter. -# Be aware of the administrative consequences of enabling this! -device bpf # Berkeley packet filter +device pty +device bpf # USB support -device uhci # UHCI PCI->USB interface -device ohci # OHCI PCI->USB interface -device usb # USB Bus (required) -#device udbp # USB Double Bulk Pipe devices -device ugen # Generic -device uhid # "Human Interface Devices" -device ukbd # Keyboard -device ulpt # Printer -device umass # Disks/Mass storage - Requires scbus and da -device ums # Mouse -device urio # Diamond Rio 500 MP3 player -device uscanner # Scanners -# USB Ethernet, requires mii -device aue # ADMtek USB ethernet -device axe # ASIX Electronics USB ethernet -device cue # CATC USB ethernet -device kue # Kawasaki LSI USB ethernet - -# FireWire support -device firewire # FireWire bus code -device sbp # SCSI over FireWire (Requires scbus and da) -device fwe # Ethernet over FireWire (non-standard!) +#device uhci # UHCI PCI->USB interface +#device ohci # OHCI PCI->USB interface +#device usb # USB Bus (required) +#device ugen # Generic +#device uhid # "Human Interface Devices" +#device ukbd # Keyboard diff --git a/sys/amd64/conf/GENERIC.hints b/sys/amd64/conf/GENERIC.hints index 958f329..6f63de4 100644 --- a/sys/amd64/conf/GENERIC.hints +++ b/sys/amd64/conf/GENERIC.hints @@ -33,8 +33,6 @@ hint.sc.0.at="isa" hint.sc.0.flags="0x100" hint.vt.0.at="isa" hint.vt.0.disabled="1" -hint.apm.0.disabled="1" -hint.apm.0.flags="0x20" hint.pcic.0.at="isa" # hint.pcic.0.irq="10" # Default to polling hint.pcic.0.port="0x3e0" diff --git a/sys/amd64/include/_inttypes.h b/sys/amd64/include/_inttypes.h index 7da589d..e6b2536 100644 --- a/sys/amd64/include/_inttypes.h +++ b/sys/amd64/include/_inttypes.h @@ -49,172 +49,172 @@ #define PRId8 "d" /* int8_t */ #define PRId16 "d" /* int16_t */ #define PRId32 "d" /* int32_t */ -#define PRId64 "lld" /* int64_t */ +#define PRId64 "ld" /* int64_t */ #define PRIdLEAST8 "d" /* int_least8_t */ #define PRIdLEAST16 "d" /* int_least16_t */ #define PRIdLEAST32 "d" /* int_least32_t */ -#define PRIdLEAST64 "lld" /* int_least64_t */ +#define PRIdLEAST64 "ld" /* int_least64_t */ #define PRIdFAST8 "d" /* int_fast8_t */ #define PRIdFAST16 "d" /* int_fast16_t */ #define PRIdFAST32 "d" /* int_fast32_t */ -#define PRIdFAST64 "lld" /* int_fast64_t */ +#define PRIdFAST64 "ld" /* int_fast64_t */ #define PRIdMAX "jd" /* intmax_t */ -#define PRIdPTR "d" /* intptr_t */ +#define PRIdPTR "ld" /* intptr_t */ #define PRIi8 "i" /* int8_t */ #define PRIi16 "i" /* int16_t */ #define PRIi32 "i" /* int32_t */ -#define PRIi64 "lli" /* int64_t */ +#define PRIi64 "li" /* int64_t */ #define PRIiLEAST8 "i" /* int_least8_t */ #define PRIiLEAST16 "i" /* int_least16_t */ #define PRIiLEAST32 "i" /* int_least32_t */ -#define PRIiLEAST64 "lli" /* int_least64_t */ +#define PRIiLEAST64 "li" /* int_least64_t */ #define PRIiFAST8 "i" /* int_fast8_t */ #define PRIiFAST16 "i" /* int_fast16_t */ #define PRIiFAST32 "i" /* int_fast32_t */ -#define PRIiFAST64 "lli" /* int_fast64_t */ +#define PRIiFAST64 "li" /* int_fast64_t */ #define PRIiMAX "ji" /* intmax_t */ -#define PRIiPTR "i" /* intptr_t */ +#define PRIiPTR "li" /* intptr_t */ /* fprintf(3) macros for unsigned integers. */ #define PRIo8 "o" /* uint8_t */ #define PRIo16 "o" /* uint16_t */ #define PRIo32 "o" /* uint32_t */ -#define PRIo64 "llo" /* uint64_t */ +#define PRIo64 "lo" /* uint64_t */ #define PRIoLEAST8 "o" /* uint_least8_t */ #define PRIoLEAST16 "o" /* uint_least16_t */ #define PRIoLEAST32 "o" /* uint_least32_t */ -#define PRIoLEAST64 "llo" /* uint_least64_t */ +#define PRIoLEAST64 "lo" /* uint_least64_t */ #define PRIoFAST8 "o" /* uint_fast8_t */ #define PRIoFAST16 "o" /* uint_fast16_t */ #define PRIoFAST32 "o" /* uint_fast32_t */ -#define PRIoFAST64 "llo" /* uint_fast64_t */ +#define PRIoFAST64 "lo" /* uint_fast64_t */ #define PRIoMAX "jo" /* uintmax_t */ -#define PRIoPTR "o" /* uintptr_t */ +#define PRIoPTR "lo" /* uintptr_t */ #define PRIu8 "u" /* uint8_t */ #define PRIu16 "u" /* uint16_t */ #define PRIu32 "u" /* uint32_t */ -#define PRIu64 "llu" /* uint64_t */ +#define PRIu64 "lu" /* uint64_t */ #define PRIuLEAST8 "u" /* uint_least8_t */ #define PRIuLEAST16 "u" /* uint_least16_t */ #define PRIuLEAST32 "u" /* uint_least32_t */ -#define PRIuLEAST64 "llu" /* uint_least64_t */ +#define PRIuLEAST64 "lu" /* uint_least64_t */ #define PRIuFAST8 "u" /* uint_fast8_t */ #define PRIuFAST16 "u" /* uint_fast16_t */ #define PRIuFAST32 "u" /* uint_fast32_t */ -#define PRIuFAST64 "llu" /* uint_fast64_t */ +#define PRIuFAST64 "lu" /* uint_fast64_t */ #define PRIuMAX "ju" /* uintmax_t */ -#define PRIuPTR "u" /* uintptr_t */ +#define PRIuPTR "lu" /* uintptr_t */ #define PRIx8 "x" /* uint8_t */ #define PRIx16 "x" /* uint16_t */ #define PRIx32 "x" /* uint32_t */ -#define PRIx64 "llx" /* uint64_t */ +#define PRIx64 "lx" /* uint64_t */ #define PRIxLEAST8 "x" /* uint_least8_t */ #define PRIxLEAST16 "x" /* uint_least16_t */ #define PRIxLEAST32 "x" /* uint_least32_t */ -#define PRIxLEAST64 "llx" /* uint_least64_t */ +#define PRIxLEAST64 "lx" /* uint_least64_t */ #define PRIxFAST8 "x" /* uint_fast8_t */ #define PRIxFAST16 "x" /* uint_fast16_t */ #define PRIxFAST32 "x" /* uint_fast32_t */ -#define PRIxFAST64 "llx" /* uint_fast64_t */ +#define PRIxFAST64 "lx" /* uint_fast64_t */ #define PRIxMAX "jx" /* uintmax_t */ -#define PRIxPTR "x" /* uintptr_t */ +#define PRIxPTR "lx" /* uintptr_t */ #define PRIX8 "X" /* uint8_t */ #define PRIX16 "X" /* uint16_t */ #define PRIX32 "X" /* uint32_t */ -#define PRIX64 "llX" /* uint64_t */ +#define PRIX64 "lX" /* uint64_t */ #define PRIXLEAST8 "X" /* uint_least8_t */ #define PRIXLEAST16 "X" /* uint_least16_t */ #define PRIXLEAST32 "X" /* uint_least32_t */ -#define PRIXLEAST64 "llX" /* uint_least64_t */ +#define PRIXLEAST64 "lX" /* uint_least64_t */ #define PRIXFAST8 "X" /* uint_fast8_t */ #define PRIXFAST16 "X" /* uint_fast16_t */ #define PRIXFAST32 "X" /* uint_fast32_t */ -#define PRIXFAST64 "llX" /* uint_fast64_t */ +#define PRIXFAST64 "lX" /* uint_fast64_t */ #define PRIXMAX "jX" /* uintmax_t */ -#define PRIXPTR "X" /* uintptr_t */ +#define PRIXPTR "lX" /* uintptr_t */ /* fscanf(3) macros for signed integers. */ #define SCNd8 "hhd" /* int8_t */ #define SCNd16 "hd" /* int16_t */ #define SCNd32 "d" /* int32_t */ -#define SCNd64 "lld" /* int64_t */ +#define SCNd64 "ld" /* int64_t */ #define SCNdLEAST8 "hhd" /* int_least8_t */ #define SCNdLEAST16 "hd" /* int_least16_t */ #define SCNdLEAST32 "d" /* int_least32_t */ -#define SCNdLEAST64 "lld" /* int_least64_t */ +#define SCNdLEAST64 "ld" /* int_least64_t */ #define SCNdFAST8 "d" /* int_fast8_t */ #define SCNdFAST16 "d" /* int_fast16_t */ #define SCNdFAST32 "d" /* int_fast32_t */ -#define SCNdFAST64 "lld" /* int_fast64_t */ +#define SCNdFAST64 "ld" /* int_fast64_t */ #define SCNdMAX "jd" /* intmax_t */ -#define SCNdPTR "d" /* intptr_t */ +#define SCNdPTR "ld" /* intptr_t */ #define SCNi8 "hhi" /* int8_t */ #define SCNi16 "hi" /* int16_t */ #define SCNi32 "i" /* int32_t */ -#define SCNi64 "lli" /* int64_t */ +#define SCNi64 "li" /* int64_t */ #define SCNiLEAST8 "hhi" /* int_least8_t */ #define SCNiLEAST16 "hi" /* int_least16_t */ #define SCNiLEAST32 "i" /* int_least32_t */ -#define SCNiLEAST64 "lli" /* int_least64_t */ +#define SCNiLEAST64 "li" /* int_least64_t */ #define SCNiFAST8 "i" /* int_fast8_t */ #define SCNiFAST16 "i" /* int_fast16_t */ #define SCNiFAST32 "i" /* int_fast32_t */ -#define SCNiFAST64 "lli" /* int_fast64_t */ +#define SCNiFAST64 "li" /* int_fast64_t */ #define SCNiMAX "ji" /* intmax_t */ -#define SCNiPTR "i" /* intptr_t */ +#define SCNiPTR "li" /* intptr_t */ /* fscanf(3) macros for unsigned integers. */ #define SCNo8 "hho" /* uint8_t */ #define SCNo16 "ho" /* uint16_t */ #define SCNo32 "o" /* uint32_t */ -#define SCNo64 "llo" /* uint64_t */ +#define SCNo64 "lo" /* uint64_t */ #define SCNoLEAST8 "hho" /* uint_least8_t */ #define SCNoLEAST16 "ho" /* uint_least16_t */ #define SCNoLEAST32 "o" /* uint_least32_t */ -#define SCNoLEAST64 "llo" /* uint_least64_t */ +#define SCNoLEAST64 "lo" /* uint_least64_t */ #define SCNoFAST8 "o" /* uint_fast8_t */ #define SCNoFAST16 "o" /* uint_fast16_t */ #define SCNoFAST32 "o" /* uint_fast32_t */ -#define SCNoFAST64 "llo" /* uint_fast64_t */ +#define SCNoFAST64 "lo" /* uint_fast64_t */ #define SCNoMAX "jo" /* uintmax_t */ -#define SCNoPTR "o" /* uintptr_t */ +#define SCNoPTR "lo" /* uintptr_t */ #define SCNu8 "hhu" /* uint8_t */ #define SCNu16 "hu" /* uint16_t */ #define SCNu32 "u" /* uint32_t */ -#define SCNu64 "llu" /* uint64_t */ +#define SCNu64 "lu" /* uint64_t */ #define SCNuLEAST8 "hhu" /* uint_least8_t */ #define SCNuLEAST16 "hu" /* uint_least16_t */ #define SCNuLEAST32 "u" /* uint_least32_t */ -#define SCNuLEAST64 "llu" /* uint_least64_t */ +#define SCNuLEAST64 "lu" /* uint_least64_t */ #define SCNuFAST8 "u" /* uint_fast8_t */ #define SCNuFAST16 "u" /* uint_fast16_t */ #define SCNuFAST32 "u" /* uint_fast32_t */ -#define SCNuFAST64 "llu" /* uint_fast64_t */ +#define SCNuFAST64 "lu" /* uint_fast64_t */ #define SCNuMAX "ju" /* uintmax_t */ -#define SCNuPTR "u" /* uintptr_t */ +#define SCNuPTR "lu" /* uintptr_t */ #define SCNx8 "hhx" /* uint8_t */ #define SCNx16 "hx" /* uint16_t */ #define SCNx32 "x" /* uint32_t */ -#define SCNx64 "llx" /* uint64_t */ +#define SCNx64 "lx" /* uint64_t */ #define SCNxLEAST8 "hhx" /* uint_least8_t */ #define SCNxLEAST16 "hx" /* uint_least16_t */ #define SCNxLEAST32 "x" /* uint_least32_t */ -#define SCNxLEAST64 "llx" /* uint_least64_t */ +#define SCNxLEAST64 "lx" /* uint_least64_t */ #define SCNxFAST8 "x" /* uint_fast8_t */ #define SCNxFAST16 "x" /* uint_fast16_t */ #define SCNxFAST32 "x" /* uint_fast32_t */ -#define SCNxFAST64 "llx" /* uint_fast64_t */ +#define SCNxFAST64 "lx" /* uint_fast64_t */ #define SCNxMAX "jx" /* uintmax_t */ -#define SCNxPTR "x" /* uintptr_t */ +#define SCNxPTR "lx" /* uintptr_t */ #endif /* !_MACHINE_INTTYPES_H_ */ diff --git a/sys/amd64/include/_types.h b/sys/amd64/include/_types.h index 69b6a0d..f61842c 100644 --- a/sys/amd64/include/_types.h +++ b/sys/amd64/include/_types.h @@ -56,6 +56,8 @@ typedef unsigned long __uint64_t; */ typedef __int32_t __clock_t; /* clock()... */ typedef __int64_t __critical_t; +typedef double __double_t; +typedef double __float_t; typedef __int64_t __intfptr_t; typedef __int64_t __intmax_t; typedef __int64_t __intptr_t; @@ -72,7 +74,7 @@ typedef __int64_t __register_t; typedef __int64_t __segsz_t; /* segment size (in pages) */ typedef __uint64_t __size_t; /* sizeof() */ typedef __int64_t __ssize_t; /* byte count or error */ -typedef __int32_t __time_t; /* time()... */ +typedef __int64_t __time_t; /* time()... */ typedef __uint64_t __uintfptr_t; typedef __uint64_t __uintmax_t; typedef __uint64_t __uintptr_t; diff --git a/sys/amd64/include/asm.h b/sys/amd64/include/asm.h index 30008a2..8ea4122 100644 --- a/sys/amd64/include/asm.h +++ b/sys/amd64/include/asm.h @@ -43,19 +43,9 @@ #include <sys/cdefs.h> #ifdef PIC -#define PIC_PROLOGUE \ - pushl %ebx; \ - call 1f; \ -1: \ - popl %ebx; \ - addl $_GLOBAL_OFFSET_TABLE_+[.-1b],%ebx -#define PIC_EPILOGUE \ - popl %ebx #define PIC_PLT(x) x@PLT -#define PIC_GOT(x) x@GOT(%ebx) +#define PIC_GOT(x) x@GOTPCREL(%rip) #else -#define PIC_PROLOGUE -#define PIC_EPILOGUE #define PIC_PLT(x) x #define PIC_GOT(x) x #endif @@ -78,14 +68,14 @@ #ifdef PROF #define ALTENTRY(x) _ENTRY(x); \ - pushl %ebp; movl %esp,%ebp; \ + pushl %rbp; movl %rsp,%rbp; \ call PIC_PLT(HIDENAME(mcount)); \ - popl %ebp; \ + popl %rbp; \ jmp 9f #define ENTRY(x) _ENTRY(x); \ - pushl %ebp; movl %esp,%ebp; \ + pushl %rbp; movl %rsp,%rbp; \ call PIC_PLT(HIDENAME(mcount)); \ - popl %ebp; \ + popl %rbp; \ 9: #else #define ALTENTRY(x) _ENTRY(x) @@ -117,42 +107,38 @@ #define ARCH_DISPATCH(x) \ _START_ENTRY; \ .globl CNAME(x); .type CNAME(x),@function; CNAME(x): ; \ - PIC_PROLOGUE; \ - movl PIC_GOT(AVECNAME(x)),%eax; \ - PIC_EPILOGUE; \ - jmpl *(%eax) + movq PIC_GOT(AVECNAME(x)),%rax; \ + jmpq *(%rax) #define ARCH_SELECT(x) _START_ENTRY; \ .type ASELNAME(x),@function; \ ASELNAME(x): \ - PIC_PROLOGUE; \ call PIC_PLT(CNAME(__get_hw_float)); \ - testl %eax,%eax; \ - movl PIC_GOT(ANAME(x)),%eax; \ + testq %rax,%rax; \ + movq PIC_GOT(ANAME(x)),%rax; \ jne 8f; \ - movl PIC_GOT(GNAME(x)),%eax; \ + movq PIC_GOT(GNAME(x)),%rax; \ 8: \ - movl PIC_GOT(AVECNAME(x)),%edx; \ - movl %eax,(%edx); \ - PIC_EPILOGUE; \ - jmpl *%eax + movq PIC_GOT(AVECNAME(x)),%rdx; \ + movq %rax,(%rdx); \ + jmpq *%rax #else /* !PIC */ #define ARCH_DISPATCH(x) \ _START_ENTRY; \ .globl CNAME(x); .type CNAME(x),@function; CNAME(x): ; \ - jmpl *AVECNAME(x) + jmpw *AVECNAME(x) #define ARCH_SELECT(x) _START_ENTRY; \ .type ASELNAME(x),@function; \ ASELNAME(x): \ call CNAME(__get_hw_float); \ - testl %eax,%eax; \ - movl $ANAME(x),%eax; \ + testw %rax,%rax; \ + movw $ANAME(x),%rax; \ jne 8f; \ - movl $GNAME(x),%eax; \ + movw $GNAME(x),%rax; \ 8: \ - movl %eax,AVECNAME(x); \ - jmpl *%eax + movw %rax,AVECNAME(x); \ + jmpw *%rax #endif /* PIC */ #define ARCH_VECTOR(x) .data; .p2align 2; \ diff --git a/sys/amd64/include/asmacros.h b/sys/amd64/include/asmacros.h index 07035ca..9893e5f 100644 --- a/sys/amd64/include/asmacros.h +++ b/sys/amd64/include/asmacros.h @@ -50,11 +50,11 @@ #define CNAME(csym) csym #define HIDENAME(asmsym) .asmsym -#define ALIGN_DATA .p2align 2 /* 4 byte alignment, zero filled */ +#define ALIGN_DATA .p2align 3 /* 8 byte alignment, zero filled */ #ifdef GPROF #define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ #else -#define ALIGN_TEXT .p2align 2,0x90 /* 4-byte alignment, nop filled */ +#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ #endif #define SUPERALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ @@ -64,9 +64,9 @@ #define NON_GPROF_RET .byte 0xc3 /* opcode for `ret' */ #ifdef LOCORE -#define PCPU(member) %fs:PC_ ## member -#define PCPU_ADDR(member, reg) movl %fs:PC_PRVSPACE,reg; \ - addl $PC_ ## member,reg +#define PCPU(member) %gs:PC_ ## member +#define PCPU_ADDR(member, reg) movq %gs:PC_PRVSPACE,reg; \ + addq $PC_ ## member,reg #endif #ifdef GPROF @@ -115,7 +115,7 @@ #define CROSSJUMPTARGET(label) \ ALIGN_TEXT; __CONCAT(to,label): ; MCOUNT; jmp label #define ENTRY(name) GEN_ENTRY(name) ; 9: ; MCOUNT -#define FAKE_MCOUNT(caller) pushl caller ; call __mcount ; popl %ecx +#define FAKE_MCOUNT(caller) pushq caller ; call __mcount ; popl %ecx #define MCOUNT call __mcount #define MCOUNT_LABEL(name) GEN_ENTRY(name) ; nop ; ALIGN_TEXT #define MEXITCOUNT call HIDENAME(mexitcount) diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h index 919bd5a..0c4d9b0 100644 --- a/sys/amd64/include/atomic.h +++ b/sys/amd64/include/atomic.h @@ -68,6 +68,7 @@ void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v) int atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src); +int atomic_cmpset_long(volatile u_long *dst, u_long exp, u_long src); #define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p); \ @@ -81,7 +82,7 @@ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) * For userland, assume the SMP case and use lock prefixes so that * the binaries will run on both types of systems. */ -#if defined(SMP) || !defined(_KERNEL) +#if !defined(_KERNEL) #define MPLOCKED lock ; #else #define MPLOCKED @@ -117,84 +118,49 @@ extern void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v) #if defined(__GNUC__) -#if defined(I386_CPU) || defined(CPU_DISABLE_CMPXCHG) - static __inline int atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src) { int res = exp; - __asm __volatile( - " pushfl ; " - " cli ; " - " cmpl %0,%2 ; " - " jne 1f ; " - " movl %1,%2 ; " - "1: " - " sete %%al; " + __asm __volatile ( + " " __XSTRING(MPLOCKED) " " + " cmpxchgl %1,%2 ; " + " setz %%al ; " " movzbl %%al,%0 ; " - " popfl ; " + "1: " "# atomic_cmpset_int" : "+a" (res) /* 0 (result) */ : "r" (src), /* 1 */ "m" (*(dst)) /* 2 */ - : "memory"); + : "memory"); return (res); } -#else /* defined(I386_CPU) */ - static __inline int -atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src) +atomic_cmpset_long(volatile u_long *dst, u_long exp, u_long src) { - int res = exp; + long res = exp; __asm __volatile ( " " __XSTRING(MPLOCKED) " " - " cmpxchgl %1,%2 ; " + " cmpxchgq %1,%2 ; " " setz %%al ; " - " movzbl %%al,%0 ; " + " movzbq %%al,%0 ; " "1: " - "# atomic_cmpset_int" - : "+a" (res) /* 0 (result) */ + "# atomic_cmpset_long" + : "+a" (res) /* 0 (result) %rax, XXX check */ : "r" (src), /* 1 */ "m" (*(dst)) /* 2 */ : "memory"); return (res); } - -#endif /* defined(I386_CPU) */ - #endif /* defined(__GNUC__) */ #if defined(__GNUC__) -#if defined(I386_CPU) - -/* - * We assume that a = b will do atomic loads and stores. - * - * XXX: This is _NOT_ safe on a P6 or higher because it does not guarantee - * memory ordering. These should only be used on a 386. - */ -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ -static __inline u_##TYPE \ -atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ -{ \ - return (*p); \ -} \ - \ -static __inline void \ -atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ -{ \ - *p = v; \ - __asm __volatile("" : : : "memory"); \ -} - -#else /* !defined(I386_CPU) */ - #define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ @@ -221,11 +187,10 @@ atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ : : "memory"); \ } -#endif /* defined(I386_CPU) */ - #else /* !defined(__GNUC__) */ extern int atomic_cmpset_int(volatile u_int *, u_int, u_int); +extern int atomic_cmpset_long(volatile u_long *, u_long, u_long); #define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ extern u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p); \ @@ -250,15 +215,15 @@ ATOMIC_ASM(clear, int, "andl %1,%0", "ir", ~v); ATOMIC_ASM(add, int, "addl %1,%0", "ir", v); ATOMIC_ASM(subtract, int, "subl %1,%0", "ir", v); -ATOMIC_ASM(set, long, "orl %1,%0", "ir", v); -ATOMIC_ASM(clear, long, "andl %1,%0", "ir", ~v); -ATOMIC_ASM(add, long, "addl %1,%0", "ir", v); -ATOMIC_ASM(subtract, long, "subl %1,%0", "ir", v); +ATOMIC_ASM(set, long, "orq %1,%0", "ir", v); +ATOMIC_ASM(clear, long, "andq %1,%0", "ir", ~v); +ATOMIC_ASM(add, long, "addq %1,%0", "ir", v); +ATOMIC_ASM(subtract, long, "subq %1,%0", "ir", v); ATOMIC_STORE_LOAD(char, "cmpxchgb %b0,%1", "xchgb %b1,%0"); ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0"); ATOMIC_STORE_LOAD(int, "cmpxchgl %0,%1", "xchgl %1,%0"); -ATOMIC_STORE_LOAD(long, "cmpxchgl %0,%1", "xchgl %1,%0"); +ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0"); #undef ATOMIC_ASM #undef ATOMIC_STORE_LOAD @@ -300,9 +265,6 @@ ATOMIC_STORE_LOAD(long, "cmpxchgl %0,%1", "xchgl %1,%0"); #define atomic_add_rel_long atomic_add_long #define atomic_subtract_acq_long atomic_subtract_long #define atomic_subtract_rel_long atomic_subtract_long -#define atomic_cmpset_long atomic_cmpset_int -#define atomic_cmpset_acq_long atomic_cmpset_acq_int -#define atomic_cmpset_rel_long atomic_cmpset_rel_int #define atomic_cmpset_acq_ptr atomic_cmpset_ptr #define atomic_cmpset_rel_ptr atomic_cmpset_ptr @@ -361,39 +323,39 @@ static __inline int atomic_cmpset_ptr(volatile void *dst, void *exp, void *src) { - return (atomic_cmpset_int((volatile u_int *)dst, (u_int)exp, - (u_int)src)); + return (atomic_cmpset_long((volatile u_long *)dst, + (u_long)exp, (u_long)src)); } static __inline void * atomic_load_acq_ptr(volatile void *p) { - return (void *)atomic_load_acq_int((volatile u_int *)p); + return (void *)atomic_load_acq_long((volatile u_long *)p); } static __inline void atomic_store_rel_ptr(volatile void *p, void *v) { - atomic_store_rel_int((volatile u_int *)p, (u_int)v); + atomic_store_rel_long((volatile u_long *)p, (u_long)v); } #define ATOMIC_PTR(NAME) \ static __inline void \ atomic_##NAME##_ptr(volatile void *p, uintptr_t v) \ { \ - atomic_##NAME##_int((volatile u_int *)p, v); \ + atomic_##NAME##_long((volatile u_long *)p, v); \ } \ \ static __inline void \ atomic_##NAME##_acq_ptr(volatile void *p, uintptr_t v) \ { \ - atomic_##NAME##_acq_int((volatile u_int *)p, v);\ + atomic_##NAME##_acq_long((volatile u_long *)p, v);\ } \ \ static __inline void \ atomic_##NAME##_rel_ptr(volatile void *p, uintptr_t v) \ { \ - atomic_##NAME##_rel_int((volatile u_int *)p, v);\ + atomic_##NAME##_rel_long((volatile u_long *)p, v);\ } ATOMIC_PTR(set) @@ -426,8 +388,8 @@ atomic_readandclear_long(volatile u_long *addr) u_long result; __asm __volatile ( - " xorl %0,%0 ; " - " xchgl %1,%0 ; " + " xorq %0,%0 ; " + " xchgq %1,%0 ; " "# atomic_readandclear_int" : "=&r" (result) /* 0 (result) */ : "m" (*addr)); /* 1 (addr) */ diff --git a/sys/amd64/include/bus.h b/sys/amd64/include/bus.h index a33569f..9aa9336 100644 --- a/sys/amd64/include/bus.h +++ b/sys/amd64/include/bus.h @@ -34,17 +34,11 @@ #ifndef _I386_BUS_H_ #define _I386_BUS_H_ -#ifdef PC98 -/* NEC PC-98 */ -#include <machine/bus_pc98.h> -#else -/* IBM-PC */ #include <machine/bus_at386.h> -#endif #include <machine/bus_dma.h> /* - * Stream accesses are the same as normal accesses on i386/pc98; there are no + * Stream accesses are the same as normal accesses on i386; there are no * supported bus systems with an endianess different from the host one. */ #define bus_space_read_stream_1(t, h, o) bus_space_read_1((t), (h), (o)) diff --git a/sys/amd64/include/bus_amd64.h b/sys/amd64/include/bus_amd64.h index e1f6e8e..d8ae035 100644 --- a/sys/amd64/include/bus_amd64.h +++ b/sys/amd64/include/bus_amd64.h @@ -92,31 +92,23 @@ /* * Bus address and size types */ -#ifdef PAE typedef uint64_t bus_addr_t; -#else -typedef uint32_t bus_addr_t; -#endif -typedef uint32_t bus_size_t; +typedef uint64_t bus_size_t; #define BUS_SPACE_MAXSIZE_24BIT 0xFFFFFF #define BUS_SPACE_MAXSIZE_32BIT 0xFFFFFFFF #define BUS_SPACE_MAXSIZE 0xFFFFFFFF #define BUS_SPACE_MAXADDR_24BIT 0xFFFFFF #define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF -#ifdef PAE #define BUS_SPACE_MAXADDR 0xFFFFFFFFFFFFFFFFULL -#else -#define BUS_SPACE_MAXADDR 0xFFFFFFFF -#endif #define BUS_SPACE_UNRESTRICTED (~0) /* * Access methods for bus resources and address space. */ -typedef int bus_space_tag_t; -typedef u_int bus_space_handle_t; +typedef uint64_t bus_space_tag_t; +typedef uint64_t bus_space_handle_t; /* * Map a region of device bus space into CPU virtual address space. @@ -1215,7 +1207,7 @@ bus_space_barrier(bus_space_tag_t tag __unused, bus_space_handle_t bsh __unused, { #ifdef __GNUC__ if (flags & BUS_SPACE_BARRIER_READ) - __asm __volatile("lock; addl $0,0(%%esp)" : : : "memory"); + __asm __volatile("lock; addl $0,0(%%rsp)" : : : "memory"); else __asm __volatile("" : : : "memory"); #endif diff --git a/sys/amd64/include/bus_at386.h b/sys/amd64/include/bus_at386.h index e1f6e8e..d8ae035 100644 --- a/sys/amd64/include/bus_at386.h +++ b/sys/amd64/include/bus_at386.h @@ -92,31 +92,23 @@ /* * Bus address and size types */ -#ifdef PAE typedef uint64_t bus_addr_t; -#else -typedef uint32_t bus_addr_t; -#endif -typedef uint32_t bus_size_t; +typedef uint64_t bus_size_t; #define BUS_SPACE_MAXSIZE_24BIT 0xFFFFFF #define BUS_SPACE_MAXSIZE_32BIT 0xFFFFFFFF #define BUS_SPACE_MAXSIZE 0xFFFFFFFF #define BUS_SPACE_MAXADDR_24BIT 0xFFFFFF #define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF -#ifdef PAE #define BUS_SPACE_MAXADDR 0xFFFFFFFFFFFFFFFFULL -#else -#define BUS_SPACE_MAXADDR 0xFFFFFFFF -#endif #define BUS_SPACE_UNRESTRICTED (~0) /* * Access methods for bus resources and address space. */ -typedef int bus_space_tag_t; -typedef u_int bus_space_handle_t; +typedef uint64_t bus_space_tag_t; +typedef uint64_t bus_space_handle_t; /* * Map a region of device bus space into CPU virtual address space. @@ -1215,7 +1207,7 @@ bus_space_barrier(bus_space_tag_t tag __unused, bus_space_handle_t bsh __unused, { #ifdef __GNUC__ if (flags & BUS_SPACE_BARRIER_READ) - __asm __volatile("lock; addl $0,0(%%esp)" : : : "memory"); + __asm __volatile("lock; addl $0,0(%%rsp)" : : : "memory"); else __asm __volatile("" : : : "memory"); #endif diff --git a/sys/amd64/include/clock.h b/sys/amd64/include/clock.h index 5ae4fb2..511780f 100644 --- a/sys/amd64/include/clock.h +++ b/sys/amd64/include/clock.h @@ -24,9 +24,6 @@ extern int timer0_max_count; extern uint64_t tsc_freq; extern int tsc_is_broken; extern int wall_cmos_clock; -#ifdef APIC_IO -extern int apic_8254_intr; -#endif /* * Driver to clock driver interface. @@ -37,14 +34,8 @@ int acquire_timer0(int rate, void (*function)(struct clockframe *frame)); int acquire_timer2(int mode); int release_timer0(void); int release_timer2(void); -#ifndef PC98 int rtcin(int val); -#else -int acquire_timer1(int mode); -int release_timer1(void); -#endif int sysbeep(int pitch, int period); -void timer_restore(void); void init_TSC(void); #endif /* _KERNEL */ diff --git a/sys/amd64/include/cpu.h b/sys/amd64/include/cpu.h index d4e72bb..e9b7fff 100644 --- a/sys/amd64/include/cpu.h +++ b/sys/amd64/include/cpu.h @@ -55,16 +55,16 @@ #define cpu_exec(p) /* nothing */ #define cpu_swapin(p) /* nothing */ -#define cpu_getstack(td) ((td)->td_frame->tf_esp) -#define cpu_setstack(td, ap) ((td)->td_frame->tf_esp = (ap)) +#define cpu_getstack(td) ((td)->td_frame->tf_rsp) +#define cpu_setstack(td, ap) ((td)->td_frame->tf_rsp = (ap)) #define TRAPF_USERMODE(framep) \ - ((ISPL((framep)->tf_cs) == SEL_UPL) || ((framep)->tf_eflags & PSL_VM)) -#define TRAPF_PC(framep) ((framep)->tf_eip) + (ISPL((framep)->tf_cs) == SEL_UPL) +#define TRAPF_PC(framep) ((framep)->tf_rip) #define CLKF_USERMODE(framep) \ - ((ISPL((framep)->cf_cs) == SEL_UPL) || ((framep)->cf_eflags & PSL_VM)) -#define CLKF_PC(framep) ((framep)->cf_eip) + (ISPL((framep)->cf_cs) == SEL_UPL) +#define CLKF_PC(framep) ((framep)->cf_rip) /* * CTL_MACHDEP definitions. @@ -72,19 +72,9 @@ #define CPU_CONSDEV 1 /* dev_t: console terminal device */ #define CPU_ADJKERNTZ 2 /* int: timezone offset (seconds) */ #define CPU_DISRTCSET 3 /* int: disable resettodr() call */ -#define CPU_BOOTINFO 4 /* struct: bootinfo */ #define CPU_WALLCLOCK 5 /* int: indicates wall CMOS clock */ #define CPU_MAXID 6 /* number of valid machdep ids */ -#define CTL_MACHDEP_NAMES { \ - { 0, 0 }, \ - { "console_device", CTLTYPE_STRUCT }, \ - { "adjkerntz", CTLTYPE_INT }, \ - { "disable_rtc_set", CTLTYPE_INT }, \ - { "bootinfo", CTLTYPE_STRUCT }, \ - { "wall_cmos_clock", CTLTYPE_INT }, \ -} - #ifdef _KERNEL extern char btext[]; extern char etext[]; @@ -99,14 +89,7 @@ void fork_trampoline(void); static __inline u_int64_t get_cyclecount(void) { -#if defined(I386_CPU) || defined(I486_CPU) - struct bintime bt; - if (!tsc_present) { - binuptime(&bt); - return (bt.frac ^ bt.sec); - } -#endif return (rdtsc()); } diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index c7f9ace..1e1df33 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -52,10 +52,12 @@ __BEGIN_DECLS #define readb(va) (*(volatile u_int8_t *) (va)) #define readw(va) (*(volatile u_int16_t *) (va)) #define readl(va) (*(volatile u_int32_t *) (va)) +#define readq(va) (*(volatile u_int64_t *) (va)) #define writeb(va, d) (*(volatile u_int8_t *) (va) = (d)) #define writew(va, d) (*(volatile u_int16_t *) (va) = (d)) #define writel(va, d) (*(volatile u_int32_t *) (va) = (d)) +#define writeq(va, d) (*(volatile u_int64_t *) (va) = (d)) #ifdef __GNUC__ @@ -310,40 +312,40 @@ ia32_pause(void) __asm __volatile("pause"); } -static __inline u_int -read_eflags(void) +static __inline u_long +read_rflags(void) { - u_int ef; + u_long rf; - __asm __volatile("pushfl; popl %0" : "=r" (ef)); - return (ef); + __asm __volatile("pushfq; popq %0" : "=r" (rf)); + return (rf); } static __inline u_int64_t rdmsr(u_int msr) { - u_int64_t rv; + u_int32_t low, high; - __asm __volatile("rdmsr" : "=A" (rv) : "c" (msr)); - return (rv); + __asm __volatile("rdmsr" : "=a" (low), "=d" (high) : "c" (msr)); + return (low | ((u_int64_t)high << 32)); } static __inline u_int64_t rdpmc(u_int pmc) { - u_int64_t rv; + u_int32_t low, high; - __asm __volatile("rdpmc" : "=A" (rv) : "c" (pmc)); - return (rv); + __asm __volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (pmc)); + return (low | ((u_int64_t)high << 32)); } static __inline u_int64_t rdtsc(void) { - u_int64_t rv; + u_int32_t low, high; - __asm __volatile("rdtsc" : "=A" (rv)); - return (rv); + __asm __volatile("rdtsc" : "=a" (low), "=d" (high)); + return (low | ((u_int64_t)high << 32)); } static __inline void @@ -353,70 +355,74 @@ wbinvd(void) } static __inline void -write_eflags(u_int ef) +write_rflags(u_long rf) { - __asm __volatile("pushl %0; popfl" : : "r" (ef)); + __asm __volatile("pushq %0; popfq" : : "r" (rf)); } static __inline void wrmsr(u_int msr, u_int64_t newval) { - __asm __volatile("wrmsr" : : "A" (newval), "c" (msr)); + u_int32_t low, high; + + low = newval; + high = newval >> 32; + __asm __volatile("wrmsr" : : "a" (low), "d" (high), "c" (msr)); } static __inline void -load_cr0(u_int data) +load_cr0(u_long data) { - __asm __volatile("movl %0,%%cr0" : : "r" (data)); + __asm __volatile("movq %0,%%cr0" : : "r" (data)); } -static __inline u_int +static __inline u_long rcr0(void) { - u_int data; + u_long data; - __asm __volatile("movl %%cr0,%0" : "=r" (data)); + __asm __volatile("movq %%cr0,%0" : "=r" (data)); return (data); } -static __inline u_int +static __inline u_long rcr2(void) { - u_int data; + u_long data; - __asm __volatile("movl %%cr2,%0" : "=r" (data)); + __asm __volatile("movq %%cr2,%0" : "=r" (data)); return (data); } static __inline void -load_cr3(u_int data) +load_cr3(u_long data) { - __asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory"); + __asm __volatile("movq %0,%%cr3" : : "r" (data) : "memory"); } -static __inline u_int +static __inline u_long rcr3(void) { - u_int data; + u_long data; - __asm __volatile("movl %%cr3,%0" : "=r" (data)); + __asm __volatile("movq %%cr3,%0" : "=r" (data)); return (data); } static __inline void -load_cr4(u_int data) +load_cr4(u_long data) { - __asm __volatile("movl %0,%%cr4" : : "r" (data)); + __asm __volatile("movq %0,%%cr4" : : "r" (data)); } -static __inline u_int +static __inline u_long rcr4(void) { - u_int data; + u_long data; - __asm __volatile("movl %%cr4,%0" : "=r" (data)); + __asm __volatile("movq %%cr4,%0" : "=r" (data)); return (data); } @@ -435,12 +441,13 @@ invltlb(void) * Only works on 486+ CPUs (i386 does not have PG_G). */ static __inline void -invlpg(u_int addr) +invlpg(u_long addr) { __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); } +/* XXX these are replaced with rdmsr/wrmsr */ static __inline u_int rfs(void) { @@ -490,132 +497,20 @@ ltr(u_short sel) __asm __volatile("ltr %0" : : "r" (sel)); } -static __inline u_int -rdr0(void) -{ - u_int data; - __asm __volatile("movl %%dr0,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_dr0(u_int dr0) -{ - __asm __volatile("movl %0,%%dr0" : : "r" (dr0)); -} - -static __inline u_int -rdr1(void) -{ - u_int data; - __asm __volatile("movl %%dr1,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_dr1(u_int dr1) -{ - __asm __volatile("movl %0,%%dr1" : : "r" (dr1)); -} - -static __inline u_int -rdr2(void) -{ - u_int data; - __asm __volatile("movl %%dr2,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_dr2(u_int dr2) -{ - __asm __volatile("movl %0,%%dr2" : : "r" (dr2)); -} - -static __inline u_int -rdr3(void) -{ - u_int data; - __asm __volatile("movl %%dr3,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_dr3(u_int dr3) -{ - __asm __volatile("movl %0,%%dr3" : : "r" (dr3)); -} - -static __inline u_int -rdr4(void) -{ - u_int data; - __asm __volatile("movl %%dr4,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_dr4(u_int dr4) -{ - __asm __volatile("movl %0,%%dr4" : : "r" (dr4)); -} - -static __inline u_int -rdr5(void) -{ - u_int data; - __asm __volatile("movl %%dr5,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_dr5(u_int dr5) -{ - __asm __volatile("movl %0,%%dr5" : : "r" (dr5)); -} - -static __inline u_int -rdr6(void) -{ - u_int data; - __asm __volatile("movl %%dr6,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_dr6(u_int dr6) -{ - __asm __volatile("movl %0,%%dr6" : : "r" (dr6)); -} - -static __inline u_int -rdr7(void) -{ - u_int data; - __asm __volatile("movl %%dr7,%0" : "=r" (data)); - return (data); -} - -static __inline void -load_dr7(u_int dr7) -{ - __asm __volatile("movl %0,%%dr7" : : "r" (dr7)); -} - static __inline register_t intr_disable(void) { - register_t eflags; + register_t rflags; - eflags = read_eflags(); + rflags = read_rflags(); disable_intr(); - return (eflags); + return (rflags); } static __inline void -intr_restore(register_t eflags) +intr_restore(register_t rflags) { - write_eflags(eflags); + write_rflags(rflags); } #else /* !__GNUC__ */ @@ -623,8 +518,8 @@ intr_restore(register_t eflags) int breakpoint(void); u_int bsfl(u_int mask); u_int bsrl(u_int mask); -void cpu_invlpg(u_int addr); -void cpu_invlpg_range(u_int start, u_int end); +void cpu_invlpg(u_long addr); +void cpu_invlpg_range(u_long start, u_long end); void disable_intr(void); void do_cpuid(u_int ax, u_int *p); void enable_intr(void); @@ -664,28 +559,13 @@ u_int rgs(void); u_int64_t rdmsr(u_int msr); u_int64_t rdpmc(u_int pmc); u_int64_t rdtsc(void); -u_int read_eflags(void); +u_int read_rflags(void); void wbinvd(void); -void write_eflags(u_int ef); +void write_rflags(u_int rf); void wrmsr(u_int msr, u_int64_t newval); -u_int rdr0(void); -void load_dr0(u_int dr0); -u_int rdr1(void); -void load_dr1(u_int dr1); -u_int rdr2(void); -void load_dr2(u_int dr2); -u_int rdr3(void); -void load_dr3(u_int dr3); -u_int rdr4(void); -void load_dr4(u_int dr4); -u_int rdr5(void); -void load_dr5(u_int dr5); -u_int rdr6(void); -void load_dr6(u_int dr6); -u_int rdr7(void); void load_dr7(u_int dr7); register_t intr_disable(void); -void intr_restore(register_t ef); +void intr_restore(register_t rf); #endif /* __GNUC__ */ diff --git a/sys/amd64/include/cputypes.h b/sys/amd64/include/cputypes.h index 585df67..2048458 100644 --- a/sys/amd64/include/cputypes.h +++ b/sys/amd64/include/cputypes.h @@ -33,32 +33,15 @@ /* * Classes of processor. */ -#define CPUCLASS_286 0 -#define CPUCLASS_386 1 -#define CPUCLASS_486 2 -#define CPUCLASS_586 3 -#define CPUCLASS_686 4 +#define CPUCLASS_X86 0 /* X86 */ +#define CPUCLASS_K8 1 /* K8 AMD64 class */ /* * Kinds of processor. */ -#define CPU_286 0 /* Intel 80286 */ -#define CPU_386SX 1 /* Intel 80386SX */ -#define CPU_386 2 /* Intel 80386DX */ -#define CPU_486SX 3 /* Intel 80486SX */ -#define CPU_486 4 /* Intel 80486DX */ -#define CPU_586 5 /* Intel P.....m (I hate lawyers; it's TM) */ -#define CPU_486DLC 6 /* Cyrix 486DLC */ -#define CPU_686 7 /* Pentium Pro */ -#define CPU_M1SC 8 /* Cyrix M1sc (aka 5x86) */ -#define CPU_M1 9 /* Cyrix M1 (aka 6x86) */ -#define CPU_BLUE 10 /* IBM BlueLighting CPU */ -#define CPU_M2 11 /* Cyrix M2 (aka enhanced 6x86 with MMX */ -#define CPU_NX586 12 /* NexGen (now AMD) 586 */ -#define CPU_CY486DX 13 /* Cyrix 486S/DX/DX2/DX4 */ -#define CPU_PII 14 /* Intel Pentium II */ -#define CPU_PIII 15 /* Intel Pentium III */ -#define CPU_P4 16 /* Intel Pentium 4 */ +#define CPU_X86 0 /* Intel */ +#define CPU_CLAWHAMMER 1 /* AMD Clawhammer */ +#define CPU_SLEDGEHAMMER 2 /* AMD Sledgehammer */ #ifndef LOCORE struct cpu_nameclass { diff --git a/sys/amd64/include/critical.h b/sys/amd64/include/critical.h index 7cc7ff2..dc5119c 100644 --- a/sys/amd64/include/critical.h +++ b/sys/amd64/include/critical.h @@ -23,7 +23,6 @@ __BEGIN_DECLS /* * Prototypes - see <arch>/<arch>/critical.c */ -void cpu_unpend(void); void cpu_critical_fork_exit(void); void cpu_thread_link(struct thread *td); @@ -34,12 +33,15 @@ void cpu_thread_link(struct thread *td); * * This routine is called from critical_enter() on the 0->1 transition * of td_critnest, prior to it being incremented to 1. - * - * If new-style critical section handling we do not have to do anything. - * However, as a side effect any interrupts occuring while td_critnest - * is non-zero will be deferred. */ -#define cpu_critical_enter() +static __inline void +cpu_critical_enter(void) +{ + struct thread *td; + + td = curthread; + td->td_md.md_savecrit = intr_disable(); +} /* * cpu_critical_exit: @@ -47,27 +49,14 @@ void cpu_thread_link(struct thread *td); * This routine is called from critical_exit() on a 1->0 transition * of td_critnest, after it has been decremented to 0. We are * exiting the last critical section. - * - * Note that the td->critnest (1->0) transition interrupt race against - * our int_pending/unpend() check below is handled by the interrupt - * code for us, so we do not have to do anything fancy. */ static __inline void cpu_critical_exit(void) { - /* - * We may have to schedule pending interrupts. Create - * conditions similar to an interrupt context and call - * unpend(). - * - * note: we do this even if we are in an interrupt - * nesting level. Deep nesting is protected by - * critical_*() and if we conditionalized it then we - * would have to check int_pending again whenever - * we decrement td_intr_nesting_level to 0. - */ - if (PCPU_GET(int_pending)) - cpu_unpend(); + struct thread *td; + + td = curthread; + intr_restore(td->td_md.md_savecrit); } #else /* !__GNUC__ */ diff --git a/sys/amd64/include/db_machdep.h b/sys/amd64/include/db_machdep.h index ba84fc6..5a2e65d 100644 --- a/sys/amd64/include/db_machdep.h +++ b/sys/amd64/include/db_machdep.h @@ -36,13 +36,13 @@ #define i386_saved_state trapframe typedef vm_offset_t db_addr_t; /* address - unsigned */ -typedef int db_expr_t; /* expression - signed */ +typedef long db_expr_t; /* expression - signed */ typedef struct i386_saved_state db_regs_t; extern db_regs_t ddb_regs; /* register state */ #define DDB_REGS (&ddb_regs) -#define PC_REGS(regs) ((db_addr_t)(regs)->tf_eip) +#define PC_REGS(regs) ((db_addr_t)(regs)->tf_rip) #define BKPT_INST 0xcc /* breakpoint instruction */ #define BKPT_SIZE (1) /* size of breakpoint inst */ diff --git a/sys/amd64/include/elf.h b/sys/amd64/include/elf.h index 46d512c..3883d49 100644 --- a/sys/amd64/include/elf.h +++ b/sys/amd64/include/elf.h @@ -30,17 +30,17 @@ #define _MACHINE_ELF_H_ 1 /* - * ELF definitions for the i386 architecture. + * ELF definitions for the AMD64 architecture. */ -#include <sys/elf32.h> /* Definitions common to all 32 bit architectures. */ +#include <sys/elf64.h> /* Definitions common to all 64 bit architectures. */ -#define __ELF_WORD_SIZE 32 /* Used by <sys/elf_generic.h> */ +#define __ELF_WORD_SIZE 64 /* Used by <sys/elf_generic.h> */ #include <sys/elf_generic.h> -#define ELF_ARCH EM_386 +#define ELF_ARCH EM_X86_64 -#define ELF_MACHINE_OK(x) ((x) == EM_386 || (x) == EM_486) +#define ELF_MACHINE_OK(x) ((x) == EM_X86_64) /* * Auxiliary vector entries for passing information to the interpreter. @@ -50,13 +50,13 @@ */ typedef struct { /* Auxiliary vector entry on initial stack */ - int a_type; /* Entry type. */ + long a_type; /* Entry type. */ union { long a_val; /* Integer value. */ void *a_ptr; /* Address. */ void (*a_fcn)(void); /* Function pointer (not used). */ } a_un; -} Elf32_Auxinfo; +} Elf64_Auxinfo; __ElfType(Auxinfo); @@ -98,24 +98,29 @@ __ElfType(Auxinfo); * Relocation types. */ -#define R_386_NONE 0 /* No relocation. */ -#define R_386_32 1 /* Add symbol value. */ -#define R_386_PC32 2 /* Add PC-relative symbol value. */ -#define R_386_GOT32 3 /* Add PC-relative GOT offset. */ -#define R_386_PLT32 4 /* Add PC-relative PLT offset. */ -#define R_386_COPY 5 /* Copy data from shared object. */ -#define R_386_GLOB_DAT 6 /* Set GOT entry to data address. */ -#define R_386_JMP_SLOT 7 /* Set GOT entry to code address. */ -#define R_386_RELATIVE 8 /* Add load address of shared object. */ -#define R_386_GOTOFF 9 /* Add GOT-relative symbol address. */ -#define R_386_GOTPC 10 /* Add PC-relative GOT table address. */ - -#define R_386_COUNT 11 /* Count of defined relocation types. */ +#define R_X86_64_NONE 0 /* No relocation. */ +#define R_X86_64_64 1 /* Add 64 bit symbol value. */ +#define R_X86_64_PC32 2 /* PC-relative 32 bit signed sym value. */ +#define R_X86_64_GOT32 3 /* PC-relative 32 bit GOT offset. */ +#define R_X86_64_PLT32 4 /* PC-relative 32 bit PLT offset. */ +#define R_X86_64_COPY 5 /* Copy data from shared object. */ +#define R_X86_64_GLOB_DAT 6 /* Set GOT entry to data address. */ +#define R_X86_64_JMP_SLOT 7 /* Set GOT entry to code address. */ +#define R_X86_64_RELATIVE 8 /* Add load address of shared object. */ +#define R_X86_64_GOTPCREL 9 /* Add 32 bit signed pcrel offset to GOT. */ +#define R_X86_64_32 10 /* Add 32 bit zero extended symbol value */ +#define R_X86_64_32S 11 /* Add 32 bit sign extended symbol value */ +#define R_X86_64_16 12 /* Add 16 bit zero extended symbol value */ +#define R_X86_64_PC16 13 /* Add 16 bit signed extended pc relative symbol value */ +#define R_X86_64_8 14 /* Add 8 bit zero extended symbol value */ +#define R_X86_64_PC8 15 /* Add 8 bit signed extended pc relative symbol value */ + +#define R_X86_64_COUNT 16 /* Count of defined relocation types. */ /* Define "machine" characteristics */ -#define ELF_TARG_CLASS ELFCLASS32 +#define ELF_TARG_CLASS ELFCLASS64 #define ELF_TARG_DATA ELFDATA2LSB -#define ELF_TARG_MACH EM_386 +#define ELF_TARG_MACH EM_X86_64 #define ELF_TARG_VER 1 #ifdef _KERNEL diff --git a/sys/amd64/include/endian.h b/sys/amd64/include/endian.h index 723a5f3..1766866 100644 --- a/sys/amd64/include/endian.h +++ b/sys/amd64/include/endian.h @@ -74,25 +74,26 @@ __extension__ ({ register __uint32_t __X = (x); \ __asm ("rorl $16, %0" : "+r" (__X)); \ __X; }) -#if defined(_KERNEL) #define __byte_swap_int(x) \ __extension__ ({ register __uint32_t __X = (x); \ __asm ("bswap %0" : "+r" (__X)); \ __X; }) -#endif + +#define __byte_swap_long(x) \ +__extension__ ({ register __uint64_t __X = (x); \ + __asm ("bswap %0" : "+r" (__X)); \ + __X; }) #define __byte_swap_word(x) \ __extension__ ({ register __uint16_t __X = (x); \ - __asm ("xchgb %h0, %b0" : "+q" (__X)); \ + __asm ("xchgb %h0, %b0" : "+Q" (__X)); \ __X; }) static __inline __uint64_t __bswap64(__uint64_t _x) { - return ((_x >> 56) | ((_x >> 40) & 0xff00) | ((_x >> 24) & 0xff0000) | - ((_x >> 8) & 0xff000000) | ((_x << 8) & ((__uint64_t)0xff << 32)) | - ((_x << 24) & ((__uint64_t)0xff << 40)) | - ((_x << 40) & ((__uint64_t)0xff << 48)) | ((_x << 56))); + + return (__byte_swap_long(_x)); } static __inline __uint32_t diff --git a/sys/amd64/include/fpu.h b/sys/amd64/include/fpu.h index 8e0f9bc..db1fe40 100644 --- a/sys/amd64/include/fpu.h +++ b/sys/amd64/include/fpu.h @@ -45,81 +45,38 @@ #ifndef _MACHINE_NPX_H_ #define _MACHINE_NPX_H_ -/* Environment information of floating point unit */ -struct env87 { - long en_cw; /* control word (16bits) */ - long en_sw; /* status word (16bits) */ - long en_tw; /* tag word (16bits) */ - long en_fip; /* floating point instruction pointer */ - u_short en_fcs; /* floating code segment selector */ - u_short en_opcode; /* opcode last executed (11 bits ) */ - long en_foo; /* floating operand offset */ - long en_fos; /* floating operand segment selector */ -}; - -/* Contents of each floating point accumulator */ +/* Contents of each x87 floating point accumulator */ struct fpacc87 { -#ifdef dontdef /* too unportable */ - u_long fp_mantlo; /* mantissa low (31:0) */ - u_long fp_manthi; /* mantissa high (63:32) */ - int fp_exp:15; /* exponent */ - int fp_sgn:1; /* mantissa sign */ -#else u_char fp_bytes[10]; -#endif }; -/* Floating point context */ -struct save87 { - struct env87 sv_env; /* floating point control/status */ - struct fpacc87 sv_ac[8]; /* accumulator contents, 0-7 */ - u_char sv_pad0[4]; /* padding for (now unused) saved status word */ - /* - * Bogus padding for emulators. Emulators should use their own - * struct and arrange to store into this struct (ending here) - * before it is inspected for ptracing or for core dumps. Some - * emulators overwrite the whole struct. We have no good way of - * knowing how much padding to leave. Leave just enough for the - * GPL emulator's i387_union (176 bytes total). - */ - u_char sv_pad[64]; /* padding; used by emulators */ +/* Contents of each SSE extended accumulator */ +struct xmmacc { + u_char xmm_bytes[16]; }; struct envxmm { u_int16_t en_cw; /* control word (16bits) */ u_int16_t en_sw; /* status word (16bits) */ - u_int16_t en_tw; /* tag word (16bits) */ + u_int8_t en_tw; /* tag word (8bits) */ + u_int8_t en_zero; u_int16_t en_opcode; /* opcode last executed (11 bits ) */ - u_int32_t en_fip; /* floating point instruction pointer */ - u_int16_t en_fcs; /* floating code segment selector */ - u_int16_t en_pad0; /* padding */ - u_int32_t en_foo; /* floating operand offset */ - u_int16_t en_fos; /* floating operand segment selector */ - u_int16_t en_pad1; /* padding */ + u_int64_t en_rip; /* floating point instruction pointer */ + u_int64_t en_rdp; /* floating operand pointer */ u_int32_t en_mxcsr; /* SSE sontorol/status register */ - u_int32_t en_pad2; /* padding */ + u_int32_t en_mxcsr_mask; /* valid bits in mxcsr */ }; -/* Contents of each SSE extended accumulator */ -struct xmmacc { - u_char xmm_bytes[16]; -}; - -struct savexmm { +struct savefpu { struct envxmm sv_env; struct { struct fpacc87 fp_acc; u_char fp_pad[6]; /* padding */ } sv_fp[8]; - struct xmmacc sv_xmm[8]; - u_char sv_pad[224]; + struct xmmacc sv_xmm[16]; + u_char sv_pad[96]; } __aligned(16); -union savefpu { - struct save87 sv_87; - struct savexmm sv_xmm; -}; - /* * The hardware default control word for i387's and later coprocessors is * 0x37F, giving: @@ -144,10 +101,10 @@ int npxdna(void); void npxdrop(void); void npxexit(struct thread *td); int npxformat(void); -int npxgetregs(struct thread *td, union savefpu *addr); +int npxgetregs(struct thread *td, struct savefpu *addr); void npxinit(u_short control); -void npxsave(union savefpu *addr); -void npxsetregs(struct thread *td, union savefpu *addr); +void npxsave(struct savefpu *addr); +void npxsetregs(struct thread *td, struct savefpu *addr); int npxtrap(void); #endif diff --git a/sys/amd64/include/frame.h b/sys/amd64/include/frame.h index c32afc4..8f96528 100644 --- a/sys/amd64/include/frame.h +++ b/sys/amd64/include/frame.h @@ -46,116 +46,93 @@ /* * Exception/Trap Stack Frame + * + * The ordering of this is specifically so that we can take first 6 + * the syscall arguments directly from the beginning of the frame. */ struct trapframe { - int tf_fs; - int tf_es; - int tf_ds; - int tf_edi; - int tf_esi; - int tf_ebp; - int tf_isp; - int tf_ebx; - int tf_edx; - int tf_ecx; - int tf_eax; - int tf_trapno; - /* below portion defined in 386 hardware */ - int tf_err; - int tf_eip; - int tf_cs; - int tf_eflags; - /* below only when crossing rings (e.g. user to kernel) */ - int tf_esp; - int tf_ss; -}; - -/* Superset of trap frame, for traps from virtual-8086 mode */ - -struct trapframe_vm86 { - int tf_fs; - int tf_es; - int tf_ds; - int tf_edi; - int tf_esi; - int tf_ebp; - int tf_isp; - int tf_ebx; - int tf_edx; - int tf_ecx; - int tf_eax; - int tf_trapno; - /* below portion defined in 386 hardware */ - int tf_err; - int tf_eip; - int tf_cs; - int tf_eflags; - /* below only when crossing rings (e.g. user to kernel) */ - int tf_esp; - int tf_ss; - /* below only when switching out of VM86 mode */ - int tf_vm86_es; - int tf_vm86_ds; - int tf_vm86_fs; - int tf_vm86_gs; + register_t tf_rdi; + register_t tf_rsi; + register_t tf_rdx; + register_t tf_rcx; + register_t tf_r8; + register_t tf_r9; + register_t tf_rax; + register_t tf_rbx; + register_t tf_rbp; + register_t tf_r10; + register_t tf_r11; + register_t tf_r12; + register_t tf_r13; + register_t tf_r14; + register_t tf_r15; + register_t tf_trapno; + /* below portion defined in hardware */ + register_t tf_err; + register_t tf_rip; + register_t tf_cs; + register_t tf_rflags; + register_t tf_rsp; + register_t tf_ss; }; /* Interrupt stack frame */ struct intrframe { - int if_vec; - int if_fs; - int if_es; - int if_ds; - int if_edi; - int if_esi; - int if_ebp; - int :32; - int if_ebx; - int if_edx; - int if_ecx; - int if_eax; - int :32; /* for compat with trap frame - trapno */ - int :32; /* for compat with trap frame - err */ - /* below portion defined in 386 hardware */ - int if_eip; - int if_cs; - int if_eflags; - /* below only when crossing rings (e.g. user to kernel) */ - int if_esp; - int if_ss; + register_t if_rdi; + register_t if_rsi; + register_t if_rdx; + register_t if_rcx; + register_t if_r8; + register_t if_r9; + register_t if_rax; + register_t if_rbx; + register_t if_rbp; + register_t if_r10; + register_t if_r11; + register_t if_r12; + register_t if_r13; + register_t if_r14; + register_t if_r15; + register_t :64; /* compat with trap frame - trapno */ + register_t :64; /* compat with trap frame - err */ + /* below portion defined in hardware */ + register_t if_rip; + register_t if_cs; + register_t if_rflags; + register_t if_rsp; + register_t if_ss; }; /* frame of clock (same as interrupt frame) */ struct clockframe { - int cf_vec; - int cf_fs; - int cf_es; - int cf_ds; - int cf_edi; - int cf_esi; - int cf_ebp; - int :32; - int cf_ebx; - int cf_edx; - int cf_ecx; - int cf_eax; - int :32; /* for compat with trap frame - trapno */ - int :32; /* for compat with trap frame - err */ - /* below portion defined in 386 hardware */ - int cf_eip; - int cf_cs; - int cf_eflags; - /* below only when crossing rings (e.g. user to kernel) */ - int cf_esp; - int cf_ss; + register_t cf_rdi; + register_t cf_rsi; + register_t cf_rdx; + register_t cf_rcx; + register_t cf_r8; + register_t cf_r9; + register_t cf_rax; + register_t cf_rbx; + register_t cf_rbp; + register_t cf_r10; + register_t cf_r11; + register_t cf_r12; + register_t cf_r13; + register_t cf_r14; + register_t cf_r15; + register_t :64; /* compat with trap frame - trapno */ + register_t :64; /* compat with trap frame - err */ + /* below portion defined in hardware */ + register_t cf_rip; + register_t cf_cs; + register_t cf_rflags; + register_t cf_rsp; + register_t cf_ss; }; int kdb_trap(int, int, struct trapframe *); -extern int (*pmath_emulate)(struct trapframe *); - -#define INTR_TO_TRAPFRAME(frame) ((struct trapframe *)&(frame)->if_fs) #endif /* _MACHINE_FRAME_H_ */ diff --git a/sys/amd64/include/kse.h b/sys/amd64/include/kse.h index feaf535..c96e4f5 100644 --- a/sys/amd64/include/kse.h +++ b/sys/amd64/include/kse.h @@ -36,14 +36,7 @@ #include <machine/ucontext.h> union kse_td_ctx { -#ifdef _KERNEL - struct { - int tf_onstack; - int tf_gs; - struct trapframe tf_tf; - } tfrm; -#endif - mcontext_t mcontext; + int teh_stuff; }; #endif /* MACHINE_KSE_H */ diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index bf3eca4..c5f0e458 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -36,11 +36,6 @@ * Miscellaneous machine-dependent declarations. */ -extern void (*bcopy_vector)(const void *from, void *to, size_t len); -extern void (*bzero_vector)(void *buf, size_t len); -extern int (*copyin_vector)(const void *udaddr, void *kaddr, size_t len); -extern int (*copyout_vector)(const void *kaddr, void *udaddr, size_t len); - extern long Maxmem; extern u_int atdevbase; /* offset in virtual memory of ISA io mem */ extern int busdma_swi_pending; @@ -51,37 +46,10 @@ extern u_int cpu_high; extern u_int cpu_id; extern u_int cpu_procinfo; extern char cpu_vendor[]; -extern u_int cyrix_did; extern uint16_t *elan_mmcr; extern char kstack[]; -#ifdef PC98 -extern int need_pre_dma_flush; -extern int need_post_dma_flush; -#endif extern char sigcode[]; extern int szsigcode; -#ifdef COMPAT_FREEBSD4 -extern int szfreebsd4_sigcode; -#endif -#ifdef COMPAT_43 -extern int szosigcode; -#endif -#ifdef SWTCH_OPTIM_STATS -extern int stupid_switch; -extern int swtch_optim_stats; -extern int tlb_flush_count; -extern int lazy_flush_count; -extern int lazy_flush_fixup; -#ifdef SMP -extern int lazy_flush_smpfixup; -extern int lazy_flush_smpipi; -extern int lazy_flush_smpbadcr3; -extern int lazy_flush_smpmiss; -#endif -#endif -#ifdef LAZY_SWITCH -extern int lazy_flush_enable; -#endif typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); struct thread; @@ -94,28 +62,13 @@ void busdma_swi(void); void cpu_halt(void); void cpu_reset(void); void cpu_setregs(void); -void cpu_switch_load_gs(void) __asm(__STRING(cpu_switch_load_gs)); void doreti_iret(void) __asm(__STRING(doreti_iret)); void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault)); -void doreti_popl_ds(void) __asm(__STRING(doreti_popl_ds)); -void doreti_popl_ds_fault(void) __asm(__STRING(doreti_popl_ds_fault)); -void doreti_popl_es(void) __asm(__STRING(doreti_popl_es)); -void doreti_popl_es_fault(void) __asm(__STRING(doreti_popl_es_fault)); -void doreti_popl_fs(void) __asm(__STRING(doreti_popl_fs)); -void doreti_popl_fs_fault(void) __asm(__STRING(doreti_popl_fs_fault)); void enable_sse(void); void fillw(int /*u_short*/ pat, void *base, size_t cnt); -void i486_bzero(void *buf, size_t len); -void i586_bcopy(const void *from, void *to, size_t len); -void i586_bzero(void *buf, size_t len); -int i586_copyin(const void *udaddr, void *kaddr, size_t len); -int i586_copyout(const void *kaddr, void *udaddr, size_t len); -void i686_pagezero(void *addr); -void init_AMD_Elan_sc520(void); +void pagezero(void *addr); int is_physical_memory(vm_offset_t addr); -vm_paddr_t kvtop(void *addr); -void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int selec); +void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int ist); void swi_vm(void *); -int user_dbreg_trap(void); #endif /* !_MACHINE_MD_VAR_H_ */ diff --git a/sys/amd64/include/mutex.h b/sys/amd64/include/mutex.h index 87d47e5..eb13945 100644 --- a/sys/amd64/include/mutex.h +++ b/sys/amd64/include/mutex.h @@ -56,20 +56,20 @@ extern struct mtx clock_lock; * locks) in the near future, however. */ #define MTX_LOCK_SPIN(lck, flags) \ - pushl $0 ; \ - pushl $0 ; \ - pushl $flags ; \ - pushl $lck ; \ + pushq $0 ; \ + pushq $0 ; \ + pushq $flags ; \ + pushq $lck ; \ call _mtx_lock_spin_flags ; \ - addl $0x10, %esp ; \ + addq $0x20, %rsp ; \ #define MTX_UNLOCK_SPIN(lck) \ - pushl $0 ; \ - pushl $0 ; \ - pushl $0 ; \ - pushl $lck ; \ + pushq $0 ; \ + pushq $0 ; \ + pushq $0 ; \ + pushq $lck ; \ call _mtx_unlock_spin_flags ; \ - addl $0x10, %esp ; \ + addq $0x20, %rsp ; \ #endif /* !LOCORE */ #endif /* __MACHINE_MUTEX_H */ diff --git a/sys/amd64/include/npx.h b/sys/amd64/include/npx.h index 8e0f9bc..db1fe40 100644 --- a/sys/amd64/include/npx.h +++ b/sys/amd64/include/npx.h @@ -45,81 +45,38 @@ #ifndef _MACHINE_NPX_H_ #define _MACHINE_NPX_H_ -/* Environment information of floating point unit */ -struct env87 { - long en_cw; /* control word (16bits) */ - long en_sw; /* status word (16bits) */ - long en_tw; /* tag word (16bits) */ - long en_fip; /* floating point instruction pointer */ - u_short en_fcs; /* floating code segment selector */ - u_short en_opcode; /* opcode last executed (11 bits ) */ - long en_foo; /* floating operand offset */ - long en_fos; /* floating operand segment selector */ -}; - -/* Contents of each floating point accumulator */ +/* Contents of each x87 floating point accumulator */ struct fpacc87 { -#ifdef dontdef /* too unportable */ - u_long fp_mantlo; /* mantissa low (31:0) */ - u_long fp_manthi; /* mantissa high (63:32) */ - int fp_exp:15; /* exponent */ - int fp_sgn:1; /* mantissa sign */ -#else u_char fp_bytes[10]; -#endif }; -/* Floating point context */ -struct save87 { - struct env87 sv_env; /* floating point control/status */ - struct fpacc87 sv_ac[8]; /* accumulator contents, 0-7 */ - u_char sv_pad0[4]; /* padding for (now unused) saved status word */ - /* - * Bogus padding for emulators. Emulators should use their own - * struct and arrange to store into this struct (ending here) - * before it is inspected for ptracing or for core dumps. Some - * emulators overwrite the whole struct. We have no good way of - * knowing how much padding to leave. Leave just enough for the - * GPL emulator's i387_union (176 bytes total). - */ - u_char sv_pad[64]; /* padding; used by emulators */ +/* Contents of each SSE extended accumulator */ +struct xmmacc { + u_char xmm_bytes[16]; }; struct envxmm { u_int16_t en_cw; /* control word (16bits) */ u_int16_t en_sw; /* status word (16bits) */ - u_int16_t en_tw; /* tag word (16bits) */ + u_int8_t en_tw; /* tag word (8bits) */ + u_int8_t en_zero; u_int16_t en_opcode; /* opcode last executed (11 bits ) */ - u_int32_t en_fip; /* floating point instruction pointer */ - u_int16_t en_fcs; /* floating code segment selector */ - u_int16_t en_pad0; /* padding */ - u_int32_t en_foo; /* floating operand offset */ - u_int16_t en_fos; /* floating operand segment selector */ - u_int16_t en_pad1; /* padding */ + u_int64_t en_rip; /* floating point instruction pointer */ + u_int64_t en_rdp; /* floating operand pointer */ u_int32_t en_mxcsr; /* SSE sontorol/status register */ - u_int32_t en_pad2; /* padding */ + u_int32_t en_mxcsr_mask; /* valid bits in mxcsr */ }; -/* Contents of each SSE extended accumulator */ -struct xmmacc { - u_char xmm_bytes[16]; -}; - -struct savexmm { +struct savefpu { struct envxmm sv_env; struct { struct fpacc87 fp_acc; u_char fp_pad[6]; /* padding */ } sv_fp[8]; - struct xmmacc sv_xmm[8]; - u_char sv_pad[224]; + struct xmmacc sv_xmm[16]; + u_char sv_pad[96]; } __aligned(16); -union savefpu { - struct save87 sv_87; - struct savexmm sv_xmm; -}; - /* * The hardware default control word for i387's and later coprocessors is * 0x37F, giving: @@ -144,10 +101,10 @@ int npxdna(void); void npxdrop(void); void npxexit(struct thread *td); int npxformat(void); -int npxgetregs(struct thread *td, union savefpu *addr); +int npxgetregs(struct thread *td, struct savefpu *addr); void npxinit(u_short control); -void npxsave(union savefpu *addr); -void npxsetregs(struct thread *td, union savefpu *addr); +void npxsave(struct savefpu *addr); +void npxsetregs(struct thread *td, struct savefpu *addr); int npxtrap(void); #endif diff --git a/sys/amd64/include/param.h b/sys/amd64/include/param.h index 5955022..355e95d 100644 --- a/sys/amd64/include/param.h +++ b/sys/amd64/include/param.h @@ -40,7 +40,7 @@ */ /* - * Machine dependent constants for the AMD64. + * Machine dependent constants for AMD64. */ /* @@ -55,7 +55,7 @@ * */ #ifndef _ALIGNBYTES -#define _ALIGNBYTES (sizeof(int) - 1) +#define _ALIGNBYTES (sizeof(long) - 1) #endif #ifndef _ALIGN #define _ALIGN(p) (((u_long)(p) + _ALIGNBYTES) &~ _ALIGNBYTES) @@ -83,40 +83,71 @@ #define MACHINE_ARCH "amd64" #endif -#ifdef SMP -#define MAXCPU 16 -#else #define MAXCPU 1 -#endif /* SMP */ #define ALIGNBYTES _ALIGNBYTES #define ALIGN(p) _ALIGN(p) #define ALIGNED_POINTER(p,t) _ALIGNED_POINTER((p),(t)) +/* Size of the level 1 page table units */ +#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t))) #define PAGE_SHIFT 12 /* LOG2(PAGE_SIZE) */ #define PAGE_SIZE (1<<PAGE_SHIFT) /* bytes/page */ #define PAGE_MASK (PAGE_SIZE-1) -#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t))) - -#define KERNBASE 0x0000000000000000LL /* start of kernel virtual */ -#define BTOPKERNBASE ((u_long)KERNBASE >> PGSHIFT) +/* Size of the level 2 page directory units */ +#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define PDRSHIFT 21 /* LOG2(NBPDR) */ +#define NBPDR (1<<PDRSHIFT) /* bytes/page dir */ +#define PDRMASK (NBPDR-1) +/* Size of the level 3 page directory pointer table units */ +#define NPDPEPG (PAGE_SIZE/(sizeof (pdp_entry_t))) +#define PDPSHIFT 30 /* LOG2(NBPDP) */ +#define NBPDP (1<<PDPSHIFT) /* bytes/page dir ptr table */ +#define PDPMASK (NBPDP-1) +/* Size of the level 4 page-map level-4 table units */ +#define NPML4EPG (PAGE_SIZE/(sizeof (pml4_entry_t))) +#define PML4SHIFT 39 /* LOG2(NBPML4T) */ +#define NBPML4T (1ul<<PML4SHIFT)/* bytes/page map lev4 table */ +#define PML4MASK (NBPML4T-1) + +#define NKPML4E 1 /* addressable number of page tables/pde's */ +#define NKPDPE 1 /* addressable number of page tables/pde's */ +#define NPGPTD 4 + +#define NBPTD (NPGPTD<<PAGE_SHIFT) +#define NPDEPTD (NBPTD/(sizeof (pd_entry_t))) #define IOPAGES 2 /* pages of i/o permission bitmap */ -#ifndef KSTACK_PAGES -#define KSTACK_PAGES 2 /* pages of kstack (with pcb) */ -#endif +#define KSTACK_PAGES 4 /* pages of kstack (with pcb) */ #define UAREA_PAGES 1 /* holds struct user WITHOUT PCB (see def.) */ -#define KSTACK_GUARD 1 /* compile in the kstack guard page */ +#define KSTACK_GUARD 1 /* compile in the kstack guard page */ + +/* + * Ceiling on amount of swblock kva space, can be changed via + * the kern.maxswzone /boot/loader.conf variable. + */ +#ifndef VM_SWZONE_SIZE_MAX +#define VM_SWZONE_SIZE_MAX (32 * 1024 * 1024) +#endif + +/* + * Ceiling on size of buffer cache (really only effects write queueing, + * the VM page cache is not effected), can be changed via + * the kern.maxbcache /boot/loader.conf variable. + */ +#ifndef VM_BCACHE_SIZE_MAX +#define VM_BCACHE_SIZE_MAX (200 * 1024 * 1024) +#endif /* * Mach derived conversion macros */ #define round_page(x) ((((unsigned long)(x)) + PAGE_MASK) & ~(PAGE_MASK)) #define trunc_page(x) ((unsigned long)(x) & ~(PAGE_MASK)) -#define trunc_4mpage(x) ((unsigned)(x) & ~PDRMASK) -#define round_4mpage(x) ((((unsigned)(x)) + PDRMASK) & ~PDRMASK) +#define trunc_2mpage(x) ((unsigned long)(x) & ~PDRMASK) +#define round_2mpage(x) ((((unsigned long)(x)) + PDRMASK) & ~PDRMASK) #define atop(x) ((unsigned long)(x) >> PAGE_SHIFT) #define ptoa(x) ((unsigned long)(x) << PAGE_SHIFT) @@ -124,7 +155,7 @@ #define amd64_btop(x) ((unsigned long)(x) >> PAGE_SHIFT) #define amd64_ptob(x) ((unsigned long)(x) << PAGE_SHIFT) -#define pgtok(x) ((x) * (PAGE_SIZE / 1024)) +#define pgtok(x) ((unsigned long)(x) * (PAGE_SIZE / 1024)) #endif /* !_MACHINE_PARAM_H_ */ #endif /* !_NO_NAMESPACE_POLLUTION */ diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h index c7a837b..551bf8f 100644 --- a/sys/amd64/include/pcb.h +++ b/sys/amd64/include/pcb.h @@ -46,34 +46,25 @@ #include <machine/npx.h> struct pcb { - int pcb_cr3; - int pcb_edi; - int pcb_esi; - int pcb_ebp; - int pcb_esp; - int pcb_ebx; - int pcb_eip; + register_t padxx[8]; + register_t pcb_cr3; + register_t pcb_r15; + register_t pcb_r14; + register_t pcb_r13; + register_t pcb_r12; + register_t pcb_rbp; + register_t pcb_rsp; + register_t pcb_rbx; + register_t pcb_rip; + register_t pcb_rflags; - int pcb_dr0; - int pcb_dr1; - int pcb_dr2; - int pcb_dr3; - int pcb_dr6; - int pcb_dr7; - - union savefpu pcb_save; - u_int pcb_flags; -#define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */ -#define PCB_DBREGS 0x02 /* process using debug registers */ -#define PCB_NPXTRAP 0x04 /* npx trap pending */ -#define PCB_NPXINITDONE 0x08 /* fpu state is initialized */ -#define PCB_VM86CALL 0x10 /* in vm86 call */ + struct savefpu pcb_save; + u_long pcb_flags; +#define PCB_NPXTRAP 0x01 /* npx trap pending */ +#define PCB_NPXINITDONE 0x02 /* fpu state is initialized */ +#define PCB_FULLCTX 0x04 /* full context restore on sysret */ caddr_t pcb_onfault; /* copyin/out fault recovery */ - int pcb_gs; - struct pcb_ext *pcb_ext; /* optional pcb extension */ - int pcb_psl; /* process status long */ - u_long __pcb_spare[2]; /* adjust to avoid core dump size changes */ }; #ifdef _KERNEL diff --git a/sys/amd64/include/pcb_ext.h b/sys/amd64/include/pcb_ext.h index cf5a911..6d1bde2 100644 --- a/sys/amd64/include/pcb_ext.h +++ b/sys/amd64/include/pcb_ext.h @@ -29,24 +29,4 @@ #ifndef _I386_PCB_EXT_H_ #define _I386_PCB_EXT_H_ -/* - * Extension to the 386 process control block - */ -#include <machine/tss.h> -#include <machine/vm86.h> -#include <machine/segments.h> - -struct pcb_ext { - struct segment_descriptor ext_tssd; /* tss descriptor */ - struct i386tss ext_tss; /* per-process i386tss */ - caddr_t ext_iomap; /* i/o permission bitmap */ - struct vm86_kernel ext_vm86; /* vm86 area */ -}; - -#ifdef _KERNEL - -int i386_extend_pcb(struct thread *); - -#endif - #endif /* _I386_PCB_EXT_H_ */ diff --git a/sys/amd64/include/pci_cfgreg.h b/sys/amd64/include/pci_cfgreg.h index b173531..75840aa 100644 --- a/sys/amd64/include/pci_cfgreg.h +++ b/sys/amd64/include/pci_cfgreg.h @@ -38,12 +38,7 @@ #define CONF1_ENABLE_RES1 0x80000000ul #define CONF2_ENABLE_PORT 0x0cf8 -#ifdef PC98 -#define CONF2_FORWARD_PORT 0x0cf9 -#else #define CONF2_FORWARD_PORT 0x0cfa -#endif - #define CONF2_ENABLE_CHK 0x0e #define CONF2_ENABLE_RES 0x0e @@ -51,4 +46,3 @@ int pci_cfgregopen(void); u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes); void pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes); int pci_cfgintr(int bus, int device, int pin, int oldirq); -int pci_probe_route_table(int bus); diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h index a680645..9543b23 100644 --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -31,9 +31,6 @@ #ifdef _KERNEL -#include <machine/segments.h> -#include <machine/tss.h> - /* * The SMP parts are setup in pmap.c and locore.s for the BSP, and * mp_machdep.c sets up the data for the AP's to "see" when they awake. @@ -42,15 +39,8 @@ * other processors" */ #define PCPU_MD_FIELDS \ - struct pcpu *pc_prvspace; /* Self-reference */ \ - struct i386tss pc_common_tss; \ - struct segment_descriptor pc_common_tssd; \ - struct segment_descriptor *pc_tss_gdt; \ - int pc_currentldt; \ - u_int32_t pc_int_pending; /* master int pending flag */ \ - u_int32_t pc_ipending; /* pending slow interrupts */ \ - u_int32_t pc_fpending; /* pending fast interrupts */ \ - u_int32_t pc_spending /* pending soft interrupts */ + struct pcpu *pc_prvspace; /* Self-reference */ \ + register_t pc_scratch_rsp; /* User %rsp in syscall */ #if defined(lint) @@ -80,7 +70,7 @@ extern struct pcpu *pcpup; #define __PCPU_PTR(name) ({ \ __pcpu_type(name) *__p; \ \ - __asm __volatile("movl %%fs:%1,%0; addl %2,%0" \ + __asm __volatile("movq %%gs:%1,%0; addq %2,%0" \ : "=r" (__p) \ : "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace))), \ "i" (__pcpu_offset(name))); \ @@ -96,22 +86,28 @@ extern struct pcpu *pcpup; \ if (sizeof(__result) == 1) { \ u_char __b; \ - __asm __volatile("movb %%fs:%1,%0" \ + __asm __volatile("movb %%gs:%1,%0" \ : "=r" (__b) \ : "m" (*(u_char *)(__pcpu_offset(name)))); \ __result = *(__pcpu_type(name) *)&__b; \ } else if (sizeof(__result) == 2) { \ u_short __w; \ - __asm __volatile("movw %%fs:%1,%0" \ + __asm __volatile("movw %%gs:%1,%0" \ : "=r" (__w) \ : "m" (*(u_short *)(__pcpu_offset(name)))); \ __result = *(__pcpu_type(name) *)&__w; \ } else if (sizeof(__result) == 4) { \ u_int __i; \ - __asm __volatile("movl %%fs:%1,%0" \ + __asm __volatile("movl %%gs:%1,%0" \ : "=r" (__i) \ : "m" (*(u_int *)(__pcpu_offset(name)))); \ __result = *(__pcpu_type(name) *)&__i; \ + } else if (sizeof(__result) == 8) { \ + u_long __l; \ + __asm __volatile("movq %%gs:%1,%0" \ + : "=r" (__l) \ + : "m" (*(u_long *)(__pcpu_offset(name)))); \ + __result = *(__pcpu_type(name) *)&__l; \ } else { \ __result = *__PCPU_PTR(name); \ } \ @@ -128,21 +124,27 @@ extern struct pcpu *pcpup; if (sizeof(__val) == 1) { \ u_char __b; \ __b = *(u_char *)&__val; \ - __asm __volatile("movb %1,%%fs:%0" \ + __asm __volatile("movb %1,%%gs:%0" \ : "=m" (*(u_char *)(__pcpu_offset(name))) \ : "r" (__b)); \ } else if (sizeof(__val) == 2) { \ u_short __w; \ __w = *(u_short *)&__val; \ - __asm __volatile("movw %1,%%fs:%0" \ + __asm __volatile("movw %1,%%gs:%0" \ : "=m" (*(u_short *)(__pcpu_offset(name))) \ : "r" (__w)); \ } else if (sizeof(__val) == 4) { \ u_int __i; \ __i = *(u_int *)&__val; \ - __asm __volatile("movl %1,%%fs:%0" \ + __asm __volatile("movl %1,%%gs:%0" \ : "=m" (*(u_int *)(__pcpu_offset(name))) \ : "r" (__i)); \ + } else if (sizeof(__val) == 8) { \ + u_long __l; \ + __l = *(u_long *)&__val; \ + __asm __volatile("movq %1,%%gs:%0" \ + : "=m" (*(u_long *)(__pcpu_offset(name))) \ + : "r" (__l)); \ } else { \ *__PCPU_PTR(name) = __val; \ } \ diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index 58e9182..d3e2104 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -83,52 +83,36 @@ #define PGEX_U 0x04 /* access from User mode (UPL) */ /* - * Size of Kernel address space. This is the number of page table pages - * (4MB each) to use for the kernel. 256 pages == 1 Gigabyte. - * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc). + * Size of Kernel address space. This is the number of level 4 (top) + * entries. We use half of them for the kernel due to the 48 bit + * virtual address sign extension. */ -#ifndef KVA_PAGES -#ifdef PAE -#define KVA_PAGES 512 -#else -#define KVA_PAGES 256 -#endif -#endif - +#define KVA_PAGES 1536 + /* - * Pte related macros + * Pte related macros. This is complicated by having to deal with + * the sign extension of the 48th bit. */ -#define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<<PDRSHIFT)|((pti)<<PAGE_SHIFT))) +#define VADDR_SIGN(l4) \ + ((l4) >= NPML4EPG/2 ? ((unsigned long)-1 << 47) : 0ul) +#define VADDR(l4, l3, l2, l1) ( \ + ((unsigned long)(l4) << PML4SHIFT) | VADDR_SIGN(l4) | \ + ((unsigned long)(l3) << PDPSHIFT) | \ + ((unsigned long)(l2) << PDRSHIFT) | \ + ((unsigned long)(l1) << PAGE_SHIFT)) + #ifndef NKPT -#ifdef PAE -#define NKPT 120 /* actual number of kernel page tables */ -#else -#define NKPT 30 /* actual number of kernel page tables */ -#endif -#endif -#ifndef NKPDE -#ifdef SMP -#define NKPDE (KVA_PAGES - 1) /* number of page tables/pde's */ -#else -#define NKPDE (KVA_PAGES) /* number of page tables/pde's */ +#define NKPT 120 /* initial number of kernel page tables */ #endif +#ifndef NKPDE +#define NKPDE (KVA_PAGES) /* number of page tables/pde's */ #endif /* * The *PTDI values control the layout of virtual memory - * - * XXX This works for now, but I am not real happy with it, I'll fix it - * right after I fix locore.s and the magic 28K hole - * - * SMP_PRIVPAGES: The per-cpu address space is 0xff80000 -> 0xffbfffff */ -#ifdef SMP -#define MPPTDI (NPDEPTD-1) /* per cpu ptd entry */ -#define KPTDI (MPPTDI-NKPDE) /* start of kernel virtual pde's */ -#else -#define KPTDI (NPDEPTD-NKPDE)/* start of kernel virtual pde's */ -#endif /* SMP */ +#define KPTDI (NPDEPTD-NKPDE) /* start of kernel virtual pde's */ #define PTDPTDI (KPTDI-NPGPTD) /* ptd entry that points to ptd! */ /* @@ -141,38 +125,37 @@ #include <sys/queue.h> -#ifdef PAE - -typedef uint64_t pdpt_entry_t; -typedef uint64_t pd_entry_t; -typedef uint64_t pt_entry_t; +typedef u_int64_t pd_entry_t; +typedef u_int64_t pt_entry_t; +typedef u_int64_t pdp_entry_t; +typedef u_int64_t pml4_entry_t; +#define PML4ESHIFT (3) +#define PDPESHIFT (3) #define PTESHIFT (3) #define PDESHIFT (3) -#else - -typedef uint32_t pd_entry_t; -typedef uint32_t pt_entry_t; - -#define PTESHIFT (2) -#define PDESHIFT (2) - -#endif - /* * Address of current and alternate address space page table maps * and directories. + * XXX it might be saner to just direct map all of physical memory + * into the kernel using 2MB pages. We have enough space to do + * it (2^47 bits of KVM, while current max physical addressability + * is 2^40 physical bits). Then we can get rid of the evil hole + * in the page tables and the evil overlapping. */ #ifdef _KERNEL -extern pt_entry_t PTmap[]; -extern pd_entry_t PTD[]; -extern pd_entry_t PTDpde[]; - -#ifdef PAE -extern pdpt_entry_t *IdlePDPT; -#endif -extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */ +extern pt_entry_t PTmap[]; +extern pd_entry_t PDmap[]; +extern pdp_entry_t PDPmap[]; +extern pml4_entry_t PML4[]; +extern pdp_entry_t PDP[]; +extern pd_entry_t PTD[]; +extern pd_entry_t PTDpde[]; + +extern u_int64_t IdlePML4; /* physical address of "Idle" state directory */ +extern u_int64_t IdlePDP; /* physical address of "Idle" state directory */ +extern u_int64_t IdlePTD; /* physical address of "Idle" state directory */ #endif #ifdef _KERNEL @@ -182,7 +165,7 @@ extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */ * Note: these work recursively, thus vtopte of a pte will give * the corresponding pde that in turn maps it. */ -#define vtopte(va) (PTmap + i386_btop(va)) +#define vtopte(va) (PTmap + amd64_btop(va)) /* * Routine: pmap_kextract @@ -195,7 +178,8 @@ pmap_kextract(vm_offset_t va) { vm_paddr_t pa; - if ((pa = PTD[va >> PDRSHIFT]) & PG_PS) { + pa = PTD[va >> PDRSHIFT]; + if (pa & PG_PS) { pa = (pa & ~(NBPDR - 1)) | (va & (NBPDR - 1)); } else { pa = *vtopte(va); @@ -206,39 +190,6 @@ pmap_kextract(vm_offset_t va) #define vtophys(va) pmap_kextract(((vm_offset_t) (va))) -#ifdef PAE - -static __inline pt_entry_t -pte_load(pt_entry_t *ptep) -{ - pt_entry_t r; - - __asm __volatile( - "lock; cmpxchg8b %1" - : "=A" (r) - : "m" (*ptep), "a" (0), "d" (0), "b" (0), "c" (0)); - return (r); -} - -static __inline pt_entry_t -pte_load_store(pt_entry_t *ptep, pt_entry_t v) -{ - pt_entry_t r; - - r = *ptep; - __asm __volatile( - "1:\n" - "\tlock; cmpxchg8b %1\n" - "\tjnz 1b" - : "+A" (r) - : "m" (*ptep), "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32))); - return (r); -} - -#define pte_load_clear(ptep) pte_load_store((ptep), (pt_entry_t)0ULL) - -#else /* PAE */ - static __inline pt_entry_t pte_load(pt_entry_t *ptep) { @@ -258,9 +209,7 @@ pte_load_store(pt_entry_t *ptep, pt_entry_t pte) return (r); } -#define pte_load_clear(pte) atomic_readandclear_int(pte) - -#endif /* PAE */ +#define pte_load_clear(pte) atomic_readandclear_long(pte) #define pte_clear(ptep) pte_load_store((ptep), (pt_entry_t)0ULL) #define pte_store(ptep, pte) pte_load_store((ptep), (pt_entry_t)pte) @@ -283,13 +232,11 @@ struct pmap { pd_entry_t *pm_pdir; /* KVA of page directory */ vm_object_t pm_pteobj; /* Container for pte's */ TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ - u_int pm_active; /* active on cpus */ + u_long pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statistics */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ -#ifdef PAE - pdpt_entry_t *pm_pdpt; /* KVA of page director pointer - table */ -#endif + pdp_entry_t *pm_pdp; /* KVA of level 3 page table */ + pml4_entry_t *pm_pml4; /* KVA of level 4 page table */ }; #define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list)) @@ -341,7 +288,6 @@ void pmap_kremove(vm_offset_t); void *pmap_mapdev(vm_paddr_t, vm_size_t); void pmap_unmapdev(vm_offset_t, vm_size_t); pt_entry_t *pmap_pte_quick(pmap_t, vm_offset_t) __pure2; -void pmap_set_opt(void); void pmap_invalidate_page(pmap_t, vm_offset_t); void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t); void pmap_invalidate_all(pmap_t); diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h index bff037a..cd54c24 100644 --- a/sys/amd64/include/proc.h +++ b/sys/amd64/include/proc.h @@ -37,35 +37,14 @@ #ifndef _MACHINE_PROC_H_ #define _MACHINE_PROC_H_ -#include <machine/segments.h> - -struct proc_ldt { - caddr_t ldt_base; - int ldt_len; - int ldt_refcnt; - u_long ldt_active; - struct segment_descriptor ldt_sd; -}; - /* - * Machine-dependent part of the proc structure for i386. + * Machine-dependent part of the proc structure for AMD64. */ struct mdthread { -#ifdef lint - int dummy; -#endif + register_t md_savecrit; }; struct mdproc { - struct proc_ldt *md_ldt; /* (j) per-process ldt */ }; -#ifdef _KERNEL - -void set_user_ldt(struct mdproc *); -struct proc_ldt *user_ldt_alloc(struct mdproc *, int); -void user_ldt_free(struct thread *); - -#endif /* _KERNEL */ - #endif /* !_MACHINE_PROC_H_ */ diff --git a/sys/amd64/include/profile.h b/sys/amd64/include/profile.h index 0df3195..7330ae0 100644 --- a/sys/amd64/include/profile.h +++ b/sys/amd64/include/profile.h @@ -65,14 +65,14 @@ #define MCOUNT_DECL(s) u_long s; #ifdef SMP extern int mcount_lock; -#define MCOUNT_ENTER(s) { s = read_eflags(); disable_intr(); \ +#define MCOUNT_ENTER(s) { s = read_rflags(); disable_intr(); \ while (!atomic_cmpset_acq_int(&mcount_lock, 0, 1)) \ /* nothing */ ; } #define MCOUNT_EXIT(s) { atomic_store_rel_int(&mcount_lock, 0); \ - write_eflags(s); } + write_rflags(s); } #else -#define MCOUNT_ENTER(s) { s = read_eflags(); disable_intr(); } -#define MCOUNT_EXIT(s) (write_eflags(s)) +#define MCOUNT_ENTER(s) { s = read_rflags(); disable_intr(); } +#define MCOUNT_EXIT(s) (write_rflags(s)) #endif #endif /* GUPROF */ @@ -94,14 +94,14 @@ mcount() \ * \ * selfpc = pc pushed by call to mcount \ */ \ - asm("movl 4(%%ebp),%0" : "=r" (selfpc)); \ + asm("movq 8(%%rbp),%0" : "=r" (selfpc)); \ /* \ * frompc = pc pushed by call to mcount's caller. \ * The caller's stack frame has already been built, so %ebp is \ * the caller's frame pointer. The caller's raddr is in the \ * caller's frame following the caller's caller's frame pointer.\ */ \ - asm("movl (%%ebp),%0" : "=r" (frompc)); \ + asm("movq (%%rbp),%0" : "=r" (frompc)); \ frompc = ((uintfptr_t *)frompc)[1]; \ _mcount(frompc, selfpc); \ } @@ -113,7 +113,7 @@ mcount() \ } #endif /* __GNUC__ */ -typedef unsigned int uintfptr_t; +typedef unsigned long uintfptr_t; #endif /* _KERNEL */ diff --git a/sys/amd64/include/psl.h b/sys/amd64/include/psl.h index 302d469..04d83ff 100644 --- a/sys/amd64/include/psl.h +++ b/sys/amd64/include/psl.h @@ -55,10 +55,10 @@ #define PSL_IOPL 0x00003000 /* i/o privilege level */ #define PSL_NT 0x00004000 /* nested task bit */ #define PSL_RF 0x00010000 /* resume flag bit */ -#define PSL_VM 0x00020000 /* virtual 8086 mode bit */ +/* #define PSL_VM 0x00020000 */ /* virtual 8086 mode bit */ #define PSL_AC 0x00040000 /* alignment checking */ -#define PSL_VIF 0x00080000 /* virtual interrupt enable */ -#define PSL_VIP 0x00100000 /* virtual interrupt pending */ +/* #define PSL_VIF 0x00080000 */ /* virtual interrupt enable */ +/* #define PSL_VIP 0x00100000 */ /* virtual interrupt pending */ #define PSL_ID 0x00200000 /* identification bit */ /* diff --git a/sys/amd64/include/reg.h b/sys/amd64/include/reg.h index 56f0f91..334dd37 100644 --- a/sys/amd64/include/reg.h +++ b/sys/amd64/include/reg.h @@ -41,66 +41,31 @@ #define _MACHINE_REG_H_ /* - * Indices for registers in `struct trapframe' and `struct regs'. - * - * This interface is deprecated. In the kernel, it is only used in FPU - * emulators to convert from register numbers encoded in instructions to - * register values. Everything else just accesses the relevant struct - * members. In userland, debuggers tend to abuse this interface since - * they don't understand that `struct regs' is a struct. I hope they have - * stopped accessing the registers in the trap frame via PT_{READ,WRITE}_U - * and we can stop supporting the user area soon. - */ -#define tFS (0) -#define tES (1) -#define tDS (2) -#define tEDI (3) -#define tESI (4) -#define tEBP (5) -#define tISP (6) -#define tEBX (7) -#define tEDX (8) -#define tECX (9) -#define tEAX (10) -#define tERR (12) -#define tEIP (13) -#define tCS (14) -#define tEFLAGS (15) -#define tESP (16) -#define tSS (17) - -/* - * Indices for registers in `struct regs' only. - * - * Some registers live in the pcb and are only in an "array" with the - * other registers in application interfaces that copy all the registers - * to or from a `struct regs'. - */ -#define tGS (18) - -/* * Register set accessible via /proc/$pid/regs and PT_{SET,GET}REGS. */ struct reg { - unsigned int r_fs; - unsigned int r_es; - unsigned int r_ds; - unsigned int r_edi; - unsigned int r_esi; - unsigned int r_ebp; - unsigned int r_isp; - unsigned int r_ebx; - unsigned int r_edx; - unsigned int r_ecx; - unsigned int r_eax; - unsigned int r_trapno; - unsigned int r_err; - unsigned int r_eip; - unsigned int r_cs; - unsigned int r_eflags; - unsigned int r_esp; - unsigned int r_ss; - unsigned int r_gs; + register_t r_r15; + register_t r_r14; + register_t r_r13; + register_t r_r12; + register_t r_r11; + register_t r_r10; + register_t r_r9; + register_t r_r8; + register_t r_rdi; + register_t r_rsi; + register_t r_rbp; + register_t r_rbx; + register_t r_rdx; + register_t r_rcx; + register_t r_rax; + register_t r_trapno; + register_t r_err; + register_t r_rip; + register_t r_cs; + register_t r_rflags; + register_t r_rsp; + register_t r_ss; }; /* @@ -112,30 +77,15 @@ struct fpreg { * simplified struct. This may be too much detail. Perhaps * an array of unsigned longs is best. */ - unsigned long fpr_env[7]; - unsigned char fpr_acc[8][10]; - unsigned long fpr_ex_sw; - unsigned char fpr_pad[64]; + unsigned long fpr_env[4]; + unsigned char fpr_acc[8][16]; + unsigned char fpr_xacc[16][16]; }; -/* - * Register set accessible via /proc/$pid/dbregs. - */ struct dbreg { - unsigned int dr[8]; /* debug registers */ - /* Index 0-3: debug address registers */ - /* Index 4-5: reserved */ - /* Index 6: debug status */ - /* Index 7: debug control */ + unsigned long grrr; }; -#define DBREG_DR7_EXEC 0x00 /* break on execute */ -#define DBREG_DR7_WRONLY 0x01 /* break on write */ -#define DBREG_DR7_RDWR 0x03 /* break on read or write */ -#define DBREG_DRX(d,x) ((d)->dr[(x)]) /* reference dr0 - dr7 by - register number */ - - #ifdef _KERNEL /* * XXX these interfaces are MI, so they should be declared in a MI place. diff --git a/sys/amd64/include/segments.h b/sys/amd64/include/segments.h index 95117da..0f9a59b 100644 --- a/sys/amd64/include/segments.h +++ b/sys/amd64/include/segments.h @@ -42,8 +42,7 @@ #define _MACHINE_SEGMENTS_H_ /* - * 386 Segmentation Data Structures and definitions - * William F. Jolitz (william@ernie.berkeley.edu) 6/20/1989 + * AMD64 Segmentation Data Structures and definitions */ /* @@ -60,60 +59,66 @@ #define GSEL(s,r) (((s)<<3) | r) /* a global selector */ /* - * Memory and System segment descriptors + * User segment descriptors (%cs, %ds etc for compatability apps. 64 bit wide) + * For long-mode apps, %cs only has the conforming bit in sd_type, the sd_dpl, + * sd_p, sd_l and sd_def32 which must be zero). %ds only has sd_p. */ -struct segment_descriptor { - unsigned sd_lolimit:16 ; /* segment extent (lsb) */ - unsigned sd_lobase:24 __packed; /* segment base address (lsb) */ - unsigned sd_type:5 ; /* segment type */ - unsigned sd_dpl:2 ; /* segment descriptor priority level */ - unsigned sd_p:1 ; /* segment descriptor present */ - unsigned sd_hilimit:4 ; /* segment extent (msb) */ - unsigned sd_xx:2 ; /* unused */ - unsigned sd_def32:1 ; /* default 32 vs 16 bit size */ - unsigned sd_gran:1 ; /* limit granularity (byte/page units)*/ - unsigned sd_hibase:8 ; /* segment base address (msb) */ -} ; +struct user_segment_descriptor { + u_int64_t sd_lolimit:16; /* segment extent (lsb) */ + u_int64_t sd_lobase:24; /* segment base address (lsb) */ + u_int64_t sd_type:5; /* segment type */ + u_int64_t sd_dpl:2; /* segment descriptor priority level */ + u_int64_t sd_p:1; /* segment descriptor present */ + u_int64_t sd_hilimit:4; /* segment extent (msb) */ + u_int64_t sd_xx:1; /* unused */ + u_int64_t sd_long:1; /* long mode (cs only) */ + u_int64_t sd_def32:1; /* default 32 vs 16 bit size */ + u_int64_t sd_gran:1; /* limit granularity (byte/page units)*/ + u_int64_t sd_hibase:8; /* segment base address (msb) */ +} __packed; /* - * Gate descriptors (e.g. indirect descriptors) + * System segment descriptors (128 bit wide) */ -struct gate_descriptor { - unsigned gd_looffset:16 ; /* gate offset (lsb) */ - unsigned gd_selector:16 ; /* gate segment selector */ - unsigned gd_stkcpy:5 ; /* number of stack wds to cpy */ - unsigned gd_xx:3 ; /* unused */ - unsigned gd_type:5 ; /* segment type */ - unsigned gd_dpl:2 ; /* segment descriptor priority level */ - unsigned gd_p:1 ; /* segment descriptor present */ - unsigned gd_hioffset:16 ; /* gate offset (msb) */ -} ; +struct system_segment_descriptor { + u_int64_t sd_lolimit:16; /* segment extent (lsb) */ + u_int64_t sd_lobase:24; /* segment base address (lsb) */ + u_int64_t sd_type:5; /* segment type */ + u_int64_t sd_dpl:2; /* segment descriptor priority level */ + u_int64_t sd_p:1; /* segment descriptor present */ + u_int64_t sd_hilimit:4; /* segment extent (msb) */ + u_int64_t sd_xx0:3; /* unused */ + u_int64_t sd_gran:1; /* limit granularity (byte/page units)*/ + u_int64_t sd_hibase:40 __packed;/* segment base address (msb) */ + u_int64_t sd_xx1:8; + u_int64_t sd_mbz:5; /* MUST be zero */ + u_int64_t sd_xx2:19; +} __packed; /* - * Generic descriptor + * Gate descriptors (e.g. indirect descriptors, trap, interrupt etc. 128 bit) + * Only interrupt and trap gates have gd_ist. */ -union descriptor { - struct segment_descriptor sd; - struct gate_descriptor gd; -}; +struct gate_descriptor { + u_int64_t gd_looffset:16; /* gate offset (lsb) */ + u_int64_t gd_selector:16; /* gate segment selector */ + u_int64_t gd_ist:3; /* IST table index */ + u_int64_t gd_xx:5; /* unused */ + u_int64_t gd_type:5; /* segment type */ + u_int64_t gd_dpl:2; /* segment descriptor priority level */ + u_int64_t gd_p:1; /* segment descriptor present */ + u_int64_t gd_hioffset:48 __packed; /* gate offset (msb) */ + u_int64_t sd_xx1:32; +} __packed; /* system segments and gate types */ #define SDT_SYSNULL 0 /* system null */ -#define SDT_SYS286TSS 1 /* system 286 TSS available */ -#define SDT_SYSLDT 2 /* system local descriptor table */ -#define SDT_SYS286BSY 3 /* system 286 TSS busy */ -#define SDT_SYS286CGT 4 /* system 286 call gate */ -#define SDT_SYSTASKGT 5 /* system task gate */ -#define SDT_SYS286IGT 6 /* system 286 interrupt gate */ -#define SDT_SYS286TGT 7 /* system 286 trap gate */ -#define SDT_SYSNULL2 8 /* system null again */ -#define SDT_SYS386TSS 9 /* system 386 TSS available */ -#define SDT_SYSNULL3 10 /* system null again */ -#define SDT_SYS386BSY 11 /* system 386 TSS busy */ -#define SDT_SYS386CGT 12 /* system 386 call gate */ -#define SDT_SYSNULL4 13 /* system null again */ -#define SDT_SYS386IGT 14 /* system 386 interrupt gate */ -#define SDT_SYS386TGT 15 /* system 386 trap gate */ +#define SDT_SYSLDT 2 /* system 64 bit local descriptor table */ +#define SDT_SYSTSS 9 /* system available 64 bit TSS */ +#define SDT_SYSBSY 11 /* system busy 64 bit TSS */ +#define SDT_SYSCGT 12 /* system 64 bit call gate */ +#define SDT_SYSIGT 14 /* system 64 bit interrupt gate */ +#define SDT_SYSTGT 15 /* system 64 bit trap gate */ /* memory segment types */ #define SDT_MEMRO 16 /* memory read only */ @@ -139,45 +144,29 @@ union descriptor { * when needed to be used by the 386 hardware */ -struct soft_segment_descriptor { - unsigned ssd_base ; /* segment base address */ - unsigned ssd_limit ; /* segment extent */ - unsigned ssd_type:5 ; /* segment type */ - unsigned ssd_dpl:2 ; /* segment descriptor priority level */ - unsigned ssd_p:1 ; /* segment descriptor present */ - unsigned ssd_xx:4 ; /* unused */ - unsigned ssd_xx1:2 ; /* unused */ - unsigned ssd_def32:1 ; /* default 32 vs 16 bit size */ - unsigned ssd_gran:1 ; /* limit granularity (byte/page units)*/ -}; +struct soft_segment_descriptor { + unsigned long ssd_base; /* segment base address */ + unsigned long ssd_limit; /* segment extent */ + unsigned long ssd_type:5; /* segment type */ + unsigned long ssd_dpl:2; /* segment descriptor priority level */ + unsigned long ssd_p:1; /* segment descriptor present */ + unsigned long ssd_long:1; /* long mode (for %cs) */ + unsigned long ssd_def32:1; /* default 32 vs 16 bit size */ + unsigned long ssd_gran:1; /* limit granularity (byte/page units)*/ +} __packed; /* * region descriptors, used to load gdt/idt tables before segments yet exist. */ struct region_descriptor { - unsigned rd_limit:16; /* segment extent */ - unsigned rd_base:32 __packed; /* base address */ -}; - -/* - * Segment Protection Exception code bits - */ - -#define SEGEX_EXT 0x01 /* recursive or externally induced */ -#define SEGEX_IDT 0x02 /* interrupt descriptor table */ -#define SEGEX_TI 0x04 /* local descriptor table */ - /* other bits are affected descriptor index */ -#define SEGEX_IDX(s) (((s)>>3)&0x1fff) + unsigned long rd_limit:16; /* segment extent */ + unsigned long rd_base:64 __packed; /* base address */ +} __packed; /* * Size of IDT table */ - -#if defined(SMP) || defined(APIC_IO) -#define NIDT 256 /* we use them all */ -#else #define NIDT 129 /* 32 reserved, 16 h/w, 0 s/w, linux's 0x80 */ -#endif /* SMP || APIC_IO */ #define NRSVIDT 32 /* reserved entries for cpu exceptions */ /* @@ -186,47 +175,25 @@ struct region_descriptor { #define GNULL_SEL 0 /* Null Descriptor */ #define GCODE_SEL 1 /* Kernel Code Descriptor */ #define GDATA_SEL 2 /* Kernel Data Descriptor */ -#define GPRIV_SEL 3 /* SMP Per-Processor Private Data */ -#define GPROC0_SEL 4 /* Task state process slot zero and up */ -#define GLDT_SEL 5 /* LDT - eventually one per process */ -#define GUSERLDT_SEL 6 /* User LDT */ -#define GTGATE_SEL 7 /* Process task switch gate */ -#define GBIOSLOWMEM_SEL 8 /* BIOS low memory access (must be entry 8) */ -#define GPANIC_SEL 9 /* Task state to consider panic from */ -#define GBIOSCODE32_SEL 10 /* BIOS interface (32bit Code) */ -#define GBIOSCODE16_SEL 11 /* BIOS interface (16bit Code) */ -#define GBIOSDATA_SEL 12 /* BIOS interface (Data) */ -#define GBIOSUTIL_SEL 13 /* BIOS interface (Utility) */ -#define GBIOSARGS_SEL 14 /* BIOS interface (Arguments) */ - -#define NGDT 15 - -/* - * Entries in the Local Descriptor Table (LDT) - */ -#define LSYS5CALLS_SEL 0 /* forced by intel BCS */ -#define LSYS5SIGR_SEL 1 -#define L43BSDCALLS_SEL 2 /* notyet */ -#define LUCODE_SEL 3 -#define LSOL26CALLS_SEL 4 /* Solaris >= 2.6 system call gate */ -#define LUDATA_SEL 5 -/* separate stack, es,fs,gs sels ? */ -/* #define LPOSIXCALLS_SEL 5*/ /* notyet */ -#define LBSDICALLS_SEL 16 /* BSDI system call gate */ -#define NLDT (LBSDICALLS_SEL + 1) +#define GUCODE32_SEL 3 /* User 32 bit code Descriptor */ +#define GUDATA_SEL 4 /* User 32/64 bit Data Descriptor */ +#define GUCODE_SEL 5 /* User 64 bit Code Descriptor */ +#define GPROC0_SEL 6 /* TSS for entering kernel etc */ +/* slot 6 is second half of GPROC0_SEL */ +#define NGDT 8 #ifdef _KERNEL -extern int _default_ldt; -extern union descriptor gdt[]; +extern struct user_segment_descriptor gdt[]; extern struct soft_segment_descriptor gdt_segs[]; extern struct gate_descriptor *idt; -extern union descriptor ldt[NLDT]; void lgdt(struct region_descriptor *rdp); -void sdtossd(struct segment_descriptor *sdp, +void sdtossd(struct user_segment_descriptor *sdp, struct soft_segment_descriptor *ssdp); void ssdtosd(struct soft_segment_descriptor *ssdp, - struct segment_descriptor *sdp); + struct user_segment_descriptor *sdp); +void ssdtosyssd(struct soft_segment_descriptor *ssdp, + struct system_segment_descriptor *sdp); #endif /* _KERNEL */ #endif /* !_MACHINE_SEGMENTS_H_ */ diff --git a/sys/amd64/include/setjmp.h b/sys/amd64/include/setjmp.h index e6e0371..a6a9399 100644 --- a/sys/amd64/include/setjmp.h +++ b/sys/amd64/include/setjmp.h @@ -37,7 +37,7 @@ #include <sys/cdefs.h> -#define _JBLEN 11 /* Size of the jmp_buf on x86. */ +#define _JBLEN 22 /* Size of the jmp_buf on x86. */ /* * jmp_buf and sigjmp_buf are encapsulated in different structs to force diff --git a/sys/amd64/include/sigframe.h b/sys/amd64/include/sigframe.h index 98be731..d104507 100644 --- a/sys/amd64/include/sigframe.h +++ b/sys/amd64/include/sigframe.h @@ -34,74 +34,7 @@ /* * Signal frames, arguments passed to application signal handlers. */ -#ifdef _KERNEL -#ifdef COMPAT_43 -struct osigframe { - /* - * The first four members may be used by applications. - */ - - register_t sf_signum; - - /* - * Either 'int' for old-style FreeBSD handler or 'siginfo_t *' - * pointing to sf_siginfo for SA_SIGINFO handlers. - */ - register_t sf_arg2; - - /* Points to sf_siginfo.si_sc. */ - register_t sf_scp; - - register_t sf_addr; - - /* - * The following arguments are not constrained by the - * function call protocol. - * Applications are not supposed to access these members, - * except using the pointers we provide in the first three - * arguments. - */ - - union { - __osiginfohandler_t *sf_action; - __sighandler_t *sf_handler; - } sf_ahu; - - /* In the SA_SIGINFO case, sf_arg2 points here. */ - osiginfo_t sf_siginfo; -}; -#endif -#ifdef COMPAT_FREEBSD4 -/* FreeBSD 4.x */ -struct sigframe4 { - register_t sf_signum; - register_t sf_siginfo; /* code or pointer to sf_si */ - register_t sf_ucontext; /* points to sf_uc */ - register_t sf_addr; /* undocumented 4th arg */ - - union { - __siginfohandler_t *sf_action; - __sighandler_t *sf_handler; - } sf_ahu; - struct ucontext4 sf_uc; /* = *sf_ucontext */ - siginfo_t sf_si; /* = *sf_siginfo (SA_SIGINFO case) */ -}; -#endif -#endif - struct sigframe { - /* - * The first four members may be used by applications. - * - * NOTE: The 4th argument is undocumented, ill commented - * on and seems to be somewhat BSD "standard". Handlers - * installed with sigvec may be using it. - */ - register_t sf_signum; - register_t sf_siginfo; /* code or pointer to sf_si */ - register_t sf_ucontext; /* points to sf_uc */ - register_t sf_addr; /* undocumented 4th arg */ - union { __siginfohandler_t *sf_action; __sighandler_t *sf_handler; diff --git a/sys/amd64/include/signal.h b/sys/amd64/include/signal.h index fb2d82c..baa0c4e 100644 --- a/sys/amd64/include/signal.h +++ b/sys/amd64/include/signal.h @@ -44,7 +44,7 @@ * Machine-dependent signal definitions */ -typedef int sig_atomic_t; +typedef long sig_atomic_t; #if __XSI_VISIBLE /* @@ -60,7 +60,6 @@ typedef int sig_atomic_t; /* * Only the kernel should need these old type definitions. */ -#if defined(_KERNEL) && defined(COMPAT_43) /* * Information pushed on stack when a signal is delivered. * This is used by the kernel to restore state following @@ -68,75 +67,46 @@ typedef int sig_atomic_t; * to the handler to allow it to restore state properly if * a non-standard exit is performed. */ -struct osigcontext { - int sc_onstack; /* sigstack state to restore */ - osigset_t sc_mask; /* signal mask to restore */ - int sc_esp; /* machine state follows: */ - int sc_ebp; - int sc_isp; - int sc_eip; - int sc_efl; - int sc_es; - int sc_ds; - int sc_cs; - int sc_ss; - int sc_edi; - int sc_esi; - int sc_ebx; - int sc_edx; - int sc_ecx; - int sc_eax; - int sc_gs; - int sc_fs; - int sc_trapno; - int sc_err; -}; -#endif - /* * The sequence of the fields/registers in struct sigcontext should match * those in mcontext_t. */ struct sigcontext { struct __sigset sc_mask; /* signal mask to restore */ - int sc_onstack; /* sigstack state to restore */ - int sc_gs; /* machine state (struct trapframe) */ - int sc_fs; - int sc_es; - int sc_ds; - int sc_edi; - int sc_esi; - int sc_ebp; - int sc_isp; - int sc_ebx; - int sc_edx; - int sc_ecx; - int sc_eax; - int sc_trapno; - int sc_err; - int sc_eip; - int sc_cs; - int sc_efl; - int sc_esp; - int sc_ss; - int sc_len; /* sizeof(mcontext_t) */ + long sc_onstack; /* sigstack state to restore */ + long sc_r15; /* machine state (struct trapframe) */ + long sc_r14; + long sc_r13; + long sc_r12; + long sc_r11; + long sc_r10; + long sc_r9; + long sc_r8; + long sc_rdi; + long sc_rsi; + long sc_rbp; + long sc_rbx; + long sc_rdx; + long sc_rcx; + long sc_rax; + long sc_trapno; + long sc_err; + long sc_rip; + long sc_cs; + long sc_rflags; + long sc_rsp; + long sc_ss; + long sc_len; /* sizeof(mcontext_t) */ /* * XXX - See <machine/ucontext.h> and <machine/npx.h> for * the following fields. */ - int sc_fpformat; - int sc_ownedfp; - int sc_spare1[1]; - int sc_fpstate[128] __aligned(16); - int sc_spare2[8]; + long sc_fpformat; + long sc_ownedfp; + long sc_spare1[1]; + long sc_fpstate[128] __aligned(16); + long sc_spare2[8]; }; - -#define sc_sp sc_esp -#define sc_fp sc_ebp -#define sc_pc sc_eip -#define sc_ps sc_efl -#define sc_eflags sc_efl - #endif /* __BSD_VISIBLE */ #endif /* !_MACHINE_SIGNAL_H_ */ diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index 6467365..3d8d117 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -13,150 +13,4 @@ #ifndef _MACHINE_SMP_H_ #define _MACHINE_SMP_H_ -#ifdef _KERNEL - -#if defined(SMP) && defined(I386_CPU) && !defined(COMPILING_LINT) -#error SMP not supported with I386_CPU -#endif -#if defined(SMP) && !defined(APIC_IO) -# error APIC_IO required for SMP, add "options APIC_IO" to your config file. -#endif /* SMP && !APIC_IO */ -#if defined(SMP) && defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) -#error SMP not supported with CPU_DISABLE_CMPXCHG -#endif - -#if defined(SMP) || defined(APIC_IO) - -#ifndef LOCORE - -/* - * For sending values to POST displays. - * XXX FIXME: where does this really belong, isa.h/isa.c perhaps? - */ -extern int current_postcode; /** XXX currently in mp_machdep.c */ -#define POSTCODE(X) current_postcode = (X), \ - outb(0x80, current_postcode) -#define POSTCODE_LO(X) current_postcode &= 0xf0, \ - current_postcode |= ((X) & 0x0f), \ - outb(0x80, current_postcode) -#define POSTCODE_HI(X) current_postcode &= 0x0f, \ - current_postcode |= (((X) << 4) & 0xf0), \ - outb(0x80, current_postcode) - - -#include <sys/bus.h> /* XXX */ -#include <machine/apic.h> -#include <machine/frame.h> -#include <i386/isa/icu.h> -#include <i386/isa/intr_machdep.h> - -/* - * Interprocessor interrupts for SMP. - */ -#define IPI_INVLTLB XINVLTLB_OFFSET -#define IPI_INVLPG XINVLPG_OFFSET -#define IPI_INVLRNG XINVLRNG_OFFSET -#define IPI_LAZYPMAP XLAZYPMAP_OFFSET -#define IPI_RENDEZVOUS XRENDEZVOUS_OFFSET -#define IPI_AST XCPUAST_OFFSET -#define IPI_STOP XCPUSTOP_OFFSET -#define IPI_HARDCLOCK XHARDCLOCK_OFFSET -#define IPI_STATCLOCK XSTATCLOCK_OFFSET - -/* global data in mpboot.s */ -extern int bootMP_size; - -/* functions in mpboot.s */ -void bootMP(void); - -/* global data in mp_machdep.c */ -extern int bsp_apic_ready; -extern int mp_naps; -extern int mp_nbusses; -extern int mp_napics; -extern int mp_picmode; -extern int boot_cpu_id; -extern vm_offset_t cpu_apic_address; -extern vm_offset_t io_apic_address[]; -extern u_int32_t cpu_apic_versions[]; -extern u_int32_t *io_apic_versions; -extern int cpu_num_to_apic_id[]; -extern int io_num_to_apic_id[]; -extern int apic_id_to_logical[]; -#define APIC_INTMAPSIZE 32 -struct apic_intmapinfo { - int ioapic; - int int_pin; - volatile void *apic_address; - int redirindex; -}; -extern struct apic_intmapinfo int_to_apicintpin[]; -extern struct pcb stoppcbs[]; - -/* functions in mp_machdep.c */ -void i386_mp_probe(void); -u_int mp_bootaddress(u_int); -u_int isa_apic_mask(u_int); -int isa_apic_irq(int); -int pci_apic_irq(int, int, int); -int apic_irq(int, int); -int next_apic_irq(int); -int undirect_isa_irq(int); -int undirect_pci_irq(int); -int apic_bus_type(int); -int apic_src_bus_id(int, int); -int apic_src_bus_irq(int, int); -int apic_int_type(int, int); -int apic_trigger(int, int); -int apic_polarity(int, int); -int mp_grab_cpu_hlt(void); -void assign_apic_irq(int apic, int intpin, int irq); -void revoke_apic_irq(int irq); -void bsp_apic_configure(void); -void init_secondary(void); -void forward_statclock(void); -void forwarded_statclock(struct clockframe frame); -void forward_hardclock(void); -void forwarded_hardclock(struct clockframe frame); -void ipi_selected(u_int cpus, u_int ipi); -void ipi_all(u_int ipi); -void ipi_all_but_self(u_int ipi); -void ipi_self(u_int ipi); -#ifdef APIC_INTR_REORDER -void set_lapic_isrloc(int, int); -#endif /* APIC_INTR_REORDER */ -void smp_invlpg(vm_offset_t addr); -void smp_masked_invlpg(u_int mask, vm_offset_t addr); -void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva); -void smp_masked_invlpg_range(u_int mask, vm_offset_t startva, - vm_offset_t endva); -void smp_invltlb(void); -void smp_masked_invltlb(u_int mask); - -/* global data in mpapic.c */ -extern volatile lapic_t lapic; -extern volatile ioapic_t **ioapic; - -/* functions in mpapic.c */ -void apic_dump(char*); -void apic_initialize(void); -void imen_dump(void); -int apic_ipi(int, int, int); -int selected_apic_ipi(u_int, int, int); -int io_apic_setup(int); -void io_apic_setup_intpin(int, int); -void io_apic_set_id(int, int); -int io_apic_get_id(int); -int ext_int_setup(int, int); - -void set_apic_timer(int); -int read_apic_timer(void); -void u_sleep(int); -u_int io_apic_read(int, int); -void io_apic_write(int, int, u_int); - -#endif /* !LOCORE */ -#endif /* SMP && !APIC_IO */ - -#endif /* _KERNEL */ #endif /* _MACHINE_SMP_H_ */ diff --git a/sys/amd64/include/specialreg.h b/sys/amd64/include/specialreg.h index 1c6ebe1..d66238c 100644 --- a/sys/amd64/include/specialreg.h +++ b/sys/amd64/include/specialreg.h @@ -75,6 +75,14 @@ #define CR4_XMM 0x00000400 /* enable SIMD/MMX2 to use except 16 */ /* + * Bits in AMD64 special registers. EFER is 64 bits wide. + */ +#define EFER_SCE 0x000000001 /* System Call Extensions (R/W) */ +#define EFER_LME 0x000000100 /* Long mode enable (R/W) */ +#define EFER_LMA 0x000000400 /* Long mode active (R) */ +#define EFER_NXE 0x000000800 /* PTE No-Execute bit enable (R/W) */ + +/* * CPUID instruction features register */ #define CPUID_FPU 0x00000001 @@ -132,6 +140,9 @@ #define MSR_PERFCTR0 0x0c1 #define MSR_PERFCTR1 0x0c2 #define MSR_MTRRcap 0x0fe +#define MSR_SYSENTER_CS 0x174 +#define MSR_SYSENTER_ESP 0x175 +#define MSR_SYSENTER_EIP 0x176 #define MSR_MCG_CAP 0x179 #define MSR_MCG_STATUS 0x17a #define MSR_MCG_CTL 0x17b @@ -147,6 +158,7 @@ #define MSR_MTRR64kBase 0x250 #define MSR_MTRR16kBase 0x258 #define MSR_MTRR4kBase 0x268 +#define MSR_PAT 0x277 #define MSR_MTRRdefType 0x2ff #define MSR_MC0_CTL 0x400 #define MSR_MC0_STATUS 0x401 @@ -176,69 +188,6 @@ #define MTRR_N16K 16 #define MTRR_N4K 64 -/* - * Cyrix configuration registers, accessible as IO ports. - */ -#define CCR0 0xc0 /* Configuration control register 0 */ -#define CCR0_NC0 0x01 /* First 64K of each 1M memory region is - non-cacheable */ -#define CCR0_NC1 0x02 /* 640K-1M region is non-cacheable */ -#define CCR0_A20M 0x04 /* Enables A20M# input pin */ -#define CCR0_KEN 0x08 /* Enables KEN# input pin */ -#define CCR0_FLUSH 0x10 /* Enables FLUSH# input pin */ -#define CCR0_BARB 0x20 /* Flushes internal cache when entering hold - state */ -#define CCR0_CO 0x40 /* Cache org: 1=direct mapped, 0=2x set - assoc */ -#define CCR0_SUSPEND 0x80 /* Enables SUSP# and SUSPA# pins */ - -#define CCR1 0xc1 /* Configuration control register 1 */ -#define CCR1_RPL 0x01 /* Enables RPLSET and RPLVAL# pins */ -#define CCR1_SMI 0x02 /* Enables SMM pins */ -#define CCR1_SMAC 0x04 /* System management memory access */ -#define CCR1_MMAC 0x08 /* Main memory access */ -#define CCR1_NO_LOCK 0x10 /* Negate LOCK# */ -#define CCR1_SM3 0x80 /* SMM address space address region 3 */ - -#define CCR2 0xc2 -#define CCR2_WB 0x02 /* Enables WB cache interface pins */ -#define CCR2_SADS 0x02 /* Slow ADS */ -#define CCR2_LOCK_NW 0x04 /* LOCK NW Bit */ -#define CCR2_SUSP_HLT 0x08 /* Suspend on HALT */ -#define CCR2_WT1 0x10 /* WT region 1 */ -#define CCR2_WPR1 0x10 /* Write-protect region 1 */ -#define CCR2_BARB 0x20 /* Flushes write-back cache when entering - hold state. */ -#define CCR2_BWRT 0x40 /* Enables burst write cycles */ -#define CCR2_USE_SUSP 0x80 /* Enables suspend pins */ - -#define CCR3 0xc3 -#define CCR3_SMILOCK 0x01 /* SMM register lock */ -#define CCR3_NMI 0x02 /* Enables NMI during SMM */ -#define CCR3_LINBRST 0x04 /* Linear address burst cycles */ -#define CCR3_SMMMODE 0x08 /* SMM Mode */ -#define CCR3_MAPEN0 0x10 /* Enables Map0 */ -#define CCR3_MAPEN1 0x20 /* Enables Map1 */ -#define CCR3_MAPEN2 0x40 /* Enables Map2 */ -#define CCR3_MAPEN3 0x80 /* Enables Map3 */ - -#define CCR4 0xe8 -#define CCR4_IOMASK 0x07 -#define CCR4_MEM 0x08 /* Enables momory bypassing */ -#define CCR4_DTE 0x10 /* Enables directory table entry cache */ -#define CCR4_FASTFPE 0x20 /* Fast FPU exception */ -#define CCR4_CPUID 0x80 /* Enables CPUID instruction */ - -#define CCR5 0xe9 -#define CCR5_WT_ALLOC 0x01 /* Write-through allocate */ -#define CCR5_SLOP 0x02 /* LOOP instruction slowed down */ -#define CCR5_LBR1 0x10 /* Local bus region 1 */ -#define CCR5_ARREN 0x20 /* Enables ARR region */ - -#define CCR6 0xea - -#define CCR7 0xeb - /* Performance Control Register (5x86 only). */ #define PCR0 0x20 #define PCR0_RSTK 0x01 /* Enables return stack */ @@ -347,21 +296,31 @@ #define AMD_WT_ALLOC_PRE 0x20000 /* programmable range enable */ #define AMD_WT_ALLOC_FRE 0x10000 /* fixed (A0000-FFFFF) range enable */ - -#ifndef LOCORE -static __inline u_char -read_cyrix_reg(u_char reg) -{ - outb(0x22, reg); - return inb(0x23); -} - -static __inline void -write_cyrix_reg(u_char reg, u_char data) -{ - outb(0x22, reg); - outb(0x23, data); -} -#endif +/* X86-64 MSR's */ +#define MSR_EFER 0xc0000080 /* extended features */ +#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target/cs/ss */ +#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target rip */ +#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target rip */ +#define MSR_SF_MASK 0xc0000084 /* syscall flags mask */ +#define MSR_FSBASE 0xc0000100 /* base address of the %fs "segment" */ +#define MSR_GSBASE 0xc0000101 /* base address of the %gs "segment" */ +#define MSR_KGSBASE 0xc0000102 /* base address of the kernel %gs */ +#define MSR_PERFEVSEL0 0xc0010000 +#define MSR_PERFEVSEL1 0xc0010001 +#define MSR_PERFEVSEL2 0xc0010002 +#define MSR_PERFEVSEL3 0xc0010003 +#undef MSR_PERFCTR0 +#undef MSR_PERFCTR1 +#define MSR_PERFCTR0 0xc0010004 +#define MSR_PERFCTR1 0xc0010005 +#define MSR_PERFCTR2 0xc0010006 +#define MSR_PERFCTR3 0xc0010007 +#define MSR_SYSCFG 0xc0010010 +#define MSR_IORRBASE0 0xc0010016 +#define MSR_IORRMASK0 0xc0010017 +#define MSR_IORRBASE1 0xc0010018 +#define MSR_IORRMASK1 0xc0010019 +#define MSR_TOP_MEM 0xc001001a /* boundary for ram below 4G */ +#define MSR_TOP_MEM2 0xc001001d /* boundary for ram above 4G */ #endif /* !_MACHINE_SPECIALREG_H_ */ diff --git a/sys/amd64/include/sysarch.h b/sys/amd64/include/sysarch.h index c5574ff..c33f7b8 100644 --- a/sys/amd64/include/sysarch.h +++ b/sys/amd64/include/sysarch.h @@ -39,46 +39,4 @@ #ifndef _MACHINE_SYSARCH_H_ #define _MACHINE_SYSARCH_H_ -#define I386_GET_LDT 0 -#define I386_SET_LDT 1 - /* I386_IOPL */ -#define I386_GET_IOPERM 3 -#define I386_SET_IOPERM 4 - /* xxxxx */ -#define I386_VM86 6 - -struct i386_ldt_args { - unsigned int start; - union descriptor *descs; - unsigned int num; -}; - -struct i386_ioperm_args { - unsigned int start; - unsigned int length; - int enable; -}; - -struct i386_vm86_args { - int sub_op; /* sub-operation to perform */ - char *sub_args; /* args */ -}; - -#ifndef _KERNEL -#include <sys/cdefs.h> - -union descriptor; -struct dbreg; - -__BEGIN_DECLS -int i386_get_ldt(int, union descriptor *, int); -int i386_set_ldt(int, union descriptor *, int); -int i386_get_ioperm(unsigned int, unsigned int *, int *); -int i386_set_ioperm(unsigned int, unsigned int, int); -int i386_vm86(int, void *); -int i386_set_watch(int, unsigned int, int, int, struct dbreg *); -int i386_clr_watch(int, struct dbreg *); -__END_DECLS -#endif - #endif /* !_MACHINE_SYSARCH_H_ */ diff --git a/sys/amd64/include/tss.h b/sys/amd64/include/tss.h index fb2e7a5..aa60ba0 100644 --- a/sys/amd64/include/tss.h +++ b/sys/amd64/include/tss.h @@ -41,37 +41,35 @@ #define _MACHINE_TSS_H_ 1 /* - * Intel 386 Context Data Type + * amd64 Context Data Type + * + * The alignment is pretty messed up here due to reuse of the original 32 bit + * fields. It might be worth trying to set the tss on a +4 byte offset to + * make the 64 bit fields aligned in practice. */ - -struct i386tss { - int tss_link; /* actually 16 bits: top 16 bits must be zero */ - int tss_esp0; /* kernel stack pointer privilege level 0 */ - int tss_ss0; /* actually 16 bits: top 16 bits must be zero */ - int tss_esp1; /* kernel stack pointer privilege level 1 */ - int tss_ss1; /* actually 16 bits: top 16 bits must be zero */ - int tss_esp2; /* kernel stack pointer privilege level 2 */ - int tss_ss2; /* actually 16 bits: top 16 bits must be zero */ - int tss_cr3; /* page table directory */ - int tss_eip; /* program counter */ - int tss_eflags; /* program status longword */ - int tss_eax; - int tss_ecx; - int tss_edx; - int tss_ebx; - int tss_esp; /* user stack pointer */ - int tss_ebp; /* user frame pointer */ - int tss_esi; - int tss_edi; - int tss_es; /* actually 16 bits: top 16 bits must be zero */ - int tss_cs; /* actually 16 bits: top 16 bits must be zero */ - int tss_ss; /* actually 16 bits: top 16 bits must be zero */ - int tss_ds; /* actually 16 bits: top 16 bits must be zero */ - int tss_fs; /* actually 16 bits: top 16 bits must be zero */ - int tss_gs; /* actually 16 bits: top 16 bits must be zero */ - int tss_ldt; /* actually 16 bits: top 16 bits must be zero */ - int tss_ioopt; /* options & io offset bitmap: currently zero */ - /* XXX unimplemented .. i/o permission bitmap */ +struct amd64tss { + u_int32_t tss_rsvd0; + u_int64_t tss_rsp0 __packed; /* kernel stack pointer ring 0 */ + u_int64_t tss_rsp1 __packed; /* kernel stack pointer ring 1 */ + u_int64_t tss_rsp2 __packed; /* kernel stack pointer ring 2 */ + u_int32_t tss_rsvd1; + u_int32_t tss_rsvd2; + u_int32_t tss_rsvd3; + u_int64_t tss_ist1 __packed; /* Interrupt stack table 1 */ + u_int64_t tss_ist2 __packed; /* Interrupt stack table 2 */ + u_int64_t tss_ist3 __packed; /* Interrupt stack table 3 */ + u_int64_t tss_ist4 __packed; /* Interrupt stack table 4 */ + u_int64_t tss_ist5 __packed; /* Interrupt stack table 5 */ + u_int64_t tss_ist6 __packed; /* Interrupt stack table 6 */ + u_int64_t tss_ist7 __packed; /* Interrupt stack table 7 */ + u_int32_t tss_rsvd4; + u_int32_t tss_rsvd5; + u_int16_t tss_rsvd6; + u_int16_t tss_iobase; /* io bitmap offset */ }; +#ifdef _KERNEL +extern struct amd64tss common_tss; +#endif + #endif /* _MACHINE_TSS_H_ */ diff --git a/sys/amd64/include/ucontext.h b/sys/amd64/include/ucontext.h index ec3e0ee..1938707 100644 --- a/sys/amd64/include/ucontext.h +++ b/sys/amd64/include/ucontext.h @@ -37,69 +37,44 @@ typedef struct __mcontext { * sigcontext. So that we can support sigcontext * and ucontext_t at the same time. */ - int mc_onstack; /* XXX - sigcontext compat. */ - int mc_gs; /* machine state (struct trapframe) */ - int mc_fs; - int mc_es; - int mc_ds; - int mc_edi; - int mc_esi; - int mc_ebp; - int mc_isp; - int mc_ebx; - int mc_edx; - int mc_ecx; - int mc_eax; - int mc_trapno; - int mc_err; - int mc_eip; - int mc_cs; - int mc_eflags; - int mc_esp; - int mc_ss; + register_t mc_onstack; /* XXX - sigcontext compat. */ + register_t mc_r15; /* machine state (struct trapframe) */ + register_t mc_r14; + register_t mc_r13; + register_t mc_r12; + register_t mc_r11; + register_t mc_r10; + register_t mc_r9; + register_t mc_r8; + register_t mc_rdi; + register_t mc_rsi; + register_t mc_rbp; + register_t mc_rbx; + register_t mc_rdx; + register_t mc_rcx; + register_t mc_rax; + register_t mc_trapno; + register_t mc_err; + register_t mc_rip; + register_t mc_cs; + register_t mc_rflags; + register_t mc_rsp; + register_t mc_ss; - int mc_len; /* sizeof(mcontext_t) */ + long mc_len; /* sizeof(mcontext_t) */ #define _MC_FPFMT_NODEV 0x10000 /* device not present or configured */ -#define _MC_FPFMT_387 0x10001 #define _MC_FPFMT_XMM 0x10002 - int mc_fpformat; + long mc_fpformat; #define _MC_FPOWNED_NONE 0x20000 /* FP state not used */ #define _MC_FPOWNED_FPU 0x20001 /* FP state came from FPU */ #define _MC_FPOWNED_PCB 0x20002 /* FP state came from PCB */ - int mc_ownedfp; - int mc_spare1[1]; /* align next field to 16 bytes */ + long mc_ownedfp; + long mc_spare1[1]; /* align next field to 16 bytes */ /* * See <machine/npx.h> for the internals of mc_fpstate[]. */ - int mc_fpstate[128] __aligned(16); - int mc_spare2[8]; + long mc_fpstate[128] __aligned(16); + long mc_spare2[8]; } mcontext_t; -#if defined(_KERNEL) && defined(COMPAT_FREEBSD4) -struct mcontext4 { - int mc_onstack; /* XXX - sigcontext compat. */ - int mc_gs; /* machine state (struct trapframe) */ - int mc_fs; - int mc_es; - int mc_ds; - int mc_edi; - int mc_esi; - int mc_ebp; - int mc_isp; - int mc_ebx; - int mc_edx; - int mc_ecx; - int mc_eax; - int mc_trapno; - int mc_err; - int mc_eip; - int mc_cs; - int mc_eflags; - int mc_esp; /* machine state */ - int mc_ss; - int mc_fpregs[28]; /* env87 + fpacc87 + u_long */ - int __spare__[17]; -}; -#endif - #endif /* !_MACHINE_UCONTEXT_H_ */ diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h index bb7c7e1..0e9697d 100644 --- a/sys/amd64/include/vmparam.h +++ b/sys/amd64/include/vmparam.h @@ -44,11 +44,9 @@ #define _MACHINE_VMPARAM_H_ 1 /* - * Machine dependent constants for 386. + * Machine dependent constants for AMD64. */ -#define VM_PROT_READ_IS_EXEC /* if you can read -- then you can exec */ - /* * Virtual memory related constants, all in bytes */ @@ -69,8 +67,6 @@ #define SGROWSIZ (128UL*1024) /* amount to grow stack */ #endif -#define USRTEXT (1*PAGE_SIZE) /* base of user text XXX bogus */ - /* * The time for a process to be blocked before being very swappable. * This is a number of seconds which the system takes as being a non-trivial @@ -90,23 +86,20 @@ * messy at times, but hey, we'll do anything to save a page :-) */ -#define VM_MAX_KERNEL_ADDRESS VADDR(KPTDI+NKPDE-1, NPTEPG-1) -#define VM_MIN_KERNEL_ADDRESS VADDR(PTDPTDI, PTDPTDI) - -#define KERNBASE VADDR(KPTDI, 0) +#define VM_MAX_KERNEL_ADDRESS VADDR(0, 0, KPTDI+NKPDE-1, NPTEPG-1) +#define VM_MIN_KERNEL_ADDRESS VADDR(0, 0, PTDPTDI, PTDPTDI) -#define KPT_MAX_ADDRESS VADDR(PTDPTDI, KPTDI+NKPT) -#define KPT_MIN_ADDRESS VADDR(PTDPTDI, KPTDI) +#define KERNBASE VADDR(0, 0, KPTDI, 0) -#define UPT_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI) -#define UPT_MIN_ADDRESS VADDR(PTDPTDI, 0) +#define UPT_MAX_ADDRESS VADDR(0, 0, PTDPTDI, PTDPTDI) +#define UPT_MIN_ADDRESS VADDR(0, 0, PTDPTDI, 0) -#define VM_MAXUSER_ADDRESS VADDR(PTDPTDI, 0) +#define VM_MAXUSER_ADDRESS UPT_MIN_ADDRESS #define USRSTACK VM_MAXUSER_ADDRESS -#define VM_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI) -#define VM_MIN_ADDRESS ((vm_offset_t)0) +#define VM_MAX_ADDRESS UPT_MAX_ADDRESS +#define VM_MIN_ADDRESS (0) /* virtual sizes (bytes) for various kernel submaps */ #ifndef VM_KMEM_SIZE diff --git a/sys/amd64/isa/clock.c b/sys/amd64/isa/clock.c index 9e6c489..dbf974e1 100644 --- a/sys/amd64/isa/clock.c +++ b/sys/amd64/isa/clock.c @@ -50,7 +50,6 @@ #include "opt_clock.h" #include "opt_isa.h" -#include "opt_mca.h" #include <sys/param.h> #include <sys/systm.h> @@ -67,39 +66,20 @@ #include <sys/power.h> #include <machine/clock.h> -#include <machine/cputypes.h> #include <machine/frame.h> #include <machine/md_var.h> #include <machine/psl.h> -#ifdef APIC_IO -#include <machine/segments.h> -#endif -#if defined(SMP) || defined(APIC_IO) -#include <machine/smp.h> -#endif /* SMP || APIC_IO */ #include <machine/specialreg.h> -#include <i386/isa/icu.h> -#include <i386/isa/isa.h> +#include <amd64/isa/icu.h> +#include <amd64/isa/isa.h> #include <isa/rtc.h> #ifdef DEV_ISA #include <isa/isavar.h> #endif -#include <i386/isa/timerreg.h> - -#include <i386/isa/intr_machdep.h> +#include <amd64/isa/timerreg.h> -#ifdef DEV_MCA -#include <i386/bios/mca_machdep.h> -#endif - -#ifdef APIC_IO -#include <i386/isa/intr_machdep.h> -/* The interrupt triggered by the 8254 (timer) chip */ -int apic_8254_intr; -static u_long read_intr_count(int vec); -static void setup_8254_mixed_mode(void); -#endif +#include <amd64/isa/intr_machdep.h> /* * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we @@ -195,10 +175,6 @@ clkintr(struct clockframe frame) mtx_unlock_spin(&clock_lock); } timer_func(&frame); -#ifdef SMP - if (timer_func == hardclock) - forward_hardclock(); -#endif switch (timer0_state) { case RELEASED: @@ -209,9 +185,6 @@ clkintr(struct clockframe frame) >= hardclock_max_count) { timer0_prescaler_count -= hardclock_max_count; hardclock(&frame); -#ifdef SMP - forward_hardclock(); -#endif } break; @@ -244,17 +217,9 @@ clkintr(struct clockframe frame) timer_func = hardclock; timer0_state = RELEASED; hardclock(&frame); -#ifdef SMP - forward_hardclock(); -#endif } break; } -#ifdef DEV_MCA - /* Reset clock interrupt by asserting bit 7 of port 0x61 */ - if (MCA_system) - outb(0x61, inb(0x61) | 0x80); -#endif } /* @@ -376,9 +341,6 @@ rtcintr(struct clockframe frame) } if (pscnt == psdiv) statclock(&frame); -#ifdef SMP - forward_statclock(); -#endif } } @@ -673,43 +635,6 @@ set_timer_freq(u_int freq, int intr_freq) mtx_unlock_spin(&clock_lock); } -static void -i8254_restore(void) -{ - - mtx_lock_spin(&clock_lock); - outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); - outb(TIMER_CNTR0, timer0_max_count & 0xff); - outb(TIMER_CNTR0, timer0_max_count >> 8); - mtx_unlock_spin(&clock_lock); -} - -static void -rtc_restore(void) -{ - - /* Restore all of the RTC's "status" (actually, control) registers. */ - /* XXX locking is needed for RTC access. */ - writertc(RTC_STATUSB, RTCSB_24HR); - writertc(RTC_STATUSA, rtc_statusa); - writertc(RTC_STATUSB, rtc_statusb); -} - -/* - * Restore all the timers non-atomically (XXX: should be atomically). - * - * This function is called from pmtimer_resume() to restore all the timers. - * This should not be necessary, but there are broken laptops that do not - * restore all the timers on resume. - */ -void -timer_restore(void) -{ - - i8254_restore(); /* restore timer_freq and hz */ - rtc_restore(); /* reenable RTC interrupts */ -} - /* * Initialize 8254 timer 0 early so that it can be used in DELAY(). * XXX initialization of other timers is unintentionally left blank. @@ -902,10 +827,6 @@ void cpu_initclocks() { int diag; -#ifdef APIC_IO - int apic_8254_trial; - void *clkdesc; -#endif /* APIC_IO */ register_t crit; if (statclock_disable) { @@ -923,32 +844,6 @@ cpu_initclocks() } /* Finish initializing 8253 timer 0. */ -#ifdef APIC_IO - - apic_8254_intr = isa_apic_irq(0); - apic_8254_trial = 0; - if (apic_8254_intr >= 0 ) { - if (apic_int_type(0, 0) == 3) - apic_8254_trial = 1; - } else { - /* look for ExtInt on pin 0 */ - if (apic_int_type(0, 0) == 3) { - apic_8254_intr = apic_irq(0, 0); - setup_8254_mixed_mode(); - } else - panic("APIC_IO: Cannot route 8254 interrupt to CPU"); - } - - inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, - INTR_TYPE_CLK | INTR_FAST, &clkdesc); - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - INTREN(1 << apic_8254_intr); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); - -#else /* APIC_IO */ - /* * XXX Check the priority of this interrupt handler. I * couldn't find anything suitable in the BSD/OS code (grog, @@ -962,8 +857,6 @@ cpu_initclocks() mtx_unlock_spin(&icu_lock); intr_restore(crit); -#endif /* APIC_IO */ - /* Initialize RTC. */ writertc(RTC_STATUSA, rtc_statusa); writertc(RTC_STATUSB, RTCSB_24HR); @@ -975,118 +868,17 @@ cpu_initclocks() if (diag != 0) printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS); -#ifdef APIC_IO - if (isa_apic_irq(8) != 8) - panic("APIC RTC != 8"); -#endif /* APIC_IO */ - inthand_add("rtc", 8, (driver_intr_t *)rtcintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); crit = intr_disable(); mtx_lock_spin(&icu_lock); -#ifdef APIC_IO - INTREN(APIC_IRQ8); -#else INTREN(IRQ8); -#endif /* APIC_IO */ mtx_unlock_spin(&icu_lock); intr_restore(crit); writertc(RTC_STATUSB, rtc_statusb); - -#ifdef APIC_IO - if (apic_8254_trial) { - - printf("APIC_IO: Testing 8254 interrupt delivery\n"); - while (read_intr_count(8) < 6) - ; /* nothing */ - if (read_intr_count(apic_8254_intr) < 3) { - /* - * The MP table is broken. - * The 8254 was not connected to the specified pin - * on the IO APIC. - * Workaround: Limited variant of mixed mode. - */ - - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - INTRDIS(1 << apic_8254_intr); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); - inthand_remove(clkdesc); - printf("APIC_IO: Broken MP table detected: " - "8254 is not connected to " - "IOAPIC #%d intpin %d\n", - int_to_apicintpin[apic_8254_intr].ioapic, - int_to_apicintpin[apic_8254_intr].int_pin); - /* - * Revoke current ISA IRQ 0 assignment and - * configure a fallback interrupt routing from - * the 8254 Timer via the 8259 PIC to the - * an ExtInt interrupt line on IOAPIC #0 intpin 0. - * We reuse the low level interrupt handler number. - */ - if (apic_irq(0, 0) < 0) { - revoke_apic_irq(apic_8254_intr); - assign_apic_irq(0, 0, apic_8254_intr); - } - apic_8254_intr = apic_irq(0, 0); - setup_8254_mixed_mode(); - inthand_add("clk", apic_8254_intr, - (driver_intr_t *)clkintr, NULL, - INTR_TYPE_CLK | INTR_FAST, NULL); - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - INTREN(1 << apic_8254_intr); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); - } - - } - if (apic_int_type(0, 0) != 3 || - int_to_apicintpin[apic_8254_intr].ioapic != 0 || - int_to_apicintpin[apic_8254_intr].int_pin != 0) - printf("APIC_IO: routing 8254 via IOAPIC #%d intpin %d\n", - int_to_apicintpin[apic_8254_intr].ioapic, - int_to_apicintpin[apic_8254_intr].int_pin); - else - printf("APIC_IO: " - "routing 8254 via 8259 and IOAPIC #0 intpin 0\n"); -#endif - -} - -#ifdef APIC_IO -static u_long -read_intr_count(int vec) -{ - u_long *up; - up = intr_countp[vec]; - if (up) - return *up; - return 0UL; -} - -static void -setup_8254_mixed_mode() -{ - /* - * Allow 8254 timer to INTerrupt 8259: - * re-initialize master 8259: - * reset; prog 4 bytes, single ICU, edge triggered - */ - outb(IO_ICU1, 0x13); - outb(IO_ICU1 + 1, NRSVIDT); /* start vector (unused) */ - outb(IO_ICU1 + 1, 0x00); /* ignore slave */ - outb(IO_ICU1 + 1, 0x03); /* auto EOI, 8086 */ - outb(IO_ICU1 + 1, 0xfe); /* unmask INT0 */ - - /* program IO APIC for type 3 INT on INT0 */ - if (ext_int_setup(0, 0) < 0) - panic("8254 redirect via APIC pin0 impossible!"); } -#endif void cpu_startprofclock(void) @@ -1135,9 +927,9 @@ i8254_get_timecount(struct timecounter *tc) { u_int count; u_int high, low; - u_int eflags; + u_long rflags; - eflags = read_eflags(); + rflags = read_rflags(); mtx_lock_spin(&clock_lock); /* Select timer0 and latch counter value. */ @@ -1148,14 +940,8 @@ i8254_get_timecount(struct timecounter *tc) count = timer0_max_count - ((high << 8) | low); if (count < i8254_lastcount || (!i8254_ticked && (clkintr_pending || - ((count < 20 || (!(eflags & PSL_I) && count < timer0_max_count / 2u)) && -#ifdef APIC_IO -#define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */ - /* XXX this assumes that apic_8254_intr is < 24. */ - (lapic_irr1 & (1 << apic_8254_intr)))) -#else + ((count < 20 || (!(rflags & PSL_I) && count < timer0_max_count / 2u)) && (inb(IO_ICU1) & 1))) -#endif )) { i8254_ticked = 1; i8254_offset += timer0_max_count; diff --git a/sys/amd64/isa/icu.h b/sys/amd64/isa/icu.h index 82e084f..06b8955 100644 --- a/sys/amd64/isa/icu.h +++ b/sys/amd64/isa/icu.h @@ -58,47 +58,14 @@ void INTREN(u_int); void INTRDIS(u_int); -#ifdef APIC_IO -extern unsigned apic_imen; /* APIC interrupt mask enable */ -#else extern unsigned imen; /* interrupt mask enable */ -#endif #endif /* LOCORE */ -#ifdef APIC_IO -/* - * Note: The APIC uses different values for IRQxxx. - * Unfortunately many drivers use the 8259 values as indexes - * into tables, etc. The APIC equivilants are kept as APIC_IRQxxx. - * The 8259 versions have to be used in SMP for legacy operation - * of the drivers. - */ -#endif /* APIC_IO */ - /* * Interrupt enable bits - in normal order of priority (which we change) */ -#ifdef PC98 -#define IRQ0 0x0001 -#define IRQ1 0x0002 -#define IRQ2 0x0004 -#define IRQ3 0x0008 -#define IRQ4 0x0010 -#define IRQ5 0x0020 -#define IRQ6 0x0040 -#define IRQ7 0x0080 -#define IRQ_SLAVE 0x0080 -#define IRQ8 0x0100 -#define IRQ9 0x0200 -#define IRQ10 0x0400 -#define IRQ11 0x0800 -#define IRQ12 0x1000 -#define IRQ13 0x2000 -#define IRQ14 0x4000 -#define IRQ15 0x8000 -#else #define IRQ0 0x0001 /* highest priority - timer */ #define IRQ1 0x0002 #define IRQ_SLAVE 0x0004 @@ -116,36 +83,69 @@ extern unsigned imen; /* interrupt mask enable */ #define IRQ5 0x0020 #define IRQ6 0x0040 #define IRQ7 0x0080 /* lowest - parallel printer */ -#endif -/* - * Interrupt Control offset into Interrupt descriptor table (IDT) - */ -#define ICU_OFFSET 32 /* 0-31 are processor exceptions */ +/* Initialization control word 1. Written to even address. */ +#define ICW1_IC4 0x01 /* ICW4 present */ +#define ICW1_SNGL 0x02 /* 1 = single, 0 = cascaded */ +#define ICW1_ADI 0x04 /* 1 = 4, 0 = 8 byte vectors */ +#define ICW1_LTIM 0x08 /* 1 = level trigger, 0 = edge */ +#define ICW1_RESET 0x10 /* must be 1 */ +/* 0x20 - 0x80 - in 8080/8085 mode only */ -#ifdef PC98 -#define ICU_IMR_OFFSET 2 -#define ICU_SLAVEID 7 -#else -#define ICU_IMR_OFFSET 1 -#define ICU_SLAVEID 2 -#endif -#define ICU_EOI 0x20 +/* Initialization control word 2. Written to the odd address. */ +/* No definitions, it is the base vector of the IDT for 8086 mode */ + +/* Initialization control word 3. Written to the odd address. */ +/* For a master PIC, bitfield indicating a slave 8259 on given input */ +/* For slave, lower 3 bits are the slave's ID binary id on master */ -#ifdef APIC_IO +/* Initialization control word 4. Written to the odd address. */ +#define ICW4_8086 0x01 /* 1 = 8086, 0 = 8080 */ +#define ICW4_AEOI 0x02 /* 1 = Auto EOI */ +#define ICW4_MS 0x04 /* 1 = buffered master, 0 = slave */ +#define ICW4_BUF 0x08 /* 1 = enable buffer mode */ +#define ICW4_SFNM 0x10 /* 1 = special fully nested mode */ -/* 32-47: ISA IRQ0-IRQ15, 48-63: IO APIC IRQ16-IRQ31 */ -#define ICU_LEN 32 -#define HWI_MASK 0xffffffff /* bits for h/w interrupts */ -#define NHWI 32 +/* Operation control words. Written after initialization. */ -#else +/* Operation control word type 1 */ +/* + * No definitions. Written to the odd address. Bitmask for interrupts. + * 1 = disabled. + */ +/* Operation control word type 2. Bit 3 (0x08) must be zero. Even address. */ +#define OCW2_L0 0x01 /* Level */ +#define OCW2_L1 0x02 +#define OCW2_L2 0x04 +/* 0x08 must be 0 to select OCW2 vs OCW3 */ +/* 0x10 must be 0 to select OCW2 vs ICW1 */ +#define OCW2_EOI 0x20 /* 1 = EOI */ +#define OCW2_SL 0x40 /* EOI mode */ +#define OCW2_R 0x80 /* EOI mode */ + +/* Operation control word type 3. Bit 3 (0x08) must be set. Even address. */ +#define OCW3_RIS 0x01 +#define OCW3_RR 0x02 +#define OCW3_P 0x04 +/* 0x08 must be 1 to select OCW3 vs OCW2 */ +#define OCW3_SEL 0x08 /* must be 1 */ +/* 0x10 must be 0 to select OCW3 vs ICW1 */ +#define OCW3_SMM 0x20 /* special mode mask */ +#define OCW3_ESMM 0x40 /* enable SMM */ + +/* + * Interrupt Control offset into Interrupt descriptor table (IDT) + */ +#define ICU_OFFSET 32 /* 0-31 are processor exceptions */ #define ICU_LEN 16 /* 32-47 are ISA interrupts */ #define HWI_MASK 0xffff /* bits for h/w interrupts */ #define NHWI 16 -#endif /* APIC_IO */ +#define ICU_IMR_OFFSET 1 +#define ICU_SLAVEID 2 +#define ICU_EOI (OCW2_EOI) /* non-specific EOI */ +#define ICU_SETPRI (OCW2_R | OCW2_SL) /* set rotation priority */ #define INTRCNT_COUNT (1 + ICU_LEN + 2 * ICU_LEN) diff --git a/sys/amd64/isa/icu_ipl.S b/sys/amd64/isa/icu_ipl.S index 8cedbb4..ad883fe 100644 --- a/sys/amd64/isa/icu_ipl.S +++ b/sys/amd64/isa/icu_ipl.S @@ -48,7 +48,7 @@ imen: .long HWI_MASK SUPERALIGN_TEXT ENTRY(INTREN) - movl 4(%esp), %eax + movq %rdi, %rax movl %eax, %ecx notl %eax andl %eax, imen @@ -65,7 +65,7 @@ ENTRY(INTREN) ret ENTRY(INTRDIS) - movl 4(%esp), %eax + movq %rdi, %rax movl %eax, %ecx orl %eax, imen movl imen, %eax diff --git a/sys/amd64/isa/icu_ipl.s b/sys/amd64/isa/icu_ipl.s index 8cedbb4..ad883fe 100644 --- a/sys/amd64/isa/icu_ipl.s +++ b/sys/amd64/isa/icu_ipl.s @@ -48,7 +48,7 @@ imen: .long HWI_MASK SUPERALIGN_TEXT ENTRY(INTREN) - movl 4(%esp), %eax + movq %rdi, %rax movl %eax, %ecx notl %eax andl %eax, imen @@ -65,7 +65,7 @@ ENTRY(INTREN) ret ENTRY(INTRDIS) - movl 4(%esp), %eax + movq %rdi, %rax movl %eax, %ecx orl %eax, imen movl imen, %eax diff --git a/sys/amd64/isa/icu_vector.S b/sys/amd64/isa/icu_vector.S index 1023635..c9fea1a 100644 --- a/sys/amd64/isa/icu_vector.S +++ b/sys/amd64/isa/icu_vector.S @@ -4,151 +4,57 @@ */ #define IRQ_BIT(irq_num) (1 << ((irq_num) % 8)) -#define IRQ_LBIT(irq_num) (1 << (irq_num)) #define IRQ_BYTE(irq_num) ((irq_num) >> 3) -#ifdef AUTO_EOI_1 - -#define ENABLE_ICU1 /* use auto-EOI to reduce i/o */ -#define OUTB_ICU1 - -#else - #define ENABLE_ICU1 \ movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ - OUTB_ICU1 /* ... to clear in service bit */ - -#define OUTB_ICU1 \ - outb %al,$IO_ICU1 - -#endif - -#ifdef AUTO_EOI_2 -/* - * The data sheet says no auto-EOI on slave, but it sometimes works. - */ -#define ENABLE_ICU1_AND_2 ENABLE_ICU1 - -#else + outb %al,$IO_ICU1 /* ... to clear in service bit */ #define ENABLE_ICU1_AND_2 \ movb $ICU_EOI,%al ; /* as above */ \ outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ - OUTB_ICU1 /* ... then first icu (if !AUTO_EOI_1) */ - -#endif - -#define PUSH_FRAME \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; /* 8 ints */ \ - pushl %ds ; /* save data and extra segments ... */ \ - pushl %es ; \ - pushl %fs - -#define PUSH_DUMMY \ - pushfl ; /* eflags */ \ - pushl %cs ; /* cs */ \ - pushl 12(%esp) ; /* original caller eip */ \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - subl $11*4,%esp + outb %al,$IO_ICU1 /* ... then first icu */ -#define POP_FRAME \ - popl %fs ; \ - popl %es ; \ - popl %ds ; \ - popal ; \ - addl $4+4,%esp - -#define POP_DUMMY \ - addl $16*4,%esp - -#define MASK_IRQ(icu, irq_num) \ - movb imen + IRQ_BYTE(irq_num),%al ; \ - orb $IRQ_BIT(irq_num),%al ; \ - movb %al,imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET -#define UNMASK_IRQ(icu, irq_num) \ - movb imen + IRQ_BYTE(irq_num),%al ; \ - andb $~IRQ_BIT(irq_num),%al ; \ - movb %al,imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET /* * Macros for interrupt interrupt entry, call to handler, and exit. */ -#define FAST_INTR(irq_num, vec_name, icu, enable_icus) \ +#define FAST_INTR(irq_num, vec_name, enable_icus) \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - PUSH_FRAME ; \ - mov $KDSEL,%ax ; \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - mov $KPSEL,%ax ; \ - mov %ax,%fs ; \ - FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ; \ - movl PCPU(CURTHREAD),%ebx ; \ - cmpl $0,TD_CRITNEST(%ebx) ; \ - je 1f ; \ -; \ - movl $1,PCPU(INT_PENDING) ; \ - orl $IRQ_LBIT(irq_num),PCPU(FPENDING) ; \ - MASK_IRQ(icu, irq_num) ; \ - enable_icus ; \ - jmp 10f ; \ -1: ; \ - incl TD_CRITNEST(%ebx) ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; \ - addl $4,%esp ; \ - enable_icus ; \ + subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \ + movq %rdi,TF_RDI(%rsp) ; \ + movq %rsi,TF_RSI(%rsp) ; \ + movq %rdx,TF_RDX(%rsp) ; \ + movq %rcx,TF_RCX(%rsp) ; \ + movq %r8,TF_R8(%rsp) ; \ + movq %r9,TF_R9(%rsp) ; \ + movq %rax,TF_RAX(%rsp) ; \ + movq %rbx,TF_RBX(%rsp) ; \ + movq %rbp,TF_RBP(%rsp) ; \ + movq %r10,TF_R10(%rsp) ; \ + movq %r11,TF_R11(%rsp) ; \ + movq %r12,TF_R12(%rsp) ; \ + movq %r13,TF_R13(%rsp) ; \ + movq %r14,TF_R14(%rsp) ; \ + movq %r15,TF_R15(%rsp) ; \ + FAKE_MCOUNT((12)*4(%rsp)) ; \ + call critical_enter ; \ + movq PCPU(CURTHREAD),%rbx ; \ + incl TD_INTR_NESTING_LEVEL(%rbx) ; \ + movq intr_unit + (irq_num) * 8, %rdi ; \ + call *intr_handler + (irq_num) * 8 ; /* do the work ASAP */ \ + enable_icus ; /* (re)enable ASAP (helps edge trigger?) */ \ incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4,%eax ; \ - incl (%eax) ; \ - decl TD_CRITNEST(%ebx) ; \ - cmpl $0,PCPU(INT_PENDING) ; \ - je 2f ; \ -; \ - call i386_unpend ; \ -2: ; \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ -10: ; \ + movq intr_countp + (irq_num) * 8,%rax ; \ + incq (%rax) ; \ + decl TD_INTR_NESTING_LEVEL(%rbx) ; \ + call critical_exit ; \ MEXITCOUNT ; \ jmp doreti -/* - * Restart a fast interrupt that was held up by a critical section. - * This routine is called from unpend(). unpend() ensures we are - * in a critical section and deals with the interrupt nesting level - * for us. If we previously masked the irq, we have to unmask it. - * - * We have a choice. We can regenerate the irq using the 'int' - * instruction or we can create a dummy frame and call the interrupt - * handler directly. I've chosen to use the dummy-frame method. - */ -#define FAST_UNPEND(irq_num, vec_name, icu) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ -; \ - pushl %ebp ; \ - movl %esp, %ebp ; \ - PUSH_DUMMY ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - addl $4, %esp ; \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4,%eax ; \ - incl (%eax) ; \ - UNMASK_IRQ(icu, irq_num) ; \ - POP_DUMMY ; \ - popl %ebp ; \ - ret - /* * Slow, threaded interrupts. * @@ -162,57 +68,56 @@ IDTVEC(vec_name) ; \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - PUSH_FRAME ; \ - mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - mov $KPSEL,%ax ; \ - mov %ax,%fs ; \ -; \ + subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \ + movq %rdi,TF_RDI(%rsp) ; \ + movq %rsi,TF_RSI(%rsp) ; \ + movq %rdx,TF_RDX(%rsp) ; \ + movq %rcx,TF_RCX(%rsp) ; \ + movq %r8,TF_R8(%rsp) ; \ + movq %r9,TF_R9(%rsp) ; \ + movq %rax,TF_RAX(%rsp) ; \ + movq %rbx,TF_RBX(%rsp) ; \ + movq %rbp,TF_RBP(%rsp) ; \ + movq %r10,TF_R10(%rsp) ; \ + movq %r11,TF_R11(%rsp) ; \ + movq %r12,TF_R12(%rsp) ; \ + movq %r13,TF_R13(%rsp) ; \ + movq %r14,TF_R14(%rsp) ; \ + movq %r15,TF_R15(%rsp) ; \ maybe_extra_ipending ; \ - MASK_IRQ(icu, irq_num) ; \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + orb $IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET ; \ enable_icus ; \ -; \ - movl PCPU(CURTHREAD),%ebx ; \ - cmpl $0,TD_CRITNEST(%ebx) ; \ - je 1f ; \ - movl $1,PCPU(INT_PENDING); \ - orl $IRQ_LBIT(irq_num),PCPU(IPENDING) ; \ - jmp 10f ; \ -1: ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ -; \ - FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ - cmpl $0,PCPU(INT_PENDING) ; \ - je 9f ; \ - call i386_unpend ; \ -9: ; \ - pushl $irq_num; /* pass the IRQ */ \ + movq PCPU(CURTHREAD),%rbx ; \ + incl TD_INTR_NESTING_LEVEL(%rbx) ; \ + FAKE_MCOUNT(13*4(%rsp)) ; /* XXX late to avoid double count */ \ + movq $irq_num, %rdi; /* pass the IRQ */ \ call sched_ithd ; \ - addl $4, %esp ; /* discard the parameter */ \ -; \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ -10: ; \ + decl TD_INTR_NESTING_LEVEL(%rbx) ; \ MEXITCOUNT ; \ + /* We could usually avoid the following jmp by inlining some of */ \ + /* doreti, but it's probably better to use less cache. */ \ jmp doreti MCOUNT_LABEL(bintr) - FAST_INTR(0,fastintr0, IO_ICU1, ENABLE_ICU1) - FAST_INTR(1,fastintr1, IO_ICU1, ENABLE_ICU1) - FAST_INTR(2,fastintr2, IO_ICU1, ENABLE_ICU1) - FAST_INTR(3,fastintr3, IO_ICU1, ENABLE_ICU1) - FAST_INTR(4,fastintr4, IO_ICU1, ENABLE_ICU1) - FAST_INTR(5,fastintr5, IO_ICU1, ENABLE_ICU1) - FAST_INTR(6,fastintr6, IO_ICU1, ENABLE_ICU1) - FAST_INTR(7,fastintr7, IO_ICU1, ENABLE_ICU1) - FAST_INTR(8,fastintr8, IO_ICU2, ENABLE_ICU1_AND_2) - FAST_INTR(9,fastintr9, IO_ICU2, ENABLE_ICU1_AND_2) - FAST_INTR(10,fastintr10, IO_ICU2, ENABLE_ICU1_AND_2) - FAST_INTR(11,fastintr11, IO_ICU2, ENABLE_ICU1_AND_2) - FAST_INTR(12,fastintr12, IO_ICU2, ENABLE_ICU1_AND_2) - FAST_INTR(13,fastintr13, IO_ICU2, ENABLE_ICU1_AND_2) - FAST_INTR(14,fastintr14, IO_ICU2, ENABLE_ICU1_AND_2) - FAST_INTR(15,fastintr15, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(0,fastintr0, ENABLE_ICU1) + FAST_INTR(1,fastintr1, ENABLE_ICU1) + FAST_INTR(2,fastintr2, ENABLE_ICU1) + FAST_INTR(3,fastintr3, ENABLE_ICU1) + FAST_INTR(4,fastintr4, ENABLE_ICU1) + FAST_INTR(5,fastintr5, ENABLE_ICU1) + FAST_INTR(6,fastintr6, ENABLE_ICU1) + FAST_INTR(7,fastintr7, ENABLE_ICU1) + FAST_INTR(8,fastintr8, ENABLE_ICU1_AND_2) + FAST_INTR(9,fastintr9, ENABLE_ICU1_AND_2) + FAST_INTR(10,fastintr10, ENABLE_ICU1_AND_2) + FAST_INTR(11,fastintr11, ENABLE_ICU1_AND_2) + FAST_INTR(12,fastintr12, ENABLE_ICU1_AND_2) + FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) + FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) + FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) /* Threaded interrupts */ @@ -233,21 +138,5 @@ MCOUNT_LABEL(bintr) INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2,) INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2,) - FAST_UNPEND(0,fastunpend0, IO_ICU1) - FAST_UNPEND(1,fastunpend1, IO_ICU1) - FAST_UNPEND(2,fastunpend2, IO_ICU1) - FAST_UNPEND(3,fastunpend3, IO_ICU1) - FAST_UNPEND(4,fastunpend4, IO_ICU1) - FAST_UNPEND(5,fastunpend5, IO_ICU1) - FAST_UNPEND(6,fastunpend6, IO_ICU1) - FAST_UNPEND(7,fastunpend7, IO_ICU1) - FAST_UNPEND(8,fastunpend8, IO_ICU2) - FAST_UNPEND(9,fastunpend9, IO_ICU2) - FAST_UNPEND(10,fastunpend10, IO_ICU2) - FAST_UNPEND(11,fastunpend11, IO_ICU2) - FAST_UNPEND(12,fastunpend12, IO_ICU2) - FAST_UNPEND(13,fastunpend13, IO_ICU2) - FAST_UNPEND(14,fastunpend14, IO_ICU2) - FAST_UNPEND(15,fastunpend15, IO_ICU2) MCOUNT_LABEL(eintr) diff --git a/sys/amd64/isa/icu_vector.s b/sys/amd64/isa/icu_vector.s index 1023635..c9fea1a 100644 --- a/sys/amd64/isa/icu_vector.s +++ b/sys/amd64/isa/icu_vector.s @@ -4,151 +4,57 @@ */ #define IRQ_BIT(irq_num) (1 << ((irq_num) % 8)) -#define IRQ_LBIT(irq_num) (1 << (irq_num)) #define IRQ_BYTE(irq_num) ((irq_num) >> 3) -#ifdef AUTO_EOI_1 - -#define ENABLE_ICU1 /* use auto-EOI to reduce i/o */ -#define OUTB_ICU1 - -#else - #define ENABLE_ICU1 \ movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ - OUTB_ICU1 /* ... to clear in service bit */ - -#define OUTB_ICU1 \ - outb %al,$IO_ICU1 - -#endif - -#ifdef AUTO_EOI_2 -/* - * The data sheet says no auto-EOI on slave, but it sometimes works. - */ -#define ENABLE_ICU1_AND_2 ENABLE_ICU1 - -#else + outb %al,$IO_ICU1 /* ... to clear in service bit */ #define ENABLE_ICU1_AND_2 \ movb $ICU_EOI,%al ; /* as above */ \ outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ - OUTB_ICU1 /* ... then first icu (if !AUTO_EOI_1) */ - -#endif - -#define PUSH_FRAME \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; /* 8 ints */ \ - pushl %ds ; /* save data and extra segments ... */ \ - pushl %es ; \ - pushl %fs - -#define PUSH_DUMMY \ - pushfl ; /* eflags */ \ - pushl %cs ; /* cs */ \ - pushl 12(%esp) ; /* original caller eip */ \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - subl $11*4,%esp + outb %al,$IO_ICU1 /* ... then first icu */ -#define POP_FRAME \ - popl %fs ; \ - popl %es ; \ - popl %ds ; \ - popal ; \ - addl $4+4,%esp - -#define POP_DUMMY \ - addl $16*4,%esp - -#define MASK_IRQ(icu, irq_num) \ - movb imen + IRQ_BYTE(irq_num),%al ; \ - orb $IRQ_BIT(irq_num),%al ; \ - movb %al,imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET -#define UNMASK_IRQ(icu, irq_num) \ - movb imen + IRQ_BYTE(irq_num),%al ; \ - andb $~IRQ_BIT(irq_num),%al ; \ - movb %al,imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET /* * Macros for interrupt interrupt entry, call to handler, and exit. */ -#define FAST_INTR(irq_num, vec_name, icu, enable_icus) \ +#define FAST_INTR(irq_num, vec_name, enable_icus) \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - PUSH_FRAME ; \ - mov $KDSEL,%ax ; \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - mov $KPSEL,%ax ; \ - mov %ax,%fs ; \ - FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ; \ - movl PCPU(CURTHREAD),%ebx ; \ - cmpl $0,TD_CRITNEST(%ebx) ; \ - je 1f ; \ -; \ - movl $1,PCPU(INT_PENDING) ; \ - orl $IRQ_LBIT(irq_num),PCPU(FPENDING) ; \ - MASK_IRQ(icu, irq_num) ; \ - enable_icus ; \ - jmp 10f ; \ -1: ; \ - incl TD_CRITNEST(%ebx) ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; \ - addl $4,%esp ; \ - enable_icus ; \ + subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \ + movq %rdi,TF_RDI(%rsp) ; \ + movq %rsi,TF_RSI(%rsp) ; \ + movq %rdx,TF_RDX(%rsp) ; \ + movq %rcx,TF_RCX(%rsp) ; \ + movq %r8,TF_R8(%rsp) ; \ + movq %r9,TF_R9(%rsp) ; \ + movq %rax,TF_RAX(%rsp) ; \ + movq %rbx,TF_RBX(%rsp) ; \ + movq %rbp,TF_RBP(%rsp) ; \ + movq %r10,TF_R10(%rsp) ; \ + movq %r11,TF_R11(%rsp) ; \ + movq %r12,TF_R12(%rsp) ; \ + movq %r13,TF_R13(%rsp) ; \ + movq %r14,TF_R14(%rsp) ; \ + movq %r15,TF_R15(%rsp) ; \ + FAKE_MCOUNT((12)*4(%rsp)) ; \ + call critical_enter ; \ + movq PCPU(CURTHREAD),%rbx ; \ + incl TD_INTR_NESTING_LEVEL(%rbx) ; \ + movq intr_unit + (irq_num) * 8, %rdi ; \ + call *intr_handler + (irq_num) * 8 ; /* do the work ASAP */ \ + enable_icus ; /* (re)enable ASAP (helps edge trigger?) */ \ incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4,%eax ; \ - incl (%eax) ; \ - decl TD_CRITNEST(%ebx) ; \ - cmpl $0,PCPU(INT_PENDING) ; \ - je 2f ; \ -; \ - call i386_unpend ; \ -2: ; \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ -10: ; \ + movq intr_countp + (irq_num) * 8,%rax ; \ + incq (%rax) ; \ + decl TD_INTR_NESTING_LEVEL(%rbx) ; \ + call critical_exit ; \ MEXITCOUNT ; \ jmp doreti -/* - * Restart a fast interrupt that was held up by a critical section. - * This routine is called from unpend(). unpend() ensures we are - * in a critical section and deals with the interrupt nesting level - * for us. If we previously masked the irq, we have to unmask it. - * - * We have a choice. We can regenerate the irq using the 'int' - * instruction or we can create a dummy frame and call the interrupt - * handler directly. I've chosen to use the dummy-frame method. - */ -#define FAST_UNPEND(irq_num, vec_name, icu) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ -; \ - pushl %ebp ; \ - movl %esp, %ebp ; \ - PUSH_DUMMY ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - addl $4, %esp ; \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4,%eax ; \ - incl (%eax) ; \ - UNMASK_IRQ(icu, irq_num) ; \ - POP_DUMMY ; \ - popl %ebp ; \ - ret - /* * Slow, threaded interrupts. * @@ -162,57 +68,56 @@ IDTVEC(vec_name) ; \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - PUSH_FRAME ; \ - mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - mov $KPSEL,%ax ; \ - mov %ax,%fs ; \ -; \ + subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \ + movq %rdi,TF_RDI(%rsp) ; \ + movq %rsi,TF_RSI(%rsp) ; \ + movq %rdx,TF_RDX(%rsp) ; \ + movq %rcx,TF_RCX(%rsp) ; \ + movq %r8,TF_R8(%rsp) ; \ + movq %r9,TF_R9(%rsp) ; \ + movq %rax,TF_RAX(%rsp) ; \ + movq %rbx,TF_RBX(%rsp) ; \ + movq %rbp,TF_RBP(%rsp) ; \ + movq %r10,TF_R10(%rsp) ; \ + movq %r11,TF_R11(%rsp) ; \ + movq %r12,TF_R12(%rsp) ; \ + movq %r13,TF_R13(%rsp) ; \ + movq %r14,TF_R14(%rsp) ; \ + movq %r15,TF_R15(%rsp) ; \ maybe_extra_ipending ; \ - MASK_IRQ(icu, irq_num) ; \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + orb $IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET ; \ enable_icus ; \ -; \ - movl PCPU(CURTHREAD),%ebx ; \ - cmpl $0,TD_CRITNEST(%ebx) ; \ - je 1f ; \ - movl $1,PCPU(INT_PENDING); \ - orl $IRQ_LBIT(irq_num),PCPU(IPENDING) ; \ - jmp 10f ; \ -1: ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ -; \ - FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ - cmpl $0,PCPU(INT_PENDING) ; \ - je 9f ; \ - call i386_unpend ; \ -9: ; \ - pushl $irq_num; /* pass the IRQ */ \ + movq PCPU(CURTHREAD),%rbx ; \ + incl TD_INTR_NESTING_LEVEL(%rbx) ; \ + FAKE_MCOUNT(13*4(%rsp)) ; /* XXX late to avoid double count */ \ + movq $irq_num, %rdi; /* pass the IRQ */ \ call sched_ithd ; \ - addl $4, %esp ; /* discard the parameter */ \ -; \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ -10: ; \ + decl TD_INTR_NESTING_LEVEL(%rbx) ; \ MEXITCOUNT ; \ + /* We could usually avoid the following jmp by inlining some of */ \ + /* doreti, but it's probably better to use less cache. */ \ jmp doreti MCOUNT_LABEL(bintr) - FAST_INTR(0,fastintr0, IO_ICU1, ENABLE_ICU1) - FAST_INTR(1,fastintr1, IO_ICU1, ENABLE_ICU1) - FAST_INTR(2,fastintr2, IO_ICU1, ENABLE_ICU1) - FAST_INTR(3,fastintr3, IO_ICU1, ENABLE_ICU1) - FAST_INTR(4,fastintr4, IO_ICU1, ENABLE_ICU1) - FAST_INTR(5,fastintr5, IO_ICU1, ENABLE_ICU1) - FAST_INTR(6,fastintr6, IO_ICU1, ENABLE_ICU1) - FAST_INTR(7,fastintr7, IO_ICU1, ENABLE_ICU1) - FAST_INTR(8,fastintr8, IO_ICU2, ENABLE_ICU1_AND_2) - FAST_INTR(9,fastintr9, IO_ICU2, ENABLE_ICU1_AND_2) - FAST_INTR(10,fastintr10, IO_ICU2, ENABLE_ICU1_AND_2) - FAST_INTR(11,fastintr11, IO_ICU2, ENABLE_ICU1_AND_2) - FAST_INTR(12,fastintr12, IO_ICU2, ENABLE_ICU1_AND_2) - FAST_INTR(13,fastintr13, IO_ICU2, ENABLE_ICU1_AND_2) - FAST_INTR(14,fastintr14, IO_ICU2, ENABLE_ICU1_AND_2) - FAST_INTR(15,fastintr15, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(0,fastintr0, ENABLE_ICU1) + FAST_INTR(1,fastintr1, ENABLE_ICU1) + FAST_INTR(2,fastintr2, ENABLE_ICU1) + FAST_INTR(3,fastintr3, ENABLE_ICU1) + FAST_INTR(4,fastintr4, ENABLE_ICU1) + FAST_INTR(5,fastintr5, ENABLE_ICU1) + FAST_INTR(6,fastintr6, ENABLE_ICU1) + FAST_INTR(7,fastintr7, ENABLE_ICU1) + FAST_INTR(8,fastintr8, ENABLE_ICU1_AND_2) + FAST_INTR(9,fastintr9, ENABLE_ICU1_AND_2) + FAST_INTR(10,fastintr10, ENABLE_ICU1_AND_2) + FAST_INTR(11,fastintr11, ENABLE_ICU1_AND_2) + FAST_INTR(12,fastintr12, ENABLE_ICU1_AND_2) + FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) + FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) + FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) /* Threaded interrupts */ @@ -233,21 +138,5 @@ MCOUNT_LABEL(bintr) INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2,) INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2,) - FAST_UNPEND(0,fastunpend0, IO_ICU1) - FAST_UNPEND(1,fastunpend1, IO_ICU1) - FAST_UNPEND(2,fastunpend2, IO_ICU1) - FAST_UNPEND(3,fastunpend3, IO_ICU1) - FAST_UNPEND(4,fastunpend4, IO_ICU1) - FAST_UNPEND(5,fastunpend5, IO_ICU1) - FAST_UNPEND(6,fastunpend6, IO_ICU1) - FAST_UNPEND(7,fastunpend7, IO_ICU1) - FAST_UNPEND(8,fastunpend8, IO_ICU2) - FAST_UNPEND(9,fastunpend9, IO_ICU2) - FAST_UNPEND(10,fastunpend10, IO_ICU2) - FAST_UNPEND(11,fastunpend11, IO_ICU2) - FAST_UNPEND(12,fastunpend12, IO_ICU2) - FAST_UNPEND(13,fastunpend13, IO_ICU2) - FAST_UNPEND(14,fastunpend14, IO_ICU2) - FAST_UNPEND(15,fastunpend15, IO_ICU2) MCOUNT_LABEL(eintr) diff --git a/sys/amd64/isa/intr_machdep.c b/sys/amd64/isa/intr_machdep.c index 67b5dea..c148228 100644 --- a/sys/amd64/isa/intr_machdep.c +++ b/sys/amd64/isa/intr_machdep.c @@ -37,9 +37,7 @@ * $FreeBSD$ */ -#include "opt_auto_eoi.h" #include "opt_isa.h" -#include "opt_mca.h" #include <sys/param.h> #include <sys/bus.h> @@ -59,32 +57,14 @@ #include <machine/md_var.h> #include <machine/segments.h> -#if defined(APIC_IO) -#include <machine/smptests.h> /** FAST_HI */ -#include <machine/smp.h> -#include <machine/resource.h> -#endif /* APIC_IO */ -#ifdef PC98 -#include <pc98/pc98/pc98.h> -#include <pc98/pc98/pc98_machdep.h> -#include <pc98/pc98/epsonio.h> -#else -#include <i386/isa/isa.h> -#endif -#include <i386/isa/icu.h> +#include <amd64/isa/isa.h> +#include <amd64/isa/icu.h> #ifdef DEV_ISA #include <isa/isavar.h> #endif -#include <i386/isa/intr_machdep.h> +#include <amd64/isa/intr_machdep.h> #include <sys/interrupt.h> -#ifdef APIC_IO -#include <machine/clock.h> -#endif - -#ifdef DEV_MCA -#include <i386/bios/mca_machdep.h> -#endif /* * Per-interrupt data. @@ -105,37 +85,6 @@ static inthand_t *fastintr[ICU_LEN] = { IDTVEC(fastintr10), IDTVEC(fastintr11), IDTVEC(fastintr12), IDTVEC(fastintr13), IDTVEC(fastintr14), IDTVEC(fastintr15), -#if defined(APIC_IO) - IDTVEC(fastintr16), IDTVEC(fastintr17), - IDTVEC(fastintr18), IDTVEC(fastintr19), - IDTVEC(fastintr20), IDTVEC(fastintr21), - IDTVEC(fastintr22), IDTVEC(fastintr23), - IDTVEC(fastintr24), IDTVEC(fastintr25), - IDTVEC(fastintr26), IDTVEC(fastintr27), - IDTVEC(fastintr28), IDTVEC(fastintr29), - IDTVEC(fastintr30), IDTVEC(fastintr31), -#endif /* APIC_IO */ -}; - -static unpendhand_t *fastunpend[ICU_LEN] = { - IDTVEC(fastunpend0), IDTVEC(fastunpend1), - IDTVEC(fastunpend2), IDTVEC(fastunpend3), - IDTVEC(fastunpend4), IDTVEC(fastunpend5), - IDTVEC(fastunpend6), IDTVEC(fastunpend7), - IDTVEC(fastunpend8), IDTVEC(fastunpend9), - IDTVEC(fastunpend10), IDTVEC(fastunpend11), - IDTVEC(fastunpend12), IDTVEC(fastunpend13), - IDTVEC(fastunpend14), IDTVEC(fastunpend15), -#if defined(APIC_IO) - IDTVEC(fastunpend16), IDTVEC(fastunpend17), - IDTVEC(fastunpend18), IDTVEC(fastunpend19), - IDTVEC(fastunpend20), IDTVEC(fastunpend21), - IDTVEC(fastunpend22), IDTVEC(fastunpend23), - IDTVEC(fastunpend24), IDTVEC(fastunpend25), - IDTVEC(fastunpend26), IDTVEC(fastunpend27), - IDTVEC(fastunpend28), IDTVEC(fastunpend29), - IDTVEC(fastunpend30), IDTVEC(fastunpend31), -#endif /* APIC_IO */ }; static inthand_t *slowintr[ICU_LEN] = { @@ -143,12 +92,6 @@ static inthand_t *slowintr[ICU_LEN] = { IDTVEC(intr4), IDTVEC(intr5), IDTVEC(intr6), IDTVEC(intr7), IDTVEC(intr8), IDTVEC(intr9), IDTVEC(intr10), IDTVEC(intr11), IDTVEC(intr12), IDTVEC(intr13), IDTVEC(intr14), IDTVEC(intr15), -#if defined(APIC_IO) - IDTVEC(intr16), IDTVEC(intr17), IDTVEC(intr18), IDTVEC(intr19), - IDTVEC(intr20), IDTVEC(intr21), IDTVEC(intr22), IDTVEC(intr23), - IDTVEC(intr24), IDTVEC(intr25), IDTVEC(intr26), IDTVEC(intr27), - IDTVEC(intr28), IDTVEC(intr29), IDTVEC(intr30), IDTVEC(intr31), -#endif /* APIC_IO */ }; static driver_intr_t isa_strayintr; @@ -158,16 +101,11 @@ static void ithread_enable(int vector); static void ithread_disable(int vector); static void init_i8259(void); -#ifdef PC98 -#define NMI_PARITY 0x04 -#define NMI_EPARITY 0x02 -#else #define NMI_PARITY (1 << 7) #define NMI_IOCHAN (1 << 6) #define ENMI_WATCHDOG (1 << 7) #define ENMI_BUSTIMER (1 << 6) #define ENMI_IOSTATUS (1 << 5) -#endif #ifdef DEV_ISA /* @@ -189,27 +127,12 @@ atpic_probe(device_t dev) } /* - * In the APIC_IO case we might be granted IRQ 2, as this is typically - * consumed by chaining between the two PIC components. If we're using - * the APIC, however, this may not be the case, and as such we should - * free the resource. (XXX untested) - * * The generic ISA attachment code will handle allocating any other resources * that we don't explicitly claim here. */ static int atpic_attach(device_t dev) { -#ifdef APIC_IO - int rid; - struct resource *res; - - /* try to allocate our IRQ and then free it */ - rid = 0; - res = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0, 1, 0); - if (res != NULL) - bus_release_resource(dev, SYS_RES_IRQ, rid, res); -#endif return(0); } @@ -245,30 +168,9 @@ isa_nmi(cd) int cd; { int retval = 0; -#ifdef PC98 - int port = inb(0x33); - - log(LOG_CRIT, "NMI PC98 port = %x\n", port); - if (epson_machine_id == 0x20) - epson_outb(0xc16, epson_inb(0xc16) | 0x1); - if (port & NMI_PARITY) { - log(LOG_CRIT, "BASE RAM parity error, likely hardware failure."); - retval = 1; - } else if (port & NMI_EPARITY) { - log(LOG_CRIT, "EXTENDED RAM parity error, likely hardware failure."); - retval = 1; - } else { - log(LOG_CRIT, "\nNMI Resume ??\n"); - } -#else /* IBM-PC */ int isa_port = inb(0x61); - int eisa_port = inb(0x461); - log(LOG_CRIT, "NMI ISA %x, EISA %x\n", isa_port, eisa_port); -#ifdef DEV_MCA - if (MCA_system && mca_bus_nmi()) - return(0); -#endif + log(LOG_CRIT, "NMI ISA STATUS 0x%02x", isa_port); if (isa_port & NMI_PARITY) { log(LOG_CRIT, "RAM parity error, likely hardware failure."); @@ -280,30 +182,7 @@ isa_nmi(cd) retval = 1; } - /* - * On a real EISA machine, this will never happen. However it can - * happen on ISA machines which implement XT style floating point - * error handling (very rare). Save them from a meaningless panic. - */ - if (eisa_port == 0xff) - return(retval); - - if (eisa_port & ENMI_WATCHDOG) { - log(LOG_CRIT, "EISA watchdog timer expired, likely hardware failure."); - retval = 1; - } - - if (eisa_port & ENMI_BUSTIMER) { - log(LOG_CRIT, "EISA bus timeout, likely hardware failure."); - retval = 1; - } - - if (eisa_port & ENMI_IOSTATUS) { - log(LOG_CRIT, "EISA I/O port status error."); - retval = 1; - } -#endif - return(retval); + return (retval); } /* @@ -352,72 +231,56 @@ isa_defaultirq() static void init_i8259() { -#ifdef DEV_MCA - if (MCA_system) - outb(IO_ICU1, 0x19); /* reset; program device, four bytes */ - else -#endif - outb(IO_ICU1, 0x11); /* reset; program device, four bytes */ + outb(IO_ICU1, ICW1_RESET | ICW1_IC4); /* reset; program device, four bytes */ outb(IO_ICU1+ICU_IMR_OFFSET, NRSVIDT); /* starting at this vector index */ - outb(IO_ICU1+ICU_IMR_OFFSET, IRQ_SLAVE); /* slave on line 7 */ -#ifdef PC98 -#ifdef AUTO_EOI_1 - outb(IO_ICU1+ICU_IMR_OFFSET, 0x1f); /* (master) auto EOI, 8086 mode */ -#else - outb(IO_ICU1+ICU_IMR_OFFSET, 0x1d); /* (master) 8086 mode */ -#endif -#else /* IBM-PC */ -#ifdef AUTO_EOI_1 - outb(IO_ICU1+ICU_IMR_OFFSET, 2 | 1); /* auto EOI, 8086 mode */ -#else - outb(IO_ICU1+ICU_IMR_OFFSET, 1); /* 8086 mode */ -#endif -#endif /* PC98 */ - outb(IO_ICU1+ICU_IMR_OFFSET, 0xff); /* leave interrupts masked */ - outb(IO_ICU1, 0x0a); /* default to IRR on read */ -#ifndef PC98 - outb(IO_ICU1, 0xc0 | (3 - 1)); /* pri order 3-7, 0-2 (com2 first) */ -#endif /* !PC98 */ - -#ifdef DEV_MCA - if (MCA_system) - outb(IO_ICU2, 0x19); /* reset; program device, four bytes */ - else -#endif - outb(IO_ICU2, 0x11); /* reset; program device, four bytes */ + outb(IO_ICU1+ICU_IMR_OFFSET, IRQ_SLAVE);/* slave on line 2 */ + outb(IO_ICU1+ICU_IMR_OFFSET, ICW4_8086);/* 8086 mode */ + outb(IO_ICU1+ICU_IMR_OFFSET, 0xff); /* leave interrupts masked */ + outb(IO_ICU1, OCW3_SEL | OCW3_RIS); /* default to IRR on read */ + outb(IO_ICU1, ICU_SETPRI | 0x2);/* pri order 3-7, 0-2 (com2 first) */ + + outb(IO_ICU2, ICW1_RESET | ICW1_IC4); /* reset; program device, four bytes */ outb(IO_ICU2+ICU_IMR_OFFSET, NRSVIDT+8); /* staring at this vector index */ - outb(IO_ICU2+ICU_IMR_OFFSET, ICU_SLAVEID); /* my slave id is 7 */ -#ifdef PC98 - outb(IO_ICU2+ICU_IMR_OFFSET,9); /* 8086 mode */ -#else /* IBM-PC */ -#ifdef AUTO_EOI_2 - outb(IO_ICU2+ICU_IMR_OFFSET, 2 | 1); /* auto EOI, 8086 mode */ -#else - outb(IO_ICU2+ICU_IMR_OFFSET,1); /* 8086 mode */ -#endif -#endif /* PC98 */ - outb(IO_ICU2+ICU_IMR_OFFSET, 0xff); /* leave interrupts masked */ - outb(IO_ICU2, 0x0a); /* default to IRR on read */ + outb(IO_ICU2+ICU_IMR_OFFSET, ICU_SLAVEID); /* my slave id is 2 */ + outb(IO_ICU2+ICU_IMR_OFFSET, ICW4_8086); /* 8086 mode */ + outb(IO_ICU2+ICU_IMR_OFFSET, 0xff); /* leave interrupts masked */ + outb(IO_ICU2, OCW3_SEL | OCW3_RIS); /* default to IRR on read */ } /* * Caught a stray interrupt, notify */ +static int isaglitch7; +static int isaglitch15; + static void isa_strayintr(vcookiep) void *vcookiep; { int intr = (void **)vcookiep - &intr_unit[0]; - - /* - * XXX TODO print a different message for #7 if it is for a - * glitch. Glitches can be distinguished from real #7's by - * testing that the in-service bit is _not_ set. The test - * must be done before sending an EOI so it can't be done if - * we are using AUTO_EOI_1. - */ + int isr; + + /* Determine if it is a stray interrupt or simply a glitch */ + if (intr == 7) { + outb(IO_ICU1, OCW3_SEL); /* select IS register */ + isr = inb(IO_ICU1); + outb(IO_ICU1, OCW3_SEL | OCW3_RIS); /* reselect IIR */ + if ((isr & 0x80) == 0) { + isaglitch7++; + return; + } + } + if (intr == 15) { + outb(IO_ICU2, OCW3_SEL); /* select IS register */ + isr = inb(IO_ICU2); + outb(IO_ICU2, OCW3_SEL | OCW3_RIS); /* reselect IIR */ + if ((isr & 0x80) == 0) { + isaglitch15++; + return; + } + } if (intrcnt[1 + intr] <= 5) log(LOG_ERR, "stray irq %d\n", intr); if (intrcnt[1 + intr] == 5) @@ -498,18 +361,9 @@ found: int icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) { -#ifdef FAST_HI - int select; /* the select register is 8 bits */ - int vector; - u_int32_t value; /* the window register is 32 bits */ -#endif /* FAST_HI */ register_t crit; -#if defined(APIC_IO) - if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */ -#else if ((u_int)intr >= ICU_LEN || intr == ICU_SLAVEID) -#endif /* APIC_IO */ return (EINVAL); #if 0 if (intr_handler[intr] != isa_strayintr) @@ -520,35 +374,9 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) mtx_lock_spin(&icu_lock); intr_handler[intr] = handler; intr_unit[intr] = arg; -#ifdef FAST_HI - if (flags & INTR_FAST) { - vector = TPR_FAST_INTS + intr; - setidt(vector, fastintr[intr], - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - } - else { - vector = TPR_SLOW_INTS + intr; - setidt(vector, slowintr[intr], - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - } -#ifdef APIC_INTR_REORDER - set_lapic_isrloc(intr, vector); -#endif - /* - * Reprogram the vector in the IO APIC. - */ - if (int_to_apicintpin[intr].ioapic >= 0) { - select = int_to_apicintpin[intr].redirindex; - value = io_apic_read(int_to_apicintpin[intr].ioapic, - select) & ~IOART_INTVEC; - io_apic_write(int_to_apicintpin[intr].ioapic, - select, value | vector); - } -#else setidt(ICU_OFFSET + intr, flags & INTR_FAST ? fastintr[intr] : slowintr[intr], - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); -#endif /* FAST_HI */ + SDT_SYSIGT, SEL_KPL, 0); INTREN(1 << intr); mtx_unlock_spin(&icu_lock); intr_restore(crit); @@ -576,17 +404,7 @@ icu_unset(intr, handler) intr_countp[intr] = &intrcnt[1 + intr]; intr_handler[intr] = isa_strayintr; intr_unit[intr] = &intr_unit[intr]; -#ifdef FAST_HI_XXX - /* XXX how do I re-create dvp here? */ - setidt(flags & INTR_FAST ? TPR_FAST_INTS + intr : TPR_SLOW_INTS + intr, - slowintr[intr], SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); -#else /* FAST_HI */ -#ifdef APIC_INTR_REORDER - set_lapic_isrloc(intr, ICU_OFFSET + intr); -#endif - setidt(ICU_OFFSET + intr, slowintr[intr], SDT_SYS386IGT, SEL_KPL, - GSEL(GCODE_SEL, SEL_KPL)); -#endif /* FAST_HI */ + setidt(ICU_OFFSET + intr, slowintr[intr], SDT_SYSIGT, SEL_KPL, 0); mtx_unlock_spin(&icu_lock); intr_restore(crit); return (0); @@ -703,10 +521,3 @@ inthand_remove(void *cookie) return (ithread_remove_handler(cookie)); } - -void -call_fast_unpend(int irq) -{ - fastunpend[irq](); -} - diff --git a/sys/amd64/isa/intr_machdep.h b/sys/amd64/isa/intr_machdep.h index 4cfecb5..26f500e 100644 --- a/sys/amd64/isa/intr_machdep.h +++ b/sys/amd64/isa/intr_machdep.h @@ -42,97 +42,6 @@ #ifdef _KERNEL -#if defined(SMP) || defined(APIC_IO) -/* - * XXX FIXME: rethink location for all IPI vectors. - */ - -/* - APIC TPR priority vector levels: - - 0xff (255) +-------------+ - | | 15 (IPIs: Xspuriousint) - 0xf0 (240) +-------------+ - | | 14 - 0xe0 (224) +-------------+ - | | 13 - 0xd0 (208) +-------------+ - | | 12 - 0xc0 (192) +-------------+ - | | 11 - 0xb0 (176) +-------------+ - | | 10 (IPIs: Xcpustop) - 0xa0 (160) +-------------+ - | | 9 (IPIs: Xinvltlb) - 0x90 (144) +-------------+ - | | 8 (linux/BSD syscall, IGNORE FAST HW INTS) - 0x80 (128) +-------------+ - | | 7 (FAST_INTR 16-23) - 0x70 (112) +-------------+ - | | 6 (FAST_INTR 0-15) - 0x60 (96) +-------------+ - | | 5 (IGNORE HW INTS) - 0x50 (80) +-------------+ - | | 4 (2nd IO APIC) - 0x40 (64) +------+------+ - | | | 3 (upper APIC hardware INTs: PCI) - 0x30 (48) +------+------+ - | | 2 (start of hardware INTs: ISA) - 0x20 (32) +-------------+ - | | 1 (exceptions, traps, etc.) - 0x10 (16) +-------------+ - | | 0 (exceptions, traps, etc.) - 0x00 (0) +-------------+ - */ - -/* IDT vector base for regular (aka. slow) and fast interrupts */ -#define TPR_SLOW_INTS 0x20 -#define TPR_FAST_INTS 0x60 -/* XXX note that the AST interrupt is at 0x50 */ - -/* blocking values for local APIC Task Priority Register */ -#define TPR_BLOCK_HWI 0x4f /* hardware INTs */ -#define TPR_IGNORE_HWI 0x5f /* ignore INTs */ -#define TPR_BLOCK_FHWI 0x7f /* hardware FAST INTs */ -#define TPR_IGNORE_FHWI 0x8f /* ignore FAST INTs */ -#define TPR_BLOCK_XINVLTLB 0x9f /* */ -#define TPR_BLOCK_XCPUSTOP 0xaf /* */ -#define TPR_BLOCK_ALL 0xff /* all INTs */ - -#ifdef TEST_TEST1 -/* put a 'fake' HWI in top of APIC prio 0x3x, 32 + 31 = 63 = 0x3f */ -#define XTEST1_OFFSET (ICU_OFFSET + 31) -#endif /** TEST_TEST1 */ - -/* TLB shootdowns */ -#define XINVLTLB_OFFSET (ICU_OFFSET + 112) /* 0x90 */ -#define XINVLPG_OFFSET (ICU_OFFSET + 113) /* 0x91 */ -#define XINVLRNG_OFFSET (ICU_OFFSET + 114) /* 0x92 */ - -/* inter-cpu clock handling */ -#define XHARDCLOCK_OFFSET (ICU_OFFSET + 120) /* 0x98 */ -#define XSTATCLOCK_OFFSET (ICU_OFFSET + 121) /* 0x99 */ - -/* inter-CPU rendezvous */ -#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 122) /* 0x9A */ - -/* lazy pmap release */ -#define XLAZYPMAP_OFFSET (ICU_OFFSET + 123) /* 0x9B */ - -/* IPI to generate an additional software trap at the target CPU */ -/* XXX in the middle of the interrupt range, overlapping IRQ48 */ -#define XCPUAST_OFFSET (ICU_OFFSET + 48) /* 0x50 */ - -/* IPI to signal CPUs to stop and wait for another CPU to restart them */ -#define XCPUSTOP_OFFSET (ICU_OFFSET + 128) /* 0xA0 */ - -/* - * Note: this vector MUST be xxxx1111, 32 + 223 = 255 = 0xff: - */ -#define XSPURIOUSINT_OFFSET (ICU_OFFSET + 223) - -#endif /* SMP || APIC_IO */ - #ifdef LOCORE /* @@ -184,48 +93,6 @@ unpendhand_t IDTVEC(fastunpend27), IDTVEC(fastunpend28), IDTVEC(fastunpend29), IDTVEC(fastunpend30), IDTVEC(fastunpend31); -#if defined(SMP) || defined(APIC_IO) -inthand_t - IDTVEC(fastintr16), IDTVEC(fastintr17), - IDTVEC(fastintr18), IDTVEC(fastintr19), - IDTVEC(fastintr20), IDTVEC(fastintr21), - IDTVEC(fastintr22), IDTVEC(fastintr23), - IDTVEC(fastintr24), IDTVEC(fastintr25), - IDTVEC(fastintr26), IDTVEC(fastintr27), - IDTVEC(fastintr28), IDTVEC(fastintr29), - IDTVEC(fastintr30), IDTVEC(fastintr31); -inthand_t - IDTVEC(intr16), IDTVEC(intr17), IDTVEC(intr18), IDTVEC(intr19), - IDTVEC(intr20), IDTVEC(intr21), IDTVEC(intr22), IDTVEC(intr23), - IDTVEC(intr24), IDTVEC(intr25), IDTVEC(intr26), IDTVEC(intr27), - IDTVEC(intr28), IDTVEC(intr29), IDTVEC(intr30), IDTVEC(intr31); - -inthand_t - Xinvltlb, /* TLB shootdowns - global */ - Xinvlpg, /* TLB shootdowns - 1 page */ - Xinvlrng, /* TLB shootdowns - page range */ - Xhardclock, /* Forward hardclock() */ - Xstatclock, /* Forward statclock() */ - Xcpuast, /* Additional software trap on other cpu */ - Xcpustop, /* CPU stops & waits for another CPU to restart it */ - Xspuriousint, /* handle APIC "spurious INTs" */ - Xrendezvous, /* handle CPU rendezvous */ - Xlazypmap; /* handle lazy pmap release */ - -#ifdef TEST_TEST1 -inthand_t - Xtest1; /* 'fake' HWI at top of APIC prio 0x3x, 32+31 = 0x3f */ -#endif /** TEST_TEST1 */ -#endif /* SMP || APIC_IO */ - -#ifdef APIC_IO -/* - * This is to accommodate "mixed-mode" programming for - * motherboards that don't connect the 8254 to the IO APIC. - */ -#define AUTO_EOI_1 1 -#endif - #define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN) void isa_defaultirq(void); diff --git a/sys/amd64/isa/isa.c b/sys/amd64/isa/isa.c index bd1df32..3a2def3 100644 --- a/sys/amd64/isa/isa.c +++ b/sys/amd64/isa/isa.c @@ -61,9 +61,6 @@ #include <sys/malloc.h> #include <machine/bus.h> #include <sys/rman.h> -#ifdef PC98 -#include <sys/systm.h> -#endif #include <machine/resource.h> @@ -131,119 +128,13 @@ isa_alloc_resource(device_t bus, device_t child, int type, int *rid, start, end, count, flags); } -#ifdef PC98 -/* - * Indirection support. The type of bus_space_handle_t is - * defined in sys/i386/include/bus_pc98.h. - */ -struct resource * -isa_alloc_resourcev(device_t child, int type, int *rid, - bus_addr_t *res, bus_size_t count, u_int flags) -{ - struct isa_device* idev = DEVTOISA(child); - struct resource_list *rl = &idev->id_resources; - - device_t bus = device_get_parent(child); - bus_addr_t start; - struct resource *re; - struct resource **bsre; - int i, j, k, linear_cnt, ressz, bsrid; - - start = bus_get_resource_start(child, type, *rid); - - linear_cnt = count; - ressz = 1; - for (i = 1; i < count; ++i) { - if (res[i] != res[i - 1] + 1) { - if (i < linear_cnt) - linear_cnt = i; - ++ressz; - } - } - - re = isa_alloc_resource(bus, child, type, rid, - start + res[0], start + res[linear_cnt - 1], - linear_cnt, flags); - if (re == NULL) - return NULL; - - bsre = malloc(sizeof (struct resource *) * ressz, M_DEVBUF, M_NOWAIT); - if (bsre == NULL) { - resource_list_release(rl, bus, child, type, *rid, re); - return NULL; - } - bsre[0] = re; - - for (i = linear_cnt, k = 1; i < count; i = j, k++) { - for (j = i + 1; j < count; j++) { - if (res[j] != res[j - 1] + 1) - break; - } - bsrid = *rid + k; - bsre[k] = isa_alloc_resource(bus, child, type, &bsrid, - start + res[i], start + res[j - 1], j - i, flags); - if (bsre[k] == NULL) { - for (k--; k >= 0; k--) - resource_list_release(rl, bus, child, type, - *rid + k, bsre[k]); - free(bsre, M_DEVBUF); - return NULL; - } - } - - re->r_bushandle->bsh_res = bsre; - re->r_bushandle->bsh_ressz = ressz; - - return re; -} - -int -isa_load_resourcev(struct resource *re, bus_addr_t *res, bus_size_t count) -{ - bus_addr_t start; - int i; - - if (count > re->r_bushandle->bsh_maxiatsz) { - printf("isa_load_resourcev: map size too large\n"); - return EINVAL; - } - - start = rman_get_start(re); - for (i = 0; i < re->r_bushandle->bsh_maxiatsz; i++) { - if (i < count) - re->r_bushandle->bsh_iat[i] = start + res[i]; - else - re->r_bushandle->bsh_iat[i] = start; - } - - re->r_bushandle->bsh_iatsz = count; - re->r_bushandle->bsh_bam = re->r_bustag->bs_ra; /* relocate access */ - - return 0; -} -#endif /* PC98 */ - int isa_release_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { struct isa_device* idev = DEVTOISA(child); struct resource_list *rl = &idev->id_resources; -#ifdef PC98 - /* - * Indirection support. The type of bus_space_handle_t is - * defined in sys/i386/include/bus_pc98.h. - */ - int i; - if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) { - for (i = 1; i < r->r_bushandle->bsh_ressz; i++) - resource_list_release(rl, bus, child, type, rid + i, - r->r_bushandle->bsh_res[i]); - if (r->r_bushandle->bsh_res != NULL) - free(r->r_bushandle->bsh_res, M_DEVBUF); - } -#endif return resource_list_release(rl, bus, child, type, rid, r); } diff --git a/sys/amd64/isa/isa.h b/sys/amd64/isa/isa.h index cc66da9..0b965fb 100644 --- a/sys/amd64/isa/isa.h +++ b/sys/amd64/isa/isa.h @@ -37,10 +37,6 @@ * $FreeBSD$ */ -#ifdef PC98 -#error isa.h is included from PC-9801 source -#endif - #ifndef _I386_ISA_ISA_H_ #define _I386_ISA_ISA_H_ @@ -151,7 +147,6 @@ #define IO_COMSIZE 8 /* 8250, 16x50 com controllers */ #define IO_DMASIZE 16 /* 8237 DMA controllers */ #define IO_DPGSIZE 32 /* 74LS612 DMA page registers */ -#define IO_EISASIZE 256 /* EISA controllers */ #define IO_FDCSIZE 8 /* Nec765 floppy controllers */ #define IO_GAMSIZE 16 /* AT compatible game controllers */ #define IO_GSCSIZE 8 /* GeniScan GS-4500G hand scanner */ @@ -169,32 +164,4 @@ #endif /* !IO_ISASIZES */ -/* - * Input / Output Memory Physical Addresses - */ -#ifndef IOM_BEGIN -#define IOM_BEGIN 0x0A0000 /* Start of I/O Memory "hole" */ -#define IOM_END 0x100000 /* End of I/O Memory "hole" */ -#define IOM_SIZE (IOM_END - IOM_BEGIN) -#endif /* !IOM_BEGIN */ - -/* - * RAM Physical Address Space (ignoring the above mentioned "hole") - */ -#ifndef RAM_BEGIN -#define RAM_BEGIN 0x0000000 /* Start of RAM Memory */ -#define RAM_END 0x1000000 /* End of RAM Memory */ -#define RAM_SIZE (RAM_END - RAM_BEGIN) -#endif /* !RAM_BEGIN */ - -/* - * Oddball Physical Memory Addresses - */ -#ifndef COMPAQ_RAMRELOC -#define COMPAQ_RAMRELOC 0x80C00000 /* Compaq RAM relocation/diag */ -#define COMPAQ_RAMSETUP 0x80C00002 /* Compaq RAM setup */ -#define WEITEK_FPU 0xC0000000 /* WTL 2167 */ -#define CYRIX_EMC 0xC0000000 /* Cyrix EMC */ -#endif /* !COMPAQ_RAMRELOC */ - #endif /* !_I386_ISA_ISA_H_ */ diff --git a/sys/amd64/isa/isa_dma.c b/sys/amd64/isa/isa_dma.c index 381bd75..6be4ae0 100644 --- a/sys/amd64/isa/isa_dma.c +++ b/sys/amd64/isa/isa_dma.c @@ -59,7 +59,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/pmap.h> -#include <i386/isa/isa.h> +#include <amd64/isa/isa.h> #include <dev/ic/i8237.h> #include <isa/isavar.h> @@ -79,6 +79,8 @@ #define DMA2_MODE (IO_DMA2 + 2*11) /* mode register */ #define DMA2_FFC (IO_DMA2 + 2*12) /* clear first/last FF */ +#define ISARAM_END 0x1000000 + static int isa_dmarangecheck(caddr_t va, u_int length, int chan); static caddr_t dma_bouncebuf[8]; @@ -382,7 +384,6 @@ isa_dmarangecheck(caddr_t va, u_int length, int chan) endva = (vm_offset_t)round_page((vm_offset_t)va + length); for (; va < (caddr_t) endva ; va += PAGE_SIZE) { phys = trunc_page(pmap_extract(kernel_pmap, (vm_offset_t)va)); -#define ISARAM_END RAM_END if (phys == 0) panic("isa_dmacheck: no physical page present"); if (phys >= ISARAM_END) diff --git a/sys/amd64/isa/ithread.c b/sys/amd64/isa/ithread.c index ee61984..6455776 100644 --- a/sys/amd64/isa/ithread.c +++ b/sys/amd64/isa/ithread.c @@ -37,8 +37,9 @@ #include <sys/systm.h> #include <sys/vmmeter.h> -#include <i386/isa/icu.h> -#include <i386/isa/intr_machdep.h> +#include <amd64/isa/icu.h> +#include <amd64/isa/intr_machdep.h> +#include <amd64/isa/isa.h> struct int_entropy { struct proc *p; @@ -46,6 +47,8 @@ struct int_entropy { }; static u_int straycount[ICU_LEN]; +static u_int glitchcount7; +static u_int glitchcount15; #define MAX_STRAY_LOG 5 @@ -56,9 +59,9 @@ static u_int straycount[ICU_LEN]; void sched_ithd(void *cookie) { - int irq = (int) cookie; /* IRQ we're handling */ + int irq = (uintptr_t) cookie; /* IRQ we're handling */ struct ithd *ithd = ithds[irq]; /* and the process that does it */ - int error; + int error, isr; /* This used to be in icu_vector.s */ /* @@ -79,7 +82,26 @@ sched_ithd(void *cookie) /* * Log stray interrupts. */ - if (error == EINVAL) + if (error == EINVAL) { + /* Determine if it is a stray interrupt or simply a glitch */ + if (irq == 7) { + outb(IO_ICU1, OCW3_SEL); /* select IS register */ + isr = inb(IO_ICU1); + outb(IO_ICU1, OCW3_SEL | OCW3_RIS); /* reselect IIR */ + if ((isr & 0x80) == 0) { + glitchcount7++; + return; + } + } + if (irq == 15) { + outb(IO_ICU2, OCW3_SEL); /* select IS register */ + isr = inb(IO_ICU2); + outb(IO_ICU2, OCW3_SEL | OCW3_RIS); /* reselect IIR */ + if ((isr & 0x80) == 0) { + glitchcount15++; + return; + } + } if (straycount[irq] < MAX_STRAY_LOG) { printf("stray irq %d\n", irq); if (++straycount[irq] == MAX_STRAY_LOG) @@ -87,4 +109,5 @@ sched_ithd(void *cookie) "got %d stray irq %d's: not logging anymore\n", MAX_STRAY_LOG, irq); } + } } diff --git a/sys/amd64/isa/npx.c b/sys/amd64/isa/npx.c index 8dadb6b..2116701 100644 --- a/sys/amd64/isa/npx.c +++ b/sys/amd64/isa/npx.c @@ -35,11 +35,8 @@ * $FreeBSD$ */ -#include "opt_cpu.h" #include "opt_debug_npx.h" #include "opt_isa.h" -#include "opt_math_emulate.h" -#include "opt_npx.h" #include <sys/param.h> #include <sys/systm.h> @@ -60,66 +57,34 @@ #include <sys/signalvar.h> #include <sys/user.h> -#ifndef SMP -#include <machine/asmacros.h> -#endif #include <machine/cputypes.h> #include <machine/frame.h> #include <machine/md_var.h> #include <machine/pcb.h> #include <machine/psl.h> -#ifndef SMP -#include <machine/clock.h> -#endif #include <machine/resource.h> #include <machine/specialreg.h> #include <machine/segments.h> #include <machine/ucontext.h> -#ifndef SMP -#include <i386/isa/icu.h> -#ifdef PC98 -#include <pc98/pc98/pc98.h> -#else -#include <i386/isa/isa.h> -#endif -#endif -#include <i386/isa/intr_machdep.h> +#include <amd64/isa/intr_machdep.h> #ifdef DEV_ISA #include <isa/isavar.h> #endif -#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU) -#define CPU_ENABLE_SSE -#endif -#if defined(CPU_DISABLE_SSE) -#undef CPU_ENABLE_SSE -#endif - /* * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. */ -/* Configuration flags. */ -#define NPX_DISABLE_I586_OPTIMIZED_BCOPY (1 << 0) -#define NPX_DISABLE_I586_OPTIMIZED_BZERO (1 << 1) -#define NPX_DISABLE_I586_OPTIMIZED_COPYIO (1 << 2) -#define NPX_PREFER_EMULATOR (1 << 3) - #if defined(__GNUC__) && !defined(lint) #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) #define fnclex() __asm("fnclex") #define fninit() __asm("fninit") -#define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr))) #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) -#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop") -#define frstor(addr) __asm("frstor %0" : : "m" (*(addr))) -#ifdef CPU_ENABLE_SSE #define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) -#endif #define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \ : : "n" (CR0_TS) : "ax") #define stop_emulating() __asm("clts") @@ -129,81 +94,31 @@ void fldcw(caddr_t addr); void fnclex(void); void fninit(void); -void fnsave(caddr_t addr); void fnstcw(caddr_t addr); void fnstsw(caddr_t addr); -void fp_divide_by_0(void); -void frstor(caddr_t addr); -#ifdef CPU_ENABLE_SSE void fxsave(caddr_t addr); void fxrstor(caddr_t addr); -#endif void start_emulating(void); void stop_emulating(void); #endif /* __GNUC__ */ -#ifdef CPU_ENABLE_SSE -#define GET_FPU_CW(thread) \ - (cpu_fxsr ? \ - (thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_cw : \ - (thread)->td_pcb->pcb_save.sv_87.sv_env.en_cw) -#define GET_FPU_SW(thread) \ - (cpu_fxsr ? \ - (thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_sw : \ - (thread)->td_pcb->pcb_save.sv_87.sv_env.en_sw) -#else /* CPU_ENABLE_SSE */ -#define GET_FPU_CW(thread) \ - (thread->td_pcb->pcb_save.sv_87.sv_env.en_cw) -#define GET_FPU_SW(thread) \ - (thread->td_pcb->pcb_save.sv_87.sv_env.en_sw) -#endif /* CPU_ENABLE_SSE */ +#define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_cw) +#define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_sw) typedef u_char bool_t; -static void fpusave(union savefpu *); -static void fpurstor(union savefpu *); static int npx_attach(device_t dev); static void npx_identify(driver_t *driver, device_t parent); -#ifndef SMP -static void npx_intr(void *); -#endif static int npx_probe(device_t dev); -#ifdef I586_CPU_XXX -static long timezero(const char *funcname, - void (*func)(void *buf, size_t len)); -#endif /* I586_CPU */ - -int hw_float; /* XXX currently just alias for npx_exists */ +int hw_float = 1; SYSCTL_INT(_hw,HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, &hw_float, 0, "Floatingpoint instructions executed in hardware"); -#ifndef SMP -static volatile u_int npx_intrs_while_probing; -static volatile u_int npx_traps_while_probing; -#endif - -static union savefpu npx_cleanstate; +static struct savefpu npx_cleanstate; static bool_t npx_cleanstate_ready; -static bool_t npx_ex16; -static bool_t npx_exists; -static bool_t npx_irq13; - -#ifndef SMP -alias_for_inthand_t probetrap; -__asm(" \n\ - .text \n\ - .p2align 2,0x90 \n\ - .type " __XSTRING(CNAME(probetrap)) ",@function \n\ -" __XSTRING(CNAME(probetrap)) ": \n\ - ss \n\ - incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\ - fnclex \n\ - iret \n\ -"); -#endif /* SMP */ /* * Identify routine. Create a connection point on our parent for probing. @@ -220,104 +135,23 @@ npx_identify(driver, parent) panic("npx_identify"); } -#ifndef SMP -/* - * Do minimal handling of npx interrupts to convert them to traps. - */ -static void -npx_intr(dummy) - void *dummy; -{ - struct thread *td; - -#ifndef SMP - npx_intrs_while_probing++; -#endif - - /* - * The BUSY# latch must be cleared in all cases so that the next - * unmasked npx exception causes an interrupt. - */ -#ifdef PC98 - outb(0xf8, 0); -#else - outb(0xf0, 0); -#endif - - /* - * fpcurthread is normally non-null here. In that case, schedule an - * AST to finish the exception handling in the correct context - * (this interrupt may occur after the thread has entered the - * kernel via a syscall or an interrupt). Otherwise, the npx - * state of the thread that caused this interrupt must have been - * pushed to the thread's pcb, and clearing of the busy latch - * above has finished the (essentially null) handling of this - * interrupt. Control will eventually return to the instruction - * that caused it and it will repeat. We will eventually (usually - * soon) win the race to handle the interrupt properly. - */ - td = PCPU_GET(fpcurthread); - if (td != NULL) { - td->td_pcb->pcb_flags |= PCB_NPXTRAP; - mtx_lock_spin(&sched_lock); - td->td_flags |= TDF_ASTPENDING; - mtx_unlock_spin(&sched_lock); - } -} -#endif /* !SMP */ - /* * Probe routine. Initialize cr0 to give correct behaviour for [f]wait - * whether the device exists or not (XXX should be elsewhere). Set flags - * to tell npxattach() what to do. Modify device struct if npx doesn't - * need to use interrupts. Return 0 if device exists. + * whether the device exists or not (XXX should be elsewhere). + * Modify device struct if npx doesn't need to use interrupts. + * Return 0 if device exists. */ static int npx_probe(dev) device_t dev; { -#ifndef SMP - struct gate_descriptor save_idt_npxtrap; - struct resource *ioport_res, *irq_res; - void *irq_cookie; - int ioport_rid, irq_num, irq_rid; - u_short control; - u_short status; - - save_idt_npxtrap = idt[16]; - setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - ioport_rid = 0; - ioport_res = bus_alloc_resource(dev, SYS_RES_IOPORT, &ioport_rid, - IO_NPX, IO_NPX, IO_NPXSIZE, RF_ACTIVE); - if (ioport_res == NULL) - panic("npx: can't get ports"); -#ifdef PC98 - if (resource_int_value("npx", 0, "irq", &irq_num) != 0) - irq_num = 8; -#else - if (resource_int_value("npx", 0, "irq", &irq_num) != 0) - irq_num = 13; -#endif - irq_rid = 0; - irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &irq_rid, irq_num, - irq_num, 1, RF_ACTIVE); - if (irq_res == NULL) - panic("npx: can't get IRQ"); - if (bus_setup_intr(dev, irq_res, INTR_TYPE_MISC | INTR_FAST, npx_intr, - NULL, &irq_cookie) != 0) - panic("npx: can't create intr"); -#endif /* !SMP */ /* * Partially reset the coprocessor, if any. Some BIOS's don't reset * it after a warm boot. */ -#ifdef PC98 - outb(0xf8,0); -#else outb(0xf1, 0); /* full reset on some systems, NOP on others */ outb(0xf0, 0); /* clear BUSY# latch */ -#endif /* * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT * instructions. We must set the CR0_MP bit and use the CR0_TS @@ -338,116 +172,13 @@ npx_probe(dev) */ stop_emulating(); /* - * Finish resetting the coprocessor, if any. If there is an error - * pending, then we may get a bogus IRQ13, but npx_intr() will handle - * it OK. Bogus halts have never been observed, but we enabled - * IRQ13 and cleared the BUSY# latch early to handle them anyway. + * Finish resetting the coprocessor. */ fninit(); device_set_desc(dev, "math processor"); -#ifdef SMP - - /* - * Exception 16 MUST work for SMP. - */ - npx_ex16 = hw_float = npx_exists = 1; - return (0); - -#else /* !SMP */ - - /* - * Don't use fwait here because it might hang. - * Don't use fnop here because it usually hangs if there is no FPU. - */ - DELAY(1000); /* wait for any IRQ13 */ -#ifdef DIAGNOSTIC - if (npx_intrs_while_probing != 0) - printf("fninit caused %u bogus npx interrupt(s)\n", - npx_intrs_while_probing); - if (npx_traps_while_probing != 0) - printf("fninit caused %u bogus npx trap(s)\n", - npx_traps_while_probing); -#endif - /* - * Check for a status of mostly zero. - */ - status = 0x5a5a; - fnstsw(&status); - if ((status & 0xb8ff) == 0) { - /* - * Good, now check for a proper control word. - */ - control = 0x5a5a; - fnstcw(&control); - if ((control & 0x1f3f) == 0x033f) { - hw_float = npx_exists = 1; - /* - * We have an npx, now divide by 0 to see if exception - * 16 works. - */ - control &= ~(1 << 2); /* enable divide by 0 trap */ - fldcw(&control); -#ifdef FPU_ERROR_BROKEN - /* - * FPU error signal doesn't work on some CPU - * accelerator board. - */ - npx_ex16 = 1; - return (0); -#endif - npx_traps_while_probing = npx_intrs_while_probing = 0; - fp_divide_by_0(); - if (npx_traps_while_probing != 0) { - /* - * Good, exception 16 works. - */ - npx_ex16 = 1; - goto no_irq13; - } - if (npx_intrs_while_probing != 0) { - /* - * Bad, we are stuck with IRQ13. - */ - npx_irq13 = 1; - idt[16] = save_idt_npxtrap; - return (0); - } - /* - * Worse, even IRQ13 is broken. Use emulator. - */ - } - } - /* - * Probe failed, but we want to get to npxattach to initialize the - * emulator and say that it has been installed. XXX handle devices - * that aren't really devices better. - */ - /* FALLTHROUGH */ -no_irq13: - idt[16] = save_idt_npxtrap; - bus_teardown_intr(dev, irq_res, irq_cookie); - - /* - * XXX hack around brokenness of bus_teardown_intr(). If we left the - * irq active then we would get it instead of exception 16. - */ - { - register_t crit; - - crit = intr_disable(); - mtx_lock_spin(&icu_lock); - INTRDIS(1 << irq_num); - mtx_unlock_spin(&icu_lock); - intr_restore(crit); - } - - bus_release_resource(dev, SYS_RES_IRQ, irq_rid, irq_res); - bus_release_resource(dev, SYS_RES_IOPORT, ioport_rid, ioport_res); return (0); - -#endif /* SMP */ } /* @@ -457,67 +188,19 @@ static int npx_attach(dev) device_t dev; { - int flags; register_t s; - if (resource_int_value("npx", 0, "flags", &flags) != 0) - flags = 0; - - if (flags) - device_printf(dev, "flags 0x%x ", flags); - if (npx_irq13) { - device_printf(dev, "using IRQ 13 interface\n"); - } else { -#if defined(MATH_EMULATE) || defined(GPL_MATH_EMULATE) - if (npx_ex16) { - if (!(flags & NPX_PREFER_EMULATOR)) - device_printf(dev, "INT 16 interface\n"); - else { - device_printf(dev, "FPU exists, but flags request " - "emulator\n"); - hw_float = npx_exists = 0; - } - } else if (npx_exists) { - device_printf(dev, "error reporting broken; using 387 emulator\n"); - hw_float = npx_exists = 0; - } else - device_printf(dev, "387 emulator\n"); -#else - if (npx_ex16) { - device_printf(dev, "INT 16 interface\n"); - if (flags & NPX_PREFER_EMULATOR) { - device_printf(dev, "emulator requested, but none compiled " - "into kernel, using FPU\n"); - } - } else - device_printf(dev, "no 387 emulator in kernel and no FPU!\n"); -#endif - } + device_printf(dev, "INT 16 interface\n"); npxinit(__INITIAL_NPXCW__); if (npx_cleanstate_ready == 0) { s = intr_disable(); stop_emulating(); - fpusave(&npx_cleanstate); + fxsave(&npx_cleanstate); start_emulating(); npx_cleanstate_ready = 1; intr_restore(s); } -#ifdef I586_CPU_XXX - if (cpu_class == CPUCLASS_586 && npx_ex16 && npx_exists && - timezero("i586_bzero()", i586_bzero) < - timezero("bzero()", bzero) * 4 / 5) { - if (!(flags & NPX_DISABLE_I586_OPTIMIZED_BCOPY)) - bcopy_vector = i586_bcopy; - if (!(flags & NPX_DISABLE_I586_OPTIMIZED_BZERO)) - bzero_vector = i586_bzero; - if (!(flags & NPX_DISABLE_I586_OPTIMIZED_COPYIO)) { - copyin_vector = i586_copyin; - copyout_vector = i586_copyout; - } - } -#endif - return (0); /* XXX unused */ } @@ -528,11 +211,9 @@ void npxinit(control) u_short control; { - static union savefpu dummy; + static struct savefpu dummy; register_t savecrit; - if (!npx_exists) - return; /* * fninit has the same h/w bugs as fnsave. Use the detoxified * fnsave to throw away any junk in the fpu. npxsave() initializes @@ -541,11 +222,8 @@ npxinit(control) savecrit = intr_disable(); npxsave(&dummy); stop_emulating(); -#ifdef CPU_ENABLE_SSE /* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */ - if (cpu_fxsr) - fninit(); -#endif + fninit(); fldcw(&control); start_emulating(); intr_restore(savecrit); @@ -558,6 +236,9 @@ void npxexit(td) struct thread *td; { +#ifdef NPX_DEBUG + u_int masked_exceptions; +#endif register_t savecrit; savecrit = intr_disable(); @@ -565,20 +246,16 @@ npxexit(td) npxsave(&PCPU_GET(curpcb)->pcb_save); intr_restore(savecrit); #ifdef NPX_DEBUG - if (npx_exists) { - u_int masked_exceptions; - - masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f; - /* - * Log exceptions that would have trapped with the old - * control word (overflow, divide by 0, and invalid operand). - */ - if (masked_exceptions & 0x0d) - log(LOG_ERR, - "pid %d (%s) exited with masked floating point exceptions 0x%02x\n", - td->td_proc->p_pid, td->td_proc->p_comm, - masked_exceptions); - } + masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f; + /* + * Log exceptions that would have trapped with the old + * control word (overflow, divide by 0, and invalid operand). + */ + if (masked_exceptions & 0x0d) + log(LOG_ERR, +"pid %d (%s) exited with masked floating point exceptions 0x%02x\n", + td->td_proc->p_pid, td->td_proc->p_comm, + masked_exceptions); #endif } @@ -586,13 +263,7 @@ int npxformat() { - if (!npx_exists) - return (_MC_FPFMT_NODEV); -#ifdef CPU_ENABLE_SSE - if (cpu_fxsr) - return (_MC_FPFMT_XMM); -#endif - return (_MC_FPFMT_387); + return (_MC_FPFMT_XMM); } /* @@ -789,11 +460,6 @@ npxtrap() register_t savecrit; u_short control, status; - if (!npx_exists) { - printf("npxtrap: fpcurthread = %p, curthread = %p, npx_exists = %d\n", - PCPU_GET(fpcurthread), curthread, npx_exists); - panic("npxtrap from nowhere"); - } savecrit = intr_disable(); /* @@ -832,8 +498,6 @@ npxdna() register_t s; u_short control; - if (!npx_exists) - return (0); if (PCPU_GET(fpcurthread) == curthread) { printf("npxdna: fpcurthread == curthread %d times\n", ++err_count); @@ -867,20 +531,15 @@ npxdna() pcb->pcb_flags |= PCB_NPXINITDONE; } else { /* - * The following frstor may cause an IRQ13 when the state + * The following frstor may cause a trap when the state * being restored has a pending error. The error will * appear to have been triggered by the current (npx) user * instruction even when that instruction is a no-wait * instruction that should not trigger an error (e.g., - * fnclex). On at least one 486 system all of the no-wait * instructions are broken the same as frstor, so our - * treatment does not amplify the breakage. On at least - * one 386/Cyrix 387 system, fnclex works correctly while - * frstor and fnsave are broken, so our treatment breaks - * fnclex if it is the first FPU instruction after a context - * switch. + * treatment does not amplify the breakage. */ - fpurstor(&pcb->pcb_save); + fxrstor(&pcb->pcb_save); } intr_restore(s); @@ -912,11 +571,11 @@ npxdna() */ void npxsave(addr) - union savefpu *addr; + struct savefpu *addr; { stop_emulating(); - fpusave(addr); + fxsave(addr); start_emulating(); PCPU_SET(fpcurthread, NULL); @@ -944,13 +603,10 @@ npxdrop() int npxgetregs(td, addr) struct thread *td; - union savefpu *addr; + struct savefpu *addr; { register_t s; - if (!npx_exists) - return (_MC_FPOWNED_NONE); - if ((td->td_pcb->pcb_flags & PCB_NPXINITDONE) == 0) { if (npx_cleanstate_ready) bcopy(&npx_cleanstate, addr, sizeof(npx_cleanstate)); @@ -960,16 +616,7 @@ npxgetregs(td, addr) } s = intr_disable(); if (td == PCPU_GET(fpcurthread)) { - fpusave(addr); -#ifdef CPU_ENABLE_SSE - if (!cpu_fxsr) -#endif - /* - * fnsave initializes the FPU and destroys whatever - * context it contains. Make sure the FPU owner - * starts with a clean state next time. - */ - npxdrop(); + fxsave(addr); intr_restore(s); return (_MC_FPOWNED_FPU); } else { @@ -985,16 +632,13 @@ npxgetregs(td, addr) void npxsetregs(td, addr) struct thread *td; - union savefpu *addr; + struct savefpu *addr; { register_t s; - if (!npx_exists) - return; - s = intr_disable(); if (td == PCPU_GET(fpcurthread)) { - fpurstor(addr); + fxrstor(addr); intr_restore(s); } else { intr_restore(s); @@ -1003,62 +647,6 @@ npxsetregs(td, addr) curthread->td_pcb->pcb_flags |= PCB_NPXINITDONE; } -static void -fpusave(addr) - union savefpu *addr; -{ - -#ifdef CPU_ENABLE_SSE - if (cpu_fxsr) - fxsave(addr); - else -#endif - fnsave(addr); -} - -static void -fpurstor(addr) - union savefpu *addr; -{ - -#ifdef CPU_ENABLE_SSE - if (cpu_fxsr) - fxrstor(addr); - else -#endif - frstor(addr); -} - -#ifdef I586_CPU_XXX -static long -timezero(funcname, func) - const char *funcname; - void (*func)(void *buf, size_t len); - -{ - void *buf; -#define BUFSIZE 1048576 - long usec; - struct timeval finish, start; - - buf = malloc(BUFSIZE, M_TEMP, M_NOWAIT); - if (buf == NULL) - return (BUFSIZE); - microtime(&start); - (*func)(buf, BUFSIZE); - microtime(&finish); - usec = 1000000 * (finish.tv_sec - start.tv_sec) + - finish.tv_usec - start.tv_usec; - if (usec <= 0) - usec = 1; - if (bootverbose) - printf("%s bandwidth = %u kBps\n", funcname, - (u_int32_t)(((BUFSIZE >> 10) * 1000000) / usec)); - free(buf, M_TEMP); - return (usec); -} -#endif /* I586_CPU */ - static device_method_t npx_methods[] = { /* Device interface */ DEVMETHOD(device_identify, npx_identify), @@ -1080,13 +668,13 @@ static driver_t npx_driver = { static devclass_t npx_devclass; -#ifdef DEV_ISA /* * We prefer to attach to the root nexus so that the usual case (exception 16) * doesn't describe the processor as being `on isa'. */ DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0); +#ifdef DEV_ISA /* * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. */ @@ -1132,7 +720,5 @@ static driver_t npxisa_driver = { static devclass_t npxisa_devclass; DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0); -#ifndef PC98 DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0); -#endif #endif /* DEV_ISA */ diff --git a/sys/amd64/isa/timerreg.h b/sys/amd64/isa/timerreg.h index 0bfd7fc..9810826 100644 --- a/sys/amd64/isa/timerreg.h +++ b/sys/amd64/isa/timerreg.h @@ -84,15 +84,9 @@ * Macros for specifying values to be written into a mode register. */ #define TIMER_CNTR0 (IO_TIMER1 + 0) /* timer 0 counter port */ -#ifdef PC98 -#define TIMER_CNTR1 0x3fdb /* timer 1 counter port */ -#define TIMER_CNTR2 (IO_TIMER1 + 4) /* timer 2 counter port */ -#define TIMER_MODE (IO_TIMER1 + 6) /* timer mode port */ -#else #define TIMER_CNTR1 (IO_TIMER1 + 1) /* timer 1 counter port */ #define TIMER_CNTR2 (IO_TIMER1 + 2) /* timer 2 counter port */ #define TIMER_MODE (IO_TIMER1 + 3) /* timer mode port */ -#endif #define TIMER_SEL0 0x00 /* select counter 0 */ #define TIMER_SEL1 0x40 /* select counter 1 */ #define TIMER_SEL2 0x80 /* select counter 2 */ diff --git a/sys/amd64/isa/vector.S b/sys/amd64/isa/vector.S index 0bed76f..0fc5334 100644 --- a/sys/amd64/isa/vector.S +++ b/sys/amd64/isa/vector.S @@ -3,37 +3,9 @@ * $FreeBSD$ */ -/* - * modified for PC98 by Kakefuda - */ - -#include "opt_auto_eoi.h" - -#include <i386/isa/icu.h> -#ifdef PC98 -#include <pc98/pc98/pc98.h> -#else -#include <i386/isa/isa.h> -#endif -#include <i386/isa/intr_machdep.h> - -#define FAST_INTR_HANDLER_USES_ES 1 -#ifdef FAST_INTR_HANDLER_USES_ES -#define ACTUALLY_PUSHED 1 -#define MAYBE_MOVW_AX_ES movw %ax,%es -#define MAYBE_POPL_ES popl %es -#define MAYBE_PUSHL_ES pushl %es -#else -/* - * We can usually skip loading %es for fastintr handlers. %es should - * only be used for string instructions, and fastintr handlers shouldn't - * do anything slow enough to justify using a string instruction. - */ -#define ACTUALLY_PUSHED 0 -#define MAYBE_MOVW_AX_ES -#define MAYBE_POPL_ES -#define MAYBE_PUSHL_ES -#endif +#include <amd64/isa/icu.h> +#include <amd64/isa/isa.h> +#include <amd64/isa/intr_machdep.h> .data ALIGN_DATA @@ -47,12 +19,12 @@ .globl intrcnt, eintrcnt intrcnt: - .space INTRCNT_COUNT * 4 + .space INTRCNT_COUNT * 8 eintrcnt: .globl intrnames, eintrnames intrnames: - .space INTRCNT_COUNT * 16 + .space INTRCNT_COUNT * 32 eintrnames: .text @@ -101,8 +73,4 @@ eintrnames: * loading segregs. */ -#ifdef APIC_IO -#include "i386/isa/apic_vector.s" -#else -#include "i386/isa/icu_vector.s" -#endif /* APIC_IO */ +#include "amd64/isa/icu_vector.s" diff --git a/sys/amd64/isa/vector.s b/sys/amd64/isa/vector.s index 0bed76f..0fc5334 100644 --- a/sys/amd64/isa/vector.s +++ b/sys/amd64/isa/vector.s @@ -3,37 +3,9 @@ * $FreeBSD$ */ -/* - * modified for PC98 by Kakefuda - */ - -#include "opt_auto_eoi.h" - -#include <i386/isa/icu.h> -#ifdef PC98 -#include <pc98/pc98/pc98.h> -#else -#include <i386/isa/isa.h> -#endif -#include <i386/isa/intr_machdep.h> - -#define FAST_INTR_HANDLER_USES_ES 1 -#ifdef FAST_INTR_HANDLER_USES_ES -#define ACTUALLY_PUSHED 1 -#define MAYBE_MOVW_AX_ES movw %ax,%es -#define MAYBE_POPL_ES popl %es -#define MAYBE_PUSHL_ES pushl %es -#else -/* - * We can usually skip loading %es for fastintr handlers. %es should - * only be used for string instructions, and fastintr handlers shouldn't - * do anything slow enough to justify using a string instruction. - */ -#define ACTUALLY_PUSHED 0 -#define MAYBE_MOVW_AX_ES -#define MAYBE_POPL_ES -#define MAYBE_PUSHL_ES -#endif +#include <amd64/isa/icu.h> +#include <amd64/isa/isa.h> +#include <amd64/isa/intr_machdep.h> .data ALIGN_DATA @@ -47,12 +19,12 @@ .globl intrcnt, eintrcnt intrcnt: - .space INTRCNT_COUNT * 4 + .space INTRCNT_COUNT * 8 eintrcnt: .globl intrnames, eintrnames intrnames: - .space INTRCNT_COUNT * 16 + .space INTRCNT_COUNT * 32 eintrnames: .text @@ -101,8 +73,4 @@ eintrnames: * loading segregs. */ -#ifdef APIC_IO -#include "i386/isa/apic_vector.s" -#else -#include "i386/isa/icu_vector.s" -#endif /* APIC_IO */ +#include "amd64/isa/icu_vector.s" diff --git a/sys/amd64/pci/pci_bus.c b/sys/amd64/pci/pci_bus.c index 9e28868..4465e7a 100644 --- a/sys/amd64/pci/pci_bus.c +++ b/sys/amd64/pci/pci_bus.c @@ -44,14 +44,10 @@ #include <machine/pci_cfgreg.h> #include <machine/segments.h> #include <machine/cputypes.h> -#include <machine/pc/bios.h> #include <machine/md_var.h> #include "pcib_if.h" -static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, - int pin); - static int nexus_pcib_maxslots(device_t dev) { @@ -81,7 +77,9 @@ nexus_pcib_write_config(device_t dev, int bus, int slot, int func, static int nexus_pcib_route_interrupt(device_t pcib, device_t dev, int pin) { - return (pcibios_pcib_route_interrupt(pcib, dev, pin)); + + /* No routing possible */ + return (PCI_INVALID_IRQ); } static const char * @@ -605,67 +603,3 @@ static driver_t pcibus_pnp_driver = { static devclass_t pcibus_pnp_devclass; DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0); - - -/* - * Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges - * that appear in the PCIBIOS Interrupt Routing Table to use the routing - * table for interrupt routing when possible. - */ -static int pcibios_pcib_probe(device_t bus); - -static device_method_t pcibios_pcib_pci_methods[] = { - /* Device interface */ - DEVMETHOD(device_probe, pcibios_pcib_probe), - DEVMETHOD(device_attach, pcib_attach), - DEVMETHOD(device_shutdown, bus_generic_shutdown), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), - - /* Bus interface */ - DEVMETHOD(bus_print_child, bus_generic_print_child), - DEVMETHOD(bus_read_ivar, pcib_read_ivar), - DEVMETHOD(bus_write_ivar, pcib_write_ivar), - DEVMETHOD(bus_alloc_resource, pcib_alloc_resource), - DEVMETHOD(bus_release_resource, bus_generic_release_resource), - DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), - DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), - DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), - DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), - - /* pcib interface */ - DEVMETHOD(pcib_maxslots, pcib_maxslots), - DEVMETHOD(pcib_read_config, pcib_read_config), - DEVMETHOD(pcib_write_config, pcib_write_config), - DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt), - - {0, 0} -}; - -static driver_t pcibios_pcib_driver = { - "pcib", - pcibios_pcib_pci_methods, - sizeof(struct pcib_softc), -}; - -DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0); - -static int -pcibios_pcib_probe(device_t dev) -{ - - if ((pci_get_class(dev) != PCIC_BRIDGE) || - (pci_get_subclass(dev) != PCIS_BRIDGE_PCI)) - return (ENXIO); - if (pci_probe_route_table(pcib_get_bus(dev)) == 0) - return (ENXIO); - device_set_desc(dev, "PCIBIOS PCI-PCI bridge"); - return (-2000); -} - -static int -pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin) -{ - return(pci_cfgintr(pci_get_bus(dev), pci_get_slot(dev), pin, - pci_get_irq(dev))); -} diff --git a/sys/amd64/pci/pci_cfgreg.c b/sys/amd64/pci/pci_cfgreg.c index 4a174bd..2352c30 100644 --- a/sys/amd64/pci/pci_cfgreg.c +++ b/sys/amd64/pci/pci_cfgreg.c @@ -44,76 +44,18 @@ #include <dev/pci/pcireg.h> #include <isa/isavar.h> #include <machine/pci_cfgreg.h> -#include <machine/segments.h> -#include <machine/pc/bios.h> - -#ifdef APIC_IO -#include <machine/smp.h> -#endif /* APIC_IO */ #include "pcib_if.h" -#define PRVERB(a) do { \ - if (bootverbose) \ - printf a ; \ -} while(0) - static int cfgmech; static int devmax; -static int pci_cfgintr_valid(struct PIR_entry *pe, int pin, int irq); -static int pci_cfgintr_unique(struct PIR_entry *pe, int pin); -static int pci_cfgintr_linked(struct PIR_entry *pe, int pin); -static int pci_cfgintr_search(struct PIR_entry *pe, int bus, int device, int matchpin, int pin); -static int pci_cfgintr_virgin(struct PIR_entry *pe, int pin); - -static void pci_print_irqmask(u_int16_t irqs); -static void pci_print_route_table(struct PIR_table *prt, int size); static int pcireg_cfgread(int bus, int slot, int func, int reg, int bytes); static void pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes); static int pcireg_cfgopen(void); -static struct PIR_table *pci_route_table; -static int pci_route_count; - static struct mtx pcicfg_mtx; -/* - * Some BIOS writers seem to want to ignore the spec and put - * 0 in the intline rather than 255 to indicate none. Some use - * numbers in the range 128-254 to indicate something strange and - * apparently undocumented anywhere. Assume these are completely bogus - * and map them to 255, which means "none". - */ -static __inline__ int -pci_i386_map_intline(int line) -{ - if (line == 0 || line >= 128) - return (PCI_INVALID_IRQ); - return (line); -} - -static u_int16_t -pcibios_get_version(void) -{ - struct bios_regs args; - - if (PCIbios.ventry == 0) { - PRVERB(("pcibios: No call entry point\n")); - return (0); - } - args.eax = PCIBIOS_BIOS_PRESENT; - if (bios32(&args, PCIbios.ventry, GSEL(GCODE_SEL, SEL_KPL))) { - PRVERB(("pcibios: BIOS_PRESENT call failed\n")); - return (0); - } - if (args.edx != 0x20494350) { - PRVERB(("pcibios: BIOS_PRESENT didn't return 'PCI ' in edx\n")); - return (0); - } - return (args.ebx & 0xffff); -} - /* * Initialise access to PCI configuration space */ @@ -121,59 +63,14 @@ int pci_cfgregopen(void) { static int opened = 0; - u_long sigaddr; - static struct PIR_table *pt; - u_int16_t v; - u_int8_t ck, *cv; - int i; if (opened) - return(1); - + return (1); if (pcireg_cfgopen() == 0) - return(0); - - v = pcibios_get_version(); - if (v > 0) - printf("pcibios: BIOS version %x.%02x\n", (v & 0xff00) >> 8, - v & 0xff); - - /* - * Look for the interrupt routing table. - * - * We use PCI BIOS's PIR table if it's available $PIR is the - * standard way to do this. Sadly, some machines are not - * standards conforming and have _PIR instead. We shrug and cope - * by looking for both. - */ - if (pcibios_get_version() >= 0x0210 && pt == NULL) { - sigaddr = bios_sigsearch(0, "$PIR", 4, 16, 0); - if (sigaddr == 0) - sigaddr = bios_sigsearch(0, "_PIR", 4, 16, 0); - if (sigaddr != 0) { - pt = (struct PIR_table *)(uintptr_t) - BIOS_PADDRTOVADDR(sigaddr); - for (cv = (u_int8_t *)pt, ck = 0, i = 0; - i < (pt->pt_header.ph_length); i++) { - ck += cv[i]; - } - if (ck == 0 && pt->pt_header.ph_length > - sizeof(struct PIR_header)) { - pci_route_table = pt; - pci_route_count = (pt->pt_header.ph_length - - sizeof(struct PIR_header)) / - sizeof(struct PIR_entry); - printf("Using $PIR table, %d entries at %p\n", - pci_route_count, pci_route_table); - if (bootverbose) - pci_print_route_table(pci_route_table, - pci_route_count); - } - } - } + return (0); mtx_init(&pcicfg_mtx, "pcicfg", NULL, MTX_SPIN); opened = 1; - return(1); + return (1); } /* @@ -183,60 +80,22 @@ u_int32_t pci_cfgregread(int bus, int slot, int func, int reg, int bytes) { uint32_t line; -#ifdef APIC_IO - uint32_t pin; - - /* - * If we are using the APIC, the contents of the intline - * register will probably be wrong (since they are set up for - * use with the PIC. Rather than rewrite these registers - * (maybe that would be smarter) we trap attempts to read them - * and translate to our private vector numbers. - */ - if ((reg == PCIR_INTLINE) && (bytes == 1)) { - - pin = pcireg_cfgread(bus, slot, func, PCIR_INTPIN, 1); - line = pcireg_cfgread(bus, slot, func, PCIR_INTLINE, 1); - if (pin != 0) { - int airq; - - airq = pci_apic_irq(bus, slot, pin); - if (airq >= 0) { - /* PCI specific entry found in MP table */ - if (airq != line) - undirect_pci_irq(line); - return(airq); - } else { - /* - * PCI interrupts might be redirected - * to the ISA bus according to some MP - * tables. Use the same methods as - * used by the ISA devices devices to - * find the proper IOAPIC int pin. - */ - airq = isa_apic_irq(line); - if ((airq >= 0) && (airq != line)) { - /* XXX: undirect_pci_irq() ? */ - undirect_isa_irq(line); - return(airq); - } - } - } - return(line); - } -#else /* * Some BIOS writers seem to want to ignore the spec and put - * 0 in the intline rather than 255 to indicate none. The rest of - * the code uses 255 as an invalid IRQ. + * 0 in the intline rather than 255 to indicate none. Some use + * numbers in the range 128-254 to indicate something strange and + * apparently undocumented anywhere. Assume these are completely bogus + * and map them to 255, which the rest of the PCI code recognizes as + * as an invalid IRQ. */ if (reg == PCIR_INTLINE && bytes == 1) { line = pcireg_cfgread(bus, slot, func, PCIR_INTLINE, 1); - return pci_i386_map_intline(line); + if (line == 0 || line >= 128) + line = PCI_INVALID_IRQ; + return (line); } -#endif /* APIC_IO */ - return(pcireg_cfgread(bus, slot, func, reg, bytes)); + return (pcireg_cfgread(bus, slot, func, reg, bytes)); } /* @@ -255,342 +114,10 @@ pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes) int pci_cfgintr(int bus, int device, int pin, int oldirq) { - struct PIR_entry *pe; - int i, irq; - struct bios_regs args; - u_int16_t v; - int already = 0; - int errok = 0; - - v = pcibios_get_version(); - if (v < 0x0210) { - PRVERB(( - "pci_cfgintr: BIOS %x.%02x doesn't support interrupt routing\n", - (v & 0xff00) >> 8, v & 0xff)); - return (PCI_INVALID_IRQ); - } - if ((bus < 0) || (bus > 255) || (device < 0) || (device > 255) || - (pin < 1) || (pin > 4)) - return(PCI_INVALID_IRQ); - - /* - * Scan the entry table for a contender - */ - for (i = 0, pe = &pci_route_table->pt_entry[0]; i < pci_route_count; - i++, pe++) { - if ((bus != pe->pe_bus) || (device != pe->pe_device)) - continue; - /* - * A link of 0 means that this intpin is not connected to - * any other device's interrupt pins and is not connected to - * any of the Interrupt Router's interrupt pins, so we can't - * route it. - */ - if (pe->pe_intpin[pin - 1].link == 0) - continue; - - if (pci_cfgintr_valid(pe, pin, oldirq)) { - printf("pci_cfgintr: %d:%d INT%c BIOS irq %d\n", bus, - device, 'A' + pin - 1, oldirq); - return (oldirq); - } - - /* - * We try to find a linked interrupt, then we look to see - * if the interrupt is uniquely routed, then we look for - * a virgin interrupt. The virgin interrupt should return - * an interrupt we can route, but if that fails, maybe we - * should try harder to route a different interrupt. - * However, experience has shown that that's rarely the - * failure mode we see. - */ - irq = pci_cfgintr_linked(pe, pin); - if (irq != PCI_INVALID_IRQ) - already = 1; - if (irq == PCI_INVALID_IRQ) { - irq = pci_cfgintr_unique(pe, pin); - if (irq != PCI_INVALID_IRQ) - errok = 1; - } - if (irq == PCI_INVALID_IRQ) - irq = pci_cfgintr_virgin(pe, pin); - if (irq == PCI_INVALID_IRQ) - break; - - /* - * Ask the BIOS to route the interrupt. If we picked an - * interrupt that failed, we should really try other - * choices that the BIOS offers us. - * - * For uniquely routed interrupts, we need to try - * to route them on some machines. Yet other machines - * fail to route, so we have to pretend that in that - * case it worked. Isn't pc hardware fun? - * - * NOTE: if we want to whack hardware to do this, then - * I think the right way to do that would be to have - * bridge drivers that do this. I'm not sure that the - * $PIR table would be valid for those interrupt - * routers. - */ - args.eax = PCIBIOS_ROUTE_INTERRUPT; - args.ebx = (bus << 8) | (device << 3); - /* pin value is 0xa - 0xd */ - args.ecx = (irq << 8) | (0xa + pin - 1); - if (!already && - bios32(&args, PCIbios.ventry, GSEL(GCODE_SEL, SEL_KPL)) && - !errok) { - PRVERB(("pci_cfgintr: ROUTE_INTERRUPT failed.\n")); - return(PCI_INVALID_IRQ); - } - printf("pci_cfgintr: %d:%d INT%c routed to irq %d\n", bus, - device, 'A' + pin - 1, irq); - return(irq); - } - - PRVERB(("pci_cfgintr: can't route an interrupt to %d:%d INT%c\n", bus, - device, 'A' + pin - 1)); - return(PCI_INVALID_IRQ); -} - -/* - * Check to see if an existing IRQ setting is valid. - */ -static int -pci_cfgintr_valid(struct PIR_entry *pe, int pin, int irq) -{ - uint32_t irqmask; - if (!PCI_INTERRUPT_VALID(irq)) - return (0); - irqmask = pe->pe_intpin[pin - 1].irqs; - if (irqmask & (1 << irq)) { - PRVERB(("pci_cfgintr_valid: BIOS irq %d is valid\n", irq)); - return (1); - } - return (0); -} - -/* - * Look to see if the routing table claims this pin is uniquely routed. - */ -static int -pci_cfgintr_unique(struct PIR_entry *pe, int pin) -{ - int irq; - uint32_t irqmask; - - irqmask = pe->pe_intpin[pin - 1].irqs; - if (irqmask != 0 && powerof2(irqmask)) { - irq = ffs(irqmask) - 1; - PRVERB(("pci_cfgintr_unique: hard-routed to irq %d\n", irq)); - return(irq); - } - return(PCI_INVALID_IRQ); -} - -/* - * Look for another device which shares the same link byte and - * already has a unique IRQ, or which has had one routed already. - */ -static int -pci_cfgintr_linked(struct PIR_entry *pe, int pin) -{ - struct PIR_entry *oe; - struct PIR_intpin *pi; - int i, j, irq; - - /* - * Scan table slots. - */ - for (i = 0, oe = &pci_route_table->pt_entry[0]; i < pci_route_count; - i++, oe++) { - /* scan interrupt pins */ - for (j = 0, pi = &oe->pe_intpin[0]; j < 4; j++, pi++) { - - /* don't look at the entry we're trying to match */ - if ((pe == oe) && (i == (pin - 1))) - continue; - /* compare link bytes */ - if (pi->link != pe->pe_intpin[pin - 1].link) - continue; - /* link destination mapped to a unique interrupt? */ - if (pi->irqs != 0 && powerof2(pi->irqs)) { - irq = ffs(pi->irqs) - 1; - PRVERB(("pci_cfgintr_linked: linked (%x) to hard-routed irq %d\n", - pi->link, irq)); - return(irq); - } - - /* - * look for the real PCI device that matches this - * table entry - */ - irq = pci_cfgintr_search(pe, oe->pe_bus, oe->pe_device, - j, pin); - if (irq != PCI_INVALID_IRQ) - return(irq); - } - } - return(PCI_INVALID_IRQ); -} - -/* - * Scan for the real PCI device at (bus)/(device) using intpin (matchpin) and - * see if it has already been assigned an interrupt. - */ -static int -pci_cfgintr_search(struct PIR_entry *pe, int bus, int device, int matchpin, int pin) -{ - devclass_t pci_devclass; - device_t *pci_devices; - int pci_count; - device_t *pci_children; - int pci_childcount; - device_t *busp, *childp; - int i, j, irq; - - /* - * Find all the PCI busses. - */ - pci_count = 0; - if ((pci_devclass = devclass_find("pci")) != NULL) - devclass_get_devices(pci_devclass, &pci_devices, &pci_count); - - /* - * Scan all the PCI busses/devices looking for this one. - */ - irq = PCI_INVALID_IRQ; - for (i = 0, busp = pci_devices; (i < pci_count) && (irq == PCI_INVALID_IRQ); - i++, busp++) { - pci_childcount = 0; - device_get_children(*busp, &pci_children, &pci_childcount); - - for (j = 0, childp = pci_children; j < pci_childcount; j++, - childp++) { - if ((pci_get_bus(*childp) == bus) && - (pci_get_slot(*childp) == device) && - (pci_get_intpin(*childp) == matchpin)) { - irq = pci_i386_map_intline(pci_get_irq(*childp)); - if (irq != PCI_INVALID_IRQ) - PRVERB(("pci_cfgintr_search: linked (%x) to configured irq %d at %d:%d:%d\n", - pe->pe_intpin[pin - 1].link, irq, - pci_get_bus(*childp), - pci_get_slot(*childp), - pci_get_function(*childp))); - break; - } - } - if (pci_children != NULL) - free(pci_children, M_TEMP); - } - if (pci_devices != NULL) - free(pci_devices, M_TEMP); - return(irq); -} - -/* - * Pick a suitable IRQ from those listed as routable to this device. - */ -static int -pci_cfgintr_virgin(struct PIR_entry *pe, int pin) -{ - int irq, ibit; - - /* - * first scan the set of PCI-only interrupts and see if any of these - * are routable - */ - for (irq = 0; irq < 16; irq++) { - ibit = (1 << irq); - - /* can we use this interrupt? */ - if ((pci_route_table->pt_header.ph_pci_irqs & ibit) && - (pe->pe_intpin[pin - 1].irqs & ibit)) { - PRVERB(("pci_cfgintr_virgin: using routable PCI-only interrupt %d\n", irq)); - return(irq); - } - } - - /* life is tough, so just pick an interrupt */ - for (irq = 0; irq < 16; irq++) { - ibit = (1 << irq); - if (pe->pe_intpin[pin - 1].irqs & ibit) { - PRVERB(("pci_cfgintr_virgin: using routable interrupt %d\n", irq)); - return(irq); - } - } - return(PCI_INVALID_IRQ); -} - -static void -pci_print_irqmask(u_int16_t irqs) -{ - int i, first; - - if (irqs == 0) { - printf("none"); - return; - } - first = 1; - for (i = 0; i < 16; i++, irqs >>= 1) - if (irqs & 1) { - if (!first) - printf(" "); - else - first = 0; - printf("%d", i); - } -} - -/* - * Dump the contents of a PCI BIOS Interrupt Routing Table to the console. - */ -static void -pci_print_route_table(struct PIR_table *prt, int size) -{ - struct PIR_entry *entry; - struct PIR_intpin *intpin; - int i, pin; - - printf("PCI-Only Interrupts: "); - pci_print_irqmask(prt->pt_header.ph_pci_irqs); - printf("\nLocation Bus Device Pin Link IRQs\n"); - entry = &prt->pt_entry[0]; - for (i = 0; i < size; i++, entry++) { - intpin = &entry->pe_intpin[0]; - for (pin = 0; pin < 4; pin++, intpin++) - if (intpin->link != 0) { - if (entry->pe_slot == 0) - printf("embedded "); - else - printf("slot %-3d ", entry->pe_slot); - printf(" %3d %3d %c 0x%02x ", - entry->pe_bus, entry->pe_device, - 'A' + pin, intpin->link); - pci_print_irqmask(intpin->irqs); - printf("\n"); - } - } -} - -/* - * See if any interrupts for a given PCI bus are routed in the PIR. Don't - * even bother looking if the BIOS doesn't support routing anyways. - */ -int -pci_probe_route_table(int bus) -{ - int i; - u_int16_t v; - - v = pcibios_get_version(); - if (v < 0x0210) - return (0); - for (i = 0; i < pci_route_count; i++) - if (pci_route_table->pt_entry[i].pe_bus == bus) - return (1); - return (0); + printf("pci_cfgintr: can't route an interrupt to %d:%d INT%c without ACPI\n", bus, + device, 'A' + pin - 1); + return (PCI_INVALID_IRQ); } /* @@ -819,4 +346,3 @@ pcireg_cfgopen(void) devmax = 0; return (cfgmech); } - |