summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorRenato Botelho <renato@netgate.com>2016-02-03 18:01:26 -0200
committerRenato Botelho <renato@netgate.com>2016-02-03 18:01:26 -0200
commitaf0758169e63a4e7f6024c241d9254a8bc09908d (patch)
treea31903fc62c875e8d23a391beb8ae57ed0092c45 /sys
parent79f27b5150f7b79a6f1bcd30e9233f1abb9c3e36 (diff)
parent6114d518f71115abacc5d610c4d668ef6e0b2f37 (diff)
downloadFreeBSD-src-af0758169e63a4e7f6024c241d9254a8bc09908d.zip
FreeBSD-src-af0758169e63a4e7f6024c241d9254a8bc09908d.tar.gz
Merge remote-tracking branch 'origin/stable/10' into devel
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/fpu.c21
-rw-r--r--sys/amd64/include/vmm.h35
-rw-r--r--sys/amd64/include/vmm_dev.h38
-rw-r--r--sys/amd64/vmm/amd/svm.c2
-rw-r--r--sys/amd64/vmm/intel/vmx.c2
-rw-r--r--sys/amd64/vmm/io/ppt.c16
-rw-r--r--sys/amd64/vmm/vmm.c471
-rw-r--r--sys/amd64/vmm/vmm_dev.c398
-rw-r--r--sys/amd64/vmm/vmm_instruction_emul.c63
-rw-r--r--sys/amd64/vmm/vmm_mem.c32
-rw-r--r--sys/amd64/vmm/vmm_mem.h2
-rw-r--r--sys/boot/common/bootstrap.h7
-rw-r--r--sys/boot/common/console.c93
-rw-r--r--sys/boot/common/interp_forth.c12
-rw-r--r--sys/boot/common/load_elf.c2
-rw-r--r--sys/boot/common/load_elf_obj.c4
-rw-r--r--sys/boot/common/misc.c8
-rw-r--r--sys/boot/common/module.c52
-rw-r--r--sys/boot/common/part.c12
-rw-r--r--sys/boot/common/self_reloc.c4
-rw-r--r--sys/boot/common/ufsread.c47
-rw-r--r--sys/boot/efi/Makefile10
-rw-r--r--sys/boot/efi/boot1/Makefile32
-rw-r--r--sys/boot/efi/boot1/boot1.c407
-rw-r--r--sys/boot/efi/boot1/boot_module.h110
-rw-r--r--sys/boot/efi/boot1/ufs_module.c185
-rw-r--r--sys/boot/efi/boot1/zfs_module.c199
-rw-r--r--sys/boot/efi/include/efi_nii.h4
-rw-r--r--sys/boot/efi/include/efiapi.h44
-rw-r--r--sys/boot/efi/include/eficon.h6
-rw-r--r--sys/boot/efi/include/eficonsctl.h2
-rw-r--r--sys/boot/efi/include/efidevp.h10
-rw-r--r--sys/boot/efi/include/efierr.h2
-rw-r--r--sys/boot/efi/include/efifpswa.h2
-rw-r--r--sys/boot/efi/include/efigop.h5
-rw-r--r--sys/boot/efi/include/efilib.h5
-rw-r--r--sys/boot/efi/include/efinet.h2
-rw-r--r--sys/boot/efi/include/efiprot.h28
-rw-r--r--sys/boot/efi/include/efipxebc.h4
-rw-r--r--sys/boot/efi/include/efiser.h2
-rw-r--r--sys/boot/efi/libefi/Makefile1
-rw-r--r--sys/boot/efi/libefi/efi_console.c2
-rw-r--r--sys/boot/efi/libefi/efipart.c1
-rw-r--r--sys/boot/efi/libefi/handles.c24
-rw-r--r--sys/boot/efi/libefi/libefi.c2
-rw-r--r--sys/boot/efi/loader/Makefile21
-rw-r--r--sys/boot/efi/loader/arch/amd64/elf64_freebsd.c2
-rw-r--r--sys/boot/efi/loader/arch/amd64/framebuffer.c2
-rw-r--r--sys/boot/efi/loader/arch/amd64/reloc.c9
-rw-r--r--sys/boot/efi/loader/autoload.c2
-rw-r--r--sys/boot/efi/loader/bootinfo.c6
-rw-r--r--sys/boot/efi/loader/conf.c9
-rw-r--r--sys/boot/efi/loader/copy.c2
-rw-r--r--sys/boot/efi/loader/devicename.c61
-rw-r--r--sys/boot/efi/loader/loader_efi.h2
-rw-r--r--sys/boot/efi/loader/main.c156
-rw-r--r--sys/boot/fdt/fdt_loader_cmd.c2
-rw-r--r--sys/boot/ficl/i386/sysdep.c2
-rw-r--r--sys/boot/i386/libi386/smbios.c4
-rw-r--r--sys/boot/zfs/zfsimpl.c8
-rw-r--r--sys/cam/ctl/ctl.c15
-rw-r--r--sys/cam/scsi/scsi_ch.c24
-rw-r--r--sys/cam/scsi/scsi_enc.c28
-rw-r--r--sys/cam/scsi/scsi_pass.c32
-rw-r--r--sys/cam/scsi/scsi_pt.c27
-rw-r--r--sys/cam/scsi/scsi_sa.c63
-rw-r--r--sys/cam/scsi/scsi_sg.c29
-rw-r--r--sys/conf/files2
-rw-r--r--sys/dev/acpica/acpi.c30
-rw-r--r--sys/dev/ixgbe/if_ix.c1249
-rw-r--r--sys/dev/ixgbe/if_ixv.c141
-rw-r--r--sys/dev/ixgbe/ix_txrx.c76
-rw-r--r--sys/dev/ixgbe/ixgbe.h223
-rw-r--r--sys/dev/ixgbe/ixgbe_mbx.h27
-rw-r--r--sys/dev/ixgbe/ixgbe_vf.c59
-rw-r--r--sys/dev/ixl/if_ixl.c6
-rw-r--r--sys/dev/netmap/ixgbe_netmap.h23
-rw-r--r--sys/dev/pci/pci.c59
-rw-r--r--sys/dev/pty/pty.c13
-rw-r--r--sys/fs/ext2fs/ext2_inode_cnv.c12
-rw-r--r--sys/i386/isa/npx.c11
-rw-r--r--sys/kern/kern_conf.c98
-rw-r--r--sys/kern/subr_bus.c230
-rw-r--r--sys/kern/subr_hints.c28
-rw-r--r--sys/kern/tty.c99
-rw-r--r--sys/mips/include/asm.h29
-rw-r--r--sys/netinet/sctp_input.c47
-rw-r--r--sys/netinet/sctp_output.c139
-rw-r--r--sys/netinet/sctp_output.h3
-rw-r--r--sys/netinet/sctp_pcb.c17
-rw-r--r--sys/netinet/sctp_var.h2
-rw-r--r--sys/netinet/tcp_output.c2
-rw-r--r--sys/netinet/tcp_subr.c2
-rw-r--r--sys/netinet/tcp_timer.c8
-rw-r--r--sys/netinet/tcp_timer.h4
-rw-r--r--sys/sys/ata.h1
-rw-r--r--sys/sys/bus.h48
-rw-r--r--sys/sys/conf.h30
-rw-r--r--sys/sys/elf_common.h5
-rw-r--r--sys/sys/param.h2
100 files changed, 4453 insertions, 1188 deletions
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index f30c073..d9c967c 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -318,13 +318,15 @@ fpuinitstate(void *arg __unused)
cpu_mxcsr_mask = 0xFFBF;
/*
- * The fninit instruction does not modify XMM registers. The
- * fpusave call dumped the garbage contained in the registers
- * after reset to the initial state saved. Clear XMM
- * registers file image to make the startup program state and
- * signal handler XMM register content predictable.
+ * The fninit instruction does not modify XMM registers or x87
+ * registers (MM/ST). The fpusave call dumped the garbage
+ * contained in the registers after reset to the initial state
+ * saved. Clear XMM and x87 registers file image to make the
+ * startup program state and signal handler XMM/x87 register
+ * content predictable.
*/
- bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc));
+ bzero(fpu_initialstate->sv_fp, sizeof(fpu_initialstate->sv_fp));
+ bzero(fpu_initialstate->sv_xmm, sizeof(fpu_initialstate->sv_xmm));
/*
* Create a table describing the layout of the CPU Extended
@@ -375,7 +377,7 @@ fpuexit(struct thread *td)
}
int
-fpuformat()
+fpuformat(void)
{
return (_MC_FPFMT_XMM);
@@ -661,7 +663,8 @@ fpudna(void)
* fpu_initialstate, to ignite the XSAVEOPT
* tracking engine.
*/
- bcopy(fpu_initialstate, curpcb->pcb_save, cpu_max_ext_state_size);
+ bcopy(fpu_initialstate, curpcb->pcb_save,
+ cpu_max_ext_state_size);
fpurestore(curpcb->pcb_save);
if (curpcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
fldcw(curpcb->pcb_initial_fpucw);
@@ -676,7 +679,7 @@ fpudna(void)
}
void
-fpudrop()
+fpudrop(void)
{
struct thread *td;
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index 1a4e5ab..f2de960 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -108,7 +108,6 @@ enum x2apic_state {
struct vm;
struct vm_exception;
-struct vm_memory_segment;
struct seg_desc;
struct vm_exit;
struct vm_run;
@@ -175,17 +174,33 @@ int vm_create(const char *name, struct vm **retvm);
void vm_destroy(struct vm *vm);
int vm_reinit(struct vm *vm);
const char *vm_name(struct vm *vm);
-int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len);
+
+/*
+ * APIs that modify the guest memory map require all vcpus to be frozen.
+ */
+int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
+ size_t len, int prot, int flags);
+int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
+void vm_free_memseg(struct vm *vm, int ident);
int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
-void *vm_gpa_hold(struct vm *, vm_paddr_t gpa, size_t len, int prot,
- void **cookie);
+int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
+int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
+
+/*
+ * APIs that inspect the guest memory map require only a *single* vcpu to
+ * be frozen. This acts like a read lock on the guest memory map since any
+ * modification requires *all* vcpus to be frozen.
+ */
+int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
+ vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
+int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
+ struct vm_object **objptr);
+void *vm_gpa_hold(struct vm *, int vcpuid, vm_paddr_t gpa, size_t len,
+ int prot, void **cookie);
void vm_gpa_release(void *cookie);
-int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
- struct vm_memory_segment *seg);
-int vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
- vm_offset_t *offset, struct vm_object **object);
-boolean_t vm_mem_allocated(struct vm *vm, vm_paddr_t gpa);
+bool vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa);
+
int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
@@ -302,8 +317,6 @@ vcpu_should_yield(struct vm *vm, int vcpu)
void *vcpu_stats(struct vm *vm, int vcpu);
void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
struct vmspace *vm_get_vmspace(struct vm *vm);
-int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
-int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
struct vatpic *vm_atpic(struct vm *vm);
struct vatpit *vm_atpit(struct vm *vm);
struct vpmtmr *vm_pmtmr(struct vm *vm);
diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h
index 9d031a9..1af75a3 100644
--- a/sys/amd64/include/vmm_dev.h
+++ b/sys/amd64/include/vmm_dev.h
@@ -34,10 +34,22 @@ void vmmdev_init(void);
int vmmdev_cleanup(void);
#endif
-struct vm_memory_segment {
- vm_paddr_t gpa; /* in */
+struct vm_memmap {
+ vm_paddr_t gpa;
+ int segid; /* memory segment */
+ vm_ooffset_t segoff; /* offset into memory segment */
+ size_t len; /* mmap length */
+ int prot; /* RWX */
+ int flags;
+};
+#define VM_MEMMAP_F_WIRED 0x01
+#define VM_MEMMAP_F_IOMMU 0x02
+
+#define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL)
+struct vm_memseg {
+ int segid;
size_t len;
- int wired;
+ char name[SPECNAMELEN + 1];
};
struct vm_register {
@@ -214,10 +226,14 @@ enum {
IOCNUM_REINIT = 5,
/* memory apis */
- IOCNUM_MAP_MEMORY = 10,
- IOCNUM_GET_MEMORY_SEG = 11,
+ IOCNUM_MAP_MEMORY = 10, /* deprecated */
+ IOCNUM_GET_MEMORY_SEG = 11, /* deprecated */
IOCNUM_GET_GPA_PMAP = 12,
IOCNUM_GLA2GPA = 13,
+ IOCNUM_ALLOC_MEMSEG = 14,
+ IOCNUM_GET_MEMSEG = 15,
+ IOCNUM_MMAP_MEMSEG = 16,
+ IOCNUM_MMAP_GETNEXT = 17,
/* register/state accessors */
IOCNUM_SET_REGISTER = 20,
@@ -278,10 +294,14 @@ enum {
_IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
#define VM_REINIT \
_IO('v', IOCNUM_REINIT)
-#define VM_MAP_MEMORY \
- _IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
-#define VM_GET_MEMORY_SEG \
- _IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment)
+#define VM_ALLOC_MEMSEG \
+ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg)
+#define VM_GET_MEMSEG \
+ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg)
+#define VM_MMAP_MEMSEG \
+ _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap)
+#define VM_MMAP_GETNEXT \
+ _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap)
#define VM_SET_REGISTER \
_IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
#define VM_GET_REGISTER \
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index abca613..ca5141a 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -1477,7 +1477,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
VCPU_CTR2(svm_sc->vm, vcpu, "nested page fault with "
"reserved bits set: info1(%#lx) info2(%#lx)",
info1, info2);
- } else if (vm_mem_allocated(svm_sc->vm, info2)) {
+ } else if (vm_mem_allocated(svm_sc->vm, vcpu, info2)) {
vmexit->exitcode = VM_EXITCODE_PAGING;
vmexit->u.paging.gpa = info2;
vmexit->u.paging.fault_type = npf_fault_type(info1);
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 5bbfe99..c5cafa8 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -2426,7 +2426,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
* this must be an instruction that accesses MMIO space.
*/
gpa = vmcs_gpa();
- if (vm_mem_allocated(vmx->vm, gpa) ||
+ if (vm_mem_allocated(vmx->vm, vcpu, gpa) ||
apic_access_fault(vmx, vcpu, gpa)) {
vmexit->exitcode = VM_EXITCODE_PAGING;
vmexit->inst_length = 0;
diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c
index b789f77..692190a 100644
--- a/sys/amd64/vmm/io/ppt.c
+++ b/sys/amd64/vmm/io/ppt.c
@@ -76,11 +76,17 @@ struct pptintr_arg { /* pptintr(pptintr_arg) */
uint64_t msg_data;
};
+struct pptseg {
+ vm_paddr_t gpa;
+ size_t len;
+ int wired;
+};
+
struct pptdev {
device_t dev;
struct vm *vm; /* owner of this device */
TAILQ_ENTRY(pptdev) next;
- struct vm_memory_segment mmio[MAX_MMIOSEGS];
+ struct pptseg mmio[MAX_MMIOSEGS];
struct {
int num_msgs; /* guest state */
@@ -207,14 +213,14 @@ static void
ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
{
int i;
- struct vm_memory_segment *seg;
+ struct pptseg *seg;
for (i = 0; i < MAX_MMIOSEGS; i++) {
seg = &ppt->mmio[i];
if (seg->len == 0)
continue;
(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
- bzero(seg, sizeof(struct vm_memory_segment));
+ bzero(seg, sizeof(struct pptseg));
}
}
@@ -324,7 +330,7 @@ ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
{
int i;
struct pptdev *ppt;
- struct vm_memory_segment *seg;
+ struct pptseg *seg;
TAILQ_FOREACH(ppt, &pptdev_list, next) {
if (ppt->vm != vm)
@@ -410,7 +416,7 @@ ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
{
int i, error;
- struct vm_memory_segment *seg;
+ struct pptseg *seg;
struct pptdev *ppt;
ppt = ppt_find(bus, slot, func);
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index c3c7bb1..31e35d2 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -120,12 +120,21 @@ struct vcpu {
#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
struct mem_seg {
+ size_t len;
+ bool sysmem;
+ struct vm_object *object;
+};
+#define VM_MAX_MEMSEGS 2
+
+struct mem_map {
vm_paddr_t gpa;
size_t len;
- boolean_t wired;
- vm_object_t object;
+ vm_ooffset_t segoff;
+ int segid;
+ int prot;
+ int flags;
};
-#define VM_MAX_MEMORY_SEGMENTS 2
+#define VM_MAX_MEMMAPS 4
/*
* Initialization:
@@ -151,8 +160,8 @@ struct vm {
void *rendezvous_arg; /* (x) rendezvous func/arg */
vm_rendezvous_func_t rendezvous_func;
struct mtx rendezvous_mtx; /* (o) rendezvous lock */
- int num_mem_segs; /* (o) guest memory segments */
- struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS];
+ struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
+ struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
struct vmspace *vmspace; /* (o) guest's address space */
char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */
@@ -224,6 +233,8 @@ TUNABLE_INT("hw.vmm.force_iommu", &vmm_force_iommu);
SYSCTL_INT(_hw_vmm, OID_AUTO, force_iommu, CTLFLAG_RDTUN, &vmm_force_iommu, 0,
"Force use of I/O MMU even if no passthrough devices were found.");
+static void vm_free_memmap(struct vm *vm, int ident);
+static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr);
#ifdef KTR
@@ -444,7 +455,6 @@ vm_create(const char *name, struct vm **retvm)
vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
strcpy(vm->name, name);
- vm->num_mem_segs = 0;
vm->vmspace = vmspace;
mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
@@ -455,18 +465,9 @@ vm_create(const char *name, struct vm **retvm)
}
static void
-vm_free_mem_seg(struct vm *vm, struct mem_seg *seg)
-{
-
- if (seg->object != NULL)
- vmm_mem_free(vm->vmspace, seg->gpa, seg->len);
-
- bzero(seg, sizeof(*seg));
-}
-
-static void
vm_cleanup(struct vm *vm, bool destroy)
{
+ struct mem_map *mm;
int i;
ppt_unassign_all(vm);
@@ -489,11 +490,23 @@ vm_cleanup(struct vm *vm, bool destroy)
VMCLEANUP(vm->cookie);
- if (destroy) {
- for (i = 0; i < vm->num_mem_segs; i++)
- vm_free_mem_seg(vm, &vm->mem_segs[i]);
+ /*
+ * System memory is removed from the guest address space only when
+ * the VM is destroyed. This is because the mapping remains the same
+ * across VM reset.
+ *
+ * Device memory can be relocated by the guest (e.g. using PCI BARs)
+ * so those mappings are removed on a VM reset.
+ */
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (destroy || !sysmem_mapping(vm, mm))
+ vm_free_memmap(vm, i);
+ }
- vm->num_mem_segs = 0;
+ if (destroy) {
+ for (i = 0; i < VM_MAX_MEMSEGS; i++)
+ vm_free_memseg(vm, i);
VMSPACE_FREE(vm->vmspace);
vm->vmspace = NULL;
@@ -551,146 +564,243 @@ vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
return (0);
}
-boolean_t
-vm_mem_allocated(struct vm *vm, vm_paddr_t gpa)
+/*
+ * Return 'true' if 'gpa' is allocated in the guest address space.
+ *
+ * This function is called in the context of a running vcpu which acts as
+ * an implicit lock on 'vm->mem_maps[]'.
+ */
+bool
+vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa)
{
+ struct mem_map *mm;
int i;
- vm_paddr_t gpabase, gpalimit;
- for (i = 0; i < vm->num_mem_segs; i++) {
- gpabase = vm->mem_segs[i].gpa;
- gpalimit = gpabase + vm->mem_segs[i].len;
- if (gpa >= gpabase && gpa < gpalimit)
- return (TRUE); /* 'gpa' is regular memory */
+#ifdef INVARIANTS
+ int hostcpu, state;
+ state = vcpu_get_state(vm, vcpuid, &hostcpu);
+ KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
+ ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
+#endif
+
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
+ return (true); /* 'gpa' is sysmem or devmem */
}
if (ppt_is_mmio(vm, gpa))
- return (TRUE); /* 'gpa' is pci passthru mmio */
+ return (true); /* 'gpa' is pci passthru mmio */
- return (FALSE);
+ return (false);
}
int
-vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
+vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
{
- int available, allocated;
struct mem_seg *seg;
- vm_object_t object;
- vm_paddr_t g;
+ vm_object_t obj;
- if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
+ if (ident < 0 || ident >= VM_MAX_MEMSEGS)
return (EINVAL);
-
- available = allocated = 0;
- g = gpa;
- while (g < gpa + len) {
- if (vm_mem_allocated(vm, g))
- allocated++;
- else
- available++;
- g += PAGE_SIZE;
- }
-
- /*
- * If there are some allocated and some available pages in the address
- * range then it is an error.
- */
- if (allocated && available)
+ if (len == 0 || (len & PAGE_MASK))
return (EINVAL);
- /*
- * If the entire address range being requested has already been
- * allocated then there isn't anything more to do.
- */
- if (allocated && available == 0)
- return (0);
-
- if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
- return (E2BIG);
-
- seg = &vm->mem_segs[vm->num_mem_segs];
+ seg = &vm->mem_segs[ident];
+ if (seg->object != NULL) {
+ if (seg->len == len && seg->sysmem == sysmem)
+ return (EEXIST);
+ else
+ return (EINVAL);
+ }
- if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL)
+ obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
+ if (obj == NULL)
return (ENOMEM);
- seg->gpa = gpa;
seg->len = len;
- seg->object = object;
- seg->wired = FALSE;
+ seg->object = obj;
+ seg->sysmem = sysmem;
+ return (0);
+}
- vm->num_mem_segs++;
+int
+vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
+ vm_object_t *objptr)
+{
+ struct mem_seg *seg;
+
+ if (ident < 0 || ident >= VM_MAX_MEMSEGS)
+ return (EINVAL);
+ seg = &vm->mem_segs[ident];
+ if (len)
+ *len = seg->len;
+ if (sysmem)
+ *sysmem = seg->sysmem;
+ if (objptr)
+ *objptr = seg->object;
return (0);
}
-static vm_paddr_t
-vm_maxmem(struct vm *vm)
+void
+vm_free_memseg(struct vm *vm, int ident)
{
- int i;
- vm_paddr_t gpa, maxmem;
+ struct mem_seg *seg;
- maxmem = 0;
- for (i = 0; i < vm->num_mem_segs; i++) {
- gpa = vm->mem_segs[i].gpa + vm->mem_segs[i].len;
- if (gpa > maxmem)
- maxmem = gpa;
+ KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
+ ("%s: invalid memseg ident %d", __func__, ident));
+
+ seg = &vm->mem_segs[ident];
+ if (seg->object != NULL) {
+ vm_object_deallocate(seg->object);
+ bzero(seg, sizeof(struct mem_seg));
}
- return (maxmem);
}
-static void
-vm_gpa_unwire(struct vm *vm)
+int
+vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
+ size_t len, int prot, int flags)
{
- int i, rv;
struct mem_seg *seg;
+ struct mem_map *m, *map;
+ vm_ooffset_t last;
+ int i, error;
- for (i = 0; i < vm->num_mem_segs; i++) {
- seg = &vm->mem_segs[i];
- if (!seg->wired)
- continue;
+ if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0)
+ return (EINVAL);
+
+ if (flags & ~VM_MEMMAP_F_WIRED)
+ return (EINVAL);
+
+ if (segid < 0 || segid >= VM_MAX_MEMSEGS)
+ return (EINVAL);
- rv = vm_map_unwire(&vm->vmspace->vm_map,
- seg->gpa, seg->gpa + seg->len,
- VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
- KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment "
- "%#lx/%ld could not be unwired: %d",
- vm_name(vm), seg->gpa, seg->len, rv));
+ seg = &vm->mem_segs[segid];
+ if (seg->object == NULL)
+ return (EINVAL);
+
+ last = first + len;
+ if (first < 0 || first >= last || last > seg->len)
+ return (EINVAL);
+
+ if ((gpa | first | last) & PAGE_MASK)
+ return (EINVAL);
+
+ map = NULL;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ m = &vm->mem_maps[i];
+ if (m->len == 0) {
+ map = m;
+ break;
+ }
+ }
- seg->wired = FALSE;
+ if (map == NULL)
+ return (ENOSPC);
+
+ error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa,
+ len, 0, VMFS_NO_SPACE, prot, prot, 0);
+ if (error != KERN_SUCCESS)
+ return (EFAULT);
+
+ vm_object_reference(seg->object);
+
+ if (flags & VM_MEMMAP_F_WIRED) {
+ error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len,
+ VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
+ if (error != KERN_SUCCESS) {
+ vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len);
+ return (EFAULT);
+ }
}
+
+ map->gpa = gpa;
+ map->len = len;
+ map->segoff = first;
+ map->segid = segid;
+ map->prot = prot;
+ map->flags = flags;
+ return (0);
}
-static int
-vm_gpa_wire(struct vm *vm)
+int
+vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
+ vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
{
- int i, rv;
- struct mem_seg *seg;
+ struct mem_map *mm, *mmnext;
+ int i;
- for (i = 0; i < vm->num_mem_segs; i++) {
- seg = &vm->mem_segs[i];
- if (seg->wired)
+ mmnext = NULL;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (mm->len == 0 || mm->gpa < *gpa)
continue;
+ if (mmnext == NULL || mm->gpa < mmnext->gpa)
+ mmnext = mm;
+ }
- /* XXX rlimits? */
- rv = vm_map_wire(&vm->vmspace->vm_map,
- seg->gpa, seg->gpa + seg->len,
- VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
- if (rv != KERN_SUCCESS)
- break;
-
- seg->wired = TRUE;
+ if (mmnext != NULL) {
+ *gpa = mmnext->gpa;
+ if (segid)
+ *segid = mmnext->segid;
+ if (segoff)
+ *segoff = mmnext->segoff;
+ if (len)
+ *len = mmnext->len;
+ if (prot)
+ *prot = mmnext->prot;
+ if (flags)
+ *flags = mmnext->flags;
+ return (0);
+ } else {
+ return (ENOENT);
}
+}
- if (i < vm->num_mem_segs) {
- /*
- * Undo the wiring before returning an error.
- */
- vm_gpa_unwire(vm);
- return (EAGAIN);
+static void
+vm_free_memmap(struct vm *vm, int ident)
+{
+ struct mem_map *mm;
+ int error;
+
+ mm = &vm->mem_maps[ident];
+ if (mm->len) {
+ error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa,
+ mm->gpa + mm->len);
+ KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
+ __func__, error));
+ bzero(mm, sizeof(struct mem_map));
}
+}
- return (0);
+static __inline bool
+sysmem_mapping(struct vm *vm, struct mem_map *mm)
+{
+
+ if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem)
+ return (true);
+ else
+ return (false);
+}
+
+static vm_paddr_t
+sysmem_maxaddr(struct vm *vm)
+{
+ struct mem_map *mm;
+ vm_paddr_t maxaddr;
+ int i;
+
+ maxaddr = 0;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (sysmem_mapping(vm, mm)) {
+ if (maxaddr < mm->gpa + mm->len)
+ maxaddr = mm->gpa + mm->len;
+ }
+ }
+ return (maxaddr);
}
static void
@@ -698,20 +808,36 @@ vm_iommu_modify(struct vm *vm, boolean_t map)
{
int i, sz;
vm_paddr_t gpa, hpa;
- struct mem_seg *seg;
+ struct mem_map *mm;
void *vp, *cookie, *host_domain;
sz = PAGE_SIZE;
host_domain = iommu_host_domain();
- for (i = 0; i < vm->num_mem_segs; i++) {
- seg = &vm->mem_segs[i];
- KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired",
- vm_name(vm), seg->gpa, seg->len));
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (!sysmem_mapping(vm, mm))
+ continue;
- gpa = seg->gpa;
- while (gpa < seg->gpa + seg->len) {
- vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE,
+ if (map) {
+ KASSERT((mm->flags & VM_MEMMAP_F_IOMMU) == 0,
+ ("iommu map found invalid memmap %#lx/%#lx/%#x",
+ mm->gpa, mm->len, mm->flags));
+ if ((mm->flags & VM_MEMMAP_F_WIRED) == 0)
+ continue;
+ mm->flags |= VM_MEMMAP_F_IOMMU;
+ } else {
+ if ((mm->flags & VM_MEMMAP_F_IOMMU) == 0)
+ continue;
+ mm->flags &= ~VM_MEMMAP_F_IOMMU;
+ KASSERT((mm->flags & VM_MEMMAP_F_WIRED) != 0,
+ ("iommu unmap found invalid memmap %#lx/%#lx/%#x",
+ mm->gpa, mm->len, mm->flags));
+ }
+
+ gpa = mm->gpa;
+ while (gpa < mm->gpa + mm->len) {
+ vp = vm_gpa_hold(vm, -1, gpa, PAGE_SIZE, VM_PROT_WRITE,
&cookie);
KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx",
vm_name(vm), gpa));
@@ -753,10 +879,9 @@ vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
if (error)
return (error);
- if (ppt_assigned_devices(vm) == 0) {
+ if (ppt_assigned_devices(vm) == 0)
vm_iommu_unmap(vm);
- vm_gpa_unwire(vm);
- }
+
return (0);
}
@@ -766,23 +891,12 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
int error;
vm_paddr_t maxaddr;
- /*
- * Virtual machines with pci passthru devices get special treatment:
- * - the guest physical memory is wired
- * - the iommu is programmed to do the 'gpa' to 'hpa' translation
- *
- * We need to do this before the first pci passthru device is attached.
- */
+ /* Set up the IOMMU to do the 'gpa' to 'hpa' translation */
if (ppt_assigned_devices(vm) == 0) {
KASSERT(vm->iommu == NULL,
("vm_assign_pptdev: iommu must be NULL"));
- maxaddr = vm_maxmem(vm);
+ maxaddr = sysmem_maxaddr(vm);
vm->iommu = iommu_create_domain(maxaddr);
-
- error = vm_gpa_wire(vm);
- if (error)
- return (error);
-
vm_iommu_map(vm);
}
@@ -791,18 +905,43 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
}
void *
-vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
+vm_gpa_hold(struct vm *vm, int vcpuid, vm_paddr_t gpa, size_t len, int reqprot,
void **cookie)
{
- int count, pageoff;
+ int i, count, pageoff;
+ struct mem_map *mm;
vm_page_t m;
-
+#ifdef INVARIANTS
+ /*
+ * All vcpus are frozen by ioctls that modify the memory map
+ * (e.g. VM_MMAP_MEMSEG). Therefore 'vm->memmap[]' stability is
+ * guaranteed if at least one vcpu is in the VCPU_FROZEN state.
+ */
+ int state;
+ KASSERT(vcpuid >= -1 || vcpuid < VM_MAXCPU, ("%s: invalid vcpuid %d",
+ __func__, vcpuid));
+ for (i = 0; i < VM_MAXCPU; i++) {
+ if (vcpuid != -1 && vcpuid != i)
+ continue;
+ state = vcpu_get_state(vm, i, NULL);
+ KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
+ __func__, state));
+ }
+#endif
pageoff = gpa & PAGE_MASK;
if (len > PAGE_SIZE - pageoff)
panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
- count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
- trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
+ count = 0;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (sysmem_mapping(vm, mm) && gpa >= mm->gpa &&
+ gpa < mm->gpa + mm->len) {
+ count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
+ trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
+ break;
+ }
+ }
if (count == 1) {
*cookie = m;
@@ -824,50 +963,6 @@ vm_gpa_release(void *cookie)
}
int
-vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
- struct vm_memory_segment *seg)
-{
- int i;
-
- for (i = 0; i < vm->num_mem_segs; i++) {
- if (gpabase == vm->mem_segs[i].gpa) {
- seg->gpa = vm->mem_segs[i].gpa;
- seg->len = vm->mem_segs[i].len;
- seg->wired = vm->mem_segs[i].wired;
- return (0);
- }
- }
- return (-1);
-}
-
-int
-vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
- vm_offset_t *offset, struct vm_object **object)
-{
- int i;
- size_t seg_len;
- vm_paddr_t seg_gpa;
- vm_object_t seg_obj;
-
- for (i = 0; i < vm->num_mem_segs; i++) {
- if ((seg_obj = vm->mem_segs[i].object) == NULL)
- continue;
-
- seg_gpa = vm->mem_segs[i].gpa;
- seg_len = vm->mem_segs[i].len;
-
- if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) {
- *offset = gpa - seg_gpa;
- *object = seg_obj;
- vm_object_reference(seg_obj);
- return (0);
- }
- }
-
- return (EINVAL);
-}
-
-int
vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
{
@@ -2425,8 +2520,8 @@ vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
}
for (idx = 0; idx < nused; idx++) {
- hva = vm_gpa_hold(vm, copyinfo[idx].gpa, copyinfo[idx].len,
- prot, &cookie);
+ hva = vm_gpa_hold(vm, vcpuid, copyinfo[idx].gpa,
+ copyinfo[idx].len, prot, &cookie);
if (hva == NULL)
break;
copyinfo[idx].hva = hva;
diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
index e3e140a..5cb4150 100644
--- a/sys/amd64/vmm/vmm_dev.c
+++ b/sys/amd64/vmm/vmm_dev.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
+#include <vm/vm_object.h>
#include <machine/vmparam.h>
#include <machine/vmm.h>
@@ -60,10 +61,19 @@ __FBSDID("$FreeBSD$");
#include "io/vhpet.h"
#include "io/vrtc.h"
+struct devmem_softc {
+ int segid;
+ char *name;
+ struct cdev *cdev;
+ struct vmmdev_softc *sc;
+ SLIST_ENTRY(devmem_softc) link;
+};
+
struct vmmdev_softc {
struct vm *vm; /* vm instance cookie */
struct cdev *cdev;
SLIST_ENTRY(vmmdev_softc) link;
+ SLIST_HEAD(, devmem_softc) devmem;
int flags;
};
#define VSC_LINKED 0x01
@@ -76,6 +86,63 @@ static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
SYSCTL_DECL(_hw_vmm);
+static int devmem_create_cdev(const char *vmname, int id, char *devmem);
+static void devmem_destroy(void *arg);
+
+static int
+vcpu_lock_one(struct vmmdev_softc *sc, int vcpu)
+{
+ int error;
+
+ if (vcpu < 0 || vcpu >= VM_MAXCPU)
+ return (EINVAL);
+
+ error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
+ return (error);
+}
+
+static void
+vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu)
+{
+ enum vcpu_state state;
+
+ state = vcpu_get_state(sc->vm, vcpu, NULL);
+ if (state != VCPU_FROZEN) {
+ panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm),
+ vcpu, state);
+ }
+
+ vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
+}
+
+static int
+vcpu_lock_all(struct vmmdev_softc *sc)
+{
+ int error, vcpu;
+
+ for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
+ error = vcpu_lock_one(sc, vcpu);
+ if (error)
+ break;
+ }
+
+ if (error) {
+ while (--vcpu >= 0)
+ vcpu_unlock_one(sc, vcpu);
+ }
+
+ return (error);
+}
+
+static void
+vcpu_unlock_all(struct vmmdev_softc *sc)
+{
+ int vcpu;
+
+ for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
+ vcpu_unlock_one(sc, vcpu);
+}
+
static struct vmmdev_softc *
vmmdev_lookup(const char *name)
{
@@ -108,12 +175,16 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
void *hpa, *cookie;
struct vmmdev_softc *sc;
- static char zerobuf[PAGE_SIZE];
-
- error = 0;
sc = vmmdev_lookup2(cdev);
if (sc == NULL)
- error = ENXIO;
+ return (ENXIO);
+
+ /*
+ * Get a read lock on the guest memory map by freezing any vcpu.
+ */
+ error = vcpu_lock_one(sc, VM_MAXCPU - 1);
+ if (error)
+ return (error);
prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
while (uio->uio_resid > 0 && error == 0) {
@@ -129,10 +200,11 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
* Since this device does not support lseek(2), dd(1) will
* read(2) blocks of data to simulate the lseek(2).
*/
- hpa = vm_gpa_hold(sc->vm, gpa, c, prot, &cookie);
+ hpa = vm_gpa_hold(sc->vm, VM_MAXCPU - 1, gpa, c, prot, &cookie);
if (hpa == NULL) {
if (uio->uio_rw == UIO_READ)
- error = uiomove(zerobuf, c, uio);
+ error = uiomove(__DECONST(void *, zero_region),
+ c, uio);
else
error = EFAULT;
} else {
@@ -140,6 +212,70 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
vm_gpa_release(cookie);
}
}
+ vcpu_unlock_one(sc, VM_MAXCPU - 1);
+ return (error);
+}
+
+CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= SPECNAMELEN + 1);
+
+static int
+get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
+{
+ struct devmem_softc *dsc;
+ int error;
+ bool sysmem;
+
+ error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
+ if (error || mseg->len == 0)
+ return (error);
+
+ if (!sysmem) {
+ SLIST_FOREACH(dsc, &sc->devmem, link) {
+ if (dsc->segid == mseg->segid)
+ break;
+ }
+ KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
+ __func__, mseg->segid));
+ error = copystr(dsc->name, mseg->name, SPECNAMELEN + 1, NULL);
+ } else {
+ bzero(mseg->name, sizeof(mseg->name));
+ }
+
+ return (error);
+}
+
+static int
+alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
+{
+ char *name;
+ int error;
+ bool sysmem;
+
+ error = 0;
+ name = NULL;
+ sysmem = true;
+
+ if (VM_MEMSEG_NAME(mseg)) {
+ sysmem = false;
+ name = malloc(SPECNAMELEN + 1, M_VMMDEV, M_WAITOK);
+ error = copystr(VM_MEMSEG_NAME(mseg), name, SPECNAMELEN + 1, 0);
+ if (error)
+ goto done;
+ }
+
+ error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
+ if (error)
+ goto done;
+
+ if (VM_MEMSEG_NAME(mseg)) {
+ error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name);
+ if (error)
+ vm_free_memseg(sc->vm, mseg->segid);
+ else
+ name = NULL; /* freed when 'cdev' is destroyed */
+ }
+done:
+ free(name, M_VMMDEV);
return (error);
}
@@ -150,7 +286,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
int error, vcpu, state_changed, size;
cpuset_t *cpuset;
struct vmmdev_softc *sc;
- struct vm_memory_segment *seg;
struct vm_register *vmreg;
struct vm_seg_desc *vmsegdesc;
struct vm_run *vmrun;
@@ -177,6 +312,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct vm_intinfo *vmii;
struct vm_rtc_time *rtctime;
struct vm_rtc_data *rtcdata;
+ struct vm_memmap *mm;
sc = vmmdev_lookup2(cdev);
if (sc == NULL)
@@ -211,43 +347,41 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
* Assumes that the first field of the ioctl data is the vcpu.
*/
vcpu = *(int *)data;
- if (vcpu < 0 || vcpu >= VM_MAXCPU) {
- error = EINVAL;
- goto done;
- }
-
- error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
+ error = vcpu_lock_one(sc, vcpu);
if (error)
goto done;
-
state_changed = 1;
break;
case VM_MAP_PPTDEV_MMIO:
case VM_BIND_PPTDEV:
case VM_UNBIND_PPTDEV:
- case VM_MAP_MEMORY:
+ case VM_ALLOC_MEMSEG:
+ case VM_MMAP_MEMSEG:
case VM_REINIT:
/*
* ioctls that operate on the entire virtual machine must
* prevent all vcpus from running.
*/
- error = 0;
- for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
- error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
- if (error)
- break;
- }
-
- if (error) {
- while (--vcpu >= 0)
- vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
+ error = vcpu_lock_all(sc);
+ if (error)
goto done;
- }
-
state_changed = 2;
break;
+ case VM_GET_MEMSEG:
+ case VM_MMAP_GETNEXT:
+ /*
+ * Lock a vcpu to make sure that the memory map cannot be
+ * modified while it is being inspected.
+ */
+ vcpu = VM_MAXCPU - 1;
+ error = vcpu_lock_one(sc, vcpu);
+ if (error)
+ goto done;
+ state_changed = 1;
+ break;
+
default:
break;
}
@@ -372,15 +506,21 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = vatpic_set_irq_trigger(sc->vm,
isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger);
break;
- case VM_MAP_MEMORY:
- seg = (struct vm_memory_segment *)data;
- error = vm_malloc(sc->vm, seg->gpa, seg->len);
+ case VM_MMAP_GETNEXT:
+ mm = (struct vm_memmap *)data;
+ error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
+ &mm->segoff, &mm->len, &mm->prot, &mm->flags);
break;
- case VM_GET_MEMORY_SEG:
- seg = (struct vm_memory_segment *)data;
- seg->len = 0;
- (void)vm_gpabase2memseg(sc->vm, seg->gpa, seg);
- error = 0;
+ case VM_MMAP_MEMSEG:
+ mm = (struct vm_memmap *)data;
+ error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
+ mm->len, mm->prot, mm->flags);
+ break;
+ case VM_ALLOC_MEMSEG:
+ error = alloc_memseg(sc, (struct vm_memseg *)data);
+ break;
+ case VM_GET_MEMSEG:
+ error = get_memseg(sc, (struct vm_memseg *)data);
break;
case VM_GET_REGISTER:
vmreg = (struct vm_register *)data;
@@ -505,12 +645,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
break;
}
- if (state_changed == 1) {
- vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
- } else if (state_changed == 2) {
- for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
- vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
- }
+ if (state_changed == 1)
+ vcpu_unlock_one(sc, vcpu);
+ else if (state_changed == 2)
+ vcpu_unlock_all(sc);
done:
/* Make sure that no handler returns a bogus value like ERESTART */
@@ -519,26 +657,79 @@ done:
}
static int
-vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset,
- vm_size_t size, struct vm_object **object, int nprot)
+vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
+ struct vm_object **objp, int nprot)
{
- int error;
struct vmmdev_softc *sc;
+ vm_paddr_t gpa;
+ size_t len;
+ vm_ooffset_t segoff, first, last;
+ int error, found, segid;
+ bool sysmem;
+
+ first = *offset;
+ last = first + mapsize;
+ if ((nprot & PROT_EXEC) || first < 0 || first >= last)
+ return (EINVAL);
sc = vmmdev_lookup2(cdev);
- if (sc != NULL && (nprot & PROT_EXEC) == 0)
- error = vm_get_memobj(sc->vm, *offset, size, offset, object);
- else
- error = EINVAL;
+ if (sc == NULL) {
+ /* virtual machine is in the process of being created */
+ return (EINVAL);
+ }
+ /*
+ * Get a read lock on the guest memory map by freezing any vcpu.
+ */
+ error = vcpu_lock_one(sc, VM_MAXCPU - 1);
+ if (error)
+ return (error);
+
+ gpa = 0;
+ found = 0;
+ while (!found) {
+ error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
+ NULL, NULL);
+ if (error)
+ break;
+
+ if (first >= gpa && last <= gpa + len)
+ found = 1;
+ else
+ gpa += len;
+ }
+
+ if (found) {
+ error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
+ KASSERT(error == 0 && *objp != NULL,
+ ("%s: invalid memory segment %d", __func__, segid));
+ if (sysmem) {
+ vm_object_reference(*objp);
+ *offset = segoff + (first - gpa);
+ } else {
+ error = EINVAL;
+ }
+ }
+ vcpu_unlock_one(sc, VM_MAXCPU - 1);
return (error);
}
static void
vmmdev_destroy(void *arg)
{
-
struct vmmdev_softc *sc = arg;
+ struct devmem_softc *dsc;
+ int error;
+
+ error = vcpu_lock_all(sc);
+ KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
+
+ while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
+ KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
+ SLIST_REMOVE_HEAD(&sc->devmem, link);
+ free(dsc->name, M_VMMDEV);
+ free(dsc, M_VMMDEV);
+ }
if (sc->cdev != NULL)
destroy_dev(sc->cdev);
@@ -560,6 +751,7 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
{
int error;
char buf[VM_MAX_NAMELEN];
+ struct devmem_softc *dsc;
struct vmmdev_softc *sc;
struct cdev *cdev;
@@ -578,22 +770,30 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
/*
* The 'cdev' will be destroyed asynchronously when 'si_threadcount'
* goes down to 0 so we should not do it again in the callback.
+ *
+ * Setting 'sc->cdev' to NULL is also used to indicate that the VM
+ * is scheduled for destruction.
*/
cdev = sc->cdev;
sc->cdev = NULL;
mtx_unlock(&vmmdev_mtx);
/*
- * Schedule the 'cdev' to be destroyed:
+ * Schedule all cdevs to be destroyed:
*
- * - any new operations on this 'cdev' will return an error (ENXIO).
+ * - any new operations on the 'cdev' will return an error (ENXIO).
*
* - when the 'si_threadcount' dwindles down to zero the 'cdev' will
* be destroyed and the callback will be invoked in a taskqueue
* context.
+ *
+ * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
*/
+ SLIST_FOREACH(dsc, &sc->devmem, link) {
+ KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
+ destroy_dev_sched_cb(dsc->cdev, devmem_destroy, dsc);
+ }
destroy_dev_sched_cb(cdev, vmmdev_destroy, sc);
-
return (0);
}
SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW,
@@ -634,6 +834,7 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
sc->vm = vm;
+ SLIST_INIT(&sc->devmem);
/*
* Lookup the name again just in case somebody sneaked in when we
@@ -687,3 +888,96 @@ vmmdev_cleanup(void)
return (error);
}
+
+static int
+devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
+ struct vm_object **objp, int nprot)
+{
+ struct devmem_softc *dsc;
+ vm_ooffset_t first, last;
+ size_t seglen;
+ int error;
+ bool sysmem;
+
+ dsc = cdev->si_drv1;
+ if (dsc == NULL) {
+ /* 'cdev' has been created but is not ready for use */
+ return (ENXIO);
+ }
+
+ first = *offset;
+ last = *offset + len;
+ if ((nprot & PROT_EXEC) || first < 0 || first >= last)
+ return (EINVAL);
+
+ error = vcpu_lock_one(dsc->sc, VM_MAXCPU - 1);
+ if (error)
+ return (error);
+
+ error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
+ KASSERT(error == 0 && !sysmem && *objp != NULL,
+ ("%s: invalid devmem segment %d", __func__, dsc->segid));
+
+ vcpu_unlock_one(dsc->sc, VM_MAXCPU - 1);
+
+ if (seglen >= last) {
+ vm_object_reference(*objp);
+ return (0);
+ } else {
+ return (EINVAL);
+ }
+}
+
+static struct cdevsw devmemsw = {
+ .d_name = "devmem",
+ .d_version = D_VERSION,
+ .d_mmap_single = devmem_mmap_single,
+};
+
+static int
+devmem_create_cdev(const char *vmname, int segid, char *devname)
+{
+ struct devmem_softc *dsc;
+ struct vmmdev_softc *sc;
+ struct cdev *cdev;
+ int error;
+
+ error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL,
+ UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname);
+ if (error)
+ return (error);
+
+ dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
+
+ mtx_lock(&vmmdev_mtx);
+ sc = vmmdev_lookup(vmname);
+ KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname));
+ if (sc->cdev == NULL) {
+ /* virtual machine is being created or destroyed */
+ mtx_unlock(&vmmdev_mtx);
+ free(dsc, M_VMMDEV);
+ destroy_dev_sched_cb(cdev, NULL, 0);
+ return (ENODEV);
+ }
+
+ dsc->segid = segid;
+ dsc->name = devname;
+ dsc->cdev = cdev;
+ dsc->sc = sc;
+ SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
+ mtx_unlock(&vmmdev_mtx);
+
+ /* The 'cdev' is ready for use after 'si_drv1' is initialized */
+ cdev->si_drv1 = dsc;
+ return (0);
+}
+
+static void
+devmem_destroy(void *arg)
+{
+ struct devmem_softc *dsc = arg;
+
+ KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__));
+ dsc->cdev = NULL;
+ dsc->sc = NULL;
+}
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
index 758b7e8..ae5330f 100644
--- a/sys/amd64/vmm/vmm_instruction_emul.c
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -1677,12 +1677,12 @@ ptp_release(void **cookie)
}
static void *
-ptp_hold(struct vm *vm, vm_paddr_t ptpphys, size_t len, void **cookie)
+ptp_hold(struct vm *vm, int vcpu, vm_paddr_t ptpphys, size_t len, void **cookie)
{
void *ptr;
ptp_release(cookie);
- ptr = vm_gpa_hold(vm, ptpphys, len, VM_PROT_RW, cookie);
+ ptr = vm_gpa_hold(vm, vcpu, ptpphys, len, VM_PROT_RW, cookie);
return (ptr);
}
@@ -1729,7 +1729,8 @@ restart:
/* Zero out the lower 12 bits. */
ptpphys &= ~0xfff;
- ptpbase32 = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie);
+ ptpbase32 = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE,
+ &cookie);
if (ptpbase32 == NULL)
goto error;
@@ -1788,7 +1789,8 @@ restart:
/* Zero out the lower 5 bits and the upper 32 bits */
ptpphys &= 0xffffffe0UL;
- ptpbase = ptp_hold(vm, ptpphys, sizeof(*ptpbase) * 4, &cookie);
+ ptpbase = ptp_hold(vm, vcpuid, ptpphys, sizeof(*ptpbase) * 4,
+ &cookie);
if (ptpbase == NULL)
goto error;
@@ -1811,7 +1813,7 @@ restart:
/* Zero out the lower 12 bits and the upper 12 bits */
ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12;
- ptpbase = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie);
+ ptpbase = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE, &cookie);
if (ptpbase == NULL)
goto error;
@@ -2319,10 +2321,13 @@ decode_moffset(struct vie *vie)
* page table fault matches with our instruction decoding.
*/
static int
-verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
+verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie,
+ enum vm_cpu_mode cpu_mode)
{
int error;
- uint64_t base, idx, gla2;
+ uint64_t base, segbase, idx, gla2;
+ enum vm_reg_name seg;
+ struct seg_desc desc;
/* Skip 'gla' verification */
if (gla == VIE_INVALID_GLA)
@@ -2355,14 +2360,48 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
}
}
- /* XXX assuming that the base address of the segment is 0 */
- gla2 = base + vie->scale * idx + vie->displacement;
+ /*
+ * From "Specifying a Segment Selector", Intel SDM, Vol 1
+ *
+ * In 64-bit mode, segmentation is generally (but not
+ * completely) disabled. The exceptions are the FS and GS
+ * segments.
+ *
+ * In legacy IA-32 mode, when the ESP or EBP register is used
+ * as the base, the SS segment is the default segment. For
+ * other data references, except when relative to stack or
+ * string destination the DS segment is the default. These
+ * can be overridden to allow other segments to be accessed.
+ */
+ if (vie->segment_override)
+ seg = vie->segment_register;
+ else if (vie->base_register == VM_REG_GUEST_RSP ||
+ vie->base_register == VM_REG_GUEST_RBP)
+ seg = VM_REG_GUEST_SS;
+ else
+ seg = VM_REG_GUEST_DS;
+ if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS &&
+ seg != VM_REG_GUEST_GS) {
+ segbase = 0;
+ } else {
+ error = vm_get_seg_desc(vm, cpuid, seg, &desc);
+ if (error) {
+ printf("verify_gla: error %d getting segment"
+ " descriptor %d", error,
+ vie->segment_register);
+ return (-1);
+ }
+ segbase = desc.base;
+ }
+
+ gla2 = segbase + base + vie->scale * idx + vie->displacement;
gla2 &= size2mask[vie->addrsize];
if (gla != gla2) {
- printf("verify_gla mismatch: "
+ printf("verify_gla mismatch: segbase(0x%0lx)"
"base(0x%0lx), scale(%d), index(0x%0lx), "
"disp(0x%0lx), gla(0x%0lx), gla2(0x%0lx)\n",
- base, vie->scale, idx, vie->displacement, gla, gla2);
+ segbase, base, vie->scale, idx, vie->displacement,
+ gla, gla2);
return (-1);
}
@@ -2396,7 +2435,7 @@ vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
return (-1);
if ((vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) == 0) {
- if (verify_gla(vm, cpuid, gla, vie))
+ if (verify_gla(vm, cpuid, gla, vie, cpu_mode))
return (-1);
}
diff --git a/sys/amd64/vmm/vmm_mem.c b/sys/amd64/vmm/vmm_mem.c
index 1019f2b..c9be6c9 100644
--- a/sys/amd64/vmm/vmm_mem.c
+++ b/sys/amd64/vmm/vmm_mem.c
@@ -114,38 +114,6 @@ vmm_mmio_free(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
vm_map_remove(&vmspace->vm_map, gpa, gpa + len);
}
-vm_object_t
-vmm_mem_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
-{
- int error;
- vm_object_t obj;
-
- if (gpa & PAGE_MASK)
- panic("vmm_mem_alloc: invalid gpa %#lx", gpa);
-
- if (len == 0 || (len & PAGE_MASK) != 0)
- panic("vmm_mem_alloc: invalid allocation size %lu", len);
-
- obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
- if (obj != NULL) {
- error = vm_map_find(&vmspace->vm_map, obj, 0, &gpa, len, 0,
- VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0);
- if (error != KERN_SUCCESS) {
- vm_object_deallocate(obj);
- obj = NULL;
- }
- }
-
- return (obj);
-}
-
-void
-vmm_mem_free(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
-{
-
- vm_map_remove(&vmspace->vm_map, gpa, gpa + len);
-}
-
vm_paddr_t
vmm_mem_maxaddr(void)
{
diff --git a/sys/amd64/vmm/vmm_mem.h b/sys/amd64/vmm/vmm_mem.h
index a375070..7773faa 100644
--- a/sys/amd64/vmm/vmm_mem.h
+++ b/sys/amd64/vmm/vmm_mem.h
@@ -33,10 +33,8 @@ struct vmspace;
struct vm_object;
int vmm_mem_init(void);
-struct vm_object *vmm_mem_alloc(struct vmspace *, vm_paddr_t gpa, size_t size);
struct vm_object *vmm_mmio_alloc(struct vmspace *, vm_paddr_t gpa, size_t len,
vm_paddr_t hpa);
-void vmm_mem_free(struct vmspace *, vm_paddr_t gpa, size_t size);
void vmm_mmio_free(struct vmspace *, vm_paddr_t gpa, size_t size);
vm_paddr_t vmm_mem_maxaddr(void);
diff --git a/sys/boot/common/bootstrap.h b/sys/boot/common/bootstrap.h
index de77c7b..8804f01 100644
--- a/sys/boot/common/bootstrap.h
+++ b/sys/boot/common/bootstrap.h
@@ -56,7 +56,10 @@ typedef int (bootblk_cmd_t)(int argc, char *argv[]);
extern char *command_errmsg;
extern char command_errbuf[]; /* XXX blah, length */
#define CMD_OK 0
-#define CMD_ERROR 1
+#define CMD_WARN 1
+#define CMD_ERROR 2
+#define CMD_CRIT 3
+#define CMD_FATAL 4
/* interp.c */
void interact(void);
@@ -233,7 +236,7 @@ int mod_loadkld(const char *name, int argc, char *argv[]);
struct preloaded_file *file_alloc(void);
struct preloaded_file *file_findfile(const char *name, const char *type);
struct file_metadata *file_findmetadata(struct preloaded_file *fp, int type);
-struct preloaded_file *file_loadraw(char *name, char *type, int insert);
+struct preloaded_file *file_loadraw(const char *name, char *type, int insert);
void file_discard(struct preloaded_file *fp);
void file_addmetadata(struct preloaded_file *fp, int type, size_t size, void *p);
int file_addmodule(struct preloaded_file *fp, char *modname, int version,
diff --git a/sys/boot/common/console.c b/sys/boot/common/console.c
index 6656eab..f4ffc56 100644
--- a/sys/boot/common/console.c
+++ b/sys/boot/common/console.c
@@ -38,7 +38,7 @@ __FBSDID("$FreeBSD$");
static int cons_set(struct env_var *ev, int flags, const void *value);
static int cons_find(const char *name);
static int cons_check(const char *string);
-static void cons_change(const char *string);
+static int cons_change(const char *string);
static int twiddle_set(struct env_var *ev, int flags, const void *value);
/*
@@ -47,12 +47,12 @@ static int twiddle_set(struct env_var *ev, int flags, const void *value);
* as active. Also create the console variable.
*/
void
-cons_probe(void)
+cons_probe(void)
{
int cons;
int active;
char *prefconsole;
-
+
/* We want a callback to install the new value when this var changes. */
env_setenv("twiddle_divisor", EV_VOLATILE, "1", twiddle_set, env_nounset);
@@ -162,54 +162,69 @@ cons_find(const char *name)
static int
cons_set(struct env_var *ev, int flags, const void *value)
{
- int cons;
-
- if ((value == NULL) || (cons_check(value) == -1)) {
- if (value != NULL)
- printf("no such console!\n");
- printf("Available consoles:\n");
- for (cons = 0; consoles[cons] != NULL; cons++)
- printf(" %s\n", consoles[cons]->c_name);
- return(CMD_ERROR);
+ int ret;
+
+ if ((value == NULL) || (cons_check(value) == 0)) {
+ /*
+ * Return CMD_OK instead of CMD_ERROR to prevent forth syntax error,
+ * which would prevent it processing any further loader.conf entries.
+ */
+ return (CMD_OK);
}
- cons_change(value);
+ ret = cons_change(value);
+ if (ret != CMD_OK)
+ return (ret);
env_setenv(ev->ev_name, flags | EV_NOHOOK, value, NULL, NULL);
- return(CMD_OK);
+ return (CMD_OK);
}
/*
- * Check that all of the consoles listed in *string are valid consoles
+ * Check that at least one the consoles listed in *string is valid
*/
static int
cons_check(const char *string)
{
- int cons;
+ int cons, found, failed;
char *curpos, *dup, *next;
dup = next = strdup(string);
- cons = -1;
+ found = failed = 0;
while (next != NULL) {
curpos = strsep(&next, " ,");
if (*curpos != '\0') {
cons = cons_find(curpos);
- if (cons == -1)
- break;
+ if (cons == -1) {
+ printf("console %s is invalid!\n", curpos);
+ failed++;
+ } else {
+ found++;
+ }
}
}
free(dup);
- return (cons);
+
+ if (found == 0)
+ printf("no valid consoles!\n");
+
+ if (found == 0 || failed != 0) {
+ printf("Available consoles:\n");
+ for (cons = 0; consoles[cons] != NULL; cons++)
+ printf(" %s\n", consoles[cons]->c_name);
+ }
+
+ return (found);
}
/*
- * Activate all of the consoles listed in *string and disable all the others.
+ * Activate all the valid consoles listed in *string and disable all others.
*/
-static void
+static int
cons_change(const char *string)
{
- int cons;
+ int cons, active;
char *curpos, *dup, *next;
/* Disable all consoles */
@@ -219,6 +234,7 @@ cons_change(const char *string)
/* Enable selected consoles */
dup = next = strdup(string);
+ active = 0;
while (next != NULL) {
curpos = strsep(&next, " ,");
if (*curpos == '\0')
@@ -227,14 +243,37 @@ cons_change(const char *string)
if (cons >= 0) {
consoles[cons]->c_flags |= C_ACTIVEIN | C_ACTIVEOUT;
consoles[cons]->c_init(0);
- if ((consoles[cons]->c_flags & (C_PRESENTIN | C_PRESENTOUT)) !=
- (C_PRESENTIN | C_PRESENTOUT))
- printf("console %s failed to initialize\n",
- consoles[cons]->c_name);
+ if ((consoles[cons]->c_flags & (C_PRESENTIN | C_PRESENTOUT)) ==
+ (C_PRESENTIN | C_PRESENTOUT)) {
+ active++;
+ continue;
+ }
+
+ if (active != 0) {
+ /* If no consoles have initialised we wouldn't see this. */
+ printf("console %s failed to initialize\n", consoles[cons]->c_name);
+ }
}
}
free(dup);
+
+ if (active == 0) {
+ /* All requested consoles failed to initialise, try to recover. */
+ for (cons = 0; consoles[cons] != NULL; cons++) {
+ consoles[cons]->c_flags |= C_ACTIVEIN | C_ACTIVEOUT;
+ consoles[cons]->c_init(0);
+ if ((consoles[cons]->c_flags &
+ (C_PRESENTIN | C_PRESENTOUT)) ==
+ (C_PRESENTIN | C_PRESENTOUT))
+ active++;
+ }
+
+ if (active == 0)
+ return (CMD_ERROR); /* Recovery failed. */
+ }
+
+ return (CMD_OK);
}
/*
diff --git a/sys/boot/common/interp_forth.c b/sys/boot/common/interp_forth.c
index 1c37e2b..d5d1a55f 100644
--- a/sys/boot/common/interp_forth.c
+++ b/sys/boot/common/interp_forth.c
@@ -138,13 +138,23 @@ bf_command(FICL_VM *vm)
} else {
result=BF_PARSE;
}
+
+ switch (result) {
+ case CMD_CRIT:
+ printf("%s\n", command_errmsg);
+ break;
+ case CMD_FATAL:
+ panic("%s\n", command_errmsg);
+ }
+
free(line);
/*
* If there was error during nested ficlExec(), we may no longer have
* valid environment to return. Throw all exceptions from here.
*/
- if (result != 0)
+ if (result != CMD_OK)
vmThrow(vm, result);
+
/* This is going to be thrown!!! */
stackPushINT(vm->pStack,result);
}
diff --git a/sys/boot/common/load_elf.c b/sys/boot/common/load_elf.c
index 200a29c..6a88e39 100644
--- a/sys/boot/common/load_elf.c
+++ b/sys/boot/common/load_elf.c
@@ -856,7 +856,7 @@ __elfN(parse_modmetadata)(struct preloaded_file *fp, elf_file_t ef,
error = __elfN(reloc_ptr)(fp, ef, v, &md, sizeof(md));
if (error == EOPNOTSUPP) {
md.md_cval += ef->off;
- md.md_data += ef->off;
+ md.md_data = (void *)((uintptr_t)md.md_data + ef->off);
} else if (error != 0)
return (error);
#endif
diff --git a/sys/boot/common/load_elf_obj.c b/sys/boot/common/load_elf_obj.c
index 3d8b02a..626f2d9 100644
--- a/sys/boot/common/load_elf_obj.c
+++ b/sys/boot/common/load_elf_obj.c
@@ -519,10 +519,8 @@ __elfN(obj_symaddr)(struct elf_file *ef, Elf_Size symidx)
{
Elf_Sym sym;
Elf_Addr base;
- int symcnt;
- symcnt = ef->e_shdr[ef->symtabindex].sh_size / sizeof(Elf_Sym);
- if (symidx >= symcnt)
+ if (symidx >= ef->e_shdr[ef->symtabindex].sh_size / sizeof(Elf_Sym))
return (0);
COPYOUT(ef->e_shdr[ef->symtabindex].sh_addr + symidx * sizeof(Elf_Sym),
&sym, sizeof(sym));
diff --git a/sys/boot/common/misc.c b/sys/boot/common/misc.c
index c4c36ea..228dfaf 100644
--- a/sys/boot/common/misc.c
+++ b/sys/boot/common/misc.c
@@ -118,14 +118,12 @@ kern_bzero(vm_offset_t dest, size_t len)
int
kern_pread(int fd, vm_offset_t dest, size_t len, off_t off)
{
- ssize_t nread;
if (lseek(fd, off, SEEK_SET) == -1) {
printf("\nlseek failed\n");
return (-1);
}
- nread = archsw.arch_readin(fd, dest, len);
- if (nread != len) {
+ if ((size_t)archsw.arch_readin(fd, dest, len) != len) {
printf("\nreadin failed\n");
return (-1);
}
@@ -140,7 +138,6 @@ void *
alloc_pread(int fd, off_t off, size_t len)
{
void *buf;
- ssize_t nread;
buf = malloc(len);
if (buf == NULL) {
@@ -152,8 +149,7 @@ alloc_pread(int fd, off_t off, size_t len)
free(buf);
return (NULL);
}
- nread = read(fd, buf, len);
- if (nread != len) {
+ if ((size_t)read(fd, buf, len) != len) {
printf("\nread failed\n");
free(buf);
return (NULL);
diff --git a/sys/boot/common/module.c b/sys/boot/common/module.c
index 2af9c20..52406ef 100644
--- a/sys/boot/common/module.c
+++ b/sys/boot/common/module.c
@@ -101,6 +101,7 @@ COMMAND_SET(load, "load", "load a kernel or module", command_load);
static int
command_load(int argc, char *argv[])
{
+ struct preloaded_file *fp;
char *typestr;
int dofile, dokld, ch, error;
@@ -110,7 +111,7 @@ command_load(int argc, char *argv[])
typestr = NULL;
if (argc == 1) {
command_errmsg = "no filename specified";
- return(CMD_ERROR);
+ return (CMD_CRIT);
}
while ((ch = getopt(argc, argv, "kt:")) != -1) {
switch(ch) {
@@ -124,7 +125,7 @@ command_load(int argc, char *argv[])
case '?':
default:
/* getopt has already reported an error */
- return(CMD_OK);
+ return (CMD_OK);
}
}
argv += (optind - 1);
@@ -136,26 +137,46 @@ command_load(int argc, char *argv[])
if (dofile) {
if ((argc != 2) || (typestr == NULL) || (*typestr == 0)) {
command_errmsg = "invalid load type";
- return(CMD_ERROR);
+ return (CMD_CRIT);
}
- return (file_loadraw(argv[1], typestr, 1) ? CMD_OK : CMD_ERROR);
+
+ fp = file_findfile(argv[1], typestr);
+ if (fp) {
+ sprintf(command_errbuf, "warning: file '%s' already loaded", argv[1]);
+ return (CMD_WARN);
+ }
+
+ if (file_loadraw(argv[1], typestr, 1) != NULL)
+ return (CMD_OK);
+
+ /* Failing to load mfs_root is never going to end well! */
+ if (strcmp("mfs_root", typestr) == 0)
+ return (CMD_FATAL);
+
+ return (CMD_ERROR);
}
/*
* Do we have explicit KLD load ?
*/
if (dokld || file_havepath(argv[1])) {
error = mod_loadkld(argv[1], argc - 2, argv + 2);
- if (error == EEXIST)
+ if (error == EEXIST) {
sprintf(command_errbuf, "warning: KLD '%s' already loaded", argv[1]);
- return (error == 0 ? CMD_OK : CMD_ERROR);
+ return (CMD_WARN);
+ }
+
+ return (error == 0 ? CMD_OK : CMD_CRIT);
}
/*
* Looks like a request for a module.
*/
error = mod_load(argv[1], NULL, argc - 2, argv + 2);
- if (error == EEXIST)
+ if (error == EEXIST) {
sprintf(command_errbuf, "warning: module '%s' already loaded", argv[1]);
- return (error == 0 ? CMD_OK : CMD_ERROR);
+ return (CMD_WARN);
+ }
+
+ return (error == 0 ? CMD_OK : CMD_CRIT);
}
COMMAND_SET(load_geli, "load_geli", "load a geli key", command_load_geli);
@@ -358,14 +379,14 @@ file_load_dependencies(struct preloaded_file *base_file)
}
/*
- * We've been asked to load (name) as (type), so just suck it in,
+ * We've been asked to load (fname) as (type), so just suck it in,
* no arguments or anything.
*/
struct preloaded_file *
-file_loadraw(char *name, char *type, int insert)
+file_loadraw(const char *fname, char *type, int insert)
{
struct preloaded_file *fp;
- char *cp;
+ char *name;
int fd, got;
vm_offset_t laddr;
@@ -376,12 +397,11 @@ file_loadraw(char *name, char *type, int insert)
}
/* locate the file on the load path */
- cp = file_search(name, NULL);
- if (cp == NULL) {
- sprintf(command_errbuf, "can't find '%s'", name);
+ name = file_search(fname, NULL);
+ if (name == NULL) {
+ sprintf(command_errbuf, "can't find '%s'", fname);
return(NULL);
}
- name = cp;
if ((fd = open(name, O_RDONLY)) < 0) {
sprintf(command_errbuf, "can't open '%s': %s", name, strerror(errno));
@@ -962,7 +982,7 @@ moduledir_rebuild(void)
{
struct moduledir *mdp, *mtmp;
const char *path, *cp, *ep;
- int cplen;
+ size_t cplen;
path = getenv("module_path");
if (path == NULL)
diff --git a/sys/boot/common/part.c b/sys/boot/common/part.c
index 518df1a..5e9217e 100644
--- a/sys/boot/common/part.c
+++ b/sys/boot/common/part.c
@@ -102,7 +102,7 @@ static struct parttypes {
const char *
parttype2str(enum partition_type type)
{
- int i;
+ size_t i;
for (i = 0; i < sizeof(ptypes) / sizeof(ptypes[0]); i++)
if (ptypes[i].type == type)
@@ -203,7 +203,7 @@ gpt_checktbl(const struct gpt_hdr *hdr, u_char *tbl, size_t size,
uint64_t lba_last)
{
struct gpt_ent *ent;
- int i, cnt;
+ uint32_t i, cnt;
cnt = size / hdr->hdr_entsz;
if (hdr->hdr_entries <= cnt) {
@@ -234,8 +234,8 @@ ptable_gptread(struct ptable *table, void *dev, diskread_t dread)
struct gpt_ent *ent;
u_char *buf, *tbl;
uint64_t offset;
- int pri, sec, i;
- size_t size;
+ int pri, sec;
+ size_t size, i;
buf = malloc(table->sectorsize);
if (buf == NULL)
@@ -358,7 +358,7 @@ mbr_parttype(uint8_t type)
return (PART_UNKNOWN);
}
-struct ptable*
+static struct ptable*
ptable_ebrread(struct ptable *table, void *dev, diskread_t dread)
{
struct dos_partition *dp;
@@ -435,7 +435,7 @@ bsd_parttype(uint8_t type)
return (PART_UNKNOWN);
}
-struct ptable*
+static struct ptable*
ptable_bsdread(struct ptable *table, void *dev, diskread_t dread)
{
struct disklabel *dl;
diff --git a/sys/boot/common/self_reloc.c b/sys/boot/common/self_reloc.c
index 9864a48..29b9c5f 100644
--- a/sys/boot/common/self_reloc.c
+++ b/sys/boot/common/self_reloc.c
@@ -61,6 +61,8 @@ __FBSDID("$FreeBSD$");
#define RELOC_TYPE_RELATIVE R_386_RELATIVE
#endif
+void self_reloc(Elf_Addr baseaddr, ElfW_Dyn *dynamic);
+
/*
* A simple elf relocator.
*/
@@ -118,6 +120,6 @@ self_reloc(Elf_Addr baseaddr, ElfW_Dyn *dynamic)
/* XXX: do we need other relocations ? */
break;
}
- rel = (ElfW_Rel *) ((caddr_t) rel + relent);
+ rel = (ElfW_Rel *)(void *)((caddr_t) rel + relent);
}
}
diff --git a/sys/boot/common/ufsread.c b/sys/boot/common/ufsread.c
index d0ca57a..6f499f9 100644
--- a/sys/boot/common/ufsread.c
+++ b/sys/boot/common/ufsread.c
@@ -165,7 +165,7 @@ static int sblock_try[] = SBLOCKSEARCH;
#endif
static ssize_t
-fsread(ufs_ino_t inode, void *buf, size_t nbyte)
+fsread_size(ufs_ino_t inode, void *buf, size_t nbyte, size_t *fsizep)
{
#ifndef UFS2_ONLY
static struct ufs1_dinode dp1;
@@ -185,10 +185,21 @@ fsread(ufs_ino_t inode, void *buf, size_t nbyte)
static ufs2_daddr_t blkmap, indmap;
u_int u;
+ /* Basic parameter validation. */
+ if ((buf == NULL && nbyte != 0) || dmadat == NULL)
+ return (-1);
+
blkbuf = dmadat->blkbuf;
indbuf = dmadat->indbuf;
- if (!dsk_meta) {
+
+ /*
+ * Force probe if inode is zero to ensure we have a valid fs, otherwise
+ * when probing multiple paritions, reads from subsequent parititions
+ * will incorrectly succeed.
+ */
+ if (!dsk_meta || inode == 0) {
inomap = 0;
+ dsk_meta = 0;
for (n = 0; sblock_try[n] != -1; n++) {
if (dskread(dmadat->sbbuf, sblock_try[n] / DEV_BSIZE,
SBLOCKSIZE / DEV_BSIZE))
@@ -207,7 +218,7 @@ fsread(ufs_ino_t inode, void *buf, size_t nbyte)
#endif
) &&
fs.fs_bsize <= MAXBSIZE &&
- fs.fs_bsize >= sizeof(struct fs))
+ fs.fs_bsize >= (int32_t)sizeof(struct fs))
break;
}
if (sblock_try[n] == -1) {
@@ -224,18 +235,18 @@ fsread(ufs_ino_t inode, void *buf, size_t nbyte)
return -1;
n = INO_TO_VBO(n, inode);
#if defined(UFS1_ONLY)
- memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n,
- sizeof(struct ufs1_dinode));
+ memcpy(&dp1, (struct ufs1_dinode *)(void *)blkbuf + n,
+ sizeof(dp1));
#elif defined(UFS2_ONLY)
- memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n,
- sizeof(struct ufs2_dinode));
+ memcpy(&dp2, (struct ufs2_dinode *)(void *)blkbuf + n,
+ sizeof(dp2));
#else
if (fs.fs_magic == FS_UFS1_MAGIC)
- memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n,
- sizeof(struct ufs1_dinode));
+ memcpy(&dp1, (struct ufs1_dinode *)(void *)blkbuf + n,
+ sizeof(dp1));
else
- memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n,
- sizeof(struct ufs2_dinode));
+ memcpy(&dp2, (struct ufs2_dinode *)(void *)blkbuf + n,
+ sizeof(dp2));
#endif
inomap = inode;
fs_off = 0;
@@ -283,7 +294,7 @@ fsread(ufs_ino_t inode, void *buf, size_t nbyte)
return -1;
vbaddr = fsbtodb(&fs, addr2) + (off >> VBLKSHIFT) * DBPERVBLK;
vboff = off & VBLKMASK;
- n = sblksize(&fs, size, lbn) - (off & ~VBLKMASK);
+ n = sblksize(&fs, (off_t)size, lbn) - (off & ~VBLKMASK);
if (n > VBLKSIZE)
n = VBLKSIZE;
if (blkmap != vbaddr) {
@@ -299,5 +310,17 @@ fsread(ufs_ino_t inode, void *buf, size_t nbyte)
fs_off += n;
nb -= n;
}
+
+ if (fsizep != NULL)
+ *fsizep = size;
+
return nbyte;
}
+
+static ssize_t
+fsread(ufs_ino_t inode, void *buf, size_t nbyte)
+{
+
+ return fsread_size(inode, buf, nbyte, NULL);
+}
+
diff --git a/sys/boot/efi/Makefile b/sys/boot/efi/Makefile
index 5dfb648..acf5a49 100644
--- a/sys/boot/efi/Makefile
+++ b/sys/boot/efi/Makefile
@@ -1,9 +1,15 @@
# $FreeBSD$
-SUBDIR= libefi
+.include <bsd.own.mk>
+
+# In-tree GCC does not support __attribute__((ms_abi)).
+.if ${COMPILER_TYPE} != "gcc"
.if ${MACHINE_CPUARCH} == "amd64"
-SUBDIR+= loader boot1
+SUBDIR+= libefi loader boot1
.endif
+.endif # ${COMPILER_TYPE} != "gcc"
+
.include <bsd.subdir.mk>
+
diff --git a/sys/boot/efi/boot1/Makefile b/sys/boot/efi/boot1/Makefile
index 1f59c33..5c2c239 100644
--- a/sys/boot/efi/boot1/Makefile
+++ b/sys/boot/efi/boot1/Makefile
@@ -4,16 +4,28 @@ MAN=
.include <bsd.own.mk>
-# In-tree GCC does not support __attribute__((ms_abi)).
-.if ${COMPILER_TYPE} != "gcc"
-
MK_SSP= no
PROG= boot1.sym
INTERNALPROG=
+WARNS?= 6
+
+.if ${MK_ZFS} != "no"
+# Disable warnings that are currently incompatible with the zfs boot code
+CWARNFLAGS.zfs_module.c += -Wno-array-bounds
+CWARNFLAGS.zfs_module.c += -Wno-cast-align
+CWARNFLAGS.zfs_module.c += -Wno-cast-qual
+CWARNFLAGS.zfs_module.c += -Wno-missing-prototypes
+CWARNFLAGS.zfs_module.c += -Wno-sign-compare
+CWARNFLAGS.zfs_module.c += -Wno-unused-parameter
+CWARNFLAGS.zfs_module.c += -Wno-unused-function
+.endif
# architecture-specific loader code
-SRCS= boot1.c reloc.c start.S
+SRCS= boot1.c reloc.c start.S ufs_module.c
+.if ${MK_ZFS} != "no"
+SRCS+= zfs_module.c
+.endif
CFLAGS+= -fPIC
CFLAGS+= -I.
@@ -21,6 +33,16 @@ CFLAGS+= -I${.CURDIR}/../include
CFLAGS+= -I${.CURDIR}/../include/${MACHINE}
CFLAGS+= -I${.CURDIR}/../../../contrib/dev/acpica/include
CFLAGS+= -I${.CURDIR}/../../..
+CFLAGS+= -DEFI_UFS_BOOT
+.ifdef(EFI_DEBUG)
+CFLAGS+= -DEFI_DEBUG
+.endif
+
+.if ${MK_ZFS} != "no"
+CFLAGS+= -I${.CURDIR}/../../zfs/
+CFLAGS+= -I${.CURDIR}/../../../cddl/boot/zfs/
+CFLAGS+= -DEFI_ZFS_BOOT
+.endif
# Always add MI sources and REGULAR efi loader bits
.PATH: ${.CURDIR}/../loader/arch/${MACHINE}
@@ -81,8 +103,6 @@ boot1.efifat: boot1.efi
CLEANFILES= boot1.efi boot1.efifat
-.endif # ${COMPILER_TYPE} != "gcc"
-
.include <bsd.prog.mk>
.if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386"
diff --git a/sys/boot/efi/boot1/boot1.c b/sys/boot/efi/boot1/boot1.c
index 9d5bac3..f046235 100644
--- a/sys/boot/efi/boot1/boot1.c
+++ b/sys/boot/efi/boot1/boot1.c
@@ -5,6 +5,8 @@
* All rights reserved.
* Copyright (c) 2014 Nathan Whitehorn
* All rights reserved.
+ * Copyright (c) 2015 Eric McCorkle
+ * All rights reserved.
*
* Redistribution and use in source and binary forms are freely
* permitted provided that the above copyright notice and this
@@ -21,7 +23,6 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
-#include <sys/dirent.h>
#include <machine/elf.h>
#include <machine/stdarg.h>
#include <stand.h>
@@ -29,46 +30,125 @@ __FBSDID("$FreeBSD$");
#include <efi.h>
#include <eficonsctl.h>
+#include "boot_module.h"
+
#define _PATH_LOADER "/boot/loader.efi"
-#define _PATH_KERNEL "/boot/kernel/kernel"
-#define BSIZEMAX 16384
+static const boot_module_t *boot_modules[] =
+{
+#ifdef EFI_ZFS_BOOT
+ &zfs_module,
+#endif
+#ifdef EFI_UFS_BOOT
+ &ufs_module
+#endif
+};
+
+#define NUM_BOOT_MODULES (sizeof(boot_modules) / sizeof(boot_module_t*))
+/* The initial number of handles used to query EFI for partitions. */
+#define NUM_HANDLES_INIT 24
-void panic(const char *fmt, ...) __dead2;
void putchar(int c);
+EFI_STATUS efi_main(EFI_HANDLE Ximage, EFI_SYSTEM_TABLE* Xsystab);
-static int domount(EFI_DEVICE_PATH *device, EFI_BLOCK_IO *blkio, int quiet);
-static void load(const char *fname);
+static void try_load(const boot_module_t* mod);
+static EFI_STATUS probe_handle(EFI_HANDLE h);
EFI_SYSTEM_TABLE *systab;
-EFI_HANDLE *image;
+EFI_BOOT_SERVICES *bs;
+static EFI_HANDLE *image;
static EFI_GUID BlockIoProtocolGUID = BLOCK_IO_PROTOCOL;
static EFI_GUID DevicePathGUID = DEVICE_PATH_PROTOCOL;
static EFI_GUID LoadedImageGUID = LOADED_IMAGE_PROTOCOL;
static EFI_GUID ConsoleControlGUID = EFI_CONSOLE_CONTROL_PROTOCOL_GUID;
-static EFI_BLOCK_IO *bootdev;
-static EFI_DEVICE_PATH *bootdevpath;
-static EFI_HANDLE *bootdevhandle;
+/*
+ * Provide Malloc / Free backed by EFIs AllocatePool / FreePool which ensures
+ * memory is correctly aligned avoiding EFI_INVALID_PARAMETER returns from
+ * EFI methods.
+ */
+void *
+Malloc(size_t len, const char *file __unused, int line __unused)
+{
+ void *out;
+
+ if (bs->AllocatePool(EfiLoaderData, len, &out) == EFI_SUCCESS)
+ return (out);
-EFI_STATUS efi_main(EFI_HANDLE Ximage, EFI_SYSTEM_TABLE* Xsystab)
+ return (NULL);
+}
+
+void
+Free(void *buf, const char *file __unused, int line __unused)
{
- EFI_HANDLE handles[128];
- EFI_BLOCK_IO *blkio;
- UINTN i, nparts = sizeof(handles), cols, rows, max_dim, best_mode;
+ (void)bs->FreePool(buf);
+}
+
+/*
+ * This function only returns if it fails to load the kernel. If it
+ * succeeds, it simply boots the kernel.
+ */
+void
+try_load(const boot_module_t *mod)
+{
+ size_t bufsize;
+ void *buf;
+ dev_info_t *dev;
+ EFI_HANDLE loaderhandle;
+ EFI_LOADED_IMAGE *loaded_image;
+ EFI_STATUS status;
+
+ status = mod->load(_PATH_LOADER, &dev, &buf, &bufsize);
+ if (status == EFI_NOT_FOUND)
+ return;
+
+ if (status != EFI_SUCCESS) {
+ printf("%s failed to load %s (%lu)\n", mod->name, _PATH_LOADER,
+ EFI_ERROR_CODE(status));
+ return;
+ }
+
+ if ((status = bs->LoadImage(TRUE, image, dev->devpath, buf, bufsize,
+ &loaderhandle)) != EFI_SUCCESS) {
+ printf("Failed to load image provided by %s, size: %zu, (%lu)\n",
+ mod->name, bufsize, EFI_ERROR_CODE(status));
+ return;
+ }
+
+ if ((status = bs->HandleProtocol(loaderhandle, &LoadedImageGUID,
+ (VOID**)&loaded_image)) != EFI_SUCCESS) {
+ printf("Failed to query LoadedImage provided by %s (%lu)\n",
+ mod->name, EFI_ERROR_CODE(status));
+ return;
+ }
+
+ loaded_image->DeviceHandle = dev->devhandle;
+
+ if ((status = bs->StartImage(loaderhandle, NULL, NULL)) !=
+ EFI_SUCCESS) {
+ printf("Failed to start image provided by %s (%lu)\n",
+ mod->name, EFI_ERROR_CODE(status));
+ return;
+ }
+}
+
+EFI_STATUS
+efi_main(EFI_HANDLE Ximage, EFI_SYSTEM_TABLE *Xsystab)
+{
+ EFI_HANDLE *handles;
EFI_STATUS status;
- EFI_DEVICE_PATH *devpath;
- EFI_BOOT_SERVICES *BS;
EFI_CONSOLE_CONTROL_PROTOCOL *ConsoleControl = NULL;
SIMPLE_TEXT_OUTPUT_INTERFACE *conout = NULL;
- char *path = _PATH_LOADER;
+ UINTN i, max_dim, best_mode, cols, rows, hsize, nhandles;
+ /* Basic initialization*/
systab = Xsystab;
image = Ximage;
+ bs = Xsystab->BootServices;
- BS = systab->BootServices;
- status = BS->LocateProtocol(&ConsoleControlGUID, NULL,
+ /* Set up the console, so printf works. */
+ status = bs->LocateProtocol(&ConsoleControlGUID, NULL,
(VOID **)&ConsoleControl);
if (status == EFI_SUCCESS)
(void)ConsoleControl->SetMode(ConsoleControl,
@@ -93,203 +173,162 @@ EFI_STATUS efi_main(EFI_HANDLE Ximage, EFI_SYSTEM_TABLE* Xsystab)
conout->EnableCursor(conout, TRUE);
conout->ClearScreen(conout);
- printf(" \n>> FreeBSD EFI boot block\n");
- printf(" Loader path: %s\n", path);
-
- status = systab->BootServices->LocateHandle(ByProtocol,
- &BlockIoProtocolGUID, NULL, &nparts, handles);
- nparts /= sizeof(handles[0]);
-
- for (i = 0; i < nparts; i++) {
- status = systab->BootServices->HandleProtocol(handles[i],
- &DevicePathGUID, (void **)&devpath);
- if (EFI_ERROR(status))
+ printf("\n>> FreeBSD EFI boot block\n");
+ printf(" Loader path: %s\n\n", _PATH_LOADER);
+ printf(" Initializing modules:");
+ for (i = 0; i < NUM_BOOT_MODULES; i++) {
+ if (boot_modules[i] == NULL)
continue;
- while (!IsDevicePathEnd(NextDevicePathNode(devpath)))
- devpath = NextDevicePathNode(devpath);
+ printf(" %s", boot_modules[i]->name);
+ if (boot_modules[i]->init != NULL)
+ boot_modules[i]->init();
+ }
+ putchar('\n');
- status = systab->BootServices->HandleProtocol(handles[i],
- &BlockIoProtocolGUID, (void **)&blkio);
- if (EFI_ERROR(status))
- continue;
+ /* Get all the device handles */
+ hsize = (UINTN)NUM_HANDLES_INIT * sizeof(EFI_HANDLE);
+ if ((status = bs->AllocatePool(EfiLoaderData, hsize, (void **)&handles))
+ != EFI_SUCCESS)
+ panic("Failed to allocate %d handles (%lu)", NUM_HANDLES_INIT,
+ EFI_ERROR_CODE(status));
- if (!blkio->Media->LogicalPartition)
- continue;
+ status = bs->LocateHandle(ByProtocol, &BlockIoProtocolGUID, NULL,
+ &hsize, handles);
+ switch (status) {
+ case EFI_SUCCESS:
+ break;
+ case EFI_BUFFER_TOO_SMALL:
+ (void)bs->FreePool(handles);
+ if ((status = bs->AllocatePool(EfiLoaderData, hsize,
+ (void **)&handles) != EFI_SUCCESS)) {
+ panic("Failed to allocate %zu handles (%lu)", hsize /
+ sizeof(*handles), EFI_ERROR_CODE(status));
+ }
+ status = bs->LocateHandle(ByProtocol, &BlockIoProtocolGUID,
+ NULL, &hsize, handles);
+ if (status != EFI_SUCCESS)
+ panic("Failed to get device handles (%lu)\n",
+ EFI_ERROR_CODE(status));
+ break;
+ default:
+ panic("Failed to get device handles (%lu)",
+ EFI_ERROR_CODE(status));
+ }
- if (domount(devpath, blkio, 1) >= 0)
+ /* Scan all partitions, probing with all modules. */
+ nhandles = hsize / sizeof(*handles);
+ printf(" Probing %zu block devices...", nhandles);
+ for (i = 0; i < nhandles; i++) {
+ status = probe_handle(handles[i]);
+ switch (status) {
+ case EFI_UNSUPPORTED:
+ printf(".");
+ break;
+ case EFI_SUCCESS:
+ printf("+");
break;
+ default:
+ printf("x");
+ break;
+ }
}
+ printf(" done\n");
- if (i == nparts)
- panic("No bootable partition found");
-
- bootdevhandle = handles[i];
- load(path);
+ /* Status summary. */
+ for (i = 0; i < NUM_BOOT_MODULES; i++) {
+ if (boot_modules[i] != NULL) {
+ printf(" ");
+ boot_modules[i]->status();
+ }
+ }
- panic("Load failed");
+ /* Select a partition to boot by trying each module in order. */
+ for (i = 0; i < NUM_BOOT_MODULES; i++)
+ if (boot_modules[i] != NULL)
+ try_load(boot_modules[i]);
- return EFI_SUCCESS;
+ /* If we get here, we're out of luck... */
+ panic("No bootable partitions found!");
}
-static int
-dskread(void *buf, u_int64_t lba, int nblk)
+static EFI_STATUS
+probe_handle(EFI_HANDLE h)
{
+ dev_info_t *devinfo;
+ EFI_BLOCK_IO *blkio;
+ EFI_DEVICE_PATH *devpath;
EFI_STATUS status;
- int size;
+ UINTN i;
- lba = lba / (bootdev->Media->BlockSize / DEV_BSIZE);
- size = nblk * DEV_BSIZE;
- status = bootdev->ReadBlocks(bootdev, bootdev->Media->MediaId, lba,
- size, buf);
+ /* Figure out if we're dealing with an actual partition. */
+ status = bs->HandleProtocol(h, &DevicePathGUID, (void **)&devpath);
+ if (status == EFI_UNSUPPORTED)
+ return (status);
- if (EFI_ERROR(status))
- return (-1);
+ if (status != EFI_SUCCESS) {
+ DPRINTF("\nFailed to query DevicePath (%lu)\n",
+ EFI_ERROR_CODE(status));
+ return (status);
+ }
- return (0);
-}
+ while (!IsDevicePathEnd(NextDevicePathNode(devpath)))
+ devpath = NextDevicePathNode(devpath);
-#include "ufsread.c"
+ status = bs->HandleProtocol(h, &BlockIoProtocolGUID, (void **)&blkio);
+ if (status == EFI_UNSUPPORTED)
+ return (status);
-static ssize_t
-fsstat(ufs_ino_t inode)
-{
-#ifndef UFS2_ONLY
- static struct ufs1_dinode dp1;
- ufs1_daddr_t addr1;
-#endif
-#ifndef UFS1_ONLY
- static struct ufs2_dinode dp2;
-#endif
- static struct fs fs;
- static ufs_ino_t inomap;
- char *blkbuf;
- void *indbuf;
- size_t n, nb, size, off, vboff;
- ufs_lbn_t lbn;
- ufs2_daddr_t addr2, vbaddr;
- static ufs2_daddr_t blkmap, indmap;
- u_int u;
-
- blkbuf = dmadat->blkbuf;
- indbuf = dmadat->indbuf;
- if (!dsk_meta) {
- inomap = 0;
- for (n = 0; sblock_try[n] != -1; n++) {
- if (dskread(dmadat->sbbuf, sblock_try[n] / DEV_BSIZE,
- SBLOCKSIZE / DEV_BSIZE))
- return -1;
- memcpy(&fs, dmadat->sbbuf, sizeof(struct fs));
- if ((
-#if defined(UFS1_ONLY)
- fs.fs_magic == FS_UFS1_MAGIC
-#elif defined(UFS2_ONLY)
- (fs.fs_magic == FS_UFS2_MAGIC &&
- fs.fs_sblockloc == sblock_try[n])
-#else
- fs.fs_magic == FS_UFS1_MAGIC ||
- (fs.fs_magic == FS_UFS2_MAGIC &&
- fs.fs_sblockloc == sblock_try[n])
-#endif
- ) &&
- fs.fs_bsize <= MAXBSIZE &&
- fs.fs_bsize >= sizeof(struct fs))
- break;
- }
- if (sblock_try[n] == -1) {
- return -1;
- }
- dsk_meta++;
- } else
- memcpy(&fs, dmadat->sbbuf, sizeof(struct fs));
- if (!inode)
- return 0;
- if (inomap != inode) {
- n = IPERVBLK(&fs);
- if (dskread(blkbuf, INO_TO_VBA(&fs, n, inode), DBPERVBLK))
- return -1;
- n = INO_TO_VBO(n, inode);
-#if defined(UFS1_ONLY)
- memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n,
- sizeof(struct ufs1_dinode));
-#elif defined(UFS2_ONLY)
- memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n,
- sizeof(struct ufs2_dinode));
-#else
- if (fs.fs_magic == FS_UFS1_MAGIC)
- memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n,
- sizeof(struct ufs1_dinode));
- else
- memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n,
- sizeof(struct ufs2_dinode));
-#endif
- inomap = inode;
- fs_off = 0;
- blkmap = indmap = 0;
+ if (status != EFI_SUCCESS) {
+ DPRINTF("\nFailed to query BlockIoProtocol (%lu)\n",
+ EFI_ERROR_CODE(status));
+ return (status);
}
- size = DIP(di_size);
- n = size - fs_off;
- return (n);
-}
-static struct dmadat __dmadat;
+ if (!blkio->Media->LogicalPartition)
+ return (EFI_UNSUPPORTED);
-static int
-domount(EFI_DEVICE_PATH *device, EFI_BLOCK_IO *blkio, int quiet)
-{
+ /* Run through each module, see if it can load this partition */
+ for (i = 0; i < NUM_BOOT_MODULES; i++) {
+ if (boot_modules[i] == NULL)
+ continue;
- dmadat = &__dmadat;
- bootdev = blkio;
- bootdevpath = device;
- if (fsread(0, NULL, 0)) {
- if (!quiet)
- printf("domount: can't read superblock\n");
- return (-1);
+ if ((status = bs->AllocatePool(EfiLoaderData,
+ sizeof(*devinfo), (void **)&devinfo)) !=
+ EFI_SUCCESS) {
+ DPRINTF("\nFailed to allocate devinfo (%lu)\n",
+ EFI_ERROR_CODE(status));
+ continue;
+ }
+ devinfo->dev = blkio;
+ devinfo->devpath = devpath;
+ devinfo->devhandle = h;
+ devinfo->devdata = NULL;
+ devinfo->next = NULL;
+
+ status = boot_modules[i]->probe(devinfo);
+ if (status == EFI_SUCCESS)
+ return (EFI_SUCCESS);
+ (void)bs->FreePool(devinfo);
}
- if (!quiet)
- printf("Succesfully mounted UFS filesystem\n");
- return (0);
+
+ return (EFI_UNSUPPORTED);
}
-static void
-load(const char *fname)
+void
+add_device(dev_info_t **devinfop, dev_info_t *devinfo)
{
- ufs_ino_t ino;
- EFI_STATUS status;
- EFI_HANDLE loaderhandle;
- EFI_LOADED_IMAGE *loaded_image;
- void *buffer;
- size_t bufsize;
+ dev_info_t *dev;
- if ((ino = lookup(fname)) == 0) {
- printf("File %s not found\n", fname);
+ if (*devinfop == NULL) {
+ *devinfop = devinfo;
return;
}
- bufsize = fsstat(ino);
- status = systab->BootServices->AllocatePool(EfiLoaderData,
- bufsize, &buffer);
- fsread(ino, buffer, bufsize);
+ for (dev = *devinfop; dev->next != NULL; dev = dev->next)
+ ;
- /* XXX: For secure boot, we need our own loader here */
- status = systab->BootServices->LoadImage(TRUE, image, bootdevpath,
- buffer, bufsize, &loaderhandle);
- if (EFI_ERROR(status))
- printf("LoadImage failed with error %lu\n",
- EFI_ERROR_CODE(status));
-
- status = systab->BootServices->HandleProtocol(loaderhandle,
- &LoadedImageGUID, (VOID**)&loaded_image);
- if (EFI_ERROR(status))
- printf("HandleProtocol failed with error %lu\n",
- EFI_ERROR_CODE(status));
-
- loaded_image->DeviceHandle = bootdevhandle;
-
- status = systab->BootServices->StartImage(loaderhandle, NULL, NULL);
- if (EFI_ERROR(status))
- printf("StartImage failed with error %lu\n",
- EFI_ERROR_CODE(status));
+ dev->next = devinfo;
}
void
diff --git a/sys/boot/efi/boot1/boot_module.h b/sys/boot/efi/boot1/boot_module.h
new file mode 100644
index 0000000..2c158f6
--- /dev/null
+++ b/sys/boot/efi/boot1/boot_module.h
@@ -0,0 +1,110 @@
+/*-
+ * Copyright (c) 2015 Eric McCorkle
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _BOOT_MODULE_H_
+#define _BOOT_MODULE_H_
+
+#include <stdbool.h>
+
+#include <efi.h>
+#include <efilib.h>
+#include <eficonsctl.h>
+
+#ifdef EFI_DEBUG
+#define DPRINTF(fmt, ...) printf(fmt, __VA_ARGS__)
+#else
+#define DPRINTF(fmt, ...) {}
+#endif
+
+/* EFI device info */
+typedef struct dev_info
+{
+ EFI_BLOCK_IO *dev;
+ EFI_DEVICE_PATH *devpath;
+ EFI_HANDLE *devhandle;
+ void *devdata;
+ struct dev_info *next;
+} dev_info_t;
+
+/*
+ * A boot loader module.
+ *
+ * This is a standard interface for filesystem modules in the EFI system.
+ */
+typedef struct boot_module_t
+{
+ const char *name;
+
+ /* init is the optional initialiser for the module. */
+ void (*init)();
+
+ /*
+ * probe checks to see if the module can handle dev.
+ *
+ * Return codes:
+ * EFI_SUCCESS = The module can handle the device.
+ * EFI_NOT_FOUND = The module can not handle the device.
+ * Other = The module encountered an error.
+ */
+ EFI_STATUS (*probe)(dev_info_t* dev);
+
+ /*
+ * load should select the best out of a set of devices that probe
+ * indicated were loadable and load it.
+ *
+ * Return codes:
+ * EFI_SUCCESS = The module can handle the device.
+ * EFI_NOT_FOUND = The module can not handle the device.
+ * Other = The module encountered an error.
+ */
+ EFI_STATUS (*load)(const char *loader_path, dev_info_t **devinfo,
+ void **buf, size_t *bufsize);
+
+ /* status outputs information about the probed devices. */
+ void (*status)();
+
+} boot_module_t;
+
+/* Standard boot modules. */
+#ifdef EFI_UFS_BOOT
+extern const boot_module_t ufs_module;
+#endif
+#ifdef EFI_ZFS_BOOT
+extern const boot_module_t zfs_module;
+#endif
+
+/* Functions available to modules. */
+extern void add_device(dev_info_t **devinfop, dev_info_t *devinfo);
+extern void panic(const char *fmt, ...) __dead2;
+extern int printf(const char *fmt, ...);
+extern int vsnprintf(char *str, size_t sz, const char *fmt, va_list ap);
+
+extern EFI_SYSTEM_TABLE *systab;
+extern EFI_BOOT_SERVICES *bs;
+
+#endif
diff --git a/sys/boot/efi/boot1/ufs_module.c b/sys/boot/efi/boot1/ufs_module.c
new file mode 100644
index 0000000..07c7152
--- /dev/null
+++ b/sys/boot/efi/boot1/ufs_module.c
@@ -0,0 +1,185 @@
+/*-
+ * Copyright (c) 1998 Robert Nordier
+ * All rights reserved.
+ * Copyright (c) 2001 Robert Drehmel
+ * All rights reserved.
+ * Copyright (c) 2014 Nathan Whitehorn
+ * All rights reserved.
+ * Copyright (c) 2015 Eric McCorkle
+ * All rights reverved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <efi.h>
+
+#include "boot_module.h"
+
+static dev_info_t *devinfo;
+static dev_info_t *devices;
+
+static int
+dskread(void *buf, u_int64_t lba, int nblk)
+{
+ int size;
+ EFI_STATUS status;
+
+ lba = lba / (devinfo->dev->Media->BlockSize / DEV_BSIZE);
+ size = nblk * DEV_BSIZE;
+
+ status = devinfo->dev->ReadBlocks(devinfo->dev,
+ devinfo->dev->Media->MediaId, lba, size, buf);
+
+ if (status != EFI_SUCCESS) {
+ DPRINTF("dskread: failed dev: %p, id: %u, lba: %lu, size: %d, "
+ "status: %lu\n", devinfo->dev,
+ devinfo->dev->Media->MediaId, lba, size,
+ EFI_ERROR_CODE(status));
+ return (-1);
+ }
+
+ return (0);
+}
+
+#include "ufsread.c"
+
+static struct dmadat __dmadat;
+
+static int
+init_dev(dev_info_t* dev)
+{
+
+ devinfo = dev;
+ dmadat = &__dmadat;
+
+ return fsread(0, NULL, 0);
+}
+
+static EFI_STATUS
+probe(dev_info_t* dev)
+{
+
+ if (init_dev(dev) < 0)
+ return (EFI_UNSUPPORTED);
+
+ add_device(&devices, dev);
+
+ return (EFI_SUCCESS);
+}
+
+static EFI_STATUS
+try_load(dev_info_t *dev, const char *loader_path, void **bufp, size_t *bufsize)
+{
+ ufs_ino_t ino;
+ EFI_STATUS status;
+ size_t size;
+ ssize_t read;
+ void *buf;
+
+ if (init_dev(dev) < 0)
+ return (EFI_UNSUPPORTED);
+
+ if ((ino = lookup(loader_path)) == 0)
+ return (EFI_NOT_FOUND);
+
+ if (fsread_size(ino, NULL, 0, &size) < 0 || size <= 0) {
+ printf("Failed to read size of '%s' ino: %d\n", loader_path,
+ ino);
+ return (EFI_INVALID_PARAMETER);
+ }
+
+ if ((status = bs->AllocatePool(EfiLoaderData, size, &buf)) !=
+ EFI_SUCCESS) {
+ printf("Failed to allocate read buffer (%lu)\n",
+ EFI_ERROR_CODE(status));
+ return (status);
+ }
+
+ read = fsread(ino, buf, size);
+ if ((size_t)read != size) {
+ printf("Failed to read '%s' (%zd != %zu)\n", loader_path, read,
+ size);
+ (void)bs->FreePool(buf);
+ return (EFI_INVALID_PARAMETER);
+ }
+
+ *bufp = buf;
+ *bufsize = size;
+
+ return (EFI_SUCCESS);
+}
+
+static EFI_STATUS
+load(const char *loader_path, dev_info_t **devinfop, void **buf,
+ size_t *bufsize)
+{
+ dev_info_t *dev;
+ EFI_STATUS status;
+
+ for (dev = devices; dev != NULL; dev = dev->next) {
+ status = try_load(dev, loader_path, buf, bufsize);
+ if (status == EFI_SUCCESS) {
+ *devinfop = dev;
+ return (EFI_SUCCESS);
+ } else if (status != EFI_NOT_FOUND) {
+ return (status);
+ }
+ }
+
+ return (EFI_NOT_FOUND);
+}
+
+static void
+status()
+{
+ int i;
+ dev_info_t *dev;
+
+ for (dev = devices, i = 0; dev != NULL; dev = dev->next, i++)
+ ;
+
+ printf("%s found ", ufs_module.name);
+ switch (i) {
+ case 0:
+ printf("no partitions\n");
+ break;
+ case 1:
+ printf("%d partition\n", i);
+ break;
+ default:
+ printf("%d partitions\n", i);
+ }
+}
+
+const boot_module_t ufs_module =
+{
+ .name = "UFS",
+ .probe = probe,
+ .load = load,
+ .status = status
+};
diff --git a/sys/boot/efi/boot1/zfs_module.c b/sys/boot/efi/boot1/zfs_module.c
new file mode 100644
index 0000000..96eec33
--- /dev/null
+++ b/sys/boot/efi/boot1/zfs_module.c
@@ -0,0 +1,199 @@
+/*-
+ * Copyright (c) 2015 Eric McCorkle
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#include <stddef.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <efi.h>
+
+#include "boot_module.h"
+
+#include "libzfs.h"
+#include "zfsimpl.c"
+
+static dev_info_t *devices;
+
+static int
+vdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes)
+{
+ dev_info_t *devinfo;
+ off_t lba;
+ EFI_STATUS status;
+
+ devinfo = (dev_info_t *)priv;
+ lba = off / devinfo->dev->Media->BlockSize;
+
+ status = devinfo->dev->ReadBlocks(devinfo->dev,
+ devinfo->dev->Media->MediaId, lba, bytes, buf);
+ if (status != EFI_SUCCESS) {
+ DPRINTF("vdev_read: failed dev: %p, id: %u, lba: %zu, size: %zu,"
+ " status: %lu\n", devinfo->dev,
+ devinfo->dev->Media->MediaId, lba, bytes,
+ EFI_ERROR_CODE(status));
+ return (-1);
+ }
+
+ return (0);
+}
+
+static EFI_STATUS
+probe(dev_info_t *dev)
+{
+ spa_t *spa;
+ dev_info_t *tdev;
+ EFI_STATUS status;
+
+ /* ZFS consumes the dev on success so we need a copy. */
+ if ((status = bs->AllocatePool(EfiLoaderData, sizeof(*dev),
+ (void**)&tdev)) != EFI_SUCCESS) {
+ DPRINTF("Failed to allocate tdev (%lu)\n",
+ EFI_ERROR_CODE(status));
+ return (status);
+ }
+ memcpy(tdev, dev, sizeof(*dev));
+
+ if (vdev_probe(vdev_read, tdev, &spa) != 0) {
+ (void)bs->FreePool(tdev);
+ return (EFI_UNSUPPORTED);
+ }
+
+ dev->devdata = spa;
+ add_device(&devices, dev);
+
+ return (EFI_SUCCESS);
+}
+
+static EFI_STATUS
+try_load(dev_info_t *devinfo, const char *loader_path, void **bufp, size_t *bufsize)
+{
+ spa_t *spa;
+ struct zfsmount zfsmount;
+ dnode_phys_t dn;
+ struct stat st;
+ int err;
+ void *buf;
+ EFI_STATUS status;
+
+ spa = devinfo->devdata;
+ if (zfs_spa_init(spa) != 0) {
+ /* Init failed, don't report this loudly. */
+ return (EFI_NOT_FOUND);
+ }
+
+ if (zfs_mount(spa, 0, &zfsmount) != 0) {
+ /* Mount failed, don't report this loudly. */
+ return (EFI_NOT_FOUND);
+ }
+
+ if ((err = zfs_lookup(&zfsmount, loader_path, &dn)) != 0) {
+ printf("Failed to lookup %s on pool %s (%d)\n", loader_path,
+ spa->spa_name, err);
+ return (EFI_INVALID_PARAMETER);
+ }
+
+ if ((err = zfs_dnode_stat(spa, &dn, &st)) != 0) {
+ printf("Failed to lookup %s on pool %s (%d)\n", loader_path,
+ spa->spa_name, err);
+ return (EFI_INVALID_PARAMETER);
+ }
+
+ if ((status = bs->AllocatePool(EfiLoaderData, (UINTN)st.st_size, &buf))
+ != EFI_SUCCESS) {
+ printf("Failed to allocate load buffer for pool %s (%lu)\n",
+ spa->spa_name, EFI_ERROR_CODE(status));
+ return (EFI_INVALID_PARAMETER);
+ }
+
+ if ((err = dnode_read(spa, &dn, 0, buf, st.st_size)) != 0) {
+ printf("Failed to read node from %s (%d)\n", spa->spa_name,
+ err);
+ (void)bs->FreePool(buf);
+ return (EFI_INVALID_PARAMETER);
+ }
+
+ *bufsize = st.st_size;
+ *bufp = buf;
+
+ return (EFI_SUCCESS);
+}
+
+static EFI_STATUS
+load(const char *loader_path, dev_info_t **devinfop, void **bufp,
+ size_t *bufsize)
+{
+ dev_info_t *devinfo;
+ EFI_STATUS status;
+
+ for (devinfo = devices; devinfo != NULL; devinfo = devinfo->next) {
+ status = try_load(devinfo, loader_path, bufp, bufsize);
+ if (status == EFI_SUCCESS) {
+ *devinfop = devinfo;
+ return (EFI_SUCCESS);
+ } else if (status != EFI_NOT_FOUND) {
+ return (status);
+ }
+ }
+
+ return (EFI_NOT_FOUND);
+}
+
+static void
+status()
+{
+ spa_t *spa;
+
+ spa = STAILQ_FIRST(&zfs_pools);
+ if (spa == NULL) {
+ printf("%s found no pools\n", zfs_module.name);
+ return;
+ }
+
+ printf("%s found the following pools:", zfs_module.name);
+ STAILQ_FOREACH(spa, &zfs_pools, spa_link)
+ printf(" %s", spa->spa_name);
+
+ printf("\n");
+}
+
+static void
+init()
+{
+
+ zfs_init();
+}
+
+const boot_module_t zfs_module =
+{
+ .name = "ZFS",
+ .init = init,
+ .probe = probe,
+ .load = load,
+ .status = status
+};
diff --git a/sys/boot/efi/include/efi_nii.h b/sys/boot/efi/include/efi_nii.h
index 5222232..561cbd4 100644
--- a/sys/boot/efi/include/efi_nii.h
+++ b/sys/boot/efi/include/efi_nii.h
@@ -26,9 +26,9 @@ Revision history:
--*/
#define EFI_NETWORK_INTERFACE_IDENTIFIER_PROTOCOL \
- { 0xE18541CD, 0xF755, 0x4f73, 0x92, 0x8D, 0x64, 0x3C, 0x8A, 0x79, 0xB2, 0x29 }
+ { 0xE18541CD, 0xF755, 0x4f73, {0x92, 0x8D, 0x64, 0x3C, 0x8A, 0x79, 0xB2, 0x29} }
#define EFI_NETWORK_INTERFACE_IDENTIFIER_PROTOCOL_31 \
- { 0x1ACED566, 0x76ED, 0x4218, 0xBC, 0x81, 0x76, 0x7F, 0x1F, 0x97, 0x7A, 0x89 }
+ { 0x1ACED566, 0x76ED, 0x4218, {0xBC, 0x81, 0x76, 0x7F, 0x1F, 0x97, 0x7A, 0x89} }
#define EFI_NETWORK_INTERFACE_IDENTIFIER_INTERFACE_REVISION 0x00010000
#define EFI_NETWORK_INTERFACE_IDENTIFIER_INTERFACE_REVISION_31 0x00010001
diff --git a/sys/boot/efi/include/efiapi.h b/sys/boot/efi/include/efiapi.h
index c8dbf9e..25e40a2 100644
--- a/sys/boot/efi/include/efiapi.h
+++ b/sys/boot/efi/include/efiapi.h
@@ -214,8 +214,8 @@ VOID
// EFI platform varibles
//
-#define EFI_GLOBAL_VARIABLE \
- { 0x8BE4DF61, 0x93CA, 0x11d2, 0xAA, 0x0D, 0x00, 0xE0, 0x98, 0x03, 0x2B, 0x8C }
+#define EFI_GLOBAL_VARIABLE \
+ { 0x8BE4DF61, 0x93CA, 0x11d2, {0xAA, 0x0D, 0x00, 0xE0, 0x98, 0x03, 0x2B, 0x8C} }
// Variable attributes
#define EFI_VARIABLE_NON_VOLATILE 0x00000001
@@ -367,8 +367,8 @@ EFI_STATUS
// Image handle
-#define LOADED_IMAGE_PROTOCOL \
- { 0x5B1B31A1, 0x9562, 0x11d2, 0x8E, 0x3F, 0x00, 0xA0, 0xC9, 0x69, 0x72, 0x3B }
+#define LOADED_IMAGE_PROTOCOL \
+ { 0x5B1B31A1, 0x9562, 0x11d2, {0x8E, 0x3F, 0x00, 0xA0, 0xC9, 0x69, 0x72, 0x3B} }
#define EFI_LOADED_IMAGE_INFORMATION_REVISION 0x1000
typedef struct {
@@ -831,35 +831,35 @@ typedef struct {
// EFI Configuration Table and GUID definitions
//
-#define MPS_TABLE_GUID \
- { 0xeb9d2d2f, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d }
+#define MPS_TABLE_GUID \
+ { 0xeb9d2d2f, 0x2d88, 0x11d3, {0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d} }
-#define ACPI_TABLE_GUID \
- { 0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d }
+#define ACPI_TABLE_GUID \
+ { 0xeb9d2d30, 0x2d88, 0x11d3, {0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d} }
-#define ACPI_20_TABLE_GUID \
- { 0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81 }
+#define ACPI_20_TABLE_GUID \
+ { 0x8868e871, 0xe4f1, 0x11d3, {0xbc, 0x22, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81} }
-#define SMBIOS_TABLE_GUID \
- { 0xeb9d2d31, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d }
+#define SMBIOS_TABLE_GUID \
+ { 0xeb9d2d31, 0x2d88, 0x11d3, {0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d} }
-#define SAL_SYSTEM_TABLE_GUID \
- { 0xeb9d2d32, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d }
+#define SAL_SYSTEM_TABLE_GUID \
+ { 0xeb9d2d32, 0x2d88, 0x11d3, {0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d} }
-#define FDT_TABLE_GUID \
- { 0xb1b621d5, 0xf19c, 0x41a5, 0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0 }
+#define FDT_TABLE_GUID \
+ { 0xb1b621d5, 0xf19c, 0x41a5, {0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0} }
-#define DXE_SERVICES_TABLE_GUID \
- { 0x5ad34ba, 0x6f02, 0x4214, 0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9 }
+#define DXE_SERVICES_TABLE_GUID \
+ { 0x5ad34ba, 0x6f02, 0x4214, {0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9} }
-#define HOB_LIST_TABLE_GUID \
- { 0x7739f24c, 0x93d7, 0x11d4, 0x9a, 0x3a, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d }
+#define HOB_LIST_TABLE_GUID \
+ { 0x7739f24c, 0x93d7, 0x11d4, {0x9a, 0x3a, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d} }
#define MEMORY_TYPE_INFORMATION_TABLE_GUID \
- { 0x4c19049f, 0x4137, 0x4dd3, 0x9c, 0x10, 0x8b, 0x97, 0xa8, 0x3f, 0xfd, 0xfa }
+ { 0x4c19049f, 0x4137, 0x4dd3, {0x9c, 0x10, 0x8b, 0x97, 0xa8, 0x3f, 0xfd, 0xfa} }
#define DEBUG_IMAGE_INFO_TABLE_GUID \
- { 0x49152e77, 0x1ada, 0x4764, 0xb7, 0xa2, 0x7a, 0xfe, 0xfe, 0xd9, 0x5e, 0x8b }
+ { 0x49152e77, 0x1ada, 0x4764, {0xb7, 0xa2, 0x7a, 0xfe, 0xfe, 0xd9, 0x5e, 0x8b} }
typedef struct _EFI_CONFIGURATION_TABLE {
EFI_GUID VendorGuid;
diff --git a/sys/boot/efi/include/eficon.h b/sys/boot/efi/include/eficon.h
index ef4af81..2f719e7 100644
--- a/sys/boot/efi/include/eficon.h
+++ b/sys/boot/efi/include/eficon.h
@@ -32,7 +32,7 @@ Revision History
//
#define SIMPLE_TEXT_OUTPUT_PROTOCOL \
- { 0x387477c2, 0x69c7, 0x11d2, 0x8e, 0x39, 0x0, 0xa0, 0xc9, 0x69, 0x72, 0x3b }
+ { 0x387477c2, 0x69c7, 0x11d2, {0x8e, 0x39, 0x0, 0xa0, 0xc9, 0x69, 0x72, 0x3b} }
INTERFACE_DECL(_SIMPLE_TEXT_OUTPUT_INTERFACE);
@@ -239,8 +239,8 @@ typedef struct _SIMPLE_TEXT_OUTPUT_INTERFACE {
// Text input protocol
//
-#define SIMPLE_TEXT_INPUT_PROTOCOL \
- { 0x387477c1, 0x69c7, 0x11d2, 0x8e, 0x39, 0x0, 0xa0, 0xc9, 0x69, 0x72, 0x3b }
+#define SIMPLE_TEXT_INPUT_PROTOCOL \
+ { 0x387477c1, 0x69c7, 0x11d2, {0x8e, 0x39, 0x0, 0xa0, 0xc9, 0x69, 0x72, 0x3b} }
INTERFACE_DECL(_SIMPLE_INPUT_INTERFACE);
diff --git a/sys/boot/efi/include/eficonsctl.h b/sys/boot/efi/include/eficonsctl.h
index 36dd2c1..68be3d6 100644
--- a/sys/boot/efi/include/eficonsctl.h
+++ b/sys/boot/efi/include/eficonsctl.h
@@ -35,7 +35,7 @@
#define _EFI_CONS_CTL_H
#define EFI_CONSOLE_CONTROL_PROTOCOL_GUID \
- { 0xf42f7782, 0x12e, 0x4c12, {0x99, 0x56, 0x49, 0xf9, 0x43, 0x4, 0xf7, 0x21} }
+ { 0xf42f7782, 0x12e, 0x4c12, {0x99, 0x56, 0x49, 0xf9, 0x43, 0x4, 0xf7, 0x21} }
typedef struct _EFI_CONSOLE_CONTROL_PROTOCOL EFI_CONSOLE_CONTROL_PROTOCOL;
diff --git a/sys/boot/efi/include/efidevp.h b/sys/boot/efi/include/efidevp.h
index a332af5..f0f49ef 100644
--- a/sys/boot/efi/include/efidevp.h
+++ b/sys/boot/efi/include/efidevp.h
@@ -110,7 +110,7 @@ typedef struct _VENDOR_DEVICE_PATH {
} VENDOR_DEVICE_PATH;
#define UNKNOWN_DEVICE_GUID \
- { 0xcf31fac5, 0xc24e, 0x11d2, 0x85, 0xf3, 0x0, 0xa0, 0xc9, 0x3e, 0xc9, 0x3b }
+ { 0xcf31fac5, 0xc24e, 0x11d2, {0x85, 0xf3, 0x0, 0xa0, 0xc9, 0x3e, 0xc9, 0x3b} }
typedef struct _UKNOWN_DEVICE_VENDOR_DP {
VENDOR_DEVICE_PATH DevicePath;
@@ -274,16 +274,16 @@ typedef struct _UART_DEVICE_PATH {
/* Use VENDOR_DEVICE_PATH struct */
#define DEVICE_PATH_MESSAGING_PC_ANSI \
- { 0xe0c14753, 0xf9be, 0x11d2, 0x9a, 0x0c, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d }
+ { 0xe0c14753, 0xf9be, 0x11d2, {0x9a, 0x0c, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d} }
#define DEVICE_PATH_MESSAGING_VT_100 \
- { 0xdfa66065, 0xb419, 0x11d3, 0x9a, 0x2d, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d }
+ { 0xdfa66065, 0xb419, 0x11d3, {0x9a, 0x2d, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d} }
#define DEVICE_PATH_MESSAGING_VT_100_PLUS \
- { 0x7baec70b, 0x57e0, 0x4c76, 0x8e, 0x87, 0x2f, 0x9e, 0x28, 0x08, 0x83, 0x43 }
+ { 0x7baec70b, 0x57e0, 0x4c76, {0x8e, 0x87, 0x2f, 0x9e, 0x28, 0x08, 0x83, 0x43} }
#define DEVICE_PATH_MESSAGING_VT_UTF8 \
- { 0xad15a0d6, 0x8bec, 0x4acf, 0xa0, 0x73, 0xd0, 0x1d, 0xe7, 0x7e, 0x2d, 0x88 }
+ { 0xad15a0d6, 0x8bec, 0x4acf, {0xa0, 0x73, 0xd0, 0x1d, 0xe7, 0x7e, 0x2d, 0x88} }
#define MEDIA_DEVICE_PATH 0x04
diff --git a/sys/boot/efi/include/efierr.h b/sys/boot/efi/include/efierr.h
index 921b297..a8b6557 100644
--- a/sys/boot/efi/include/efierr.h
+++ b/sys/boot/efi/include/efierr.h
@@ -31,7 +31,7 @@ Revision History
#define EFIWARN(a) (a)
#define EFI_ERROR(a) (((INTN) a) < 0)
-#define EFI_ERROR_CODE(a) (a & ~EFI_ERROR_MASK)
+#define EFI_ERROR_CODE(a) (unsigned long)(a & ~EFI_ERROR_MASK)
#define EFI_SUCCESS 0
diff --git a/sys/boot/efi/include/efifpswa.h b/sys/boot/efi/include/efifpswa.h
index 3e039ef..21823c5 100644
--- a/sys/boot/efi/include/efifpswa.h
+++ b/sys/boot/efi/include/efifpswa.h
@@ -7,7 +7,7 @@
*/
#define EFI_INTEL_FPSWA \
- { 0xc41b6531, 0x97b9, 0x11d3, 0x9a, 0x29, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d }
+ { 0xc41b6531, 0x97b9, 0x11d3, {0x9a, 0x29, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d} }
INTERFACE_DECL(_FPSWA_INTERFACE);
diff --git a/sys/boot/efi/include/efigop.h b/sys/boot/efi/include/efigop.h
index dfc4bba..104fa6e 100644
--- a/sys/boot/efi/include/efigop.h
+++ b/sys/boot/efi/include/efigop.h
@@ -27,9 +27,8 @@ Revision History
#ifndef _EFIGOP_H
#define _EFIGOP_H
-#define EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID \
- { 0x9042a9de, 0x23dc, 0x4a38, 0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, \
- 0x51, 0x6a }
+#define EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID \
+ { 0x9042a9de, 0x23dc, 0x4a38, {0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a} }
INTERFACE_DECL(_EFI_GRAPHICS_OUTPUT);
diff --git a/sys/boot/efi/include/efilib.h b/sys/boot/efi/include/efilib.h
index ef03028..ba5b663 100644
--- a/sys/boot/efi/include/efilib.h
+++ b/sys/boot/efi/include/efilib.h
@@ -39,14 +39,15 @@ extern struct devsw efinet_dev;
extern struct netif_driver efinetif;
void *efi_get_table(EFI_GUID *tbl);
-void efi_main(EFI_HANDLE image_handle, EFI_SYSTEM_TABLE *system_table);
int efi_register_handles(struct devsw *, EFI_HANDLE *, EFI_HANDLE *, int);
EFI_HANDLE efi_find_handle(struct devsw *, int);
-int efi_handle_lookup(EFI_HANDLE, struct devsw **, int *);
+int efi_handle_lookup(EFI_HANDLE, struct devsw **, int *, uint64_t *);
+int efi_handle_update_dev(EFI_HANDLE, struct devsw *, int, uint64_t);
int efi_status_to_errno(EFI_STATUS);
time_t efi_time(EFI_TIME *);
EFI_STATUS main(int argc, CHAR16 *argv[]);
void exit(EFI_STATUS status);
+void delay(int usecs);
diff --git a/sys/boot/efi/include/efinet.h b/sys/boot/efi/include/efinet.h
index b4996d9..3ac58b2 100644
--- a/sys/boot/efi/include/efinet.h
+++ b/sys/boot/efi/include/efinet.h
@@ -29,7 +29,7 @@ Revision History
//
#define EFI_SIMPLE_NETWORK_PROTOCOL \
- { 0xA19832B9, 0xAC25, 0x11D3, 0x9A, 0x2D, 0x00, 0x90, 0x27, 0x3F, 0xC1, 0x4D }
+ { 0xA19832B9, 0xAC25, 0x11D3, {0x9A, 0x2D, 0x00, 0x90, 0x27, 0x3F, 0xC1, 0x4D} }
INTERFACE_DECL(_EFI_SIMPLE_NETWORK);
diff --git a/sys/boot/efi/include/efiprot.h b/sys/boot/efi/include/efiprot.h
index fac4568..28cec59 100644
--- a/sys/boot/efi/include/efiprot.h
+++ b/sys/boot/efi/include/efiprot.h
@@ -31,8 +31,8 @@ Revision History
// Device Path protocol
//
-#define DEVICE_PATH_PROTOCOL \
- { 0x9576e91, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x0, 0xa0, 0xc9, 0x69, 0x72, 0x3b }
+#define DEVICE_PATH_PROTOCOL \
+ { 0x9576e91, 0x6d3f, 0x11d2, {0x8e, 0x39, 0x0, 0xa0, 0xc9, 0x69, 0x72, 0x3b} }
//
@@ -40,7 +40,7 @@ Revision History
//
#define BLOCK_IO_PROTOCOL \
- { 0x964e5b21, 0x6459, 0x11d2, 0x8e, 0x39, 0x0, 0xa0, 0xc9, 0x69, 0x72, 0x3b }
+ { 0x964e5b21, 0x6459, 0x11d2, {0x8e, 0x39, 0x0, 0xa0, 0xc9, 0x69, 0x72, 0x3b} }
#define EFI_BLOCK_IO_INTERFACE_REVISION 0x00010000
INTERFACE_DECL(_EFI_BLOCK_IO);
@@ -116,7 +116,7 @@ typedef struct _EFI_BLOCK_IO {
//
#define DISK_IO_PROTOCOL \
- { 0xce345171, 0xba0b, 0x11d2, 0x8e, 0x4f, 0x0, 0xa0, 0xc9, 0x69, 0x72, 0x3b }
+ { 0xce345171, 0xba0b, 0x11d2, {0x8e, 0x4f, 0x0, 0xa0, 0xc9, 0x69, 0x72, 0x3b} }
#define EFI_DISK_IO_INTERFACE_REVISION 0x00010000
INTERFACE_DECL(_EFI_DISK_IO);
@@ -155,7 +155,7 @@ typedef struct _EFI_DISK_IO {
//
#define SIMPLE_FILE_SYSTEM_PROTOCOL \
- { 0x964e5b22, 0x6459, 0x11d2, 0x8e, 0x39, 0x0, 0xa0, 0xc9, 0x69, 0x72, 0x3b }
+ { 0x964e5b22, 0x6459, 0x11d2, {0x8e, 0x39, 0x0, 0xa0, 0xc9, 0x69, 0x72, 0x3b} }
INTERFACE_DECL(_EFI_FILE_IO_INTERFACE);
INTERFACE_DECL(_EFI_FILE_HANDLE);
@@ -290,8 +290,8 @@ typedef struct _EFI_FILE_HANDLE {
// File information types
//
-#define EFI_FILE_INFO_ID \
- { 0x9576e92, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x0, 0xa0, 0xc9, 0x69, 0x72, 0x3b }
+#define EFI_FILE_INFO_ID \
+ { 0x9576e92, 0x6d3f, 0x11d2, {0x8e, 0x39, 0x0, 0xa0, 0xc9, 0x69, 0x72, 0x3b} }
typedef struct {
UINT64 Size;
@@ -314,8 +314,8 @@ typedef struct {
#define SIZE_OF_EFI_FILE_INFO EFI_FIELD_OFFSET(EFI_FILE_INFO,FileName)
-#define EFI_FILE_SYSTEM_INFO_ID \
- { 0x9576e93, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x0, 0xa0, 0xc9, 0x69, 0x72, 0x3b }
+#define EFI_FILE_SYSTEM_INFO_ID \
+ { 0x9576e93, 0x6d3f, 0x11d2, {0x8e, 0x39, 0x0, 0xa0, 0xc9, 0x69, 0x72, 0x3b} }
typedef struct {
UINT64 Size;
@@ -336,8 +336,8 @@ typedef struct {
#define SIZE_OF_EFI_FILE_SYSTEM_INFO EFI_FIELD_OFFSET(EFI_FILE_SYSTEM_INFO,VolumeLabel)
-#define EFI_FILE_SYSTEM_VOLUME_LABEL_INFO_ID \
- { 0xDB47D7D3,0xFE81, 0x11d3, 0x9A, 0x35, 0x00, 0x90, 0x27, 0x3F, 0xC1, 0x4D }
+#define EFI_FILE_SYSTEM_VOLUME_LABEL_INFO_ID \
+ { 0xDB47D7D3,0xFE81, 0x11d3, {0x9A, 0x35, 0x00, 0x90, 0x27, 0x3F, 0xC1, 0x4D} }
typedef struct {
CHAR16 VolumeLabel[1];
@@ -351,7 +351,7 @@ typedef struct {
#define LOAD_FILE_PROTOCOL \
- { 0x56EC3091, 0x954C, 0x11d2, 0x8E, 0x3F, 0x00, 0xA0, 0xC9, 0x69, 0x72, 0x3B }
+ { 0x56EC3091, 0x954C, 0x11d2, {0x8E, 0x3F, 0x00, 0xA0, 0xC9, 0x69, 0x72, 0x3B} }
INTERFACE_DECL(_EFI_LOAD_FILE_INTERFACE);
@@ -375,7 +375,7 @@ typedef struct _EFI_LOAD_FILE_INTERFACE {
//
#define DEVICE_IO_PROTOCOL \
- { 0xaf6ac311, 0x84c3, 0x11d2, 0x8e, 0x3c, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b }
+ { 0xaf6ac311, 0x84c3, 0x11d2, {0x8e, 0x3c, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b} }
INTERFACE_DECL(_EFI_DEVICE_IO_INTERFACE);
@@ -485,7 +485,7 @@ typedef struct _EFI_DEVICE_IO_INTERFACE {
//
#define UNICODE_COLLATION_PROTOCOL \
- { 0x1d85cd7f, 0xf43d, 0x11d2, 0x9a, 0xc, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d }
+ { 0x1d85cd7f, 0xf43d, 0x11d2, {0x9a, 0xc, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d} }
#define UNICODE_BYTE_ORDER_MARK (CHAR16)(0xfeff)
diff --git a/sys/boot/efi/include/efipxebc.h b/sys/boot/efi/include/efipxebc.h
index 3781a67..ba0b2e9 100644
--- a/sys/boot/efi/include/efipxebc.h
+++ b/sys/boot/efi/include/efipxebc.h
@@ -32,7 +32,7 @@ Revision History
//
#define EFI_PXE_BASE_CODE_PROTOCOL \
- { 0x03c4e603, 0xac28, 0x11d3, 0x9a, 0x2d, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d }
+ { 0x03c4e603, 0xac28, 0x11d3, {0x9a, 0x2d, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d} }
INTERFACE_DECL(_EFI_PXE_BASE_CODE);
@@ -425,7 +425,7 @@ typedef struct _EFI_PXE_BASE_CODE {
//
#define EFI_PXE_BASE_CODE_CALLBACK_PROTOCOL \
- { 0x245dca21, 0xfb7b, 0x11d3, 0x8f, 0x01, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b }
+ { 0x245dca21, 0xfb7b, 0x11d3, {0x8f, 0x01, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b} }
//
// Revision Number
diff --git a/sys/boot/efi/include/efiser.h b/sys/boot/efi/include/efiser.h
index 1f3fe4a..e3d66e2 100644
--- a/sys/boot/efi/include/efiser.h
+++ b/sys/boot/efi/include/efiser.h
@@ -30,7 +30,7 @@ Revision History
//
#define SERIAL_IO_PROTOCOL \
- { 0xBB25CF6F, 0xF1D4, 0x11D2, 0x9A, 0x0C, 0x00, 0x90, 0x27, 0x3F, 0xC1, 0xFD }
+ { 0xBB25CF6F, 0xF1D4, 0x11D2, {0x9A, 0x0C, 0x00, 0x90, 0x27, 0x3F, 0xC1, 0xFD} }
INTERFACE_DECL(_SERIAL_IO_INTERFACE);
diff --git a/sys/boot/efi/libefi/Makefile b/sys/boot/efi/libefi/Makefile
index 900b230..f87fbe9 100644
--- a/sys/boot/efi/libefi/Makefile
+++ b/sys/boot/efi/libefi/Makefile
@@ -2,6 +2,7 @@
LIB= efi
INTERNALLIB=
+WARNS?= 2
SRCS= delay.c efi_console.c efinet.c efipart.c errno.c handles.c \
libefi.c time.c
diff --git a/sys/boot/efi/libefi/efi_console.c b/sys/boot/efi/libefi/efi_console.c
index 52a3725..daf1338 100644
--- a/sys/boot/efi/libefi/efi_console.c
+++ b/sys/boot/efi/libefi/efi_console.c
@@ -47,6 +47,8 @@ static int esc;
void get_pos(int *x, int *y);
void curs_move(int *_x, int *_y, int x, int y);
static void CL(int);
+void HO(void);
+void end_term(void);
#endif
static void efi_cons_probe(struct console *);
diff --git a/sys/boot/efi/libefi/efipart.c b/sys/boot/efi/libefi/efipart.c
index 0831a98..757d64f 100644
--- a/sys/boot/efi/libefi/efipart.c
+++ b/sys/boot/efi/libefi/efipart.c
@@ -67,7 +67,6 @@ efipart_init(void)
EFI_HANDLE *hin, *hout, *aliases, handle;
EFI_STATUS status;
UINTN sz;
- CHAR16 *path;
u_int n, nin, nout;
int err;
size_t devpathlen;
diff --git a/sys/boot/efi/libefi/handles.c b/sys/boot/efi/libefi/handles.c
index b15c0a5..1e4ef6f 100644
--- a/sys/boot/efi/libefi/handles.c
+++ b/sys/boot/efi/libefi/handles.c
@@ -35,6 +35,7 @@ struct entry {
EFI_HANDLE alias;
struct devsw *dev;
int unit;
+ uint64_t extra;
};
struct entry *entry;
@@ -79,7 +80,7 @@ efi_find_handle(struct devsw *dev, int unit)
}
int
-efi_handle_lookup(EFI_HANDLE h, struct devsw **dev, int *unit)
+efi_handle_lookup(EFI_HANDLE h, struct devsw **dev, int *unit, uint64_t *extra)
{
int idx;
@@ -90,7 +91,28 @@ efi_handle_lookup(EFI_HANDLE h, struct devsw **dev, int *unit)
*dev = entry[idx].dev;
if (unit != NULL)
*unit = entry[idx].unit;
+ if (extra != NULL)
+ *extra = entry[idx].extra;
return (0);
}
return (ENOENT);
}
+
+int
+efi_handle_update_dev(EFI_HANDLE h, struct devsw *dev, int unit,
+ uint64_t guid)
+{
+ int idx;
+
+ for (idx = 0; idx < nentries; idx++) {
+ if (entry[idx].handle != h)
+ continue;
+ entry[idx].dev = dev;
+ entry[idx].unit = unit;
+ entry[idx].alias = NULL;
+ entry[idx].extra = guid;
+ return (0);
+ }
+
+ return (ENOENT);
+}
diff --git a/sys/boot/efi/libefi/libefi.c b/sys/boot/efi/libefi/libefi.c
index 3c66b04..f87b89a 100644
--- a/sys/boot/efi/libefi/libefi.c
+++ b/sys/boot/efi/libefi/libefi.c
@@ -179,7 +179,7 @@ efi_main(EFI_HANDLE image_handle, EFI_SYSTEM_TABLE *system_table)
argv = malloc((argc + 1) * sizeof(CHAR16*));
argc = 0;
if (addprog)
- argv[argc++] = L"loader.efi";
+ argv[argc++] = (CHAR16 *)L"loader.efi";
argp = args;
while (argp != NULL && *argp != 0) {
argp = arg_skipsep(argp);
diff --git a/sys/boot/efi/loader/Makefile b/sys/boot/efi/loader/Makefile
index 268edce..010425b 100644
--- a/sys/boot/efi/loader/Makefile
+++ b/sys/boot/efi/loader/Makefile
@@ -4,13 +4,11 @@ MAN=
.include <bsd.own.mk>
-# In-tree GCC does not support __attribute__((ms_abi)).
-.if ${COMPILER_TYPE} != "gcc"
-
MK_SSP= no
PROG= loader.sym
INTERNALPROG=
+WARNS?= 3
# architecture-specific loader code
SRCS= autoload.c \
@@ -22,6 +20,16 @@ SRCS= autoload.c \
smbios.c \
vers.c
+.if ${MK_ZFS} != "no"
+SRCS+= zfs.c
+.PATH: ${.CURDIR}/../../zfs
+
+# Disable warnings that are currently incompatible with the zfs boot code
+CWARNFLAGS.zfs.c+= -Wno-sign-compare
+CWARNFLAGS.zfs.c+= -Wno-array-bounds
+CWARNFLAGS.zfs.c+= -Wno-missing-prototypes
+.endif
+
.PATH: ${.CURDIR}/arch/${MACHINE}
# For smbios.c
.PATH: ${.CURDIR}/../../i386/libi386
@@ -35,6 +43,11 @@ CFLAGS+= -I${.CURDIR}/../include/${MACHINE}
CFLAGS+= -I${.CURDIR}/../../../contrib/dev/acpica/include
CFLAGS+= -I${.CURDIR}/../../..
CFLAGS+= -I${.CURDIR}/../../i386/libi386
+.if ${MK_ZFS} != "no"
+CFLAGS+= -I${.CURDIR}/../../zfs
+CFLAGS+= -I${.CURDIR}/../../../cddl/boot/zfs
+CFLAGS+= -DEFI_ZFS_BOOT
+.endif
CFLAGS+= -DNO_PCI -DEFI
.if ${MK_FORTH} != "no"
@@ -94,8 +107,6 @@ LIBEFI= ${.OBJDIR}/../libefi/libefi.a
DPADD= ${LIBFICL} ${LIBEFI} ${LIBSTAND} ${LDSCRIPT}
LDADD= ${LIBFICL} ${LIBEFI} ${LIBSTAND}
-.endif # ${COMPILER_TYPE} != "gcc"
-
.include <bsd.prog.mk>
beforedepend ${OBJS}: machine x86
diff --git a/sys/boot/efi/loader/arch/amd64/elf64_freebsd.c b/sys/boot/efi/loader/arch/amd64/elf64_freebsd.c
index c1dbec2..c0c9ce7 100644
--- a/sys/boot/efi/loader/arch/amd64/elf64_freebsd.c
+++ b/sys/boot/efi/loader/arch/amd64/elf64_freebsd.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include "platform/acfreebsd.h"
#include "acconfig.h"
#define ACPI_SYSTEM_XFACE
+#define ACPI_USE_SYSTEM_INTTYPES
#include "actypes.h"
#include "actbl.h"
@@ -100,7 +101,6 @@ elf64_exec(struct preloaded_file *fp)
ACPI_TABLE_RSDP *rsdp;
char buf[24];
int revision;
- EFI_STATUS status;
rsdp = efi_get_table(&acpi20_guid);
if (rsdp == NULL) {
diff --git a/sys/boot/efi/loader/arch/amd64/framebuffer.c b/sys/boot/efi/loader/arch/amd64/framebuffer.c
index 90bf992..2adb8b5 100644
--- a/sys/boot/efi/loader/arch/amd64/framebuffer.c
+++ b/sys/boot/efi/loader/arch/amd64/framebuffer.c
@@ -35,6 +35,8 @@ __FBSDID("$FreeBSD$");
#include <efilib.h>
#include <machine/metadata.h>
+#include "framebuffer.h"
+
static EFI_GUID gop_guid = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
int
diff --git a/sys/boot/efi/loader/arch/amd64/reloc.c b/sys/boot/efi/loader/arch/amd64/reloc.c
index 98bcf8e..9a70281 100644
--- a/sys/boot/efi/loader/arch/amd64/reloc.c
+++ b/sys/boot/efi/loader/arch/amd64/reloc.c
@@ -42,12 +42,15 @@ __FBSDID("$FreeBSD$");
#define ELFW_R_TYPE ELF64_R_TYPE
#endif
+EFI_STATUS _reloc(unsigned long ImageBase, ElfW_Dyn *dynamic, EFI_HANDLE image_handle __unused,
+ EFI_SYSTEM_TABLE *system_table __unused);
+
/*
* A simple relocator for IA32/AMD64 EFI binaries.
*/
EFI_STATUS
-_reloc(unsigned long ImageBase, ElfW_Dyn *dynamic, EFI_HANDLE image_handle,
- EFI_SYSTEM_TABLE *system_table)
+_reloc(unsigned long ImageBase, ElfW_Dyn *dynamic, EFI_HANDLE image_handle __unused,
+ EFI_SYSTEM_TABLE *system_table __unused)
{
unsigned long relsz, relent;
unsigned long *newaddr;
@@ -100,7 +103,7 @@ _reloc(unsigned long ImageBase, ElfW_Dyn *dynamic, EFI_HANDLE image_handle,
/* XXX: do we need other relocations ? */
break;
}
- rel = (ElfW_Rel *) ((caddr_t) rel + relent);
+ rel = (ElfW_Rel *)(void *) ((caddr_t) rel + relent);
}
return (EFI_SUCCESS);
diff --git a/sys/boot/efi/loader/autoload.c b/sys/boot/efi/loader/autoload.c
index 694a6da..c1eb849 100644
--- a/sys/boot/efi/loader/autoload.c
+++ b/sys/boot/efi/loader/autoload.c
@@ -27,6 +27,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "loader_efi.h"
+
int
efi_autoload(void)
{
diff --git a/sys/boot/efi/loader/bootinfo.c b/sys/boot/efi/loader/bootinfo.c
index 18837f8..3bc6e42 100644
--- a/sys/boot/efi/loader/bootinfo.c
+++ b/sys/boot/efi/loader/bootinfo.c
@@ -48,6 +48,8 @@ __FBSDID("$FreeBSD$");
#include "framebuffer.h"
#include "loader_efi.h"
+int bi_load(char *args, vm_offset_t *modulep, vm_offset_t *kernendp);
+
static const char howto_switches[] = "aCdrgDmphsv";
static int howto_masks[] = {
RB_ASKNAME, RB_CDROM, RB_KDB, RB_DFLTROOT, RB_GDB, RB_MULTIPLE,
@@ -113,7 +115,7 @@ bi_copyenv(vm_offset_t start)
/* Traverse the environment. */
for (ep = environ; ep != NULL; ep = ep->ev_next) {
len = strlen(ep->ev_name);
- if (archsw.arch_copyin(ep->ev_name, addr, len) != len)
+ if ((size_t)archsw.arch_copyin(ep->ev_name, addr, len) != len)
break;
addr += len;
if (archsw.arch_copyin("=", addr, 1) != 1)
@@ -121,7 +123,7 @@ bi_copyenv(vm_offset_t start)
addr++;
if (ep->ev_value != NULL) {
len = strlen(ep->ev_value);
- if (archsw.arch_copyin(ep->ev_value, addr, len) != len)
+ if ((size_t)archsw.arch_copyin(ep->ev_value, addr, len) != len)
break;
addr += len;
}
diff --git a/sys/boot/efi/loader/conf.c b/sys/boot/efi/loader/conf.c
index 8c063fd..523714d 100644
--- a/sys/boot/efi/loader/conf.c
+++ b/sys/boot/efi/loader/conf.c
@@ -31,14 +31,23 @@ __FBSDID("$FreeBSD$");
#include <bootstrap.h>
#include <efi.h>
#include <efilib.h>
+#ifdef EFI_ZFS_BOOT
+#include <libzfs.h>
+#endif
struct devsw *devsw[] = {
&efipart_dev,
&efinet_dev,
+#ifdef EFI_ZFS_BOOT
+ &zfs_dev,
+#endif
NULL
};
struct fs_ops *file_system[] = {
+#ifdef EFI_ZFS_BOOT
+ &zfs_fsops,
+#endif
&dosfs_fsops,
&ufs_fsops,
&cd9660_fsops,
diff --git a/sys/boot/efi/loader/copy.c b/sys/boot/efi/loader/copy.c
index 363d95d..6521e3c 100644
--- a/sys/boot/efi/loader/copy.c
+++ b/sys/boot/efi/loader/copy.c
@@ -37,6 +37,8 @@ __FBSDID("$FreeBSD$");
#include <efi.h>
#include <efilib.h>
+#include "loader_efi.h"
+
#ifndef EFI_STAGING_SIZE
#define EFI_STAGING_SIZE 48
#endif
diff --git a/sys/boot/efi/loader/devicename.c b/sys/boot/efi/loader/devicename.c
index 4e9764c..63c9293 100644
--- a/sys/boot/efi/loader/devicename.c
+++ b/sys/boot/efi/loader/devicename.c
@@ -31,11 +31,17 @@ __FBSDID("$FreeBSD$");
#include <stand.h>
#include <string.h>
#include <sys/disklabel.h>
-#include "bootstrap.h"
+#include <sys/param.h>
+#include <bootstrap.h>
+#ifdef EFI_ZFS_BOOT
+#include <libzfs.h>
+#endif
#include <efi.h>
#include <efilib.h>
+#include "loader_efi.h"
+
static int efi_parsedev(struct devdesc **, const char *, const char **);
/*
@@ -84,7 +90,7 @@ efi_parsedev(struct devdesc **dev, const char *devspec, const char **path)
struct devsw *dv;
char *cp;
const char *np;
- int i, err;
+ int i;
/* minimum length check */
if (strlen(devspec) < 2)
@@ -99,24 +105,43 @@ efi_parsedev(struct devdesc **dev, const char *devspec, const char **path)
if (devsw[i] == NULL)
return (ENOENT);
- idev = malloc(sizeof(struct devdesc));
- if (idev == NULL)
- return (ENOMEM);
+ np = devspec + strlen(dv->dv_name);
+
+#ifdef EFI_ZFS_BOOT
+ if (dv->dv_type == DEVT_ZFS) {
+ int err;
- idev->d_dev = dv;
- idev->d_type = dv->dv_type;
- idev->d_unit = -1;
+ idev = malloc(sizeof(struct zfs_devdesc));
+ if (idev == NULL)
+ return (ENOMEM);
- err = 0;
- np = devspec + strlen(dv->dv_name);
- if (*np != '\0' && *np != ':') {
- idev->d_unit = strtol(np, &cp, 0);
- if (cp == np) {
- idev->d_unit = -1;
+ err = zfs_parsedev((struct zfs_devdesc*)idev, np, path);
+ if (err != 0) {
free(idev);
- return (EUNIT);
+ return (err);
+ }
+ *dev = idev;
+ cp = strchr(np + 1, ':');
+ } else
+#endif
+ {
+ idev = malloc(sizeof(struct devdesc));
+ if (idev == NULL)
+ return (ENOMEM);
+
+ idev->d_dev = dv;
+ idev->d_type = dv->dv_type;
+ idev->d_unit = -1;
+ if (*np != '\0' && *np != ':') {
+ idev->d_unit = strtol(np, &cp, 0);
+ if (cp == np) {
+ idev->d_unit = -1;
+ free(idev);
+ return (EUNIT);
+ }
}
}
+
if (*cp != '\0' && *cp != ':') {
free(idev);
return (EINVAL);
@@ -135,9 +160,13 @@ char *
efi_fmtdev(void *vdev)
{
struct devdesc *dev = (struct devdesc *)vdev;
- static char buf[32]; /* XXX device length constant? */
+ static char buf[SPECNAMELEN + 1];
switch(dev->d_type) {
+#ifdef EFI_ZFS_BOOT
+ case DEVT_ZFS:
+ return (zfs_fmtdev(dev));
+#endif
case DEVT_NONE:
strcpy(buf, "(no device)");
break;
diff --git a/sys/boot/efi/loader/loader_efi.h b/sys/boot/efi/loader/loader_efi.h
index 5819d78..ee7c4bb 100644
--- a/sys/boot/efi/loader/loader_efi.h
+++ b/sys/boot/efi/loader/loader_efi.h
@@ -31,6 +31,8 @@
#ifndef _LOADER_EFI_COPY_H_
#define _LOADER_EFI_COPY_H_
+#include <stand.h>
+
int efi_autoload(void);
int efi_getdev(void **vdev, const char *devspec, const char **path);
diff --git a/sys/boot/efi/loader/main.c b/sys/boot/efi/loader/main.c
index ec9f9af..a90a87e 100644
--- a/sys/boot/efi/loader/main.c
+++ b/sys/boot/efi/loader/main.c
@@ -28,6 +28,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <sys/param.h>
#include <stand.h>
#include <string.h>
#include <setjmp.h>
@@ -38,6 +39,10 @@ __FBSDID("$FreeBSD$");
#include <bootstrap.h>
#include <smbios.h>
+#ifdef EFI_ZFS_BOOT
+#include <libzfs.h>
+#endif
+
#include "loader_efi.h"
extern char bootprog_name[];
@@ -45,7 +50,6 @@ extern char bootprog_rev[];
extern char bootprog_date[];
extern char bootprog_maker[];
-struct devdesc currdev; /* our current device */
struct arch_switch archsw; /* MI/MD interface boundary */
EFI_GUID acpi = ACPI_TABLE_GUID;
@@ -60,13 +64,44 @@ EFI_GUID hoblist = HOB_LIST_TABLE_GUID;
EFI_GUID memtype = MEMORY_TYPE_INFORMATION_TABLE_GUID;
EFI_GUID debugimg = DEBUG_IMAGE_INFO_TABLE_GUID;
+#ifdef EFI_ZFS_BOOT
+static void efi_zfs_probe(void);
+#endif
+
+/*
+ * Need this because EFI uses UTF-16 unicode string constants, but we
+ * use UTF-8. We can't use printf due to the possiblity of \0 and we
+ * don't support support wide characters either.
+ */
+static void
+print_str16(const CHAR16 *str)
+{
+ int i;
+
+ for (i = 0; str[i]; i++)
+ printf("%c", (char)str[i]);
+}
+
EFI_STATUS
main(int argc, CHAR16 *argv[])
{
char var[128];
EFI_LOADED_IMAGE *img;
EFI_GUID *guid;
- int i, j, vargood;
+ int i, j, vargood, unit;
+ struct devsw *dev;
+ uint64_t pool_guid;
+ UINTN k;
+
+ archsw.arch_autoload = efi_autoload;
+ archsw.arch_getdev = efi_getdev;
+ archsw.arch_copyin = efi_copyin;
+ archsw.arch_copyout = efi_copyout;
+ archsw.arch_readin = efi_readin;
+#ifdef EFI_ZFS_BOOT
+ /* Note this needs to be set before ZFS init. */
+ archsw.arch_zfs_probe = efi_zfs_probe;
+#endif
/*
* XXX Chicken-and-egg problem; we want to have console output
@@ -114,6 +149,13 @@ main(int argc, CHAR16 *argv[])
/* Get our loaded image protocol interface structure. */
BS->HandleProtocol(IH, &imgid, (VOID**)&img);
+ printf("Command line arguments:");
+ for (i = 0; i < argc; i++) {
+ printf(" ");
+ print_str16(argv[i]);
+ }
+ printf("\n");
+
printf("Image base: 0x%lx\n", (u_long)img->ImageBase);
printf("EFI version: %d.%02d\n", ST->Hdr.Revision >> 16,
ST->Hdr.Revision & 0xffff);
@@ -127,9 +169,6 @@ main(int argc, CHAR16 *argv[])
printf("%s, Revision %s\n", bootprog_name, bootprog_rev);
printf("(%s, %s)\n", bootprog_maker, bootprog_date);
- efi_handle_lookup(img->DeviceHandle, &currdev.d_dev, &currdev.d_unit);
- currdev.d_type = currdev.d_dev->dv_type;
-
/*
* Disable the watchdog timer. By default the boot manager sets
* the timer to 5 minutes before invoking a boot option. If we
@@ -141,23 +180,48 @@ main(int argc, CHAR16 *argv[])
*/
BS->SetWatchdogTimer(0, 0, 0, NULL);
- env_setenv("currdev", EV_VOLATILE, efi_fmtdev(&currdev),
- efi_setcurrdev, env_nounset);
- env_setenv("loaddev", EV_VOLATILE, efi_fmtdev(&currdev), env_noset,
- env_nounset);
+ if (efi_handle_lookup(img->DeviceHandle, &dev, &unit, &pool_guid) != 0)
+ return (EFI_NOT_FOUND);
+
+ switch (dev->dv_type) {
+#ifdef EFI_ZFS_BOOT
+ case DEVT_ZFS: {
+ struct zfs_devdesc currdev;
+
+ currdev.d_dev = dev;
+ currdev.d_unit = unit;
+ currdev.d_type = currdev.d_dev->dv_type;
+ currdev.d_opendata = NULL;
+ currdev.pool_guid = pool_guid;
+ currdev.root_guid = 0;
+ env_setenv("currdev", EV_VOLATILE, efi_fmtdev(&currdev),
+ efi_setcurrdev, env_nounset);
+ env_setenv("loaddev", EV_VOLATILE, efi_fmtdev(&currdev), env_noset,
+ env_nounset);
+ break;
+ }
+#endif
+ default: {
+ struct devdesc currdev;
+
+ currdev.d_dev = dev;
+ currdev.d_unit = unit;
+ currdev.d_opendata = NULL;
+ currdev.d_type = currdev.d_dev->dv_type;
+ env_setenv("currdev", EV_VOLATILE, efi_fmtdev(&currdev),
+ efi_setcurrdev, env_nounset);
+ env_setenv("loaddev", EV_VOLATILE, efi_fmtdev(&currdev), env_noset,
+ env_nounset);
+ break;
+ }
+ }
setenv("LINES", "24", 1); /* optional */
- archsw.arch_autoload = efi_autoload;
- archsw.arch_getdev = efi_getdev;
- archsw.arch_copyin = efi_copyin;
- archsw.arch_copyout = efi_copyout;
- archsw.arch_readin = efi_readin;
-
- for (i = 0; i < ST->NumberOfTableEntries; i++) {
- guid = &ST->ConfigurationTable[i].VendorGuid;
+ for (k = 0; k < ST->NumberOfTableEntries; k++) {
+ guid = &ST->ConfigurationTable[k].VendorGuid;
if (!memcmp(guid, &smbios, sizeof(EFI_GUID))) {
- smbios_detect(ST->ConfigurationTable[i].VendorTable);
+ smbios_detect(ST->ConfigurationTable[k].VendorTable);
break;
}
}
@@ -241,8 +305,9 @@ command_memmap(int argc, char *argv[])
for (i = 0, p = map; i < ndesc;
i++, p = NextMemoryDescriptor(p, dsz)) {
- printf("%23s %012lx %012lx %08lx ", types[p->Type],
- p->PhysicalStart, p->VirtualStart, p->NumberOfPages);
+ printf("%23s %012jx %012jx %08jx ", types[p->Type],
+ (uintmax_t)p->PhysicalStart, (uintmax_t)p->VirtualStart,
+ (uintmax_t)p->NumberOfPages);
if (p->Attribute & EFI_MEMORY_UC)
printf("UC ");
if (p->Attribute & EFI_MEMORY_WC)
@@ -283,9 +348,10 @@ guid_to_string(EFI_GUID *guid)
static int
command_configuration(int argc, char *argv[])
{
- int i;
+ UINTN i;
- printf("NumberOfTableEntries=%ld\n", ST->NumberOfTableEntries);
+ printf("NumberOfTableEntries=%lu\n",
+ (unsigned long)ST->NumberOfTableEntries);
for (i = 0; i < ST->NumberOfTableEntries; i++) {
EFI_GUID *guid;
@@ -379,9 +445,8 @@ command_nvram(int argc, char *argv[])
CHAR16 *data;
EFI_STATUS status;
EFI_GUID varguid = { 0,0,0,{0,0,0,0,0,0,0,0} };
- UINTN varsz, datasz;
+ UINTN varsz, datasz, i;
SIMPLE_TEXT_OUTPUT_INTERFACE *conout;
- int i;
conout = ST->ConOut;
@@ -417,3 +482,46 @@ command_nvram(int argc, char *argv[])
return (CMD_OK);
}
+
+#ifdef EFI_ZFS_BOOT
+COMMAND_SET(lszfs, "lszfs", "list child datasets of a zfs dataset",
+ command_lszfs);
+
+static int
+command_lszfs(int argc, char *argv[])
+{
+ int err;
+
+ if (argc != 2) {
+ command_errmsg = "wrong number of arguments";
+ return (CMD_ERROR);
+ }
+
+ err = zfs_list(argv[1]);
+ if (err != 0) {
+ command_errmsg = strerror(err);
+ return (CMD_ERROR);
+ }
+ return (CMD_OK);
+}
+#endif
+
+#ifdef EFI_ZFS_BOOT
+static void
+efi_zfs_probe(void)
+{
+ EFI_HANDLE h;
+ u_int unit;
+ int i;
+ char dname[SPECNAMELEN + 1];
+ uint64_t guid;
+
+ unit = 0;
+ h = efi_find_handle(&efipart_dev, 0);
+ for (i = 0; h != NULL; h = efi_find_handle(&efipart_dev, ++i)) {
+ snprintf(dname, sizeof(dname), "%s%d:", efipart_dev.dv_name, i);
+ if (zfs_probe_dev(dname, &guid) == 0)
+ (void)efi_handle_update_dev(h, &zfs_dev, unit++, guid);
+ }
+}
+#endif
diff --git a/sys/boot/fdt/fdt_loader_cmd.c b/sys/boot/fdt/fdt_loader_cmd.c
index 98a0388..ccfe2dd 100644
--- a/sys/boot/fdt/fdt_loader_cmd.c
+++ b/sys/boot/fdt/fdt_loader_cmd.c
@@ -464,7 +464,7 @@ fdt_fixup_memory(struct fdt_mem_region *region, size_t num)
{
struct fdt_mem_region *curmr;
uint32_t addr_cells, size_cells;
- uint32_t *addr_cellsp, *reg, *size_cellsp;
+ uint32_t *addr_cellsp, *size_cellsp;
int err, i, len, memory, root;
size_t realmrno;
uint8_t *buf, *sb;
diff --git a/sys/boot/ficl/i386/sysdep.c b/sys/boot/ficl/i386/sysdep.c
index 2a5346a..a87f643 100644
--- a/sys/boot/ficl/i386/sysdep.c
+++ b/sys/boot/ficl/i386/sysdep.c
@@ -58,7 +58,7 @@ void ficlTextOut(FICL_VM *pVM, char *msg, int fNewline)
IGNORE(pVM);
while(*msg != 0)
- putchar(*(msg++));
+ putchar((unsigned char)*(msg++));
if (fNewline)
putchar('\n');
diff --git a/sys/boot/i386/libi386/smbios.c b/sys/boot/i386/libi386/smbios.c
index 6e4fb84..7a7ce4b 100644
--- a/sys/boot/i386/libi386/smbios.c
+++ b/sys/boot/i386/libi386/smbios.c
@@ -332,7 +332,7 @@ static caddr_t
smbios_find_struct(int type)
{
caddr_t dmi;
- int i;
+ size_t i;
if (smbios.addr == NULL)
return (NULL);
@@ -402,7 +402,7 @@ smbios_detect(const caddr_t addr)
{
char buf[16];
caddr_t dmi;
- int i;
+ size_t i;
smbios_probe(addr);
if (smbios.addr == NULL)
diff --git a/sys/boot/zfs/zfsimpl.c b/sys/boot/zfs/zfsimpl.c
index 927fbad..aa1a789 100644
--- a/sys/boot/zfs/zfsimpl.c
+++ b/sys/boot/zfs/zfsimpl.c
@@ -2165,7 +2165,13 @@ zfs_lookup(const struct zfsmount *mount, const char *upath, dnode_phys_t *dnode)
strcpy(&path[sb.st_size], p);
else
path[sb.st_size] = 0;
- if (sb.st_size + sizeof(znode_phys_t) <= dn.dn_bonuslen) {
+ /*
+ * Second test is purely to silence bogus compiler
+ * warning about accessing past the end of dn_bonus.
+ */
+ if (sb.st_size + sizeof(znode_phys_t) <=
+ dn.dn_bonuslen && sizeof(znode_phys_t) <=
+ sizeof(dn.dn_bonus)) {
memcpy(path, &dn.dn_bonus[sizeof(znode_phys_t)],
sb.st_size);
} else {
diff --git a/sys/cam/ctl/ctl.c b/sys/cam/ctl/ctl.c
index fddc4eb..68f7881 100644
--- a/sys/cam/ctl/ctl.c
+++ b/sys/cam/ctl/ctl.c
@@ -1780,6 +1780,7 @@ ctl_ha_role_sysctl(SYSCTL_HANDLER_ARGS)
static int
ctl_init(void)
{
+ struct make_dev_args args;
struct ctl_softc *softc;
void *other_pool;
int i, error;
@@ -1787,9 +1788,17 @@ ctl_init(void)
softc = control_softc = malloc(sizeof(*control_softc), M_DEVBUF,
M_WAITOK | M_ZERO);
- softc->dev = make_dev(&ctl_cdevsw, 0, UID_ROOT, GID_OPERATOR, 0600,
- "cam/ctl");
- softc->dev->si_drv1 = softc;
+ make_dev_args_init(&args);
+ args.mda_devsw = &ctl_cdevsw;
+ args.mda_uid = UID_ROOT;
+ args.mda_gid = GID_OPERATOR;
+ args.mda_mode = 0600;
+ args.mda_si_drv1 = softc;
+ error = make_dev_s(&args, &softc->dev, "cam/ctl");
+ if (error != 0) {
+ free(control_softc, M_DEVBUF);
+ return (error);
+ }
sysctl_ctx_init(&softc->sysctl_ctx);
softc->sysctl_tree = SYSCTL_ADD_NODE(&softc->sysctl_ctx,
diff --git a/sys/cam/scsi/scsi_ch.c b/sys/cam/scsi/scsi_ch.c
index bc398e7..137128d 100644
--- a/sys/cam/scsi/scsi_ch.c
+++ b/sys/cam/scsi/scsi_ch.c
@@ -372,6 +372,8 @@ chregister(struct cam_periph *periph, void *arg)
struct ch_softc *softc;
struct ccb_getdev *cgd;
struct ccb_pathinq cpi;
+ struct make_dev_args args;
+ int error;
cgd = (struct ccb_getdev *)arg;
if (cgd == NULL) {
@@ -431,11 +433,20 @@ chregister(struct cam_periph *periph, void *arg)
/* Register the device */
- softc->dev = make_dev(&ch_cdevsw, periph->unit_number, UID_ROOT,
- GID_OPERATOR, 0600, "%s%d", periph->periph_name,
- periph->unit_number);
+ make_dev_args_init(&args);
+ args.mda_devsw = &ch_cdevsw;
+ args.mda_unit = periph->unit_number;
+ args.mda_uid = UID_ROOT;
+ args.mda_gid = GID_OPERATOR;
+ args.mda_mode = 0600;
+ args.mda_si_drv1 = periph;
+ error = make_dev_s(&args, &softc->dev, "%s%d", periph->periph_name,
+ periph->unit_number);
cam_periph_lock(periph);
- softc->dev->si_drv1 = periph;
+ if (error != 0) {
+ cam_periph_release_locked(periph);
+ return (CAM_REQ_CMP_ERR);
+ }
/*
* Add an async callback so that we get
@@ -507,8 +518,6 @@ chclose(struct cdev *dev, int flag, int fmt, struct thread *td)
struct mtx *mtx;
periph = (struct cam_periph *)dev->si_drv1;
- if (periph == NULL)
- return(ENXIO);
mtx = cam_periph_mtx(periph);
mtx_lock(mtx);
@@ -754,9 +763,6 @@ chioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td)
int error;
periph = (struct cam_periph *)dev->si_drv1;
- if (periph == NULL)
- return(ENXIO);
-
cam_periph_lock(periph);
CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("entering chioctl\n"));
diff --git a/sys/cam/scsi/scsi_enc.c b/sys/cam/scsi/scsi_enc.c
index 844f25a..a450cd0 100644
--- a/sys/cam/scsi/scsi_enc.c
+++ b/sys/cam/scsi/scsi_enc.c
@@ -264,10 +264,6 @@ enc_open(struct cdev *dev, int flags, int fmt, struct thread *td)
int error = 0;
periph = (struct cam_periph *)dev->si_drv1;
- if (periph == NULL) {
- return (ENXIO);
- }
-
if (cam_periph_acquire(periph) != CAM_REQ_CMP)
return (ENXIO);
@@ -302,8 +298,6 @@ enc_close(struct cdev *dev, int flag, int fmt, struct thread *td)
struct mtx *mtx;
periph = (struct cam_periph *)dev->si_drv1;
- if (periph == NULL)
- return (ENXIO);
mtx = cam_periph_mtx(periph);
mtx_lock(mtx);
@@ -364,9 +358,6 @@ enc_ioctl(struct cdev *dev, u_long cmd, caddr_t arg_addr, int flag,
addr = NULL;
periph = (struct cam_periph *)dev->si_drv1;
- if (periph == NULL)
- return (ENXIO);
-
CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("entering encioctl\n"));
cam_periph_lock(periph);
@@ -905,6 +896,7 @@ enc_ctor(struct cam_periph *periph, void *arg)
enc_softc_t *enc;
struct ccb_getdev *cgd;
char *tname;
+ struct make_dev_args args;
cgd = (struct ccb_getdev *)arg;
if (cgd == NULL) {
@@ -987,12 +979,20 @@ enc_ctor(struct cam_periph *periph, void *arg)
return (CAM_REQ_CMP_ERR);
}
- enc->enc_dev = make_dev(&enc_cdevsw, periph->unit_number,
- UID_ROOT, GID_OPERATOR, 0600, "%s%d",
- periph->periph_name, periph->unit_number);
-
+ make_dev_args_init(&args);
+ args.mda_devsw = &enc_cdevsw;
+ args.mda_unit = periph->unit_number;
+ args.mda_uid = UID_ROOT;
+ args.mda_gid = GID_OPERATOR;
+ args.mda_mode = 0600;
+ args.mda_si_drv1 = periph;
+ err = make_dev_s(&args, &enc->enc_dev, "%s%d", periph->periph_name,
+ periph->unit_number);
cam_periph_lock(periph);
- enc->enc_dev->si_drv1 = periph;
+ if (err != 0) {
+ cam_periph_release_locked(periph);
+ return (CAM_REQ_CMP_ERR);
+ }
enc->enc_flags |= ENC_FLAG_INITIALIZED;
diff --git a/sys/cam/scsi/scsi_pass.c b/sys/cam/scsi/scsi_pass.c
index 09cda5b..c0c313e 100644
--- a/sys/cam/scsi/scsi_pass.c
+++ b/sys/cam/scsi/scsi_pass.c
@@ -548,7 +548,8 @@ passregister(struct cam_periph *periph, void *arg)
struct pass_softc *softc;
struct ccb_getdev *cgd;
struct ccb_pathinq cpi;
- int no_tags;
+ struct make_dev_args args;
+ int error, no_tags;
cgd = (struct ccb_getdev *)arg;
if (cgd == NULL) {
@@ -648,9 +649,20 @@ passregister(struct cam_periph *periph, void *arg)
}
/* Register the device */
- softc->dev = make_dev(&pass_cdevsw, periph->unit_number,
- UID_ROOT, GID_OPERATOR, 0600, "%s%d",
- periph->periph_name, periph->unit_number);
+ make_dev_args_init(&args);
+ args.mda_devsw = &pass_cdevsw;
+ args.mda_unit = periph->unit_number;
+ args.mda_uid = UID_ROOT;
+ args.mda_gid = GID_OPERATOR;
+ args.mda_mode = 0600;
+ args.mda_si_drv1 = periph;
+ error = make_dev_s(&args, &softc->dev, "%s%d", periph->periph_name,
+ periph->unit_number);
+ if (error != 0) {
+ cam_periph_lock(periph);
+ cam_periph_release_locked(periph);
+ return (CAM_REQ_CMP_ERR);
+ }
/*
* Hold a reference to the periph before we create the physical
@@ -664,7 +676,6 @@ passregister(struct cam_periph *periph, void *arg)
}
cam_periph_lock(periph);
- softc->dev->si_drv1 = periph;
TASK_INIT(&softc->add_physpath_task, /*priority*/0,
pass_add_physpath, periph);
@@ -754,8 +765,6 @@ passclose(struct cdev *dev, int flag, int fmt, struct thread *td)
struct mtx *mtx;
periph = (struct cam_periph *)dev->si_drv1;
- if (periph == NULL)
- return (ENXIO);
mtx = cam_periph_mtx(periph);
mtx_lock(mtx);
@@ -1759,9 +1768,6 @@ passdoioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread
uint32_t priority;
periph = (struct cam_periph *)dev->si_drv1;
- if (periph == NULL)
- return(ENXIO);
-
cam_periph_lock(periph);
softc = (struct pass_softc *)periph->softc;
@@ -2068,9 +2074,6 @@ passpoll(struct cdev *dev, int poll_events, struct thread *td)
int revents;
periph = (struct cam_periph *)dev->si_drv1;
- if (periph == NULL)
- return (ENXIO);
-
softc = (struct pass_softc *)periph->softc;
revents = poll_events & (POLLOUT | POLLWRNORM);
@@ -2095,9 +2098,6 @@ passkqfilter(struct cdev *dev, struct knote *kn)
struct pass_softc *softc;
periph = (struct cam_periph *)dev->si_drv1;
- if (periph == NULL)
- return (ENXIO);
-
softc = (struct pass_softc *)periph->softc;
kn->kn_hook = (caddr_t)periph;
diff --git a/sys/cam/scsi/scsi_pt.c b/sys/cam/scsi/scsi_pt.c
index 15240da..e9d2437 100644
--- a/sys/cam/scsi/scsi_pt.c
+++ b/sys/cam/scsi/scsi_pt.c
@@ -173,9 +173,6 @@ ptclose(struct cdev *dev, int flag, int fmt, struct thread *td)
struct pt_softc *softc;
periph = (struct cam_periph *)dev->si_drv1;
- if (periph == NULL)
- return (ENXIO);
-
softc = (struct pt_softc *)periph->softc;
cam_periph_lock(periph);
@@ -252,6 +249,8 @@ ptctor(struct cam_periph *periph, void *arg)
struct pt_softc *softc;
struct ccb_getdev *cgd;
struct ccb_pathinq cpi;
+ struct make_dev_args args;
+ int error;
cgd = (struct ccb_getdev *)arg;
if (cgd == NULL) {
@@ -282,6 +281,21 @@ ptctor(struct cam_periph *periph, void *arg)
xpt_action((union ccb *)&cpi);
cam_periph_unlock(periph);
+
+ make_dev_args_init(&args);
+ args.mda_devsw = &pt_cdevsw;
+ args.mda_unit = periph->unit_number;
+ args.mda_uid = UID_ROOT;
+ args.mda_gid = GID_OPERATOR;
+ args.mda_mode = 0600;
+ args.mda_si_drv1 = periph;
+ error = make_dev_s(&args, &softc->dev, "%s%d", periph->periph_name,
+ periph->unit_number);
+ if (error != 0) {
+ cam_periph_lock(periph);
+ return (CAM_REQ_CMP_ERR);
+ }
+
softc->device_stats = devstat_new_entry("pt",
periph->unit_number, 0,
DEVSTAT_NO_BLOCKSIZE,
@@ -289,11 +303,7 @@ ptctor(struct cam_periph *periph, void *arg)
XPORT_DEVSTAT_TYPE(cpi.transport),
DEVSTAT_PRIORITY_OTHER);
- softc->dev = make_dev(&pt_cdevsw, periph->unit_number, UID_ROOT,
- GID_OPERATOR, 0600, "%s%d", periph->periph_name,
- periph->unit_number);
cam_periph_lock(periph);
- softc->dev->si_drv1 = periph;
/*
* Add async callbacks for bus reset and
@@ -571,9 +581,6 @@ ptioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td)
int error = 0;
periph = (struct cam_periph *)dev->si_drv1;
- if (periph == NULL)
- return(ENXIO);
-
softc = (struct pt_softc *)periph->softc;
cam_periph_lock(periph);
diff --git a/sys/cam/scsi/scsi_sa.c b/sys/cam/scsi/scsi_sa.c
index d0a2811..2779544 100644
--- a/sys/cam/scsi/scsi_sa.c
+++ b/sys/cam/scsi/scsi_sa.c
@@ -730,9 +730,6 @@ saclose(struct cdev *dev, int flag, int fmt, struct thread *td)
mode = SAMODE(dev);
periph = (struct cam_periph *)dev->si_drv1;
- if (periph == NULL)
- return (ENXIO);
-
cam_periph_lock(periph);
softc = (struct sa_softc *)periph->softc;
@@ -905,10 +902,6 @@ sastrategy(struct bio *bp)
return;
}
periph = (struct cam_periph *)bp->bio_dev->si_drv1;
- if (periph == NULL) {
- biofinish(bp, NULL, ENXIO);
- return;
- }
cam_periph_lock(periph);
softc = (struct sa_softc *)periph->softc;
@@ -1516,9 +1509,6 @@ saioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag, struct thread *td)
spaceop = 0; /* shut up gcc */
periph = (struct cam_periph *)dev->si_drv1;
- if (periph == NULL)
- return (ENXIO);
-
cam_periph_lock(periph);
softc = (struct sa_softc *)periph->softc;
@@ -2284,7 +2274,7 @@ saasync(void *callback_arg, u_int32_t code,
static void
sasetupdev(struct sa_softc *softc, struct cdev *dev)
{
- dev->si_drv1 = softc->periph;
+
dev->si_iosize_max = softc->maxio;
dev->si_flags |= softc->si_flags;
/*
@@ -2346,8 +2336,10 @@ saregister(struct cam_periph *periph, void *arg)
struct sa_softc *softc;
struct ccb_getdev *cgd;
struct ccb_pathinq cpi;
+ struct make_dev_args args;
caddr_t match;
char tmpstr[80];
+ int error;
cgd = (struct ccb_getdev *)arg;
if (cgd == NULL) {
@@ -2505,25 +2497,48 @@ saregister(struct cam_periph *periph, void *arg)
return (CAM_REQ_CMP_ERR);
}
- softc->devs.ctl_dev = make_dev(&sa_cdevsw, SAMINOR(SA_CTLDEV,
- SA_ATYPE_R), UID_ROOT, GID_OPERATOR,
- 0660, "%s%d.ctl", periph->periph_name, periph->unit_number);
+ make_dev_args_init(&args);
+ args.mda_devsw = &sa_cdevsw;
+ args.mda_si_drv1 = softc->periph;
+ args.mda_uid = UID_ROOT;
+ args.mda_gid = GID_OPERATOR;
+ args.mda_mode = 0660;
+
+ args.mda_unit = SAMINOR(SA_CTLDEV, SA_ATYPE_R);
+ error = make_dev_s(&args, &softc->devs.ctl_dev, "%s%d.ctl",
+ periph->periph_name, periph->unit_number);
+ if (error != 0) {
+ cam_periph_lock(periph);
+ return (CAM_REQ_CMP_ERR);
+ }
sasetupdev(softc, softc->devs.ctl_dev);
- softc->devs.r_dev = make_dev(&sa_cdevsw, SAMINOR(SA_NOT_CTLDEV,
- SA_ATYPE_R), UID_ROOT, GID_OPERATOR,
- 0660, "%s%d", periph->periph_name, periph->unit_number);
+ args.mda_unit = SAMINOR(SA_NOT_CTLDEV, SA_ATYPE_R);
+ error = make_dev_s(&args, &softc->devs.r_dev, "%s%d",
+ periph->periph_name, periph->unit_number);
+ if (error != 0) {
+ cam_periph_lock(periph);
+ return (CAM_REQ_CMP_ERR);
+ }
sasetupdev(softc, softc->devs.r_dev);
- softc->devs.nr_dev = make_dev(&sa_cdevsw, SAMINOR(SA_NOT_CTLDEV,
- SA_ATYPE_NR), UID_ROOT, GID_OPERATOR,
- 0660, "n%s%d", periph->periph_name, periph->unit_number);
+ args.mda_unit = SAMINOR(SA_NOT_CTLDEV, SA_ATYPE_NR);
+ error = make_dev_s(&args, &softc->devs.nr_dev, "n%s%d",
+ periph->periph_name, periph->unit_number);
+ if (error != 0) {
+ cam_periph_lock(periph);
+ return (CAM_REQ_CMP_ERR);
+ }
sasetupdev(softc, softc->devs.nr_dev);
- softc->devs.er_dev = make_dev(&sa_cdevsw, SAMINOR(SA_NOT_CTLDEV,
- SA_ATYPE_ER), UID_ROOT, GID_OPERATOR,
- 0660, "e%s%d", periph->periph_name, periph->unit_number);
- sasetupdev(softc, softc->devs.er_dev);
+ args.mda_unit = SAMINOR(SA_NOT_CTLDEV, SA_ATYPE_ER);
+ error = make_dev_s(&args, &softc->devs.er_dev, "e%s%d",
+ periph->periph_name, periph->unit_number);
+ if (error != 0) {
+ cam_periph_lock(periph);
+ return (CAM_REQ_CMP_ERR);
+ }
+ sasetupdev(softc, softc->devs.er_dev);
cam_periph_lock(periph);
diff --git a/sys/cam/scsi/scsi_sg.c b/sys/cam/scsi/scsi_sg.c
index 3e13003..ef83fc7 100644
--- a/sys/cam/scsi/scsi_sg.c
+++ b/sys/cam/scsi/scsi_sg.c
@@ -300,7 +300,8 @@ sgregister(struct cam_periph *periph, void *arg)
struct sg_softc *softc;
struct ccb_getdev *cgd;
struct ccb_pathinq cpi;
- int no_tags;
+ struct make_dev_args args;
+ int no_tags, error;
cgd = (struct ccb_getdev *)arg;
if (cgd == NULL) {
@@ -361,9 +362,20 @@ sgregister(struct cam_periph *periph, void *arg)
}
/* Register the device */
- softc->dev = make_dev(&sg_cdevsw, periph->unit_number,
- UID_ROOT, GID_OPERATOR, 0600, "%s%d",
- periph->periph_name, periph->unit_number);
+ make_dev_args_init(&args);
+ args.mda_devsw = &sg_cdevsw;
+ args.mda_unit = periph->unit_number;
+ args.mda_uid = UID_ROOT;
+ args.mda_gid = GID_OPERATOR;
+ args.mda_mode = 0600;
+ args.mda_si_drv1 = periph;
+ error = make_dev_s(&args, &softc->dev, "%s%d",
+ periph->periph_name, periph->unit_number);
+ if (error != 0) {
+ cam_periph_lock(periph);
+ cam_periph_release_locked(periph);
+ return (CAM_REQ_CMP_ERR);
+ }
if (periph->unit_number < 26) {
(void)make_dev_alias(softc->dev, "sg%c",
periph->unit_number + 'a');
@@ -373,7 +385,6 @@ sgregister(struct cam_periph *periph, void *arg)
(periph->unit_number % 26) + 'a');
}
cam_periph_lock(periph);
- softc->dev->si_drv1 = periph;
/*
* Add as async callback so that we get
@@ -429,9 +440,6 @@ sgopen(struct cdev *dev, int flags, int fmt, struct thread *td)
int error = 0;
periph = (struct cam_periph *)dev->si_drv1;
- if (periph == NULL)
- return (ENXIO);
-
if (cam_periph_acquire(periph) != CAM_REQ_CMP)
return (ENXIO);
@@ -468,8 +476,6 @@ sgclose(struct cdev *dev, int flag, int fmt, struct thread *td)
struct mtx *mtx;
periph = (struct cam_periph *)dev->si_drv1;
- if (periph == NULL)
- return (ENXIO);
mtx = cam_periph_mtx(periph);
mtx_lock(mtx);
@@ -506,9 +512,6 @@ sgioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag, struct thread *td)
int dir, error;
periph = (struct cam_periph *)dev->si_drv1;
- if (periph == NULL)
- return (ENXIO);
-
cam_periph_lock(periph);
softc = (struct sg_softc *)periph->softc;
diff --git a/sys/conf/files b/sys/conf/files
index 957b034..5ff5e3c 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3286,7 +3286,7 @@ libkern/jenkins_hash.c standard
libkern/murmur3_32.c standard
libkern/mcount.c optional profiling-routine
libkern/memcchr.c standard
-libkern/memchr.c optional fdt
+libkern/memchr.c standard
libkern/memcmp.c standard
libkern/qsort.c standard
libkern/qsort_r.c standard
diff --git a/sys/dev/acpica/acpi.c b/sys/dev/acpica/acpi.c
index 55b0dd2..08a967f 100644
--- a/sys/dev/acpica/acpi.c
+++ b/sys/dev/acpica/acpi.c
@@ -100,6 +100,7 @@ int acpi_quirks;
/* Supported sleep states. */
static BOOLEAN acpi_sleep_states[ACPI_S_STATE_COUNT];
+static void acpi_lookup(void *arg, const char *name, device_t *dev);
static int acpi_modevent(struct module *mod, int event, void *junk);
static int acpi_probe(device_t dev);
static int acpi_attach(device_t dev);
@@ -670,8 +671,10 @@ acpi_attach(device_t dev)
/* Register ACPI again to pass the correct argument of pm_func. */
power_pm_register(POWER_PM_TYPE_ACPI, acpi_pm_func, sc);
- if (!acpi_disabled("bus"))
+ if (!acpi_disabled("bus")) {
+ EVENTHANDLER_REGISTER(dev_lookup, acpi_lookup, NULL, 1000);
acpi_probe_children(dev);
+ }
/* Update all GPEs and enable runtime GPEs. */
status = AcpiUpdateAllGpes();
@@ -3404,6 +3407,31 @@ acpi_disabled(char *subsys)
return (0);
}
+static void
+acpi_lookup(void *arg, const char *name, device_t *dev)
+{
+ ACPI_HANDLE handle;
+
+ if (*dev != NULL)
+ return;
+
+ /*
+ * Allow any handle name that is specified as an absolute path and
+ * starts with '\'. We could restrict this to \_SB and friends,
+ * but see acpi_probe_children() for notes on why we scan the entire
+ * namespace for devices.
+ *
+ * XXX: The pathname argument to AcpiGetHandle() should be fixed to
+ * be const.
+ */
+ if (name[0] != '\\')
+ return;
+ if (ACPI_FAILURE(AcpiGetHandle(ACPI_ROOT_OBJECT, __DECONST(char *, name),
+ &handle)))
+ return;
+ *dev = acpi_get_device(handle);
+}
+
/*
* Control interface.
*
diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c
index 0bc3e16..e85c066 100644
--- a/sys/dev/ixgbe/if_ix.c
+++ b/sys/dev/ixgbe/if_ix.c
@@ -40,6 +40,11 @@
#include "ixgbe.h"
+#ifdef RSS
+#include <net/rss_config.h>
+#include <netinet/in_rss.h>
+#endif
+
/*********************************************************************
* Set this to one to display debug statistics
*********************************************************************/
@@ -48,7 +53,7 @@ int ixgbe_display_debug_stats = 0;
/*********************************************************************
* Driver version
*********************************************************************/
-char ixgbe_driver_version[] = "2.8.3";
+char ixgbe_driver_version[] = "3.1.0";
/*********************************************************************
* PCI Device ID Table
@@ -132,6 +137,7 @@ static int ixgbe_setup_msix(struct adapter *);
static void ixgbe_free_pci_resources(struct adapter *);
static void ixgbe_local_timer(void *);
static int ixgbe_setup_interface(device_t, struct adapter *);
+static void ixgbe_config_gpie(struct adapter *);
static void ixgbe_config_dmac(struct adapter *);
static void ixgbe_config_delay_values(struct adapter *);
static void ixgbe_config_link(struct adapter *);
@@ -200,6 +206,18 @@ static void ixgbe_handle_phy(void *, int);
static void ixgbe_reinit_fdir(void *, int);
#endif
+#ifdef PCI_IOV
+static void ixgbe_ping_all_vfs(struct adapter *);
+static void ixgbe_handle_mbx(void *, int);
+static int ixgbe_init_iov(device_t, u16, const nvlist_t *);
+static void ixgbe_uninit_iov(device_t);
+static int ixgbe_add_vf(device_t, u16, const nvlist_t *);
+static void ixgbe_initialize_iov(struct adapter *);
+static void ixgbe_recalculate_max_frame(struct adapter *);
+static void ixgbe_init_vf(struct adapter *, struct ixgbe_vf *);
+#endif /* PCI_IOV */
+
+
/*********************************************************************
* FreeBSD Device Interface Entry Points
*********************************************************************/
@@ -212,6 +230,11 @@ static device_method_t ix_methods[] = {
DEVMETHOD(device_shutdown, ixgbe_shutdown),
DEVMETHOD(device_suspend, ixgbe_suspend),
DEVMETHOD(device_resume, ixgbe_resume),
+#ifdef PCI_IOV
+ DEVMETHOD(pci_iov_init, ixgbe_init_iov),
+ DEVMETHOD(pci_iov_uninit, ixgbe_uninit_iov),
+ DEVMETHOD(pci_iov_add_vf, ixgbe_add_vf),
+#endif /* PCI_IOV */
DEVMETHOD_END
};
@@ -224,6 +247,9 @@ DRIVER_MODULE(ix, pci, ix_driver, ix_devclass, 0, 0);
MODULE_DEPEND(ix, pci, 1, 1, 1);
MODULE_DEPEND(ix, ether, 1, 1, 1);
+#ifdef DEV_NETMAP
+MODULE_DEPEND(ix, netmap, 1, 1, 1);
+#endif /* DEV_NETMAP */
/*
** TUNEABLE PARAMETERS:
@@ -291,8 +317,7 @@ SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0,
static int ixgbe_num_queues = 0;
TUNABLE_INT("hw.ix.num_queues", &ixgbe_num_queues);
SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0,
- "Number of queues to configure up to a maximum of 8; "
- "0 indicates autoconfigure");
+ "Number of queues to configure, 0 indicates autoconfigure");
/*
** Number of TX descriptors per ring,
@@ -344,6 +369,8 @@ static int fdir_pballoc = 1;
#include <dev/netmap/ixgbe_netmap.h>
#endif /* DEV_NETMAP */
+static MALLOC_DEFINE(M_IXGBE, "ix", "ix driver allocations");
+
/*********************************************************************
* Device identification routine
*
@@ -484,7 +511,7 @@ ixgbe_attach(device_t dev)
}
/* Allocate multicast array memory. */
- adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
+ adapter->mta = malloc(sizeof(*adapter->mta) *
MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
if (adapter->mta == NULL) {
device_printf(dev, "Can not allocate multicast setup array\n");
@@ -566,9 +593,32 @@ ixgbe_attach(device_t dev)
/* Check PCIE slot type/speed/width */
ixgbe_get_slot_info(hw);
+
/* Set an initial default flow control value */
adapter->fc = ixgbe_fc_full;
+#ifdef PCI_IOV
+ if ((hw->mac.type != ixgbe_mac_82598EB) && (adapter->msix > 1)) {
+ nvlist_t *pf_schema, *vf_schema;
+
+ hw->mbx.ops.init_params(hw);
+ pf_schema = pci_iov_schema_alloc_node();
+ vf_schema = pci_iov_schema_alloc_node();
+ pci_iov_schema_add_unicast_mac(vf_schema, "mac-addr", 0, NULL);
+ pci_iov_schema_add_bool(vf_schema, "mac-anti-spoof",
+ IOV_SCHEMA_HASDEFAULT, TRUE);
+ pci_iov_schema_add_bool(vf_schema, "allow-set-mac",
+ IOV_SCHEMA_HASDEFAULT, FALSE);
+ pci_iov_schema_add_bool(vf_schema, "allow-promisc",
+ IOV_SCHEMA_HASDEFAULT, FALSE);
+ error = pci_iov_attach(dev, pf_schema, vf_schema);
+ if (error != 0) {
+ device_printf(dev,
+ "Error %d setting up SR-IOV\n", error);
+ }
+ }
+#endif /* PCI_IOV */
+
/* Check for certain supported features */
ixgbe_check_wol_support(adapter);
ixgbe_check_eee_support(adapter);
@@ -625,6 +675,13 @@ ixgbe_detach(device_t dev)
return (EBUSY);
}
+#ifdef PCI_IOV
+ if (pci_iov_detach(dev) != 0) {
+ device_printf(dev, "SR-IOV in use; detach first.\n");
+ return (EBUSY);
+ }
+#endif /* PCI_IOV */
+
/* Stop the adapter */
IXGBE_CORE_LOCK(adapter);
ixgbe_setup_low_power_mode(adapter);
@@ -643,6 +700,9 @@ ixgbe_detach(device_t dev)
taskqueue_drain(adapter->tq, &adapter->link_task);
taskqueue_drain(adapter->tq, &adapter->mod_task);
taskqueue_drain(adapter->tq, &adapter->msf_task);
+#ifdef PCI_IOV
+ taskqueue_drain(adapter->tq, &adapter->mbx_task);
+#endif
taskqueue_drain(adapter->tq, &adapter->phy_task);
#ifdef IXGBE_FDIR
taskqueue_drain(adapter->tq, &adapter->fdir_task);
@@ -819,6 +879,9 @@ ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data)
adapter->max_frame_size =
ifp->if_mtu + IXGBE_MTU_HDR;
ixgbe_init_locked(adapter);
+#ifdef PCI_IOV
+ ixgbe_recalculate_max_frame(adapter);
+#endif
IXGBE_CORE_UNLOCK(adapter);
}
break;
@@ -934,22 +997,36 @@ ixgbe_init_locked(struct adapter *adapter)
struct ifnet *ifp = adapter->ifp;
device_t dev = adapter->dev;
struct ixgbe_hw *hw = &adapter->hw;
- u32 k, txdctl, mhadd, gpie;
+ struct tx_ring *txr;
+ struct rx_ring *rxr;
+ u32 txdctl, mhadd;
u32 rxdctl, rxctrl;
+#ifdef PCI_IOV
+ enum ixgbe_iov_mode mode;
+#endif
mtx_assert(&adapter->core_mtx, MA_OWNED);
INIT_DEBUGOUT("ixgbe_init_locked: begin");
+
hw->adapter_stopped = FALSE;
ixgbe_stop_adapter(hw);
callout_stop(&adapter->timer);
+#ifdef PCI_IOV
+ mode = ixgbe_get_iov_mode(adapter);
+ adapter->pool = ixgbe_max_vfs(mode);
+ /* Queue indices may change with IOV mode */
+ for (int i = 0; i < adapter->num_queues; i++) {
+ adapter->rx_rings[i].me = ixgbe_pf_que_index(mode, i);
+ adapter->tx_rings[i].me = ixgbe_pf_que_index(mode, i);
+ }
+#endif
/* reprogram the RAR[0] in case user changed it. */
- ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
+ ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, IXGBE_RAH_AV);
/* Get the latest mac address, User can use a LAA */
- bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
- IXGBE_ETH_LENGTH_OF_ADDRESS);
- ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
+ bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
+ ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, 1);
hw->addr_ctrl.rar_used_count = 1;
/* Set the various hardware offload abilities */
@@ -972,6 +1049,9 @@ ixgbe_init_locked(struct adapter *adapter)
}
ixgbe_init_hw(hw);
+#ifdef PCI_IOV
+ ixgbe_initialize_iov(adapter);
+#endif
ixgbe_initialize_transmit_units(adapter);
/* Setup Multicast table */
@@ -981,14 +1061,10 @@ ixgbe_init_locked(struct adapter *adapter)
** Determine the correct mbuf pool
** for doing jumbo frames
*/
- if (adapter->max_frame_size <= 2048)
+ if (adapter->max_frame_size <= MCLBYTES)
adapter->rx_mbuf_sz = MCLBYTES;
- else if (adapter->max_frame_size <= 4096)
- adapter->rx_mbuf_sz = MJUMPAGESIZE;
- else if (adapter->max_frame_size <= 9216)
- adapter->rx_mbuf_sz = MJUM9BYTES;
else
- adapter->rx_mbuf_sz = MJUM16BYTES;
+ adapter->rx_mbuf_sz = MJUMPAGESIZE;
/* Prepare receive descriptors and buffers */
if (ixgbe_setup_receive_structures(adapter)) {
@@ -1000,31 +1076,8 @@ ixgbe_init_locked(struct adapter *adapter)
/* Configure RX settings */
ixgbe_initialize_receive_units(adapter);
- gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
-
- /* Enable Fan Failure Interrupt */
- gpie |= IXGBE_SDP1_GPIEN_BY_MAC(hw);
-
- /* Add for Module detection */
- if (hw->mac.type == ixgbe_mac_82599EB)
- gpie |= IXGBE_SDP2_GPIEN;
-
- /*
- * Thermal Failure Detection (X540)
- * Link Detection (X552)
- */
- if (hw->mac.type == ixgbe_mac_X540 ||
- hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
- hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
- gpie |= IXGBE_SDP0_GPIEN_X540;
-
- if (adapter->msix > 1) {
- /* Enable Enhanced MSIX mode */
- gpie |= IXGBE_GPIE_MSIX_MODE;
- gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
- IXGBE_GPIE_OCD;
- }
- IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
+ /* Enable SDP & MSIX interrupts based on adapter */
+ ixgbe_config_gpie(adapter);
/* Set MTU size */
if (ifp->if_mtu > ETHERMTU) {
@@ -1037,7 +1090,8 @@ ixgbe_init_locked(struct adapter *adapter)
/* Now enable all the queues */
for (int i = 0; i < adapter->num_queues; i++) {
- txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
+ txr = &adapter->tx_rings[i];
+ txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txr->me));
txdctl |= IXGBE_TXDCTL_ENABLE;
/* Set WTHRESH to 8, burst writeback */
txdctl |= (8 << 16);
@@ -1049,11 +1103,12 @@ ixgbe_init_locked(struct adapter *adapter)
* Prefetching enables tx line rate even with 1 queue.
*/
txdctl |= (32 << 0) | (1 << 8);
- IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
+ IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txr->me), txdctl);
}
- for (int i = 0; i < adapter->num_queues; i++) {
- rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
+ for (int i = 0, j = 0; i < adapter->num_queues; i++) {
+ rxr = &adapter->rx_rings[i];
+ rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me));
if (hw->mac.type == ixgbe_mac_82598EB) {
/*
** PTHRESH = 21
@@ -1064,9 +1119,9 @@ ixgbe_init_locked(struct adapter *adapter)
rxdctl |= 0x080420;
}
rxdctl |= IXGBE_RXDCTL_ENABLE;
- IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
- for (k = 0; k < 10; k++) {
- if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
+ IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), rxdctl);
+ for (; j < 10; j++) {
+ if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)) &
IXGBE_RXDCTL_ENABLE)
break;
else
@@ -1095,10 +1150,10 @@ ixgbe_init_locked(struct adapter *adapter)
struct netmap_kring *kring = &na->rx_rings[i];
int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
- IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
+ IXGBE_WRITE_REG(hw, IXGBE_RDT(rxr->me), t);
} else
#endif /* DEV_NETMAP */
- IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
+ IXGBE_WRITE_REG(hw, IXGBE_RDT(rxr->me), adapter->num_rx_desc - 1);
}
/* Enable Receive engine */
@@ -1137,9 +1192,9 @@ ixgbe_init_locked(struct adapter *adapter)
#endif
/*
- ** Check on any SFP devices that
- ** need to be kick-started
- */
+ * Check on any SFP devices that
+ * need to be kick-started
+ */
if (hw->phy.type == ixgbe_phy_none) {
int err = hw->phy.ops.identify(hw);
if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
@@ -1153,8 +1208,7 @@ ixgbe_init_locked(struct adapter *adapter)
IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->vector), IXGBE_LINK_ITR);
/* Configure Energy Efficient Ethernet for supported devices */
- if (adapter->eee_support)
- ixgbe_setup_eee(hw, adapter->eee_enabled);
+ ixgbe_setup_eee(hw, adapter->eee_enabled);
/* Config/Enable Link */
ixgbe_config_link(adapter);
@@ -1174,6 +1228,15 @@ ixgbe_init_locked(struct adapter *adapter)
/* And now turn on interrupts */
ixgbe_enable_intr(adapter);
+#ifdef PCI_IOV
+ /* Enable the use of the MBX by the VF's */
+ {
+ u32 reg = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
+ reg |= IXGBE_CTRL_EXT_PFRSTD;
+ IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, reg);
+ }
+#endif
+
/* Now inform the stack we're ready */
ifp->if_drv_flags |= IFF_DRV_RUNNING;
@@ -1192,6 +1255,51 @@ ixgbe_init(void *arg)
}
static void
+ixgbe_config_gpie(struct adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 gpie;
+
+ gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
+
+ /* Fan Failure Interrupt */
+ if (hw->device_id == IXGBE_DEV_ID_82598AT)
+ gpie |= IXGBE_SDP1_GPIEN;
+
+ /*
+ * Module detection (SDP2)
+ * Media ready (SDP1)
+ */
+ if (hw->mac.type == ixgbe_mac_82599EB) {
+ gpie |= IXGBE_SDP2_GPIEN;
+ if (hw->device_id != IXGBE_DEV_ID_82599_QSFP_SF_QP)
+ gpie |= IXGBE_SDP1_GPIEN;
+ }
+
+ /*
+ * Thermal Failure Detection (X540)
+ * Link Detection (X557)
+ */
+ if (hw->mac.type == ixgbe_mac_X540 ||
+ hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
+ hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
+ gpie |= IXGBE_SDP0_GPIEN_X540;
+
+ if (adapter->msix > 1) {
+ /* Enable Enhanced MSIX mode */
+ gpie |= IXGBE_GPIE_MSIX_MODE;
+ gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
+ IXGBE_GPIE_OCD;
+ }
+
+ IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
+ return;
+}
+
+/*
+ * Requires adapter->max_frame_size to be set.
+ */
+static void
ixgbe_config_delay_values(struct adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
@@ -1285,10 +1393,9 @@ ixgbe_handle_que(void *context, int pending)
struct adapter *adapter = que->adapter;
struct tx_ring *txr = que->txr;
struct ifnet *ifp = adapter->ifp;
- bool more;
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- more = ixgbe_rxeof(que);
+ ixgbe_rxeof(que);
IXGBE_TX_LOCK(txr);
ixgbe_txeof(txr);
if (!drbr_empty(ifp, txr->br))
@@ -1344,8 +1451,8 @@ ixgbe_legacy_irq(void *arg)
IXGBE_TX_UNLOCK(txr);
/* Check for fan failure */
- if ((hw->phy.media_type == ixgbe_media_type_copper) &&
- (reg_eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))) {
+ if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
+ (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
"REPLACE IMMEDIATELY!!\n");
IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1_BY_MAC(hw));
@@ -1384,6 +1491,7 @@ ixgbe_msix_que(void *arg)
bool more;
u32 newitr = 0;
+
/* Protect against spurious interrupts */
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
return;
@@ -1504,6 +1612,10 @@ ixgbe_msix_link(void *arg)
device_printf(adapter->dev, "System shutdown required!\n");
IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
}
+#ifdef PCI_IOV
+ if (reg_eicr & IXGBE_EICR_MAILBOX)
+ taskqueue_enqueue(adapter->tq, &adapter->mbx_task);
+#endif
}
/* Pluggable optics-related interrupt */
@@ -1569,7 +1681,7 @@ ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
}
ifmr->ifm_status |= IFM_ACTIVE;
- layer = ixgbe_get_supported_physical_layer(hw);
+ layer = adapter->phy_layer;
if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T ||
layer & IXGBE_PHYSICAL_LAYER_1000BASE_T ||
@@ -1732,10 +1844,16 @@ ixgbe_media_change(struct ifnet * ifp)
hw->mac.autotry_restart = TRUE;
hw->mac.ops.setup_link(hw, speed, TRUE);
- adapter->advertise =
- ((speed & IXGBE_LINK_SPEED_10GB_FULL) << 2) |
- ((speed & IXGBE_LINK_SPEED_1GB_FULL) << 1) |
- ((speed & IXGBE_LINK_SPEED_100_FULL) << 0);
+ if (IFM_SUBTYPE(ifm->ifm_media) == IFM_AUTO) {
+ adapter->advertise = 0;
+ } else {
+ if ((speed & IXGBE_LINK_SPEED_10GB_FULL) != 0)
+ adapter->advertise |= 1 << 2;
+ if ((speed & IXGBE_LINK_SPEED_1GB_FULL) != 0)
+ adapter->advertise |= 1 << 1;
+ if ((speed & IXGBE_LINK_SPEED_100_FULL) != 0)
+ adapter->advertise |= 1 << 0;
+ }
return (0);
@@ -1802,18 +1920,17 @@ ixgbe_set_promisc(struct adapter *adapter)
static void
ixgbe_set_multi(struct adapter *adapter)
{
- u32 fctrl;
- u8 *mta;
- u8 *update_ptr;
- struct ifmultiaddr *ifma;
- int mcnt = 0;
- struct ifnet *ifp = adapter->ifp;
+ u32 fctrl;
+ u8 *update_ptr;
+ struct ifmultiaddr *ifma;
+ struct ixgbe_mc_addr *mta;
+ int mcnt = 0;
+ struct ifnet *ifp = adapter->ifp;
IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
mta = adapter->mta;
- bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
- MAX_NUM_MULTICAST_ADDRESSES);
+ bzero(mta, sizeof(*mta) * MAX_NUM_MULTICAST_ADDRESSES);
#if __FreeBSD_version < 800000
IF_ADDR_LOCK(ifp);
@@ -1826,8 +1943,8 @@ ixgbe_set_multi(struct adapter *adapter)
if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
break;
bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
- &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
- IXGBE_ETH_LENGTH_OF_ADDRESS);
+ mta[mcnt].addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
+ mta[mcnt].vmdq = adapter->pool;
mcnt++;
}
#if __FreeBSD_version < 800000
@@ -1850,7 +1967,7 @@ ixgbe_set_multi(struct adapter *adapter)
IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
- update_ptr = mta;
+ update_ptr = (u8 *)mta;
ixgbe_update_mc_addr_list(&adapter->hw,
update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
}
@@ -1866,13 +1983,13 @@ ixgbe_set_multi(struct adapter *adapter)
static u8 *
ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
{
- u8 *addr = *update_ptr;
- u8 *newptr;
- *vmdq = 0;
+ struct ixgbe_mc_addr *mta;
- newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
- *update_ptr = newptr;
- return addr;
+ mta = (struct ixgbe_mc_addr *)*update_ptr;
+ *vmdq = mta->vmdq;
+
+ *update_ptr = (u8*)(mta + 1);;
+ return (mta->addr);
}
@@ -1954,6 +2071,7 @@ watchdog:
ixgbe_init_locked(adapter);
}
+
/*
** Note: this routine updates the OS on the link state
** the real check of the hardware only happens with
@@ -1977,6 +2095,9 @@ ixgbe_update_link_status(struct adapter *adapter)
/* Update DMA coalescing config */
ixgbe_config_dmac(adapter);
if_link_state_change(ifp, LINK_STATE_UP);
+#ifdef PCI_IOV
+ ixgbe_ping_all_vfs(adapter);
+#endif
}
} else { /* Link down */
if (adapter->link_active == TRUE) {
@@ -1984,6 +2105,9 @@ ixgbe_update_link_status(struct adapter *adapter)
device_printf(dev,"Link is Down\n");
if_link_state_change(ifp, LINK_STATE_DOWN);
adapter->link_active = FALSE;
+#ifdef PCI_IOV
+ ixgbe_ping_all_vfs(adapter);
+#endif
}
}
@@ -2083,7 +2207,7 @@ ixgbe_setup_optics(struct adapter *adapter)
struct ixgbe_hw *hw = &adapter->hw;
int layer;
- layer = ixgbe_get_supported_physical_layer(hw);
+ layer = adapter->phy_layer = ixgbe_get_supported_physical_layer(hw);
if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
adapter->optics = IFM_10G_T;
@@ -2208,6 +2332,31 @@ ixgbe_allocate_msix(struct adapter *adapter)
struct tx_ring *txr = adapter->tx_rings;
int error, rid, vector = 0;
int cpu_id = 0;
+#ifdef RSS
+ cpuset_t cpu_mask;
+#endif
+
+#ifdef RSS
+ /*
+ * If we're doing RSS, the number of queues needs to
+ * match the number of RSS buckets that are configured.
+ *
+ * + If there's more queues than RSS buckets, we'll end
+ * up with queues that get no traffic.
+ *
+ * + If there's more RSS buckets than queues, we'll end
+ * up having multiple RSS buckets map to the same queue,
+ * so there'll be some contention.
+ */
+ if (adapter->num_queues != rss_getnumbuckets()) {
+ device_printf(dev,
+ "%s: number of queues (%d) != number of RSS buckets (%d)"
+ "; performance will be impacted.\n",
+ __func__,
+ adapter->num_queues,
+ rss_getnumbuckets());
+ }
+#endif
for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
rid = vector + 1;
@@ -2232,6 +2381,14 @@ ixgbe_allocate_msix(struct adapter *adapter)
#endif
que->msix = vector;
adapter->active_queues |= (u64)(1 << que->msix);
+#ifdef RSS
+ /*
+ * The queue ID is used as the RSS layer bucket ID.
+ * We look up the queue ID -> RSS CPU ID and select
+ * that.
+ */
+ cpu_id = rss_getcpu(i % rss_getnumbuckets());
+#else
/*
* Bind the msix vector, and thus the
* rings to the corresponding cpu.
@@ -2241,16 +2398,37 @@ ixgbe_allocate_msix(struct adapter *adapter)
*/
if (adapter->num_queues > 1)
cpu_id = i;
-
+#endif
if (adapter->num_queues > 1)
bus_bind_intr(dev, que->res, cpu_id);
+#ifdef IXGBE_DEBUG
+#ifdef RSS
+ device_printf(dev,
+ "Bound RSS bucket %d to CPU %d\n",
+ i, cpu_id);
+#else
+ device_printf(dev,
+ "Bound queue %d to cpu %d\n",
+ i, cpu_id);
+#endif
+#endif /* IXGBE_DEBUG */
+
TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
taskqueue_thread_enqueue, &que->tq);
+#ifdef RSS
+ CPU_SETOF(cpu_id, &cpu_mask);
+ taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
+ &cpu_mask,
+ "%s (bucket %d)",
+ device_get_nameunit(adapter->dev),
+ cpu_id);
+#else
taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
device_get_nameunit(adapter->dev));
+#endif
}
/* and Link */
@@ -2279,6 +2457,9 @@ ixgbe_allocate_msix(struct adapter *adapter)
TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
+#ifdef PCI_IOV
+ TASK_INIT(&adapter->mbx_task, 0, ixgbe_handle_mbx, adapter);
+#endif
TASK_INIT(&adapter->phy_task, 0, ixgbe_handle_phy, adapter);
#ifdef IXGBE_FDIR
TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
@@ -2326,11 +2507,14 @@ ixgbe_setup_msix(struct adapter *adapter)
/* Figure out a reasonable auto config value */
queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
+#ifdef RSS
+ /* If we're doing RSS, clamp at the number of RSS buckets */
+ if (queues > rss_getnumbuckets())
+ queues = rss_getnumbuckets();
+#endif
+
if (ixgbe_num_queues != 0)
queues = ixgbe_num_queues;
- /* Set max queues to 8 when autoconfiguring */
- else if ((ixgbe_num_queues == 0) && (queues > 8))
- queues = 8;
/* reflect correct sysctl value */
ixgbe_num_queues = queues;
@@ -2499,10 +2683,15 @@ ixgbe_setup_interface(device_t dev, struct adapter *adapter)
ifp->if_softc = adapter;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = ixgbe_ioctl;
+#if __FreeBSD_version >= 1100036
+ if_setgetcounterfn(ifp, ixgbe_get_counter);
+#endif
+#if __FreeBSD_version >= 1100045
/* TSO parameters */
ifp->if_hw_tsomax = 65518;
ifp->if_hw_tsomaxsegcount = IXGBE_82599_SCATTER;
ifp->if_hw_tsomaxsegsize = 2048;
+#endif
ifp->if_transmit = ixgbe_mq_start;
ifp->if_qflush = ixgbe_qflush;
@@ -2564,7 +2753,7 @@ ixgbe_add_media_types(struct adapter *adapter)
device_t dev = adapter->dev;
int layer;
- layer = ixgbe_get_supported_physical_layer(hw);
+ layer = adapter->phy_layer = ixgbe_get_supported_physical_layer(hw);
/* Media types with matching FreeBSD media defines */
if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T)
@@ -2675,40 +2864,41 @@ ixgbe_initialize_transmit_units(struct adapter *adapter)
for (int i = 0; i < adapter->num_queues; i++, txr++) {
u64 tdba = txr->txdma.dma_paddr;
u32 txctrl = 0;
+ int j = txr->me;
- IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
+ IXGBE_WRITE_REG(hw, IXGBE_TDBAL(j),
(tdba & 0x00000000ffffffffULL));
- IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
- IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
+ IXGBE_WRITE_REG(hw, IXGBE_TDBAH(j), (tdba >> 32));
+ IXGBE_WRITE_REG(hw, IXGBE_TDLEN(j),
adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
/* Setup the HW Tx Head and Tail descriptor pointers */
- IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
- IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
+ IXGBE_WRITE_REG(hw, IXGBE_TDH(j), 0);
+ IXGBE_WRITE_REG(hw, IXGBE_TDT(j), 0);
/* Cache the tail address */
- txr->tail = IXGBE_TDT(txr->me);
+ txr->tail = IXGBE_TDT(j);
/* Disable Head Writeback */
switch (hw->mac.type) {
case ixgbe_mac_82598EB:
- txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
+ txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(j));
break;
case ixgbe_mac_82599EB:
case ixgbe_mac_X540:
default:
- txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
+ txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(j));
break;
}
txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
switch (hw->mac.type) {
case ixgbe_mac_82598EB:
- IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
+ IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(j), txctrl);
break;
case ixgbe_mac_82599EB:
case ixgbe_mac_X540:
default:
- IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
+ IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(j), txctrl);
break;
}
@@ -2716,6 +2906,9 @@ ixgbe_initialize_transmit_units(struct adapter *adapter)
if (hw->mac.type != ixgbe_mac_82598EB) {
u32 dmatxctl, rttdcs;
+#ifdef PCI_IOV
+ enum ixgbe_iov_mode mode = ixgbe_get_iov_mode(adapter);
+#endif
dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
dmatxctl |= IXGBE_DMATXCTL_TE;
IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
@@ -2723,7 +2916,11 @@ ixgbe_initialize_transmit_units(struct adapter *adapter)
rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
rttdcs |= IXGBE_RTTDCS_ARBDIS;
IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
+#ifdef PCI_IOV
+ IXGBE_WRITE_REG(hw, IXGBE_MTQC, ixgbe_get_mtqc(mode));
+#else
IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
+#endif
rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
}
@@ -2735,17 +2932,22 @@ static void
ixgbe_initialise_rss_mapping(struct adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
- uint32_t reta;
- int i, j, queue_id, table_size;
- int index_mult;
- uint32_t rss_key[10];
- uint32_t mrqc;
-
- /* Setup RSS */
- reta = 0;
+ u32 reta = 0, mrqc, rss_key[10];
+ int queue_id, table_size, index_mult;
+#ifdef RSS
+ u32 rss_hash_config;
+#endif
+#ifdef PCI_IOV
+ enum ixgbe_iov_mode mode;
+#endif
+#ifdef RSS
+ /* Fetch the configured RSS key */
+ rss_getkey((uint8_t *) &rss_key);
+#else
/* set up random bits */
arc4rand(&rss_key, sizeof(rss_key), 0);
+#endif
/* Set multiplier for RETA setup and table size based on MAC */
index_mult = 0x1;
@@ -2763,9 +2965,19 @@ ixgbe_initialise_rss_mapping(struct adapter *adapter)
}
/* Set up the redirection table */
- for (i = 0, j = 0; i < table_size; i++, j++) {
+ for (int i = 0, j = 0; i < table_size; i++, j++) {
if (j == adapter->num_queues) j = 0;
+#ifdef RSS
+ /*
+ * Fetch the RSS bucket id for the given indirection entry.
+ * Cap it at the number of configured buckets (which is
+ * num_queues.)
+ */
+ queue_id = rss_get_indirection_to_bucket(i);
+ queue_id = queue_id % adapter->num_queues;
+#else
queue_id = (j * index_mult);
+#endif
/*
* The low 8 bits are for hash value (n+0);
* The next 8 bits are for hash value (n+1), etc.
@@ -2786,6 +2998,32 @@ ixgbe_initialise_rss_mapping(struct adapter *adapter)
IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]);
/* Perform hash on these packet types */
+#ifdef RSS
+ mrqc = IXGBE_MRQC_RSSEN;
+ rss_hash_config = rss_gethashconfig();
+ if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4)
+ mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
+ if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4)
+ mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
+ if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6)
+ mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
+ if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6)
+ mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
+ if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX)
+ mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
+ if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX)
+ mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
+ if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4)
+ mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
+ if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4_EX)
+ device_printf(adapter->dev,
+ "%s: RSS_HASHTYPE_RSS_UDP_IPV4_EX defined, "
+ "but not supported\n", __func__);
+ if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6)
+ mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
+ if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX)
+ mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
+#else
/*
* Disable UDP - IP fragments aren't currently being handled
* and so we end up with a mix of 2-tuple and 4-tuple
@@ -2794,18 +3032,16 @@ ixgbe_initialise_rss_mapping(struct adapter *adapter)
mrqc = IXGBE_MRQC_RSSEN
| IXGBE_MRQC_RSS_FIELD_IPV4
| IXGBE_MRQC_RSS_FIELD_IPV4_TCP
-#if 0
- | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
-#endif
| IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
| IXGBE_MRQC_RSS_FIELD_IPV6_EX
| IXGBE_MRQC_RSS_FIELD_IPV6
| IXGBE_MRQC_RSS_FIELD_IPV6_TCP
-#if 0
- | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
- | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP
-#endif
;
+#endif /* RSS */
+#ifdef PCI_IOV
+ mode = ixgbe_get_iov_mode(adapter);
+ mrqc |= ixgbe_get_mrqc(mode);
+#endif
IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
}
@@ -2864,16 +3100,17 @@ ixgbe_initialize_receive_units(struct adapter *adapter)
for (int i = 0; i < adapter->num_queues; i++, rxr++) {
u64 rdba = rxr->rxdma.dma_paddr;
+ int j = rxr->me;
/* Setup the Base and Length of the Rx Descriptor Ring */
- IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
+ IXGBE_WRITE_REG(hw, IXGBE_RDBAL(j),
(rdba & 0x00000000ffffffffULL));
- IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
- IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
+ IXGBE_WRITE_REG(hw, IXGBE_RDBAH(j), (rdba >> 32));
+ IXGBE_WRITE_REG(hw, IXGBE_RDLEN(j),
adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
/* Set up the SRRCTL register */
- srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
+ srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(j));
srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
srrctl |= bufsz;
@@ -3009,9 +3246,9 @@ ixgbe_setup_vlan_hw_support(struct adapter *adapter)
rxr = &adapter->rx_rings[i];
/* On 82599 the VLAN enable is per/queue in RXDCTL */
if (hw->mac.type != ixgbe_mac_82598EB) {
- ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
+ ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me));
ctrl |= IXGBE_RXDCTL_VME;
- IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
+ IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), ctrl);
}
rxr->vtag_strip = TRUE;
}
@@ -3061,6 +3298,9 @@ ixgbe_enable_intr(struct adapter *adapter)
#ifdef IXGBE_FDIR
mask |= IXGBE_EIMS_FLOW_DIR;
#endif
+#ifdef PCI_IOV
+ mask |= IXGBE_EIMS_MAILBOX;
+#endif
break;
case ixgbe_mac_X540:
/* Detect if Thermal Sensor is enabled */
@@ -3084,6 +3324,9 @@ ixgbe_enable_intr(struct adapter *adapter)
#ifdef IXGBE_FDIR
mask |= IXGBE_EIMS_FLOW_DIR;
#endif
+#ifdef PCI_IOV
+ mask |= IXGBE_EIMS_MAILBOX;
+#endif
/* falls through */
default:
break;
@@ -3097,6 +3340,9 @@ ixgbe_enable_intr(struct adapter *adapter)
/* Don't autoclear Link */
mask &= ~IXGBE_EIMS_OTHER;
mask &= ~IXGBE_EIMS_LSC;
+#ifdef PCI_IOV
+ mask &= ~IXGBE_EIMS_MAILBOX;
+#endif
IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
}
@@ -3295,8 +3541,8 @@ ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
static void
ixgbe_configure_ivars(struct adapter *adapter)
{
- struct ix_queue *que = adapter->queues;
- u32 newitr;
+ struct ix_queue *que = adapter->queues;
+ u32 newitr;
if (ixgbe_max_interrupt_rate > 0)
newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
@@ -3310,10 +3556,12 @@ ixgbe_configure_ivars(struct adapter *adapter)
}
for (int i = 0; i < adapter->num_queues; i++, que++) {
+ struct rx_ring *rxr = &adapter->rx_rings[i];
+ struct tx_ring *txr = &adapter->tx_rings[i];
/* First the RX queue entry */
- ixgbe_set_ivar(adapter, i, que->msix, 0);
+ ixgbe_set_ivar(adapter, rxr->me, que->msix, 0);
/* ... and the TX */
- ixgbe_set_ivar(adapter, i, que->msix, 1);
+ ixgbe_set_ivar(adapter, txr->me, que->msix, 1);
/* Set an Initial EITR value */
IXGBE_WRITE_REG(&adapter->hw,
IXGBE_EITR(que->msix), newitr);
@@ -3327,7 +3575,8 @@ ixgbe_configure_ivars(struct adapter *adapter)
** ixgbe_sfp_probe - called in the local timer to
** determine if a port had optics inserted.
*/
-static bool ixgbe_sfp_probe(struct adapter *adapter)
+static bool
+ixgbe_sfp_probe(struct adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
device_t dev = adapter->dev;
@@ -3387,6 +3636,7 @@ ixgbe_handle_mod(void *context, int pending)
"Unsupported SFP+ module type was detected.\n");
return;
}
+
err = hw->mac.ops.setup_sfp(hw);
if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
device_printf(dev,
@@ -3509,9 +3759,7 @@ ixgbe_check_eee_support(struct adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
- adapter->eee_support = adapter->eee_enabled =
- (hw->device_id == IXGBE_DEV_ID_X550T ||
- hw->device_id == IXGBE_DEV_ID_X550EM_X_KR);
+ adapter->eee_enabled = !!(hw->mac.ops.setup_eee);
}
/*
@@ -3887,8 +4135,7 @@ ixgbe_add_device_sysctls(struct adapter *adapter)
ixgbe_sysctl_dmac, "I", "DMA Coalesce");
/* for X550T and X550EM backplane devices */
- if (hw->device_id == IXGBE_DEV_ID_X550T ||
- hw->device_id == IXGBE_DEV_ID_X550EM_X_KR) {
+ if (hw->mac.ops.setup_eee) {
struct sysctl_oid *eee_node;
struct sysctl_oid_list *eee_list;
@@ -4518,6 +4765,7 @@ static int
ixgbe_sysctl_eee_enable(SYSCTL_HANDLER_ARGS)
{
struct adapter *adapter = (struct adapter *) arg1;
+ struct ixgbe_hw *hw = &adapter->hw;
struct ifnet *ifp = adapter->ifp;
int new_eee_enabled, error = 0;
@@ -4528,7 +4776,7 @@ ixgbe_sysctl_eee_enable(SYSCTL_HANDLER_ARGS)
if (new_eee_enabled == adapter->eee_enabled)
return (0);
- if (new_eee_enabled > 0 && !adapter->eee_support)
+ if (new_eee_enabled > 0 && !hw->mac.ops.setup_eee)
return (ENODEV);
else
adapter->eee_enabled = !!(new_eee_enabled);
@@ -4644,10 +4892,19 @@ ixgbe_enable_rx_drop(struct adapter *adapter)
struct ixgbe_hw *hw = &adapter->hw;
for (int i = 0; i < adapter->num_queues; i++) {
- u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
+ struct rx_ring *rxr = &adapter->rx_rings[i];
+ u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me));
srrctl |= IXGBE_SRRCTL_DROP_EN;
- IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
+ IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl);
+ }
+#ifdef PCI_IOV
+ /* enable drop for each vf */
+ for (int i = 0; i < adapter->num_vfs; i++) {
+ IXGBE_WRITE_REG(hw, IXGBE_QDE,
+ (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT) |
+ IXGBE_QDE_ENABLE));
}
+#endif
}
static void
@@ -4656,10 +4913,18 @@ ixgbe_disable_rx_drop(struct adapter *adapter)
struct ixgbe_hw *hw = &adapter->hw;
for (int i = 0; i < adapter->num_queues; i++) {
- u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
+ struct rx_ring *rxr = &adapter->rx_rings[i];
+ u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me));
srrctl &= ~IXGBE_SRRCTL_DROP_EN;
- IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
+ IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl);
+ }
+#ifdef PCI_IOV
+ /* disable drop for each vf */
+ for (int i = 0; i < adapter->num_vfs; i++) {
+ IXGBE_WRITE_REG(hw, IXGBE_QDE,
+ (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT)));
}
+#endif
}
static void
@@ -4686,4 +4951,722 @@ ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
}
}
+#ifdef PCI_IOV
+
+/*
+** Support functions for SRIOV/VF management
+*/
+
+static void
+ixgbe_ping_all_vfs(struct adapter *adapter)
+{
+ struct ixgbe_vf *vf;
+
+ for (int i = 0; i < adapter->num_vfs; i++) {
+ vf = &adapter->vfs[i];
+ if (vf->flags & IXGBE_VF_ACTIVE)
+ ixgbe_send_vf_msg(adapter, vf, IXGBE_PF_CONTROL_MSG);
+ }
+}
+
+
+static void
+ixgbe_vf_set_default_vlan(struct adapter *adapter, struct ixgbe_vf *vf,
+ uint16_t tag)
+{
+ struct ixgbe_hw *hw;
+ uint32_t vmolr, vmvir;
+
+ hw = &adapter->hw;
+
+ vf->vlan_tag = tag;
+
+ vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf->pool));
+
+ /* Do not receive packets that pass inexact filters. */
+ vmolr &= ~(IXGBE_VMOLR_ROMPE | IXGBE_VMOLR_ROPE);
+
+ /* Disable Multicast Promicuous Mode. */
+ vmolr &= ~IXGBE_VMOLR_MPE;
+
+ /* Accept broadcasts. */
+ vmolr |= IXGBE_VMOLR_BAM;
+
+ if (tag == 0) {
+ /* Accept non-vlan tagged traffic. */
+ //vmolr |= IXGBE_VMOLR_AUPE;
+
+ /* Allow VM to tag outgoing traffic; no default tag. */
+ vmvir = 0;
+ } else {
+ /* Require vlan-tagged traffic. */
+ vmolr &= ~IXGBE_VMOLR_AUPE;
+
+ /* Tag all traffic with provided vlan tag. */
+ vmvir = (tag | IXGBE_VMVIR_VLANA_DEFAULT);
+ }
+ IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf->pool), vmolr);
+ IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf->pool), vmvir);
+}
+
+
+static boolean_t
+ixgbe_vf_frame_size_compatible(struct adapter *adapter, struct ixgbe_vf *vf)
+{
+
+ /*
+ * Frame size compatibility between PF and VF is only a problem on
+ * 82599-based cards. X540 and later support any combination of jumbo
+ * frames on PFs and VFs.
+ */
+ if (adapter->hw.mac.type != ixgbe_mac_82599EB)
+ return (TRUE);
+
+ switch (vf->api_ver) {
+ case IXGBE_API_VER_1_0:
+ case IXGBE_API_VER_UNKNOWN:
+ /*
+ * On legacy (1.0 and older) VF versions, we don't support jumbo
+ * frames on either the PF or the VF.
+ */
+ if (adapter->max_frame_size > ETHER_MAX_LEN ||
+ vf->max_frame_size > ETHER_MAX_LEN)
+ return (FALSE);
+
+ return (TRUE);
+
+ break;
+ case IXGBE_API_VER_1_1:
+ default:
+ /*
+ * 1.1 or later VF versions always work if they aren't using
+ * jumbo frames.
+ */
+ if (vf->max_frame_size <= ETHER_MAX_LEN)
+ return (TRUE);
+
+ /*
+ * Jumbo frames only work with VFs if the PF is also using jumbo
+ * frames.
+ */
+ if (adapter->max_frame_size <= ETHER_MAX_LEN)
+ return (TRUE);
+
+ return (FALSE);
+
+ }
+}
+
+
+static void
+ixgbe_process_vf_reset(struct adapter *adapter, struct ixgbe_vf *vf)
+{
+ ixgbe_vf_set_default_vlan(adapter, vf, vf->default_vlan);
+
+ // XXX clear multicast addresses
+
+ ixgbe_clear_rar(&adapter->hw, vf->rar_index);
+
+ vf->api_ver = IXGBE_API_VER_UNKNOWN;
+}
+
+
+static void
+ixgbe_vf_enable_transmit(struct adapter *adapter, struct ixgbe_vf *vf)
+{
+ struct ixgbe_hw *hw;
+ uint32_t vf_index, vfte;
+
+ hw = &adapter->hw;
+
+ vf_index = IXGBE_VF_INDEX(vf->pool);
+ vfte = IXGBE_READ_REG(hw, IXGBE_VFTE(vf_index));
+ vfte |= IXGBE_VF_BIT(vf->pool);
+ IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_index), vfte);
+}
+
+
+static void
+ixgbe_vf_enable_receive(struct adapter *adapter, struct ixgbe_vf *vf)
+{
+ struct ixgbe_hw *hw;
+ uint32_t vf_index, vfre;
+
+ hw = &adapter->hw;
+
+ vf_index = IXGBE_VF_INDEX(vf->pool);
+ vfre = IXGBE_READ_REG(hw, IXGBE_VFRE(vf_index));
+ if (ixgbe_vf_frame_size_compatible(adapter, vf))
+ vfre |= IXGBE_VF_BIT(vf->pool);
+ else
+ vfre &= ~IXGBE_VF_BIT(vf->pool);
+ IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_index), vfre);
+}
+
+
+static void
+ixgbe_vf_reset_msg(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg)
+{
+ struct ixgbe_hw *hw;
+ uint32_t ack;
+ uint32_t resp[IXGBE_VF_PERMADDR_MSG_LEN];
+
+ hw = &adapter->hw;
+
+ ixgbe_process_vf_reset(adapter, vf);
+
+ if (ixgbe_validate_mac_addr(vf->ether_addr) == 0) {
+ ixgbe_set_rar(&adapter->hw, vf->rar_index,
+ vf->ether_addr, vf->pool, TRUE);
+ ack = IXGBE_VT_MSGTYPE_ACK;
+ } else
+ ack = IXGBE_VT_MSGTYPE_NACK;
+
+ ixgbe_vf_enable_transmit(adapter, vf);
+ ixgbe_vf_enable_receive(adapter, vf);
+
+ vf->flags |= IXGBE_VF_CTS;
+
+ resp[0] = IXGBE_VF_RESET | ack | IXGBE_VT_MSGTYPE_CTS;
+ bcopy(vf->ether_addr, &resp[1], ETHER_ADDR_LEN);
+ resp[3] = hw->mac.mc_filter_type;
+ ixgbe_write_mbx(hw, resp, IXGBE_VF_PERMADDR_MSG_LEN, vf->pool);
+}
+
+
+static void
+ixgbe_vf_set_mac(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg)
+{
+ uint8_t *mac;
+
+ mac = (uint8_t*)&msg[1];
+
+ /* Check that the VF has permission to change the MAC address. */
+ if (!(vf->flags & IXGBE_VF_CAP_MAC) && ixgbe_vf_mac_changed(vf, mac)) {
+ ixgbe_send_vf_nack(adapter, vf, msg[0]);
+ return;
+ }
+
+ if (ixgbe_validate_mac_addr(mac) != 0) {
+ ixgbe_send_vf_nack(adapter, vf, msg[0]);
+ return;
+ }
+
+ bcopy(mac, vf->ether_addr, ETHER_ADDR_LEN);
+
+ ixgbe_set_rar(&adapter->hw, vf->rar_index, vf->ether_addr,
+ vf->pool, TRUE);
+
+ ixgbe_send_vf_ack(adapter, vf, msg[0]);
+}
+
+
+/*
+** VF multicast addresses are set by using the appropriate bit in
+** 1 of 128 32 bit addresses (4096 possible).
+*/
+static void
+ixgbe_vf_set_mc_addr(struct adapter *adapter, struct ixgbe_vf *vf, u32 *msg)
+{
+ u16 *list = (u16*)&msg[1];
+ int entries;
+ u32 vmolr, vec_bit, vec_reg, mta_reg;
+
+ entries = (msg[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT;
+ entries = min(entries, IXGBE_MAX_VF_MC);
+
+ vmolr = IXGBE_READ_REG(&adapter->hw, IXGBE_VMOLR(vf->pool));
+
+ vf->num_mc_hashes = entries;
+
+ /* Set the appropriate MTA bit */
+ for (int i = 0; i < entries; i++) {
+ vf->mc_hash[i] = list[i];
+ vec_reg = (vf->mc_hash[i] >> 5) & 0x7F;
+ vec_bit = vf->mc_hash[i] & 0x1F;
+ mta_reg = IXGBE_READ_REG(&adapter->hw, IXGBE_MTA(vec_reg));
+ mta_reg |= (1 << vec_bit);
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_MTA(vec_reg), mta_reg);
+ }
+
+ vmolr |= IXGBE_VMOLR_ROMPE;
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_VMOLR(vf->pool), vmolr);
+ ixgbe_send_vf_ack(adapter, vf, msg[0]);
+ return;
+}
+
+
+static void
+ixgbe_vf_set_vlan(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg)
+{
+ struct ixgbe_hw *hw;
+ int enable;
+ uint16_t tag;
+
+ hw = &adapter->hw;
+ enable = IXGBE_VT_MSGINFO(msg[0]);
+ tag = msg[1] & IXGBE_VLVF_VLANID_MASK;
+
+ if (!(vf->flags & IXGBE_VF_CAP_VLAN)) {
+ ixgbe_send_vf_nack(adapter, vf, msg[0]);
+ return;
+ }
+
+ /* It is illegal to enable vlan tag 0. */
+ if (tag == 0 && enable != 0){
+ ixgbe_send_vf_nack(adapter, vf, msg[0]);
+ return;
+ }
+
+ ixgbe_set_vfta(hw, tag, vf->pool, enable);
+ ixgbe_send_vf_ack(adapter, vf, msg[0]);
+}
+
+
+static void
+ixgbe_vf_set_lpe(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg)
+{
+ struct ixgbe_hw *hw;
+ uint32_t vf_max_size, pf_max_size, mhadd;
+
+ hw = &adapter->hw;
+ vf_max_size = msg[1];
+
+ if (vf_max_size < ETHER_CRC_LEN) {
+ /* We intentionally ACK invalid LPE requests. */
+ ixgbe_send_vf_ack(adapter, vf, msg[0]);
+ return;
+ }
+
+ vf_max_size -= ETHER_CRC_LEN;
+
+ if (vf_max_size > IXGBE_MAX_FRAME_SIZE) {
+ /* We intentionally ACK invalid LPE requests. */
+ ixgbe_send_vf_ack(adapter, vf, msg[0]);
+ return;
+ }
+
+ vf->max_frame_size = vf_max_size;
+ ixgbe_update_max_frame(adapter, vf->max_frame_size);
+
+ /*
+ * We might have to disable reception to this VF if the frame size is
+ * not compatible with the config on the PF.
+ */
+ ixgbe_vf_enable_receive(adapter, vf);
+
+ mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
+ pf_max_size = (mhadd & IXGBE_MHADD_MFS_MASK) >> IXGBE_MHADD_MFS_SHIFT;
+
+ if (pf_max_size < adapter->max_frame_size) {
+ mhadd &= ~IXGBE_MHADD_MFS_MASK;
+ mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
+ IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
+ }
+
+ ixgbe_send_vf_ack(adapter, vf, msg[0]);
+}
+
+
+static void
+ixgbe_vf_set_macvlan(struct adapter *adapter, struct ixgbe_vf *vf,
+ uint32_t *msg)
+{
+ //XXX implement this
+ ixgbe_send_vf_nack(adapter, vf, msg[0]);
+}
+
+
+static void
+ixgbe_vf_api_negotiate(struct adapter *adapter, struct ixgbe_vf *vf,
+ uint32_t *msg)
+{
+
+ switch (msg[1]) {
+ case IXGBE_API_VER_1_0:
+ case IXGBE_API_VER_1_1:
+ vf->api_ver = msg[1];
+ ixgbe_send_vf_ack(adapter, vf, msg[0]);
+ break;
+ default:
+ vf->api_ver = IXGBE_API_VER_UNKNOWN;
+ ixgbe_send_vf_nack(adapter, vf, msg[0]);
+ break;
+ }
+}
+
+
+static void
+ixgbe_vf_get_queues(struct adapter *adapter, struct ixgbe_vf *vf,
+ uint32_t *msg)
+{
+ struct ixgbe_hw *hw;
+ uint32_t resp[IXGBE_VF_GET_QUEUES_RESP_LEN];
+ int num_queues;
+
+ hw = &adapter->hw;
+
+ /* GET_QUEUES is not supported on pre-1.1 APIs. */
+ switch (msg[0]) {
+ case IXGBE_API_VER_1_0:
+ case IXGBE_API_VER_UNKNOWN:
+ ixgbe_send_vf_nack(adapter, vf, msg[0]);
+ return;
+ }
+
+ resp[0] = IXGBE_VF_GET_QUEUES | IXGBE_VT_MSGTYPE_ACK |
+ IXGBE_VT_MSGTYPE_CTS;
+
+ num_queues = ixgbe_vf_queues(ixgbe_get_iov_mode(adapter));
+ resp[IXGBE_VF_TX_QUEUES] = num_queues;
+ resp[IXGBE_VF_RX_QUEUES] = num_queues;
+ resp[IXGBE_VF_TRANS_VLAN] = (vf->default_vlan != 0);
+ resp[IXGBE_VF_DEF_QUEUE] = 0;
+
+ ixgbe_write_mbx(hw, resp, IXGBE_VF_GET_QUEUES_RESP_LEN, vf->pool);
+}
+
+
+static void
+ixgbe_process_vf_msg(struct adapter *adapter, struct ixgbe_vf *vf)
+{
+ struct ixgbe_hw *hw;
+ uint32_t msg[IXGBE_VFMAILBOX_SIZE];
+ int error;
+
+ hw = &adapter->hw;
+
+ error = ixgbe_read_mbx(hw, msg, IXGBE_VFMAILBOX_SIZE, vf->pool);
+
+ if (error != 0)
+ return;
+
+ CTR3(KTR_MALLOC, "%s: received msg %x from %d",
+ adapter->ifp->if_xname, msg[0], vf->pool);
+ if (msg[0] == IXGBE_VF_RESET) {
+ ixgbe_vf_reset_msg(adapter, vf, msg);
+ return;
+ }
+
+ if (!(vf->flags & IXGBE_VF_CTS)) {
+ ixgbe_send_vf_nack(adapter, vf, msg[0]);
+ return;
+ }
+
+ switch (msg[0] & IXGBE_VT_MSG_MASK) {
+ case IXGBE_VF_SET_MAC_ADDR:
+ ixgbe_vf_set_mac(adapter, vf, msg);
+ break;
+ case IXGBE_VF_SET_MULTICAST:
+ ixgbe_vf_set_mc_addr(adapter, vf, msg);
+ break;
+ case IXGBE_VF_SET_VLAN:
+ ixgbe_vf_set_vlan(adapter, vf, msg);
+ break;
+ case IXGBE_VF_SET_LPE:
+ ixgbe_vf_set_lpe(adapter, vf, msg);
+ break;
+ case IXGBE_VF_SET_MACVLAN:
+ ixgbe_vf_set_macvlan(adapter, vf, msg);
+ break;
+ case IXGBE_VF_API_NEGOTIATE:
+ ixgbe_vf_api_negotiate(adapter, vf, msg);
+ break;
+ case IXGBE_VF_GET_QUEUES:
+ ixgbe_vf_get_queues(adapter, vf, msg);
+ break;
+ default:
+ ixgbe_send_vf_nack(adapter, vf, msg[0]);
+ }
+}
+
+
+/*
+ * Tasklet for handling VF -> PF mailbox messages.
+ */
+static void
+ixgbe_handle_mbx(void *context, int pending)
+{
+ struct adapter *adapter;
+ struct ixgbe_hw *hw;
+ struct ixgbe_vf *vf;
+ int i;
+
+ adapter = context;
+ hw = &adapter->hw;
+
+ IXGBE_CORE_LOCK(adapter);
+ for (i = 0; i < adapter->num_vfs; i++) {
+ vf = &adapter->vfs[i];
+
+ if (vf->flags & IXGBE_VF_ACTIVE) {
+ if (ixgbe_check_for_rst(hw, vf->pool) == 0)
+ ixgbe_process_vf_reset(adapter, vf);
+
+ if (ixgbe_check_for_msg(hw, vf->pool) == 0)
+ ixgbe_process_vf_msg(adapter, vf);
+
+ if (ixgbe_check_for_ack(hw, vf->pool) == 0)
+ ixgbe_process_vf_ack(adapter, vf);
+ }
+ }
+ IXGBE_CORE_UNLOCK(adapter);
+}
+
+
+static int
+ixgbe_init_iov(device_t dev, u16 num_vfs, const nvlist_t *config)
+{
+ struct adapter *adapter;
+ enum ixgbe_iov_mode mode;
+
+ adapter = device_get_softc(dev);
+ adapter->num_vfs = num_vfs;
+ mode = ixgbe_get_iov_mode(adapter);
+
+ if (num_vfs > ixgbe_max_vfs(mode)) {
+ adapter->num_vfs = 0;
+ return (ENOSPC);
+ }
+
+ IXGBE_CORE_LOCK(adapter);
+
+ adapter->vfs = malloc(sizeof(*adapter->vfs) * num_vfs, M_IXGBE,
+ M_NOWAIT | M_ZERO);
+
+ if (adapter->vfs == NULL) {
+ adapter->num_vfs = 0;
+ IXGBE_CORE_UNLOCK(adapter);
+ return (ENOMEM);
+ }
+
+ ixgbe_init_locked(adapter);
+
+ IXGBE_CORE_UNLOCK(adapter);
+
+ return (0);
+}
+
+
+static void
+ixgbe_uninit_iov(device_t dev)
+{
+ struct ixgbe_hw *hw;
+ struct adapter *adapter;
+ uint32_t pf_reg, vf_reg;
+
+ adapter = device_get_softc(dev);
+ hw = &adapter->hw;
+
+ IXGBE_CORE_LOCK(adapter);
+
+ /* Enable rx/tx for the PF and disable it for all VFs. */
+ pf_reg = IXGBE_VF_INDEX(adapter->pool);
+ IXGBE_WRITE_REG(hw, IXGBE_VFRE(pf_reg),
+ IXGBE_VF_BIT(adapter->pool));
+ IXGBE_WRITE_REG(hw, IXGBE_VFTE(pf_reg),
+ IXGBE_VF_BIT(adapter->pool));
+
+ if (pf_reg == 0)
+ vf_reg = 1;
+ else
+ vf_reg = 0;
+ IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_reg), 0);
+ IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_reg), 0);
+
+ IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
+
+ free(adapter->vfs, M_IXGBE);
+ adapter->vfs = NULL;
+ adapter->num_vfs = 0;
+
+ IXGBE_CORE_UNLOCK(adapter);
+}
+
+
+static void
+ixgbe_initialize_iov(struct adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ uint32_t mrqc, mtqc, vt_ctl, vf_reg, gcr_ext, gpie;
+ enum ixgbe_iov_mode mode;
+ int i;
+
+ mode = ixgbe_get_iov_mode(adapter);
+ if (mode == IXGBE_NO_VM)
+ return;
+
+ IXGBE_CORE_LOCK_ASSERT(adapter);
+
+ mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
+ mrqc &= ~IXGBE_MRQC_MRQE_MASK;
+
+ switch (mode) {
+ case IXGBE_64_VM:
+ mrqc |= IXGBE_MRQC_VMDQRSS64EN;
+ break;
+ case IXGBE_32_VM:
+ mrqc |= IXGBE_MRQC_VMDQRSS32EN;
+ break;
+ default:
+ panic("Unexpected SR-IOV mode %d", mode);
+ }
+ IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+
+ mtqc = IXGBE_MTQC_VT_ENA;
+ switch (mode) {
+ case IXGBE_64_VM:
+ mtqc |= IXGBE_MTQC_64VF;
+ break;
+ case IXGBE_32_VM:
+ mtqc |= IXGBE_MTQC_32VF;
+ break;
+ default:
+ panic("Unexpected SR-IOV mode %d", mode);
+ }
+ IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
+
+
+ gcr_ext = IXGBE_READ_REG(hw, IXGBE_GCR_EXT);
+ gcr_ext |= IXGBE_GCR_EXT_MSIX_EN;
+ gcr_ext &= ~IXGBE_GCR_EXT_VT_MODE_MASK;
+ switch (mode) {
+ case IXGBE_64_VM:
+ gcr_ext |= IXGBE_GCR_EXT_VT_MODE_64;
+ break;
+ case IXGBE_32_VM:
+ gcr_ext |= IXGBE_GCR_EXT_VT_MODE_32;
+ break;
+ default:
+ panic("Unexpected SR-IOV mode %d", mode);
+ }
+ IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext);
+
+
+ gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
+ gcr_ext &= ~IXGBE_GPIE_VTMODE_MASK;
+ switch (mode) {
+ case IXGBE_64_VM:
+ gpie |= IXGBE_GPIE_VTMODE_64;
+ break;
+ case IXGBE_32_VM:
+ gpie |= IXGBE_GPIE_VTMODE_32;
+ break;
+ default:
+ panic("Unexpected SR-IOV mode %d", mode);
+ }
+ IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
+
+ /* Enable rx/tx for the PF. */
+ vf_reg = IXGBE_VF_INDEX(adapter->pool);
+ IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_reg),
+ IXGBE_VF_BIT(adapter->pool));
+ IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_reg),
+ IXGBE_VF_BIT(adapter->pool));
+
+ /* Allow VM-to-VM communication. */
+ IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
+
+ vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
+ vt_ctl |= (adapter->pool << IXGBE_VT_CTL_POOL_SHIFT);
+ IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
+
+ for (i = 0; i < adapter->num_vfs; i++)
+ ixgbe_init_vf(adapter, &adapter->vfs[i]);
+}
+
+
+/*
+** Check the max frame setting of all active VF's
+*/
+static void
+ixgbe_recalculate_max_frame(struct adapter *adapter)
+{
+ struct ixgbe_vf *vf;
+
+ IXGBE_CORE_LOCK_ASSERT(adapter);
+
+ for (int i = 0; i < adapter->num_vfs; i++) {
+ vf = &adapter->vfs[i];
+ if (vf->flags & IXGBE_VF_ACTIVE)
+ ixgbe_update_max_frame(adapter, vf->max_frame_size);
+ }
+}
+
+
+static void
+ixgbe_init_vf(struct adapter *adapter, struct ixgbe_vf *vf)
+{
+ struct ixgbe_hw *hw;
+ uint32_t vf_index, pfmbimr;
+
+ IXGBE_CORE_LOCK_ASSERT(adapter);
+
+ hw = &adapter->hw;
+
+ if (!(vf->flags & IXGBE_VF_ACTIVE))
+ return;
+
+ vf_index = IXGBE_VF_INDEX(vf->pool);
+ pfmbimr = IXGBE_READ_REG(hw, IXGBE_PFMBIMR(vf_index));
+ pfmbimr |= IXGBE_VF_BIT(vf->pool);
+ IXGBE_WRITE_REG(hw, IXGBE_PFMBIMR(vf_index), pfmbimr);
+
+ ixgbe_vf_set_default_vlan(adapter, vf, vf->vlan_tag);
+
+ // XXX multicast addresses
+
+ if (ixgbe_validate_mac_addr(vf->ether_addr) == 0) {
+ ixgbe_set_rar(&adapter->hw, vf->rar_index,
+ vf->ether_addr, vf->pool, TRUE);
+ }
+
+ ixgbe_vf_enable_transmit(adapter, vf);
+ ixgbe_vf_enable_receive(adapter, vf);
+
+ ixgbe_send_vf_msg(adapter, vf, IXGBE_PF_CONTROL_MSG);
+}
+
+static int
+ixgbe_add_vf(device_t dev, u16 vfnum, const nvlist_t *config)
+{
+ struct adapter *adapter;
+ struct ixgbe_vf *vf;
+ const void *mac;
+
+ adapter = device_get_softc(dev);
+
+ KASSERT(vfnum < adapter->num_vfs, ("VF index %d is out of range %d",
+ vfnum, adapter->num_vfs));
+
+ IXGBE_CORE_LOCK(adapter);
+ vf = &adapter->vfs[vfnum];
+ vf->pool= vfnum;
+
+ /* RAR[0] is used by the PF so use vfnum + 1 for VF RAR. */
+ vf->rar_index = vfnum + 1;
+ vf->default_vlan = 0;
+ vf->max_frame_size = ETHER_MAX_LEN;
+ ixgbe_update_max_frame(adapter, vf->max_frame_size);
+
+ if (nvlist_exists_binary(config, "mac-addr")) {
+ mac = nvlist_get_binary(config, "mac-addr", NULL);
+ bcopy(mac, vf->ether_addr, ETHER_ADDR_LEN);
+ if (nvlist_get_bool(config, "allow-set-mac"))
+ vf->flags |= IXGBE_VF_CAP_MAC;
+ } else
+ /*
+ * If the administrator has not specified a MAC address then
+ * we must allow the VF to choose one.
+ */
+ vf->flags |= IXGBE_VF_CAP_MAC;
+
+ vf->flags = IXGBE_VF_ACTIVE;
+
+ ixgbe_init_vf(adapter, vf);
+ IXGBE_CORE_UNLOCK(adapter);
+
+ return (0);
+}
+#endif /* PCI_IOV */
diff --git a/sys/dev/ixgbe/if_ixv.c b/sys/dev/ixgbe/if_ixv.c
index 0d0393c..a3971cc 100644
--- a/sys/dev/ixgbe/if_ixv.c
+++ b/sys/dev/ixgbe/if_ixv.c
@@ -43,7 +43,7 @@
/*********************************************************************
* Driver version
*********************************************************************/
-char ixv_driver_version[] = "1.2.5";
+char ixv_driver_version[] = "1.4.0";
/*********************************************************************
* PCI Device ID Table
@@ -126,6 +126,18 @@ static void ixv_msix_mbx(void *);
static void ixv_handle_que(void *, int);
static void ixv_handle_mbx(void *, int);
+#ifdef DEV_NETMAP
+/*
+ * This is defined in <dev/netmap/ixgbe_netmap.h>, which is included by
+ * if_ix.c.
+ */
+extern void ixgbe_netmap_attach(struct adapter *adapter);
+
+#include <net/netmap.h>
+#include <sys/selinfo.h>
+#include <dev/netmap/netmap_kern.h>
+#endif /* DEV_NETMAP */
+
/*********************************************************************
* FreeBSD Device Interface Entry Points
*********************************************************************/
@@ -147,12 +159,19 @@ devclass_t ixv_devclass;
DRIVER_MODULE(ixv, pci, ixv_driver, ixv_devclass, 0, 0);
MODULE_DEPEND(ixv, pci, 1, 1, 1);
MODULE_DEPEND(ixv, ether, 1, 1, 1);
+#ifdef DEV_NETMAP
+MODULE_DEPEND(ix, netmap, 1, 1, 1);
+#endif /* DEV_NETMAP */
/* XXX depend on 'ix' ? */
/*
** TUNEABLE PARAMETERS:
*/
+/* Number of Queues - do not exceed MSIX vectors - 1 */
+static int ixv_num_queues = 1;
+TUNABLE_INT("hw.ixv.num_queues", &ixv_num_queues);
+
/*
** AIM: Adaptive Interrupt Moderation
** which means that the interrupt rate
@@ -312,6 +331,15 @@ ixv_attach(device_t dev)
"max number of tx packets to process",
&adapter->tx_process_limit, ixv_tx_process_limit);
+ /* Sysctls for limiting the amount of work done in the taskqueues */
+ ixv_set_sysctl_value(adapter, "rx_processing_limit",
+ "max number of rx packets to process",
+ &adapter->rx_process_limit, ixv_rx_process_limit);
+
+ ixv_set_sysctl_value(adapter, "tx_processing_limit",
+ "max number of tx packets to process",
+ &adapter->tx_process_limit, ixv_tx_process_limit);
+
/* Do descriptor calc and sanity checks */
if (((ixv_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
ixv_txd < MIN_TXD || ixv_txd > MAX_TXD) {
@@ -349,6 +377,11 @@ ixv_attach(device_t dev)
ixgbe_reset_hw(hw);
+ /* Get the Mailbox API version */
+ device_printf(dev,"MBX API %d negotiation: %d\n",
+ ixgbe_mbox_api_11,
+ ixgbevf_negotiate_api_version(hw, ixgbe_mbox_api_11));
+
error = ixgbe_init_hw(hw);
if (error) {
device_printf(dev,"Hardware Initialization Failure\n");
@@ -383,6 +416,9 @@ ixv_attach(device_t dev)
adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
ixv_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
+#ifdef DEV_NETMAP
+ ixgbe_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
INIT_DEBUGOUT("ixv_attach: end");
return (0);
@@ -446,6 +482,9 @@ ixv_detach(device_t dev)
ether_ifdetach(adapter->ifp);
callout_drain(&adapter->timer);
+#ifdef DEV_NETMAP
+ netmap_detach(adapter->ifp);
+#endif /* DEV_NETMAP */
ixv_free_pci_resources(adapter);
bus_generic_detach(dev);
if_free(adapter->ifp);
@@ -1321,10 +1360,13 @@ static int
ixv_setup_msix(struct adapter *adapter)
{
device_t dev = adapter->dev;
- int rid, want;
+ int rid, want, msgs;
- /* First try MSI/X */
+ /* Must have at least 2 MSIX vectors */
+ msgs = pci_msix_count(dev);
+ if (msgs < 2)
+ goto out;
rid = PCIR_BAR(3);
adapter->msix_mem = bus_alloc_resource_any(dev,
SYS_RES_MEMORY, &rid, RF_ACTIVE);
@@ -1335,11 +1377,16 @@ ixv_setup_msix(struct adapter *adapter)
}
/*
- ** Want two vectors: one for a queue,
+ ** Want vectors for the queues,
** plus an additional for mailbox.
*/
- want = 2;
- if ((pci_alloc_msix(dev, &want) == 0) && (want == 2)) {
+ want = adapter->num_queues + 1;
+ if (want > msgs) {
+ want = msgs;
+ adapter->num_queues = msgs - 1;
+ } else
+ msgs = want;
+ if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
device_printf(adapter->dev,
"Using MSIX interrupts with %d vectors\n", want);
return (want);
@@ -1378,7 +1425,9 @@ ixv_allocate_pci_resources(struct adapter *adapter)
rman_get_bushandle(adapter->pci_mem);
adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
- adapter->num_queues = 1;
+ /* Pick up the tuneable queues */
+ adapter->num_queues = ixv_num_queues;
+
adapter->hw.back = &adapter->osdep;
/*
@@ -1596,32 +1645,41 @@ ixv_initialize_receive_units(struct adapter *adapter)
{
struct rx_ring *rxr = adapter->rx_rings;
struct ixgbe_hw *hw = &adapter->hw;
- struct ifnet *ifp = adapter->ifp;
- u32 bufsz, fctrl, rxcsum, hlreg;
-
-
- /* Enable broadcasts */
- fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
- fctrl |= IXGBE_FCTRL_BAM;
- fctrl |= IXGBE_FCTRL_DPF;
- fctrl |= IXGBE_FCTRL_PMCF;
- IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
+ struct ifnet *ifp = adapter->ifp;
+ u32 bufsz, rxcsum, psrtype;
+ int max_frame;
- /* Set for Jumbo Frames? */
- hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
- if (ifp->if_mtu > ETHERMTU) {
- hlreg |= IXGBE_HLREG0_JUMBOEN;
+ if (ifp->if_mtu > ETHERMTU)
bufsz = 4096 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
- } else {
- hlreg &= ~IXGBE_HLREG0_JUMBOEN;
+ else
bufsz = 2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
- }
- IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
+
+ psrtype = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR |
+ IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR |
+ IXGBE_PSRTYPE_L2HDR;
+
+ IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
+
+ /* Tell PF our expected packet-size */
+ max_frame = ifp->if_mtu + IXGBE_MTU_HDR;
+ ixgbevf_rlpml_set_vf(hw, max_frame);
for (int i = 0; i < adapter->num_queues; i++, rxr++) {
u64 rdba = rxr->rxdma.dma_paddr;
u32 reg, rxdctl;
+ /* Disable the queue */
+ rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
+ rxdctl &= ~(IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME);
+ IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
+ for (int j = 0; j < 10; j++) {
+ if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)) &
+ IXGBE_RXDCTL_ENABLE)
+ msec_delay(1);
+ else
+ break;
+ }
+ wmb();
/* Setup the Base and Length of the Rx Descriptor Ring */
IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
(rdba & 0x00000000ffffffffULL));
@@ -1630,6 +1688,10 @@ ixv_initialize_receive_units(struct adapter *adapter)
IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
+ /* Reset the ring indices */
+ IXGBE_WRITE_REG(hw, IXGBE_VFRDH(rxr->me), 0);
+ IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), 0);
+
/* Set up the SRRCTL register */
reg = IXGBE_READ_REG(hw, IXGBE_VFSRRCTL(i));
reg &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
@@ -1658,6 +1720,35 @@ ixv_initialize_receive_units(struct adapter *adapter)
msec_delay(1);
}
wmb();
+
+ /* Set the Tail Pointer */
+#ifdef DEV_NETMAP
+ /*
+ * In netmap mode, we must preserve the buffers made
+ * available to userspace before the if_init()
+ * (this is true by default on the TX side, because
+ * init makes all buffers available to userspace).
+ *
+ * netmap_reset() and the device specific routines
+ * (e.g. ixgbe_setup_receive_rings()) map these
+ * buffers at the end of the NIC ring, so here we
+ * must set the RDT (tail) register to make sure
+ * they are not overwritten.
+ *
+ * In this driver the NIC ring starts at RDH = 0,
+ * RDT points to the last slot available for reception (?),
+ * so RDT = num_rx_desc - 1 means the whole ring is available.
+ */
+ if (ifp->if_capenable & IFCAP_NETMAP) {
+ struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_kring *kring = &na->rx_rings[i];
+ int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
+
+ IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), t);
+ } else
+#endif /* DEV_NETMAP */
+ IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me),
+ adapter->num_rx_desc - 1);
}
rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
diff --git a/sys/dev/ixgbe/ix_txrx.c b/sys/dev/ixgbe/ix_txrx.c
index 0d4fd68..068048f 100644
--- a/sys/dev/ixgbe/ix_txrx.c
+++ b/sys/dev/ixgbe/ix_txrx.c
@@ -40,6 +40,11 @@
#include "ixgbe.h"
+#ifdef RSS
+#include <net/rss_config.h>
+#include <netinet/in_rss.h>
+#endif
+
#ifdef DEV_NETMAP
#include <net/netmap.h>
#include <sys/selinfo.h>
@@ -191,6 +196,9 @@ ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
struct ix_queue *que;
struct tx_ring *txr;
int i, err = 0;
+#ifdef RSS
+ uint32_t bucket_id;
+#endif
/*
* When doing RSS, map it to the same outbound queue
@@ -199,9 +207,16 @@ ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
* If everything is setup correctly, it should be the
* same bucket that the current CPU we're on is.
*/
- if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
- i = m->m_pkthdr.flowid % adapter->num_queues;
- else
+ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
+#ifdef RSS
+ if (rss_hash2bucket(m->m_pkthdr.flowid,
+ M_HASHTYPE_GET(m), &bucket_id) == 0)
+ /* TODO: spit out something if bucket_id > num_queues? */
+ i = bucket_id % adapter->num_queues;
+ else
+#endif
+ i = m->m_pkthdr.flowid % adapter->num_queues;
+ } else
i = curcpu % adapter->num_queues;
/* Check for a hung queue and pick alternative */
@@ -555,7 +570,6 @@ ixgbe_setup_transmit_ring(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
struct ixgbe_tx_buf *txbuf;
- int i;
#ifdef DEV_NETMAP
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_slot *slot;
@@ -578,7 +592,7 @@ ixgbe_setup_transmit_ring(struct tx_ring *txr)
/* Free any existing tx buffers. */
txbuf = txr->tx_buffers;
- for (i = 0; i < txr->num_desc; i++, txbuf++) {
+ for (int i = 0; i < txr->num_desc; i++, txbuf++) {
if (txbuf->m_head != NULL) {
bus_dmamap_sync(txr->txtag, txbuf->map,
BUS_DMASYNC_POSTWRITE);
@@ -599,7 +613,8 @@ ixgbe_setup_transmit_ring(struct tx_ring *txr)
*/
if (slot) {
int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
- netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
+ netmap_load_map(na, txr->txtag,
+ txbuf->map, NMB(na, slot + si));
}
#endif /* DEV_NETMAP */
/* Clear the EOP descriptor pointer */
@@ -752,8 +767,7 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
if (mp->m_flags & M_VLANTAG) {
vtag = htole16(mp->m_pkthdr.ether_vtag);
vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
- }
- else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
+ } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
return (0);
/*
@@ -1354,7 +1368,7 @@ ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
struct adapter *adapter = rxr->adapter;
device_t dev = adapter->dev;
struct ixgbe_rx_buf *rxbuf;
- int i, bsize, error;
+ int bsize, error;
bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
if (!(rxr->rx_buffers =
@@ -1381,7 +1395,7 @@ ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
goto fail;
}
- for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
+ for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
rxbuf = &rxr->rx_buffers[i];
error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
if (error) {
@@ -1403,9 +1417,8 @@ static void
ixgbe_free_receive_ring(struct rx_ring *rxr)
{
struct ixgbe_rx_buf *rxbuf;
- int i;
- for (i = 0; i < rxr->num_desc; i++) {
+ for (int i = 0; i < rxr->num_desc; i++) {
rxbuf = &rxr->rx_buffers[i];
if (rxbuf->buf != NULL) {
bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
@@ -1888,25 +1901,23 @@ ixgbe_rxeof(struct ix_queue *que)
if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
ixgbe_rx_checksum(staterr, sendmp, ptype);
- /*
- * In case of multiqueue, we have RXCSUM.PCSD bit set
- * and never cleared. This means we have RSS hash
- * available to be used.
- */
- if (adapter->num_queues > 1) {
- sendmp->m_pkthdr.flowid =
- le32toh(cur->wb.lower.hi_dword.rss);
+ /*
+ * In case of multiqueue, we have RXCSUM.PCSD bit set
+ * and never cleared. This means we have RSS hash
+ * available to be used.
+ */
+ if (adapter->num_queues > 1) {
+ sendmp->m_pkthdr.flowid =
+ le32toh(cur->wb.lower.hi_dword.rss);
/*
* Full RSS support is not avilable in
* FreeBSD 10 so setting the hash type to
* OPAQUE.
*/
M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
- } else {
-#if __FreeBSD_version >= 800000
- sendmp->m_pkthdr.flowid = que->msix;
+ } else {
+ sendmp->m_pkthdr.flowid = que->msix;
M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
-#endif /* FreeBSD_version */
}
}
next_desc:
@@ -2089,6 +2100,9 @@ ixgbe_allocate_queues(struct adapter *adapter)
struct rx_ring *rxr;
int rsize, tsize, error = IXGBE_SUCCESS;
int txconf = 0, rxconf = 0;
+#ifdef PCI_IOV
+ enum ixgbe_iov_mode iov_mode;
+#endif
/* First allocate the top level queue structs */
if (!(adapter->queues =
@@ -2121,6 +2135,12 @@ ixgbe_allocate_queues(struct adapter *adapter)
tsize = roundup2(adapter->num_tx_desc *
sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
+#ifdef PCI_IOV
+ iov_mode = ixgbe_get_iov_mode(adapter);
+ adapter->pool = ixgbe_max_vfs(iov_mode);
+#else
+ adapter->pool = 0;
+#endif
/*
* Now set up the TX queues, txconf is needed to handle the
* possibility that things fail midcourse and we need to
@@ -2130,7 +2150,11 @@ ixgbe_allocate_queues(struct adapter *adapter)
/* Set up some basics */
txr = &adapter->tx_rings[i];
txr->adapter = adapter;
+#ifdef PCI_IOV
+ txr->me = ixgbe_pf_que_index(iov_mode, i);
+#else
txr->me = i;
+#endif
txr->num_desc = adapter->num_tx_desc;
/* Initialize the TX side lock */
@@ -2175,7 +2199,11 @@ ixgbe_allocate_queues(struct adapter *adapter)
rxr = &adapter->rx_rings[i];
/* Set up some basics */
rxr->adapter = adapter;
+#ifdef PCI_IOV
+ rxr->me = ixgbe_pf_que_index(iov_mode, i);
+#else
rxr->me = i;
+#endif
rxr->num_desc = adapter->num_rx_desc;
/* Initialize the RX side lock */
diff --git a/sys/dev/ixgbe/ixgbe.h b/sys/dev/ixgbe/ixgbe.h
index df1da13..f7721f6 100644
--- a/sys/dev/ixgbe/ixgbe.h
+++ b/sys/dev/ixgbe/ixgbe.h
@@ -49,8 +49,10 @@
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/sockio.h>
+#include <sys/eventhandler.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/bpf.h>
#include <net/ethernet.h>
@@ -90,11 +92,22 @@
#include <machine/smp.h>
#include <sys/sbuf.h>
+#ifdef PCI_IOV
+#include <sys/nv.h>
+#include <sys/iov_schema.h>
+#include <dev/pci/pci_iov.h>
+#endif
+
#include "ixgbe_api.h"
#include "ixgbe_common.h"
#include "ixgbe_phy.h"
#include "ixgbe_vf.h"
+#ifdef PCI_IOV
+#include "ixgbe_common.h"
+#include "ixgbe_mbx.h"
+#endif
+
/* Tunables */
/*
@@ -242,6 +255,29 @@
(_adapter->hw.mac.type == ixgbe_mac_X540_vf) || \
(_adapter->hw.mac.type == ixgbe_mac_82599_vf))
+#ifdef PCI_IOV
+#define IXGBE_VF_INDEX(vmdq) ((vmdq) / 32)
+#define IXGBE_VF_BIT(vmdq) (1 << ((vmdq) % 32))
+
+#define IXGBE_VT_MSG_MASK 0xFFFF
+
+#define IXGBE_VT_MSGINFO(msg) \
+ (((msg) & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT)
+
+#define IXGBE_VF_GET_QUEUES_RESP_LEN 5
+
+#define IXGBE_API_VER_1_0 0
+#define IXGBE_API_VER_2_0 1 /* Solaris API. Not supported. */
+#define IXGBE_API_VER_1_1 2
+#define IXGBE_API_VER_UNKNOWN UINT16_MAX
+
+enum ixgbe_iov_mode {
+ IXGBE_64_VM,
+ IXGBE_32_VM,
+ IXGBE_NO_VM
+};
+#endif /* PCI_IOV */
+
/*
*****************************************************************************
@@ -260,6 +296,7 @@ typedef struct _ixgbe_vendor_info_t {
unsigned int index;
} ixgbe_vendor_info_t;
+
struct ixgbe_tx_buf {
union ixgbe_adv_tx_desc *eop;
struct mbuf *m_head;
@@ -288,6 +325,11 @@ struct ixgbe_dma_alloc {
int dma_nseg;
};
+struct ixgbe_mc_addr {
+ u8 addr[IXGBE_ETH_LENGTH_OF_ADDRESS];
+ u32 vmdq;
+};
+
/*
** Driver queue struct: this is the interrupt container
** for the associated tx and rx ring.
@@ -383,6 +425,28 @@ struct rx_ring {
#endif
};
+#ifdef PCI_IOV
+#define IXGBE_VF_CTS (1 << 0) /* VF is clear to send. */
+#define IXGBE_VF_CAP_MAC (1 << 1) /* VF is permitted to change MAC. */
+#define IXGBE_VF_CAP_VLAN (1 << 2) /* VF is permitted to join vlans. */
+#define IXGBE_VF_ACTIVE (1 << 3) /* VF is active. */
+
+#define IXGBE_MAX_VF_MC 30 /* Max number of multicast entries */
+
+struct ixgbe_vf {
+ u_int pool;
+ u_int rar_index;
+ u_int max_frame_size;
+ uint32_t flags;
+ uint8_t ether_addr[ETHER_ADDR_LEN];
+ uint16_t mc_hash[IXGBE_MAX_VF_MC];
+ uint16_t num_mc_hashes;
+ uint16_t default_vlan;
+ uint16_t vlan_tag;
+ uint16_t api_ver;
+};
+#endif /* PCI_IOV */
+
/* Our adapter structure */
struct adapter {
struct ifnet *ifp;
@@ -434,8 +498,8 @@ struct adapter {
bool link_up;
u32 vector;
u16 dmac;
- bool eee_support;
bool eee_enabled;
+ u32 phy_layer;
/* Power management-related */
bool wol_support;
@@ -449,6 +513,9 @@ struct adapter {
struct task link_task; /* Link tasklet */
struct task mod_task; /* SFP tasklet */
struct task msf_task; /* Multispeed Fiber */
+#ifdef PCI_IOV
+ struct task mbx_task; /* VF -> PF mailbox interrupt */
+#endif /* PCI_IOV */
#ifdef IXGBE_FDIR
int fdir_reinit;
struct task fdir_task;
@@ -482,8 +549,12 @@ struct adapter {
u32 rx_process_limit;
/* Multicast array memory */
- u8 *mta;
-
+ struct ixgbe_mc_addr *mta;
+ int num_vfs;
+ int pool;
+#ifdef PCI_IOV
+ struct ixgbe_vf *vfs;
+#endif
/* Misc stats maintained by the driver */
unsigned long dropped_pkts;
@@ -666,4 +737,150 @@ bool ixgbe_rxeof(struct ix_queue *);
int ixgbe_dma_malloc(struct adapter *,
bus_size_t, struct ixgbe_dma_alloc *, int);
void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
+
+#ifdef PCI_IOV
+
+static inline boolean_t
+ixgbe_vf_mac_changed(struct ixgbe_vf *vf, const uint8_t *mac)
+{
+ return (bcmp(mac, vf->ether_addr, ETHER_ADDR_LEN) != 0);
+}
+
+static inline void
+ixgbe_send_vf_msg(struct adapter *adapter, struct ixgbe_vf *vf, u32 msg)
+{
+
+ if (vf->flags & IXGBE_VF_CTS)
+ msg |= IXGBE_VT_MSGTYPE_CTS;
+
+ ixgbe_write_mbx(&adapter->hw, &msg, 1, vf->pool);
+}
+
+static inline void
+ixgbe_send_vf_ack(struct adapter *adapter, struct ixgbe_vf *vf, u32 msg)
+{
+ msg &= IXGBE_VT_MSG_MASK;
+ ixgbe_send_vf_msg(adapter, vf, msg | IXGBE_VT_MSGTYPE_ACK);
+}
+
+static inline void
+ixgbe_send_vf_nack(struct adapter *adapter, struct ixgbe_vf *vf, u32 msg)
+{
+ msg &= IXGBE_VT_MSG_MASK;
+ ixgbe_send_vf_msg(adapter, vf, msg | IXGBE_VT_MSGTYPE_NACK);
+}
+
+static inline void
+ixgbe_process_vf_ack(struct adapter *adapter, struct ixgbe_vf *vf)
+{
+ if (!(vf->flags & IXGBE_VF_CTS))
+ ixgbe_send_vf_nack(adapter, vf, 0);
+}
+
+static inline enum ixgbe_iov_mode
+ixgbe_get_iov_mode(struct adapter *adapter)
+{
+ if (adapter->num_vfs == 0)
+ return (IXGBE_NO_VM);
+ if (adapter->num_queues <= 2)
+ return (IXGBE_64_VM);
+ else if (adapter->num_queues <= 4)
+ return (IXGBE_32_VM);
+ else
+ return (IXGBE_NO_VM);
+}
+
+static inline u16
+ixgbe_max_vfs(enum ixgbe_iov_mode mode)
+{
+ /*
+ * We return odd numbers below because we
+ * reserve 1 VM's worth of queues for the PF.
+ */
+ switch (mode) {
+ case IXGBE_64_VM:
+ return (63);
+ case IXGBE_32_VM:
+ return (31);
+ case IXGBE_NO_VM:
+ default:
+ return (0);
+ }
+}
+
+static inline int
+ixgbe_vf_queues(enum ixgbe_iov_mode mode)
+{
+ switch (mode) {
+ case IXGBE_64_VM:
+ return (2);
+ case IXGBE_32_VM:
+ return (4);
+ case IXGBE_NO_VM:
+ default:
+ return (0);
+ }
+}
+
+static inline int
+ixgbe_vf_que_index(enum ixgbe_iov_mode mode, u32 vfnum, int num)
+{
+ return ((vfnum * ixgbe_vf_queues(mode)) + num);
+}
+
+static inline int
+ixgbe_pf_que_index(enum ixgbe_iov_mode mode, int num)
+{
+ return (ixgbe_vf_que_index(mode, ixgbe_max_vfs(mode), num));
+}
+
+static inline void
+ixgbe_update_max_frame(struct adapter * adapter, int max_frame)
+{
+ if (adapter->max_frame_size < max_frame)
+ adapter->max_frame_size = max_frame;
+}
+
+static inline u32
+ixgbe_get_mrqc(enum ixgbe_iov_mode mode)
+{
+ u32 mrqc = 0;
+ switch (mode) {
+ case IXGBE_64_VM:
+ mrqc = IXGBE_MRQC_VMDQRSS64EN;
+ break;
+ case IXGBE_32_VM:
+ mrqc = IXGBE_MRQC_VMDQRSS32EN;
+ break;
+ case IXGBE_NO_VM:
+ mrqc = 0;
+ break;
+ default:
+ panic("Unexpected SR-IOV mode %d", mode);
+ }
+ return(mrqc);
+}
+
+
+static inline u32
+ixgbe_get_mtqc(enum ixgbe_iov_mode mode)
+{
+ uint32_t mtqc = 0;
+ switch (mode) {
+ case IXGBE_64_VM:
+ mtqc |= IXGBE_MTQC_64VF | IXGBE_MTQC_VT_ENA;
+ break;
+ case IXGBE_32_VM:
+ mtqc |= IXGBE_MTQC_32VF | IXGBE_MTQC_VT_ENA;
+ break;
+ case IXGBE_NO_VM:
+ mtqc = IXGBE_MTQC_64Q_1PB;
+ break;
+ default:
+ panic("Unexpected SR-IOV mode %d", mode);
+ }
+ return(mtqc);
+}
+#endif /* PCI_IOV */
+
#endif /* _IXGBE_H_ */
diff --git a/sys/dev/ixgbe/ixgbe_mbx.h b/sys/dev/ixgbe/ixgbe_mbx.h
index ea75cbe..a4a78eb 100644
--- a/sys/dev/ixgbe/ixgbe_mbx.h
+++ b/sys/dev/ixgbe/ixgbe_mbx.h
@@ -80,6 +80,21 @@
/* bits 23:16 are used for extra info for certain messages */
#define IXGBE_VT_MSGINFO_MASK (0xFF << IXGBE_VT_MSGINFO_SHIFT)
+/* definitions to support mailbox API version negotiation */
+
+/*
+ * each element denotes a version of the API; existing numbers may not
+ * change; any additions must go at the end
+ */
+enum ixgbe_pfvf_api_rev {
+ ixgbe_mbox_api_10, /* API version 1.0, linux/freebsd VF driver */
+ ixgbe_mbox_api_20, /* API version 2.0, solaris Phase1 VF driver */
+ ixgbe_mbox_api_11, /* API version 1.1, linux/freebsd VF driver */
+ /* This value should always be last */
+ ixgbe_mbox_api_unknown, /* indicates that API version is not known */
+};
+
+/* mailbox API, legacy requests */
#define IXGBE_VF_RESET 0x01 /* VF requests reset */
#define IXGBE_VF_SET_MAC_ADDR 0x02 /* VF requests PF to set MAC addr */
#define IXGBE_VF_SET_MULTICAST 0x03 /* VF requests PF to set MC addr */
@@ -106,6 +121,18 @@
#define IXGBE_PF_CONTROL_MSG 0x0100 /* PF control message */
+/* mailbox API, version 2.0 VF requests */
+#define IXGBE_VF_API_NEGOTIATE 0x08 /* negotiate API version */
+#define IXGBE_VF_GET_QUEUES 0x09 /* get queue configuration */
+#define IXGBE_VF_ENABLE_MACADDR 0x0A /* enable MAC address */
+#define IXGBE_VF_DISABLE_MACADDR 0x0B /* disable MAC address */
+#define IXGBE_VF_GET_MACADDRS 0x0C /* get all configured MAC addrs */
+#define IXGBE_VF_SET_MCAST_PROMISC 0x0D /* enable multicast promiscuous */
+#define IXGBE_VF_GET_MTU 0x0E /* get bounds on MTU */
+#define IXGBE_VF_SET_MTU 0x0F /* set a specific MTU */
+
+/* mailbox API, version 2.0 PF requests */
+#define IXGBE_PF_TRANSPARENT_VLAN 0x0101 /* enable transparent vlan */
#define IXGBE_VF_MBX_INIT_TIMEOUT 2000 /* number of retries on mailbox */
#define IXGBE_VF_MBX_INIT_DELAY 500 /* microseconds between retries */
diff --git a/sys/dev/ixgbe/ixgbe_vf.c b/sys/dev/ixgbe/ixgbe_vf.c
index c010cf4..a00b8be 100644
--- a/sys/dev/ixgbe/ixgbe_vf.c
+++ b/sys/dev/ixgbe/ixgbe_vf.c
@@ -185,6 +185,8 @@ s32 ixgbe_reset_hw_vf(struct ixgbe_hw *hw)
/* Call adapter stop to disable tx/rx and clear interrupts */
hw->mac.ops.stop_adapter(hw);
+ /* reset the api version */
+ hw->api_version = ixgbe_mbox_api_10;
DEBUGOUT("Issuing a function level reset to MAC\n");
@@ -223,6 +225,8 @@ s32 ixgbe_reset_hw_vf(struct ixgbe_hw *hw)
if (ret_val)
return ret_val;
+ msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
+
if (msgbuf[0] != (IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_ACK) &&
msgbuf[0] != (IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_NACK))
return IXGBE_ERR_INVALID_MAC_ADDR;
@@ -666,6 +670,57 @@ int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api)
int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs,
unsigned int *default_tc)
{
- UNREFERENCED_3PARAMETER(hw, num_tcs, default_tc);
- return IXGBE_SUCCESS;
+ int err;
+ u32 msg[5];
+
+ /* do nothing if API doesn't support ixgbevf_get_queues */
+ switch (hw->api_version) {
+ case ixgbe_mbox_api_11:
+ break;
+ default:
+ return 0;
+ }
+
+ /* Fetch queue configuration from the PF */
+ msg[0] = IXGBE_VF_GET_QUEUES;
+ msg[1] = msg[2] = msg[3] = msg[4] = 0;
+ err = hw->mbx.ops.write_posted(hw, msg, 5, 0);
+
+ if (!err)
+ err = hw->mbx.ops.read_posted(hw, msg, 5, 0);
+
+ if (!err) {
+ msg[0] &= ~IXGBE_VT_MSGTYPE_CTS;
+
+ /*
+ * if we we didn't get an ACK there must have been
+ * some sort of mailbox error so we should treat it
+ * as such
+ */
+ if (msg[0] != (IXGBE_VF_GET_QUEUES | IXGBE_VT_MSGTYPE_ACK))
+ return IXGBE_ERR_MBX;
+
+ /* record and validate values from message */
+ hw->mac.max_tx_queues = msg[IXGBE_VF_TX_QUEUES];
+ if (hw->mac.max_tx_queues == 0 ||
+ hw->mac.max_tx_queues > IXGBE_VF_MAX_TX_QUEUES)
+ hw->mac.max_tx_queues = IXGBE_VF_MAX_TX_QUEUES;
+
+ hw->mac.max_rx_queues = msg[IXGBE_VF_RX_QUEUES];
+ if (hw->mac.max_rx_queues == 0 ||
+ hw->mac.max_rx_queues > IXGBE_VF_MAX_RX_QUEUES)
+ hw->mac.max_rx_queues = IXGBE_VF_MAX_RX_QUEUES;
+
+ *num_tcs = msg[IXGBE_VF_TRANS_VLAN];
+ /* in case of unknown state assume we cannot tag frames */
+ if (*num_tcs > hw->mac.max_rx_queues)
+ *num_tcs = 1;
+
+ *default_tc = msg[IXGBE_VF_DEF_QUEUE];
+ /* default to queue 0 on out-of-bounds queue number */
+ if (*default_tc >= hw->mac.max_tx_queues)
+ *default_tc = 0;
+ }
+
+ return err;
}
diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c
index 14d0aeb..b89c64c 100644
--- a/sys/dev/ixl/if_ixl.c
+++ b/sys/dev/ixl/if_ixl.c
@@ -673,9 +673,9 @@ ixl_attach(device_t dev)
}
/* Limit phy interrupts to link and modules failure */
- error = i40e_aq_set_phy_int_mask(hw,
- I40E_AQ_EVENT_LINK_UPDOWN | I40E_AQ_EVENT_MODULE_QUAL_FAIL, NULL);
- if (error)
+ error = i40e_aq_set_phy_int_mask(hw, ~(I40E_AQ_EVENT_LINK_UPDOWN |
+ I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL);
+ if (error)
device_printf(dev, "set phy mask failed: %d\n", error);
/* Get the bus configuration and set the shared code */
diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h
index f1f03cb..c055598 100644
--- a/sys/dev/netmap/ixgbe_netmap.h
+++ b/sys/dev/netmap/ixgbe_netmap.h
@@ -26,7 +26,7 @@
/*
* $FreeBSD$
*
- * netmap support for: ixgbe
+ * netmap support for: ixgbe (both ix and ixv)
*
* This file is meant to be a reference on how to implement
* netmap support for a network driver.
@@ -48,6 +48,7 @@
*/
#include <dev/netmap/netmap_kern.h>
+void ixgbe_netmap_attach(struct adapter *adapter);
/*
* device-specific sysctl variables:
@@ -122,10 +123,8 @@ ixgbe_netmap_reg(struct netmap_adapter *na, int onoff)
IXGBE_CORE_LOCK(adapter);
ixgbe_disable_intr(adapter); // XXX maybe ixgbe_stop ?
- /* Tell the stack that the interface is no longer active */
- ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
-
- set_crcstrip(&adapter->hw, onoff);
+ if (!IXGBE_IS_VF(adapter))
+ set_crcstrip(&adapter->hw, onoff);
/* enable or disable flags and callbacks in na and ifp */
if (onoff) {
nm_set_native_flags(na);
@@ -133,7 +132,8 @@ ixgbe_netmap_reg(struct netmap_adapter *na, int onoff)
nm_clear_native_flags(na);
}
ixgbe_init_locked(adapter); /* also enables intr */
- set_crcstrip(&adapter->hw, onoff); // XXX why twice ?
+ if (!IXGBE_IS_VF(adapter))
+ set_crcstrip(&adapter->hw, onoff); // XXX why twice ?
IXGBE_CORE_UNLOCK(adapter);
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
}
@@ -266,7 +266,7 @@ ixgbe_netmap_txsync(struct netmap_kring *kring, int flags)
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/* (re)start the tx unit up to slot nic_i (excluded) */
- IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), nic_i);
+ IXGBE_WRITE_REG(&adapter->hw, txr->tail, nic_i);
}
/*
@@ -310,7 +310,8 @@ ixgbe_netmap_txsync(struct netmap_kring *kring, int flags)
* REPORT_STATUS in a few slots so TDH is the only
* good way.
*/
- nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(kring->ring_id));
+ nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_IS_VF(adapter) ?
+ IXGBE_VFTDH(kring->ring_id) : IXGBE_TDH(kring->ring_id));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
@@ -381,7 +382,7 @@ ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
* rxr->next_to_check is set to 0 on a ring reinit
*/
if (netmap_no_pendintr || force_update) {
- int crclen = ix_crcstrip ? 0 : 4;
+ int crclen = (ix_crcstrip || IXGBE_IS_VF(adapter) ) ? 0 : 4;
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail)
@@ -455,7 +456,7 @@ ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
* so move nic_i back by one unit
*/
nic_i = nm_prev(nic_i, lim);
- IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), nic_i);
+ IXGBE_WRITE_REG(&adapter->hw, rxr->tail, nic_i);
}
/* tell userspace that there might be new packets */
@@ -475,7 +476,7 @@ ring_reset:
* netmap mode will be disabled and the driver will only
* operate in standard mode.
*/
-static void
+void
ixgbe_netmap_attach(struct adapter *adapter)
{
struct netmap_adapter na;
diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c
index cef2db4..bf41b8e 100644
--- a/sys/dev/pci/pci.c
+++ b/sys/dev/pci/pci.c
@@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/module.h>
+#include <sys/limits.h>
#include <sys/linker.h>
#include <sys/fcntl.h>
#include <sys/conf.h>
@@ -4924,8 +4925,8 @@ pci_child_location_str_method(device_t dev, device_t child, char *buf,
size_t buflen)
{
- snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
- pci_get_function(child));
+ snprintf(buf, buflen, "pci%d:%d:%d:%d", pci_get_domain(child),
+ pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
return (0);
}
@@ -4955,10 +4956,60 @@ pci_assign_interrupt_method(device_t dev, device_t child)
cfg->intpin));
}
+static void
+pci_lookup(void *arg, const char *name, device_t *dev)
+{
+ long val;
+ char *end;
+ int domain, bus, slot, func;
+
+ if (*dev != NULL)
+ return;
+
+ /*
+ * Accept pciconf-style selectors of either pciD:B:S:F or
+ * pciB:S:F. In the latter case, the domain is assumed to
+ * be zero.
+ */
+ if (strncmp(name, "pci", 3) != 0)
+ return;
+ val = strtol(name + 3, &end, 10);
+ if (val < 0 || val > INT_MAX || *end != ':')
+ return;
+ domain = val;
+ val = strtol(end + 1, &end, 10);
+ if (val < 0 || val > INT_MAX || *end != ':')
+ return;
+ bus = val;
+ val = strtol(end + 1, &end, 10);
+ if (val < 0 || val > INT_MAX)
+ return;
+ slot = val;
+ if (*end == ':') {
+ val = strtol(end + 1, &end, 10);
+ if (val < 0 || val > INT_MAX || *end != '\0')
+ return;
+ func = val;
+ } else if (*end == '\0') {
+ func = slot;
+ slot = bus;
+ bus = domain;
+ domain = 0;
+ } else
+ return;
+
+ if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
+ func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
+ return;
+
+ *dev = pci_find_dbsf(domain, bus, slot, func);
+}
+
static int
pci_modevent(module_t mod, int what, void *arg)
{
static struct cdev *pci_cdev;
+ static eventhandler_tag tag;
switch (what) {
case MOD_LOAD:
@@ -4967,9 +5018,13 @@ pci_modevent(module_t mod, int what, void *arg)
pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
"pci");
pci_load_vendor_data();
+ tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
+ 1000);
break;
case MOD_UNLOAD:
+ if (tag != NULL)
+ EVENTHANDLER_DEREGISTER(dev_lookup, tag);
destroy_dev(pci_cdev);
break;
}
diff --git a/sys/dev/pty/pty.c b/sys/dev/pty/pty.c
index 5e2d822..e38ed69 100644
--- a/sys/dev/pty/pty.c
+++ b/sys/dev/pty/pty.c
@@ -97,6 +97,8 @@ static void
pty_clone(void *arg, struct ucred *cr, char *name, int namelen,
struct cdev **dev)
{
+ struct make_dev_args mda;
+ int error;
/* Cloning is already satisfied. */
if (*dev != NULL)
@@ -117,8 +119,15 @@ pty_clone(void *arg, struct ucred *cr, char *name, int namelen,
return;
/* Create the controller device node. */
- *dev = make_dev_credf(MAKEDEV_REF, &ptydev_cdevsw, 0,
- NULL, UID_ROOT, GID_WHEEL, 0666, "%s", name);
+ make_dev_args_init(&mda);
+ mda.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_REF;
+ mda.mda_devsw = &ptydev_cdevsw;
+ mda.mda_uid = UID_ROOT;
+ mda.mda_gid = GID_WHEEL;
+ mda.mda_mode = 0666;
+ error = make_dev_s(&mda, dev, "%s", name);
+ if (error != 0)
+ *dev = NULL;
}
static int
diff --git a/sys/fs/ext2fs/ext2_inode_cnv.c b/sys/fs/ext2fs/ext2_inode_cnv.c
index b69d4e5..9b1b0ad 100644
--- a/sys/fs/ext2fs/ext2_inode_cnv.c
+++ b/sys/fs/ext2fs/ext2_inode_cnv.c
@@ -149,13 +149,11 @@ ext2_i2ei(struct inode *ip, struct ext2fs_dinode *ei)
ei->e2di_atime = ip->i_atime;
ei->e2di_mtime = ip->i_mtime;
ei->e2di_ctime = ip->i_ctime;
- if (E2DI_HAS_XTIME(ip)) {
- ei->e2di_ctime_extra = NSEC_TO_XTIME(ip->i_ctimensec);
- ei->e2di_mtime_extra = NSEC_TO_XTIME(ip->i_mtimensec);
- ei->e2di_atime_extra = NSEC_TO_XTIME(ip->i_atimensec);
- ei->e2di_crtime = ip->i_birthtime;
- ei->e2di_crtime_extra = NSEC_TO_XTIME(ip->i_birthnsec);
- }
+ ei->e2di_ctime_extra = NSEC_TO_XTIME(ip->i_ctimensec);
+ ei->e2di_mtime_extra = NSEC_TO_XTIME(ip->i_mtimensec);
+ ei->e2di_atime_extra = NSEC_TO_XTIME(ip->i_atimensec);
+ ei->e2di_crtime = ip->i_birthtime;
+ ei->e2di_crtime_extra = NSEC_TO_XTIME(ip->i_birthnsec);
ei->e2di_flags = 0;
ei->e2di_flags |= (ip->i_flags & SF_APPEND) ? EXT2_APPEND: 0;
ei->e2di_flags |= (ip->i_flags & SF_IMMUTABLE) ? EXT2_IMMUTABLE: 0;
diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c
index b0ea4e7..dff0f5c 100644
--- a/sys/i386/isa/npx.c
+++ b/sys/i386/isa/npx.c
@@ -503,11 +503,12 @@ npxinitstate(void *arg __unused)
/*
* The fninit instruction does not modify XMM
- * registers. The fpusave call dumped the garbage
- * contained in the registers after reset to the
- * initial state saved. Clear XMM registers file
- * image to make the startup program state and signal
- * handler XMM register content predictable.
+ * registers or x87 registers (MM/ST). The fpusave
+ * call dumped the garbage contained in the registers
+ * after reset to the initial state saved. Clear XMM
+ * and x87 registers file image to make the startup
+ * program state and signal handler XMM/x87 register
+ * content predictable.
*/
bzero(npx_initialstate->sv_xmm.sv_fp,
sizeof(npx_initialstate->sv_xmm.sv_fp));
diff --git a/sys/kern/kern_conf.c b/sys/kern/kern_conf.c
index d352bda..db0366c 100644
--- a/sys/kern/kern_conf.c
+++ b/sys/kern/kern_conf.c
@@ -566,22 +566,26 @@ notify_destroy(struct cdev *dev)
}
static struct cdev *
-newdev(struct cdevsw *csw, int unit, struct cdev *si)
+newdev(struct make_dev_args *args, struct cdev *si)
{
struct cdev *si2;
+ struct cdevsw *csw;
mtx_assert(&devmtx, MA_OWNED);
+ csw = args->mda_devsw;
if (csw->d_flags & D_NEEDMINOR) {
/* We may want to return an existing device */
LIST_FOREACH(si2, &csw->d_devs, si_list) {
- if (dev2unit(si2) == unit) {
+ if (dev2unit(si2) == args->mda_unit) {
dev_free_devlocked(si);
return (si2);
}
}
}
- si->si_drv0 = unit;
+ si->si_drv0 = args->mda_unit;
si->si_devsw = csw;
+ si->si_drv1 = args->mda_si_drv1;
+ si->si_drv2 = args->mda_si_drv2;
LIST_INSERT_HEAD(&csw->d_devs, si, si_list);
return (si);
}
@@ -737,33 +741,46 @@ prep_devname(struct cdev *dev, const char *fmt, va_list ap)
return (0);
}
+void
+make_dev_args_init_impl(struct make_dev_args *args, size_t sz)
+{
+
+ bzero(args, sz);
+ args->mda_size = sz;
+}
+
static int
-make_dev_credv(int flags, struct cdev **dres, struct cdevsw *devsw, int unit,
- struct ucred *cr, uid_t uid, gid_t gid, int mode, const char *fmt,
- va_list ap)
+make_dev_sv(struct make_dev_args *args1, struct cdev **dres,
+ const char *fmt, va_list ap)
{
struct cdev *dev, *dev_new;
+ struct make_dev_args args;
int res;
- KASSERT((flags & MAKEDEV_WAITOK) == 0 || (flags & MAKEDEV_NOWAIT) == 0,
- ("make_dev_credv: both WAITOK and NOWAIT specified"));
- dev_new = devfs_alloc(flags);
+ bzero(&args, sizeof(args));
+ if (sizeof(args) < args1->mda_size)
+ return (EINVAL);
+ bcopy(args1, &args, args1->mda_size);
+ KASSERT((args.mda_flags & MAKEDEV_WAITOK) == 0 ||
+ (args.mda_flags & MAKEDEV_NOWAIT) == 0,
+ ("make_dev_sv: both WAITOK and NOWAIT specified"));
+ dev_new = devfs_alloc(args.mda_flags);
if (dev_new == NULL)
return (ENOMEM);
dev_lock();
- res = prep_cdevsw(devsw, flags);
+ res = prep_cdevsw(args.mda_devsw, args.mda_flags);
if (res != 0) {
dev_unlock();
devfs_free(dev_new);
return (res);
}
- dev = newdev(devsw, unit, dev_new);
+ dev = newdev(&args, dev_new);
if ((dev->si_flags & SI_NAMED) == 0) {
res = prep_devname(dev, fmt, ap);
if (res != 0) {
- if ((flags & MAKEDEV_CHECKNAME) == 0) {
+ if ((args.mda_flags & MAKEDEV_CHECKNAME) == 0) {
panic(
- "make_dev_credv: bad si_name (error=%d, si_name=%s)",
+ "make_dev_sv: bad si_name (error=%d, si_name=%s)",
res, dev->si_name);
}
if (dev == dev_new) {
@@ -775,9 +792,9 @@ make_dev_credv(int flags, struct cdev **dres, struct cdevsw *devsw, int unit,
return (res);
}
}
- if (flags & MAKEDEV_REF)
+ if ((args.mda_flags & MAKEDEV_REF) != 0)
dev_refl(dev);
- if (flags & MAKEDEV_ETERNAL)
+ if ((args.mda_flags & MAKEDEV_ETERNAL) != 0)
dev->si_flags |= SI_ETERNAL;
if (dev->si_flags & SI_CHEAPCLONE &&
dev->si_flags & SI_NAMED) {
@@ -792,24 +809,55 @@ make_dev_credv(int flags, struct cdev **dres, struct cdevsw *devsw, int unit,
}
KASSERT(!(dev->si_flags & SI_NAMED),
("make_dev() by driver %s on pre-existing device (min=%x, name=%s)",
- devsw->d_name, dev2unit(dev), devtoname(dev)));
+ args.mda_devsw->d_name, dev2unit(dev), devtoname(dev)));
dev->si_flags |= SI_NAMED;
- if (cr != NULL)
- dev->si_cred = crhold(cr);
- dev->si_uid = uid;
- dev->si_gid = gid;
- dev->si_mode = mode;
+ if (args.mda_cr != NULL)
+ dev->si_cred = crhold(args.mda_cr);
+ dev->si_uid = args.mda_uid;
+ dev->si_gid = args.mda_gid;
+ dev->si_mode = args.mda_mode;
devfs_create(dev);
clean_unrhdrl(devfs_inos);
dev_unlock_and_free();
- notify_create(dev, flags);
+ notify_create(dev, args.mda_flags);
*dres = dev;
return (0);
}
+int
+make_dev_s(struct make_dev_args *args, struct cdev **dres,
+ const char *fmt, ...)
+{
+ va_list ap;
+ int res;
+
+ va_start(ap, fmt);
+ res = make_dev_sv(args, dres, fmt, ap);
+ va_end(ap);
+ return (res);
+}
+
+static int
+make_dev_credv(int flags, struct cdev **dres, struct cdevsw *devsw, int unit,
+ struct ucred *cr, uid_t uid, gid_t gid, int mode, const char *fmt,
+ va_list ap)
+{
+ struct make_dev_args args;
+
+ make_dev_args_init(&args);
+ args.mda_flags = flags;
+ args.mda_devsw = devsw;
+ args.mda_cr = cr;
+ args.mda_uid = uid;
+ args.mda_gid = gid;
+ args.mda_mode = mode;
+ args.mda_unit = unit;
+ return (make_dev_sv(&args, dres, fmt, ap));
+}
+
struct cdev *
make_dev(struct cdevsw *devsw, int unit, uid_t uid, gid_t gid, int mode,
const char *fmt, ...)
@@ -1247,6 +1295,7 @@ clone_create(struct clonedevs **cdp, struct cdevsw *csw, int *up,
{
struct clonedevs *cd;
struct cdev *dev, *ndev, *dl, *de;
+ struct make_dev_args args;
int unit, low, u;
KASSERT(*cdp != NULL,
@@ -1298,7 +1347,10 @@ clone_create(struct clonedevs **cdp, struct cdevsw *csw, int *up,
}
if (unit == -1)
unit = low & CLONE_UNITMASK;
- dev = newdev(csw, unit | extra, ndev);
+ make_dev_args_init(&args);
+ args.mda_unit = unit | extra;
+ args.mda_devsw = csw;
+ dev = newdev(&args, ndev);
if (dev->si_flags & SI_CLONELIST) {
printf("dev %p (%s) is on clonelist\n", dev, dev->si_name);
printf("unit=%d, low=%d, extra=0x%x\n", unit, low, extra);
diff --git a/sys/kern/subr_bus.c b/sys/kern/subr_bus.c
index bb6806c..faf4aa0 100644
--- a/sys/kern/subr_bus.c
+++ b/sys/kern/subr_bus.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/poll.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/condvar.h>
#include <sys/queue.h>
@@ -147,6 +148,8 @@ struct device {
static MALLOC_DEFINE(M_BUS, "bus", "Bus data structures");
static MALLOC_DEFINE(M_BUS_SC, "bus-sc", "Bus data structures, softc");
+static void devctl2_init(void);
+
#ifdef BUS_DEBUG
static int bus_debug = 1;
@@ -434,6 +437,7 @@ devinit(void)
cv_init(&devsoftc.cv, "dev cv");
TAILQ_INIT(&devsoftc.devq);
knlist_init_mtx(&devsoftc.sel.si_note, &devsoftc.mtx);
+ devctl2_init();
}
static int
@@ -4998,3 +5002,229 @@ bus_free_resource(device_t dev, int type, struct resource *r)
return (0);
return (bus_release_resource(dev, type, rman_get_rid(r), r));
}
+
+/*
+ * /dev/devctl2 implementation. The existing /dev/devctl device has
+ * implicit semantics on open, so it could not be reused for this.
+ * Another option would be to call this /dev/bus?
+ */
+static int
+find_device(struct devreq *req, device_t *devp)
+{
+ device_t dev;
+
+ /*
+ * First, ensure that the name is nul terminated.
+ */
+ if (memchr(req->dr_name, '\0', sizeof(req->dr_name)) == NULL)
+ return (EINVAL);
+
+ /*
+ * Second, try to find an attached device whose name matches
+ * 'name'.
+ */
+ TAILQ_FOREACH(dev, &bus_data_devices, devlink) {
+ if (dev->nameunit != NULL &&
+ strcmp(dev->nameunit, req->dr_name) == 0) {
+ *devp = dev;
+ return (0);
+ }
+ }
+
+ /* Finally, give device enumerators a chance. */
+ dev = NULL;
+ EVENTHANDLER_INVOKE(dev_lookup, req->dr_name, &dev);
+ if (dev == NULL)
+ return (ENOENT);
+ *devp = dev;
+ return (0);
+}
+
+static bool
+driver_exists(struct device *bus, const char *driver)
+{
+ devclass_t dc;
+
+ for (dc = bus->devclass; dc != NULL; dc = dc->parent) {
+ if (devclass_find_driver_internal(dc, driver) != NULL)
+ return (true);
+ }
+ return (false);
+}
+
+static int
+devctl2_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
+ struct thread *td)
+{
+ struct devreq *req;
+ device_t dev;
+ int error, old;
+
+ /* Locate the device to control. */
+ mtx_lock(&Giant);
+ req = (struct devreq *)data;
+ switch (cmd) {
+ case DEV_ATTACH:
+ case DEV_DETACH:
+ case DEV_ENABLE:
+ case DEV_DISABLE:
+ case DEV_SET_DRIVER:
+ error = priv_check(td, PRIV_DRIVER);
+ if (error == 0)
+ error = find_device(req, &dev);
+ break;
+ default:
+ error = ENOTTY;
+ break;
+ }
+ if (error) {
+ mtx_unlock(&Giant);
+ return (error);
+ }
+
+ /* Perform the requested operation. */
+ switch (cmd) {
+ case DEV_ATTACH:
+ if (device_is_attached(dev) && (dev->flags & DF_REBID) == 0)
+ error = EBUSY;
+ else if (!device_is_enabled(dev))
+ error = ENXIO;
+ else
+ error = device_probe_and_attach(dev);
+ break;
+ case DEV_DETACH:
+ if (!device_is_attached(dev)) {
+ error = ENXIO;
+ break;
+ }
+ if (!(req->dr_flags & DEVF_FORCE_DETACH)) {
+ error = device_quiesce(dev);
+ if (error)
+ break;
+ }
+ error = device_detach(dev);
+ break;
+ case DEV_ENABLE:
+ if (device_is_enabled(dev)) {
+ error = EBUSY;
+ break;
+ }
+
+ /*
+ * If the device has been probed but not attached (e.g.
+ * when it has been disabled by a loader hint), just
+ * attach the device rather than doing a full probe.
+ */
+ device_enable(dev);
+ if (device_is_alive(dev)) {
+ /*
+ * If the device was disabled via a hint, clear
+ * the hint.
+ */
+ if (resource_disabled(dev->driver->name, dev->unit))
+ resource_unset_value(dev->driver->name,
+ dev->unit, "disabled");
+ error = device_attach(dev);
+ } else
+ error = device_probe_and_attach(dev);
+ break;
+ case DEV_DISABLE:
+ if (!device_is_enabled(dev)) {
+ error = ENXIO;
+ break;
+ }
+
+ if (!(req->dr_flags & DEVF_FORCE_DETACH)) {
+ error = device_quiesce(dev);
+ if (error)
+ break;
+ }
+
+ /*
+ * Force DF_FIXEDCLASS on around detach to preserve
+ * the existing name.
+ */
+ old = dev->flags;
+ dev->flags |= DF_FIXEDCLASS;
+ error = device_detach(dev);
+ if (!(old & DF_FIXEDCLASS))
+ dev->flags &= ~DF_FIXEDCLASS;
+ if (error == 0)
+ device_disable(dev);
+ break;
+ case DEV_SET_DRIVER: {
+ devclass_t dc;
+ char driver[128];
+
+ error = copyinstr(req->dr_data, driver, sizeof(driver), NULL);
+ if (error)
+ break;
+ if (driver[0] == '\0') {
+ error = EINVAL;
+ break;
+ }
+ if (dev->devclass != NULL &&
+ strcmp(driver, dev->devclass->name) == 0)
+ /* XXX: Could possibly force DF_FIXEDCLASS on? */
+ break;
+
+ /*
+ * Scan drivers for this device's bus looking for at
+ * least one matching driver.
+ */
+ if (dev->parent == NULL) {
+ error = EINVAL;
+ break;
+ }
+ if (!driver_exists(dev->parent, driver)) {
+ error = ENOENT;
+ break;
+ }
+ dc = devclass_create(driver);
+ if (dc == NULL) {
+ error = ENOMEM;
+ break;
+ }
+
+ /* Detach device if necessary. */
+ if (device_is_attached(dev)) {
+ if (req->dr_flags & DEVF_SET_DRIVER_DETACH)
+ error = device_detach(dev);
+ else
+ error = EBUSY;
+ if (error)
+ break;
+ }
+
+ /* Clear any previously-fixed device class and unit. */
+ if (dev->flags & DF_FIXEDCLASS)
+ devclass_delete_device(dev->devclass, dev);
+ dev->flags |= DF_WILDCARD;
+ dev->unit = -1;
+
+ /* Force the new device class. */
+ error = devclass_add_device(dc, dev);
+ if (error)
+ break;
+ dev->flags |= DF_FIXEDCLASS;
+ error = device_probe_and_attach(dev);
+ break;
+ }
+ }
+ mtx_unlock(&Giant);
+ return (error);
+}
+
+static struct cdevsw devctl2_cdevsw = {
+ .d_version = D_VERSION,
+ .d_ioctl = devctl2_ioctl,
+ .d_name = "devctl2",
+};
+
+static void
+devctl2_init(void)
+{
+
+ make_dev_credf(MAKEDEV_ETERNAL, &devctl2_cdevsw, 0, NULL,
+ UID_ROOT, GID_WHEEL, 0600, "devctl2");
+}
diff --git a/sys/kern/subr_hints.c b/sys/kern/subr_hints.c
index f9085b2..08f8e8c 100644
--- a/sys/kern/subr_hints.c
+++ b/sys/kern/subr_hints.c
@@ -461,3 +461,31 @@ resource_disabled(const char *name, int unit)
return (0);
return (value);
}
+
+/*
+ * Clear a value associated with a device by removing it from
+ * the kernel environment. This only removes a hint for an
+ * exact unit.
+ */
+int
+resource_unset_value(const char *name, int unit, const char *resname)
+{
+ char varname[128];
+ const char *retname, *retvalue;
+ int error, line;
+ size_t len;
+
+ line = 0;
+ error = resource_find(&line, NULL, name, &unit, resname, NULL,
+ &retname, NULL, NULL, NULL, NULL, &retvalue);
+ if (error)
+ return (error);
+
+ retname -= strlen("hint.");
+ len = retvalue - retname - 1;
+ if (len > sizeof(varname) - 1)
+ return (ENAMETOOLONG);
+ memcpy(varname, retname, len);
+ varname[len] = '\0';
+ return (unsetenv(varname));
+}
diff --git a/sys/kern/tty.c b/sys/kern/tty.c
index 86e3a3e..a5a6690 100644
--- a/sys/kern/tty.c
+++ b/sys/kern/tty.c
@@ -240,14 +240,10 @@ ttydev_open(struct cdev *dev, int oflags, int devtype __unused,
struct thread *td)
{
struct tty *tp;
- int error = 0;
-
- while ((tp = dev->si_drv1) == NULL) {
- error = tsleep(&dev->si_drv1, PCATCH, "ttdrv1", 1);
- if (error != EWOULDBLOCK)
- return (error);
- }
+ int error;
+ tp = dev->si_drv1;
+ error = 0;
tty_lock(tp);
if (tty_gone(tp)) {
/* Device is already gone. */
@@ -762,13 +758,10 @@ ttyil_open(struct cdev *dev, int oflags __unused, int devtype __unused,
struct thread *td)
{
struct tty *tp;
- int error = 0;
+ int error;
- while ((tp = dev->si_drv1) == NULL) {
- error = tsleep(&dev->si_drv1, PCATCH, "ttdrv1", 1);
- if (error != EWOULDBLOCK)
- return (error);
- }
+ tp = dev->si_drv1;
+ error = 0;
tty_lock(tp);
if (tty_gone(tp))
error = ENODEV;
@@ -1218,6 +1211,7 @@ static int
tty_vmakedevf(struct tty *tp, struct ucred *cred, int flags,
const char *fmt, va_list ap)
{
+ struct make_dev_args args;
struct cdev *dev, *init, *lock, *cua, *cinit, *clock;
const char *prefix = "tty";
char name[SPECNAMELEN - 3]; /* for "tty" and "cua". */
@@ -1248,71 +1242,72 @@ tty_vmakedevf(struct tty *tp, struct ucred *cred, int flags,
flags |= MAKEDEV_CHECKNAME;
/* Master call-in device. */
- error = make_dev_p(flags, &dev, &ttydev_cdevsw, cred, uid, gid, mode,
- "%s%s", prefix, name);
- if (error)
+ make_dev_args_init(&args);
+ args.mda_flags = flags;
+ args.mda_devsw = &ttydev_cdevsw;
+ args.mda_cr = cred;
+ args.mda_uid = uid;
+ args.mda_gid = gid;
+ args.mda_mode = mode;
+ args.mda_si_drv1 = tp;
+ error = make_dev_s(&args, &dev, "%s%s", prefix, name);
+ if (error != 0)
return (error);
- dev->si_drv1 = tp;
- wakeup(&dev->si_drv1);
tp->t_dev = dev;
init = lock = cua = cinit = clock = NULL;
/* Slave call-in devices. */
if (tp->t_flags & TF_INITLOCK) {
- error = make_dev_p(flags, &init, &ttyil_cdevsw, cred, uid,
- gid, mode, "%s%s.init", prefix, name);
- if (error)
+ args.mda_devsw = &ttyil_cdevsw;
+ args.mda_unit = TTYUNIT_INIT;
+ args.mda_si_drv1 = tp;
+ args.mda_si_drv2 = &tp->t_termios_init_in;
+ error = make_dev_s(&args, &init, "%s%s.init", prefix, name);
+ if (error != 0)
goto fail;
dev_depends(dev, init);
- dev2unit(init) = TTYUNIT_INIT;
- init->si_drv1 = tp;
- wakeup(&init->si_drv1);
- init->si_drv2 = &tp->t_termios_init_in;
- error = make_dev_p(flags, &lock, &ttyil_cdevsw, cred, uid,
- gid, mode, "%s%s.lock", prefix, name);
- if (error)
+ args.mda_unit = TTYUNIT_LOCK;
+ args.mda_si_drv2 = &tp->t_termios_lock_in;
+ error = make_dev_s(&args, &lock, "%s%s.lock", prefix, name);
+ if (error != 0)
goto fail;
dev_depends(dev, lock);
- dev2unit(lock) = TTYUNIT_LOCK;
- lock->si_drv1 = tp;
- wakeup(&lock->si_drv1);
- lock->si_drv2 = &tp->t_termios_lock_in;
}
/* Call-out devices. */
if (tp->t_flags & TF_CALLOUT) {
- error = make_dev_p(flags, &cua, &ttydev_cdevsw, cred,
- UID_UUCP, GID_DIALER, 0660, "cua%s", name);
- if (error)
+ make_dev_args_init(&args);
+ args.mda_flags = flags;
+ args.mda_devsw = &ttydev_cdevsw;
+ args.mda_cr = cred;
+ args.mda_uid = UID_UUCP;
+ args.mda_gid = GID_DIALER;
+ args.mda_mode = 0660;
+ args.mda_unit = TTYUNIT_CALLOUT;
+ args.mda_si_drv1 = tp;
+ error = make_dev_s(&args, &cua, "cua%s", name);
+ if (error != 0)
goto fail;
dev_depends(dev, cua);
- dev2unit(cua) = TTYUNIT_CALLOUT;
- cua->si_drv1 = tp;
- wakeup(&cua->si_drv1);
/* Slave call-out devices. */
if (tp->t_flags & TF_INITLOCK) {
- error = make_dev_p(flags, &cinit, &ttyil_cdevsw, cred,
- UID_UUCP, GID_DIALER, 0660, "cua%s.init", name);
- if (error)
+ args.mda_devsw = &ttyil_cdevsw;
+ args.mda_unit = TTYUNIT_CALLOUT | TTYUNIT_INIT;
+ args.mda_si_drv2 = &tp->t_termios_init_out;
+ error = make_dev_s(&args, &cinit, "cua%s.init", name);
+ if (error != 0)
goto fail;
dev_depends(dev, cinit);
- dev2unit(cinit) = TTYUNIT_CALLOUT | TTYUNIT_INIT;
- cinit->si_drv1 = tp;
- wakeup(&cinit->si_drv1);
- cinit->si_drv2 = &tp->t_termios_init_out;
- error = make_dev_p(flags, &clock, &ttyil_cdevsw, cred,
- UID_UUCP, GID_DIALER, 0660, "cua%s.lock", name);
- if (error)
+ args.mda_unit = TTYUNIT_CALLOUT | TTYUNIT_LOCK;
+ args.mda_si_drv2 = &tp->t_termios_lock_out;
+ error = make_dev_s(&args, &clock, "cua%s.lock", name);
+ if (error != 0)
goto fail;
dev_depends(dev, clock);
- dev2unit(clock) = TTYUNIT_CALLOUT | TTYUNIT_LOCK;
- clock->si_drv1 = tp;
- wakeup(&clock->si_drv1);
- clock->si_drv2 = &tp->t_termios_lock_out;
}
}
diff --git a/sys/mips/include/asm.h b/sys/mips/include/asm.h
index 3b0c49a..39ec60c 100644
--- a/sys/mips/include/asm.h
+++ b/sys/mips/include/asm.h
@@ -706,20 +706,21 @@ _C_LABEL(x):
/* Only valid with the _JB_MAGIC_SETJMP magic */
#define _JB_SIGMASK 13
-
-#define _JB_FPREG_F20 14
-#define _JB_FPREG_F21 15
-#define _JB_FPREG_F22 16
-#define _JB_FPREG_F23 17
-#define _JB_FPREG_F24 18
-#define _JB_FPREG_F25 19
-#define _JB_FPREG_F26 20
-#define _JB_FPREG_F27 21
-#define _JB_FPREG_F28 22
-#define _JB_FPREG_F29 23
-#define _JB_FPREG_F30 24
-#define _JB_FPREG_F31 25
-#define _JB_FPREG_FCSR 26
+#define __JB_SIGMASK_REMAINDER 14 /* sigmask_t is 128-bits */
+
+#define _JB_FPREG_F20 15
+#define _JB_FPREG_F21 16
+#define _JB_FPREG_F22 17
+#define _JB_FPREG_F23 18
+#define _JB_FPREG_F24 19
+#define _JB_FPREG_F25 20
+#define _JB_FPREG_F26 21
+#define _JB_FPREG_F27 22
+#define _JB_FPREG_F28 23
+#define _JB_FPREG_F29 24
+#define _JB_FPREG_F30 25
+#define _JB_FPREG_F31 26
+#define _JB_FPREG_FCSR 27
/*
* Various macros for dealing with TLB hazards
diff --git a/sys/netinet/sctp_input.c b/sys/netinet/sctp_input.c
index 3b50590..4c1a98c 100644
--- a/sys/netinet/sctp_input.c
+++ b/sys/netinet/sctp_input.c
@@ -85,7 +85,7 @@ static void
sctp_handle_init(struct mbuf *m, int iphlen, int offset,
struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh,
struct sctp_init_chunk *cp, struct sctp_inpcb *inp,
- struct sctp_tcb *stcb, int *abort_no_unlock,
+ struct sctp_tcb *stcb, struct sctp_nets *net, int *abort_no_unlock,
uint8_t mflowtype, uint32_t mflowid,
uint32_t vrf_id, uint16_t port)
{
@@ -198,8 +198,8 @@ sctp_handle_init(struct mbuf *m, int iphlen, int offset,
sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
} else {
SCTPDBG(SCTP_DEBUG_INPUT3, "sctp_handle_init: sending INIT-ACK\n");
- sctp_send_initiate_ack(inp, stcb, m, iphlen, offset, src, dst,
- sh, cp,
+ sctp_send_initiate_ack(inp, stcb, net, m, iphlen, offset,
+ src, dst, sh, cp,
mflowtype, mflowid,
vrf_id, port,
((stcb == NULL) ? SCTP_HOLDS_LOCK : SCTP_NOT_LOCKED));
@@ -4840,7 +4840,7 @@ process_control_chunks:
}
sctp_handle_init(m, iphlen, *offset, src, dst, sh,
(struct sctp_init_chunk *)ch, inp,
- stcb, &abort_no_unlock,
+ stcb, *netp, &abort_no_unlock,
mflowtype, mflowid,
vrf_id, port);
*offset = length;
@@ -5684,9 +5684,18 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
stcb = sctp_findassociation_addr(m, offset, src, dst,
sh, ch, &inp, &net, vrf_id);
#if defined(INET) || defined(INET6)
- if ((net != NULL) && (port != 0)) {
+ if ((ch->chunk_type != SCTP_INITIATION) &&
+ (net != NULL) && (net->port != port)) {
if (net->port == 0) {
- sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
+ /* UDP encapsulation turned on. */
+ net->mtu -= sizeof(struct udphdr);
+ if (stcb->asoc.smallest_mtu > net->mtu) {
+ sctp_pathmtu_adjustment(stcb, net->mtu);
+ }
+ } else if (port == 0) {
+ /* UDP encapsulation turned off. */
+ net->mtu += sizeof(struct udphdr);
+ /* XXX Update smallest_mtu */
}
net->port = port;
}
@@ -5715,9 +5724,18 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
stcb = sctp_findassociation_addr(m, offset, src, dst,
sh, ch, &inp, &net, vrf_id);
#if defined(INET) || defined(INET6)
- if ((net != NULL) && (port != 0)) {
+ if ((ch->chunk_type != SCTP_INITIATION) &&
+ (net != NULL) && (net->port != port)) {
if (net->port == 0) {
- sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
+ /* UDP encapsulation turned on. */
+ net->mtu -= sizeof(struct udphdr);
+ if (stcb->asoc.smallest_mtu > net->mtu) {
+ sctp_pathmtu_adjustment(stcb, net->mtu);
+ }
+ } else if (port == 0) {
+ /* UDP encapsulation turned off. */
+ net->mtu += sizeof(struct udphdr);
+ /* XXX Update smallest_mtu */
}
net->port = port;
}
@@ -5827,9 +5845,18 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt
*/
inp = stcb->sctp_ep;
#if defined(INET) || defined(INET6)
- if ((net != NULL) && (port != 0)) {
+ if ((ch->chunk_type != SCTP_INITIATION) &&
+ (net != NULL) && (net->port != port)) {
if (net->port == 0) {
- sctp_pathmtu_adjustment(stcb, net->mtu - sizeof(struct udphdr));
+ /* UDP encapsulation turned on. */
+ net->mtu -= sizeof(struct udphdr);
+ if (stcb->asoc.smallest_mtu > net->mtu) {
+ sctp_pathmtu_adjustment(stcb, net->mtu);
+ }
+ } else if (port == 0) {
+ /* UDP encapsulation turned off. */
+ net->mtu += sizeof(struct udphdr);
+ /* XXX Update smallest_mtu */
}
net->port = port;
}
diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c
index 5328f49..1c16aaa 100644
--- a/sys/netinet/sctp_output.c
+++ b/sys/netinet/sctp_output.c
@@ -5307,6 +5307,7 @@ sctp_are_there_new_addresses(struct sctp_association *asoc,
uint16_t ptype, plen;
uint8_t fnd;
struct sctp_nets *net;
+ int check_src;
#ifdef INET
struct sockaddr_in sin4, *sa4;
@@ -5328,39 +5329,61 @@ sctp_are_there_new_addresses(struct sctp_association *asoc,
sin6.sin6_len = sizeof(sin6);
#endif
/* First what about the src address of the pkt ? */
- fnd = 0;
- TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
- sa = (struct sockaddr *)&net->ro._l_addr;
- if (sa->sa_family == src->sa_family) {
+ check_src = 0;
+ switch (src->sa_family) {
#ifdef INET
- if (sa->sa_family == AF_INET) {
- struct sockaddr_in *src4;
+ case AF_INET:
+ if (asoc->scope.ipv4_addr_legal) {
+ check_src = 1;
+ }
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ if (asoc->scope.ipv6_addr_legal) {
+ check_src = 1;
+ }
+ break;
+#endif
+ default:
+ /* TSNH */
+ break;
+ }
+ if (check_src) {
+ fnd = 0;
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ sa = (struct sockaddr *)&net->ro._l_addr;
+ if (sa->sa_family == src->sa_family) {
+#ifdef INET
+ if (sa->sa_family == AF_INET) {
+ struct sockaddr_in *src4;
- sa4 = (struct sockaddr_in *)sa;
- src4 = (struct sockaddr_in *)src;
- if (sa4->sin_addr.s_addr == src4->sin_addr.s_addr) {
- fnd = 1;
- break;
+ sa4 = (struct sockaddr_in *)sa;
+ src4 = (struct sockaddr_in *)src;
+ if (sa4->sin_addr.s_addr == src4->sin_addr.s_addr) {
+ fnd = 1;
+ break;
+ }
}
- }
#endif
#ifdef INET6
- if (sa->sa_family == AF_INET6) {
- struct sockaddr_in6 *src6;
+ if (sa->sa_family == AF_INET6) {
+ struct sockaddr_in6 *src6;
- sa6 = (struct sockaddr_in6 *)sa;
- src6 = (struct sockaddr_in6 *)src;
- if (SCTP6_ARE_ADDR_EQUAL(sa6, src6)) {
- fnd = 1;
- break;
+ sa6 = (struct sockaddr_in6 *)sa;
+ src6 = (struct sockaddr_in6 *)src;
+ if (SCTP6_ARE_ADDR_EQUAL(sa6, src6)) {
+ fnd = 1;
+ break;
+ }
}
- }
#endif
+ }
+ }
+ if (fnd == 0) {
+ /* New address added! no need to look futher. */
+ return (1);
}
- }
- if (fnd == 0) {
- /* New address added! no need to look futher. */
- return (1);
}
/* Ok so far lets munge through the rest of the packet */
offset += sizeof(struct sctp_init_chunk);
@@ -5381,9 +5404,11 @@ sctp_are_there_new_addresses(struct sctp_association *asoc,
phdr == NULL) {
return (1);
}
- p4 = (struct sctp_ipv4addr_param *)phdr;
- sin4.sin_addr.s_addr = p4->addr;
- sa_touse = (struct sockaddr *)&sin4;
+ if (asoc->scope.ipv4_addr_legal) {
+ p4 = (struct sctp_ipv4addr_param *)phdr;
+ sin4.sin_addr.s_addr = p4->addr;
+ sa_touse = (struct sockaddr *)&sin4;
+ }
break;
}
#endif
@@ -5398,10 +5423,12 @@ sctp_are_there_new_addresses(struct sctp_association *asoc,
phdr == NULL) {
return (1);
}
- p6 = (struct sctp_ipv6addr_param *)phdr;
- memcpy((caddr_t)&sin6.sin6_addr, p6->addr,
- sizeof(p6->addr));
- sa_touse = (struct sockaddr *)&sin6;
+ if (asoc->scope.ipv6_addr_legal) {
+ p6 = (struct sctp_ipv6addr_param *)phdr;
+ memcpy((caddr_t)&sin6.sin6_addr, p6->addr,
+ sizeof(p6->addr));
+ sa_touse = (struct sockaddr *)&sin6;
+ }
break;
}
#endif
@@ -5457,7 +5484,8 @@ sctp_are_there_new_addresses(struct sctp_association *asoc,
*/
void
sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
- struct mbuf *init_pkt, int iphlen, int offset,
+ struct sctp_nets *src_net, struct mbuf *init_pkt,
+ int iphlen, int offset,
struct sockaddr *src, struct sockaddr *dst,
struct sctphdr *sh, struct sctp_init_chunk *init_chk,
uint8_t mflowtype, uint32_t mflowid,
@@ -5501,20 +5529,39 @@ sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
asoc = NULL;
}
if ((asoc != NULL) &&
- (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT) &&
- (sctp_are_there_new_addresses(asoc, init_pkt, offset, src))) {
- /* new addresses, out of here in non-cookie-wait states */
- /*
- * Send a ABORT, we don't add the new address error clause
- * though we even set the T bit and copy in the 0 tag.. this
- * looks no different than if no listener was present.
- */
- op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
- "Address added");
- sctp_send_abort(init_pkt, iphlen, src, dst, sh, 0, op_err,
- mflowtype, mflowid, inp->fibnum,
- vrf_id, port);
- return;
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT)) {
+ if (sctp_are_there_new_addresses(asoc, init_pkt, offset, src)) {
+ /*
+ * new addresses, out of here in non-cookie-wait
+ * states
+ *
+ * Send an ABORT, without the new address error cause.
+ * This looks no different than if no listener was
+ * present.
+ */
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ "Address added");
+ sctp_send_abort(init_pkt, iphlen, src, dst, sh, 0, op_err,
+ mflowtype, mflowid, inp->fibnum,
+ vrf_id, port);
+ return;
+ }
+ if (src_net != NULL && (src_net->port != port)) {
+ /*
+ * change of remote encapsulation port, out of here
+ * in non-cookie-wait states
+ *
+ * Send an ABORT, without an specific error cause. This
+ * looks no different than if no listener was
+ * present.
+ */
+ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
+ "Remote encapsulation port changed");
+ sctp_send_abort(init_pkt, iphlen, src, dst, sh, 0, op_err,
+ mflowtype, mflowid, inp->fibnum,
+ vrf_id, port);
+ return;
+ }
}
abort_flag = 0;
op_err = sctp_arethere_unrecognized_parameters(init_pkt,
diff --git a/sys/netinet/sctp_output.h b/sys/netinet/sctp_output.h
index d7222c4..b2441a6 100644
--- a/sys/netinet/sctp_output.h
+++ b/sys/netinet/sctp_output.h
@@ -80,7 +80,8 @@ sctp_send_initiate(struct sctp_inpcb *, struct sctp_tcb *, int
);
void
-sctp_send_initiate_ack(struct sctp_inpcb *, struct sctp_tcb *, struct mbuf *,
+sctp_send_initiate_ack(struct sctp_inpcb *, struct sctp_tcb *,
+ struct sctp_nets *, struct mbuf *,
int, int,
struct sockaddr *, struct sockaddr *,
struct sctphdr *, struct sctp_init_chunk *,
diff --git a/sys/netinet/sctp_pcb.c b/sys/netinet/sctp_pcb.c
index 39872e4..385bdf0 100644
--- a/sys/netinet/sctp_pcb.c
+++ b/sys/netinet/sctp_pcb.c
@@ -2256,7 +2256,6 @@ sctp_findassociation_addr(struct mbuf *m, int offset,
struct sctphdr *sh, struct sctp_chunkhdr *ch,
struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id)
{
- int find_tcp_pool;
struct sctp_tcb *stcb;
struct sctp_inpcb *inp;
@@ -2268,25 +2267,13 @@ sctp_findassociation_addr(struct mbuf *m, int offset,
return (stcb);
}
}
- find_tcp_pool = 0;
- /*
- * Don't consider INIT chunks since that breaks 1-to-1 sockets: When
- * a server closes the listener, incoming INIT chunks are not
- * responsed by an INIT-ACK chunk.
- */
- if ((ch->chunk_type != SCTP_INITIATION_ACK) &&
- (ch->chunk_type != SCTP_COOKIE_ACK) &&
- (ch->chunk_type != SCTP_COOKIE_ECHO)) {
- /* Other chunk types go to the tcp pool. */
- find_tcp_pool = 1;
- }
if (inp_p) {
stcb = sctp_findassociation_addr_sa(src, dst, inp_p, netp,
- find_tcp_pool, vrf_id);
+ 1, vrf_id);
inp = *inp_p;
} else {
stcb = sctp_findassociation_addr_sa(src, dst, &inp, netp,
- find_tcp_pool, vrf_id);
+ 1, vrf_id);
}
SCTPDBG(SCTP_DEBUG_PCB1, "stcb:%p inp:%p\n", (void *)stcb, (void *)inp);
if (stcb == NULL && inp) {
diff --git a/sys/netinet/sctp_var.h b/sys/netinet/sctp_var.h
index 7f4cabf..86414f2 100644
--- a/sys/netinet/sctp_var.h
+++ b/sys/netinet/sctp_var.h
@@ -86,7 +86,7 @@ extern struct pr_usrreqs sctp_usrreqs;
#define sctp_sbspace_failedmsgs(sb) ((long) ((sctp_maxspace(sb) > (sb)->sb_cc) ? (sctp_maxspace(sb) - (sb)->sb_cc) : 0))
-#define sctp_sbspace_sub(a,b) ((a > b) ? (a - b) : 0)
+#define sctp_sbspace_sub(a,b) (((a) > (b)) ? ((a) - (b)) : 0)
/*
* I tried to cache the readq entries at one point. But the reality
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 2ca0fb5..4c4a605 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -1608,7 +1608,7 @@ tcp_setpersist(struct tcpcb *tp)
* Start/restart persistance timer.
*/
TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
- TCPTV_PERSMIN, TCPTV_PERSMAX);
+ tcp_persmin, tcp_persmax);
tcp_timer_activate(tp, TT_PERSIST, tt);
if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
tp->t_rxtshift++;
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index c0add82..9fe6eac 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -400,6 +400,8 @@ tcp_init(void)
tcp_rexmit_min = TCPTV_MIN;
if (tcp_rexmit_min < 1)
tcp_rexmit_min = 1;
+ tcp_persmin = TCPTV_PERSMIN;
+ tcp_persmax = TCPTV_PERSMAX;
tcp_rexmit_slop = TCPTV_CPU_VAR;
tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
tcp_tcbhashsize = hashsize;
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index c66cb76..d721039 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -71,6 +71,14 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_debug.h>
#endif
+int tcp_persmin;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmin, CTLTYPE_INT|CTLFLAG_RW,
+ &tcp_persmin, 0, sysctl_msec_to_ticks, "I", "minimum persistence interval");
+
+int tcp_persmax;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmax, CTLTYPE_INT|CTLFLAG_RW,
+ &tcp_persmax, 0, sysctl_msec_to_ticks, "I", "maximum persistence interval");
+
int tcp_keepinit;
SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
&tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
diff --git a/sys/netinet/tcp_timer.h b/sys/netinet/tcp_timer.h
index dbb8aee..7e5ee70 100644
--- a/sys/netinet/tcp_timer.h
+++ b/sys/netinet/tcp_timer.h
@@ -78,7 +78,7 @@
#define TCPTV_RTOBASE ( 3*hz) /* assumed RTO if no info */
#define TCPTV_SRTTDFLT ( 3*hz) /* assumed RTT if no info */
-#define TCPTV_PERSMIN ( 5*hz) /* retransmit persistence */
+#define TCPTV_PERSMIN ( 5*hz) /* minimum persist interval */
#define TCPTV_PERSMAX ( 60*hz) /* maximum persist interval */
#define TCPTV_KEEP_INIT ( 75*hz) /* initial connect keepalive */
@@ -174,6 +174,8 @@ struct tcp_timer {
#define TP_KEEPCNT(tp) ((tp)->t_keepcnt ? (tp)->t_keepcnt : tcp_keepcnt)
#define TP_MAXIDLE(tp) (TP_KEEPCNT(tp) * TP_KEEPINTVL(tp))
+extern int tcp_persmin; /* minimum persist interval */
+extern int tcp_persmax; /* maximum persist interval */
extern int tcp_keepinit; /* time to establish connection */
extern int tcp_keepidle; /* time before keepalive probes begin */
extern int tcp_keepintvl; /* time between keepalive probes */
diff --git a/sys/sys/ata.h b/sys/sys/ata.h
index 863f0e8..272b46a 100644
--- a/sys/sys/ata.h
+++ b/sys/sys/ata.h
@@ -399,6 +399,7 @@ struct ata_params {
#define ATA_IDLE_CMD 0xe3 /* idle */
#define ATA_READ_BUFFER 0xe4 /* read buffer */
#define ATA_READ_PM 0xe4 /* read portmultiplier */
+#define ATA_CHECK_POWER_MODE 0xe5 /* device power mode */
#define ATA_SLEEP 0xe6 /* sleep */
#define ATA_FLUSHCACHE 0xe7 /* flush cache to disk */
#define ATA_WRITE_PM 0xe8 /* write portmultiplier */
diff --git a/sys/sys/bus.h b/sys/sys/bus.h
index fd858ca..4ab8249 100644
--- a/sys/sys/bus.h
+++ b/sys/sys/bus.h
@@ -31,6 +31,7 @@
#include <machine/_limits.h>
#include <sys/_bus_dma.h>
+#include <sys/ioccom.h>
/**
* @defgroup NEWBUS newbus - a generic framework for managing devices
@@ -75,9 +76,43 @@ struct u_device {
/* XXX more driver info? */
};
+/**
+ * @brief Device request structure used for ioctl's.
+ *
+ * Used for ioctl's on /dev/devctl2. All device ioctl's
+ * must have parameter definitions which begin with dr_name.
+ */
+struct devreq_buffer {
+ void *buffer;
+ size_t length;
+};
+
+struct devreq {
+ char dr_name[128];
+ int dr_flags; /* request-specific flags */
+ union {
+ struct devreq_buffer dru_buffer;
+ void *dru_data;
+ } dr_dru;
+#define dr_buffer dr_dru.dru_buffer /* variable-sized buffer */
+#define dr_data dr_dru.dru_data /* fixed-size buffer */
+};
+
+#define DEV_ATTACH _IOW('D', 1, struct devreq)
+#define DEV_DETACH _IOW('D', 2, struct devreq)
+#define DEV_ENABLE _IOW('D', 3, struct devreq)
+#define DEV_DISABLE _IOW('D', 4, struct devreq)
+#define DEV_SET_DRIVER _IOW('D', 7, struct devreq)
+
+/* Flags for DEV_DETACH and DEV_DISABLE. */
+#define DEVF_FORCE_DETACH 0x0000001
+
+/* Flags for DEV_SET_DRIVER. */
+#define DEVF_SET_DRIVER_DETACH 0x0000001 /* Detach existing driver. */
+
#ifdef _KERNEL
-#include <sys/queue.h>
+#include <sys/eventhandler.h>
#include <sys/kobj.h>
/**
@@ -94,6 +129,14 @@ void devctl_queue_data_f(char *__data, int __flags);
void devctl_queue_data(char *__data);
/**
+ * Device name parsers. Hook to allow device enumerators to map
+ * scheme-specific names to a device.
+ */
+typedef void (*dev_lookup_fn)(void *arg, const char *name,
+ device_t *result);
+EVENTHANDLER_DECLARE(dev_lookup, dev_lookup_fn);
+
+/**
* @brief A device driver (included mainly for compatibility with
* FreeBSD 4.x).
*/
@@ -454,6 +497,7 @@ struct sysctl_oid *device_get_sysctl_tree(device_t dev);
int device_is_alive(device_t dev); /* did probe succeed? */
int device_is_attached(device_t dev); /* did attach succeed? */
int device_is_enabled(device_t dev);
+int device_is_suspended(device_t dev);
int device_is_quiet(device_t dev);
int device_print_prettyname(device_t dev);
int device_printf(device_t dev, const char *, ...) __printflike(2, 3);
@@ -517,6 +561,8 @@ int resource_set_long(const char *name, int unit, const char *resname,
long value);
int resource_set_string(const char *name, int unit, const char *resname,
const char *value);
+int resource_unset_value(const char *name, int unit, const char *resname);
+
/*
* Functions for maintaining and checking consistency of
* bus information exported to userspace.
diff --git a/sys/sys/conf.h b/sys/sys/conf.h
index 06c38c1..34c65a8 100644
--- a/sys/sys/conf.h
+++ b/sys/sys/conf.h
@@ -244,6 +244,28 @@ void clone_cleanup(struct clonedevs **);
#define CLONE_FLAG0 (CLONE_UNITMASK + 1)
int clone_create(struct clonedevs **, struct cdevsw *, int *unit, struct cdev **dev, int extra);
+#define MAKEDEV_REF 0x01
+#define MAKEDEV_WHTOUT 0x02
+#define MAKEDEV_NOWAIT 0x04
+#define MAKEDEV_WAITOK 0x08
+#define MAKEDEV_ETERNAL 0x10
+#define MAKEDEV_CHECKNAME 0x20
+struct make_dev_args {
+ size_t mda_size;
+ int mda_flags;
+ struct cdevsw *mda_devsw;
+ struct ucred *mda_cr;
+ uid_t mda_uid;
+ gid_t mda_gid;
+ int mda_mode;
+ int mda_unit;
+ void *mda_si_drv1;
+ void *mda_si_drv2;
+};
+void make_dev_args_init_impl(struct make_dev_args *_args, size_t _sz);
+#define make_dev_args_init(a) \
+ make_dev_args_init_impl((a), sizeof(struct make_dev_args))
+
int count_dev(struct cdev *_dev);
void delist_dev(struct cdev *_dev);
void destroy_dev(struct cdev *_dev);
@@ -265,12 +287,6 @@ struct cdev *make_dev(struct cdevsw *_devsw, int _unit, uid_t _uid, gid_t _gid,
struct cdev *make_dev_cred(struct cdevsw *_devsw, int _unit,
struct ucred *_cr, uid_t _uid, gid_t _gid, int _perms,
const char *_fmt, ...) __printflike(7, 8);
-#define MAKEDEV_REF 0x01
-#define MAKEDEV_WHTOUT 0x02
-#define MAKEDEV_NOWAIT 0x04
-#define MAKEDEV_WAITOK 0x08
-#define MAKEDEV_ETERNAL 0x10
-#define MAKEDEV_CHECKNAME 0x20
struct cdev *make_dev_credf(int _flags,
struct cdevsw *_devsw, int _unit,
struct ucred *_cr, uid_t _uid, gid_t _gid, int _mode,
@@ -278,6 +294,8 @@ struct cdev *make_dev_credf(int _flags,
int make_dev_p(int _flags, struct cdev **_cdev, struct cdevsw *_devsw,
struct ucred *_cr, uid_t _uid, gid_t _gid, int _mode,
const char *_fmt, ...) __printflike(8, 9);
+int make_dev_s(struct make_dev_args *_args, struct cdev **_cdev,
+ const char *_fmt, ...) __printflike(3, 4);
struct cdev *make_dev_alias(struct cdev *_pdev, const char *_fmt, ...)
__printflike(2, 3);
int make_dev_alias_p(int _flags, struct cdev **_cdev, struct cdev *_pdev,
diff --git a/sys/sys/elf_common.h b/sys/sys/elf_common.h
index d6ca101..b7e1fb2 100644
--- a/sys/sys/elf_common.h
+++ b/sys/sys/elf_common.h
@@ -748,8 +748,9 @@ typedef struct {
#define STB_LOCAL 0 /* Local symbol */
#define STB_GLOBAL 1 /* Global symbol */
#define STB_WEAK 2 /* like global - lower precedence */
-#define STB_LOOS 10 /* Reserved range for operating system */
-#define STB_HIOS 12 /* specific semantics. */
+#define STB_LOOS 10 /* Start of operating system reserved range. */
+#define STB_GNU_UNIQUE 10 /* Unique symbol (GNU) */
+#define STB_HIOS 12 /* End of operating system reserved range. */
#define STB_LOPROC 13 /* reserved range for processor */
#define STB_HIPROC 15 /* specific semantics. */
diff --git a/sys/sys/param.h b/sys/sys/param.h
index 1fd1313..f1786f4 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -58,7 +58,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1002508 /* Master, propagated to newvers */
+#define __FreeBSD_version 1002509 /* Master, propagated to newvers */
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
OpenPOWER on IntegriCloud