From 552b404fb268fbef96316a878c5e8b00892d1dfd Mon Sep 17 00:00:00 2001 From: neel Date: Thu, 28 Feb 2013 01:00:32 +0000 Subject: Remove the quirk to allow use of MSI when the guest is running inside bhyve. This became redundant after the hostbridge presented to the guest started advertising the PCI-E capability (r246846). Obtained from: NetApp --- sys/dev/pci/pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c index 7632e73..649a494 100644 --- a/sys/dev/pci/pci.c +++ b/sys/dev/pci/pci.c @@ -244,7 +244,6 @@ static const struct pci_quirk pci_quirks[] = { * but support MSI just fine. QEMU uses the Intel 82440. */ { 0x12378086, PCI_QUIRK_ENABLE_MSI_VM, 0, 0 }, - { 0x12751275, PCI_QUIRK_ENABLE_MSI_VM, 0, 0 }, /* bhyve */ /* * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus -- cgit v1.1 From 9d861dbb5610d58af957b37050bea1b9e6cacf2b Mon Sep 17 00:00:00 2001 From: gjb Date: Thu, 28 Feb 2013 01:22:14 +0000 Subject: Properly handle '-h' argument. PR: 176332 Reviewed by: scottl MFC after: 3 days --- usr.bin/dc/dc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr.bin/dc/dc.c b/usr.bin/dc/dc.c index d5edadb..546b247 100644 --- a/usr.bin/dc/dc.c +++ b/usr.bin/dc/dc.c @@ -84,7 +84,7 @@ main(int argc, char *argv[]) bool extended_regs = false, preproc_done = false; /* accept and ignore a single dash to be 4.4BSD dc(1) compatible */ - while ((ch = getopt_long(argc, argv, "e:f:Vx", long_options, NULL)) != -1) { + while ((ch = getopt_long(argc, argv, "e:f:hVx", long_options, NULL)) != -1) { switch (ch) { case 'e': if (!preproc_done) -- cgit v1.1 From a7aacdcc59ec6c6ad46cda7cc6e549cf8ad57427 Mon Sep 17 00:00:00 2001 From: pjd Date: Thu, 28 Feb 2013 01:24:24 +0000 Subject: When we are waiting for new trail files we may have been disconnected and reconnected in the meantime. Check if reset is set before opening next trail file, as not doing so will result in sending OPEN message with the same file name twice and this is illegal - the second OPEN is send without first closing previous trail file. --- contrib/openbsm/bin/auditdistd/sender.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/contrib/openbsm/bin/auditdistd/sender.c b/contrib/openbsm/bin/auditdistd/sender.c index 256fbb1..ab90e6c 100644 --- a/contrib/openbsm/bin/auditdistd/sender.c +++ b/contrib/openbsm/bin/auditdistd/sender.c @@ -394,6 +394,7 @@ read_thread_wait(void) mtx_lock(&adist_remote_mtx); if (adhost->adh_reset) { +reset: adhost->adh_reset = false; if (trail_filefd(adist_trail) != -1) trail_close(adist_trail); @@ -408,6 +409,14 @@ read_thread_wait(void) while (trail_filefd(adist_trail) == -1) { newfile = true; wait_for_dir(); + /* + * We may have been disconnected and reconnected in the + * meantime, check if reset is set. + */ + mtx_lock(&adist_remote_mtx); + if (adhost->adh_reset) + goto reset; + mtx_unlock(&adist_remote_mtx); if (trail_filefd(adist_trail) == -1) trail_next(adist_trail); } -- cgit v1.1 From 625ec702fe2d634ad8f2145031474520fe561460 Mon Sep 17 00:00:00 2001 From: delphij Date: Thu, 28 Feb 2013 04:16:47 +0000 Subject: Refresh vendor driver version which adds ARC-1224 support. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many thanks to Areca for continuing to support FreeBSD. Submitted by: 黃清隆 MFC after: 3 days --- share/man/man4/arcmsr.4 | 4 +- sys/dev/arcmsr/arcmsr.c | 135 ++++++++++++++++++++++++++++++++++++++++++------ sys/dev/arcmsr/arcmsr.h | 17 +++--- 3 files changed, 129 insertions(+), 27 deletions(-) diff --git a/share/man/man4/arcmsr.4 b/share/man/man4/arcmsr.4 index 71b5faa..87c7050 100644 --- a/share/man/man4/arcmsr.4 +++ b/share/man/man4/arcmsr.4 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 18, 2012 +.Dd February 27, 2013 .Dt ARCMSR 4 .Os .Sh NAME @@ -108,6 +108,8 @@ ARC-1222 .It ARC-1223 .It +ARC-1224 +.It ARC-1230 .It ARC-1231 diff --git a/sys/dev/arcmsr/arcmsr.c b/sys/dev/arcmsr/arcmsr.c index c815d02..d80fcf2 100644 --- a/sys/dev/arcmsr/arcmsr.c +++ b/sys/dev/arcmsr/arcmsr.c @@ -1,16 +1,15 @@ /* -***************************************************************************************** -** O.S : FreeBSD +******************************************************************************** +** OS : FreeBSD ** FILE NAME : arcmsr.c ** BY : Erich Chen, Ching Huang ** Description: SCSI RAID Device Driver for -** ARECA (ARC11XX/ARC12XX/ARC13XX/ARC16XX/ARC188x) SATA/SAS RAID HOST Adapter -** ARCMSR RAID Host adapter -** [RAID controller:INTEL 331(PCI-X) 341(PCI-EXPRESS) chip set] -****************************************************************************************** -************************************************************************ +** ARECA (ARC11XX/ARC12XX/ARC13XX/ARC16XX/ARC188x) +** SATA/SAS RAID HOST Adapter +******************************************************************************** +******************************************************************************** ** -** Copyright (C) 2002 - 2010, Areca Technology Corporation All rights reserved. +** Copyright (C) 2002 - 2012, Areca Technology Corporation All rights reserved. ** ** Redistribution and use in source and binary forms, with or without ** modification, are permitted provided that the following conditions @@ -33,7 +32,7 @@ ** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT **(INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF ** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -************************************************************************** +******************************************************************************** ** History ** ** REV# DATE NAME DESCRIPTION @@ -73,7 +72,7 @@ ** 1.20.00.23 01/30/2012 Ching Huang Fixed Request requeued and Retrying command ** 1.20.00.24 06/11/2012 Ching Huang Fixed return sense data condition ** 1.20.00.25 08/17/2012 Ching Huang Fixed hotplug device no function on type A adapter -** 1.20.00.26 12/14/2012 Ching Huang Added support ARC1214 +** 1.20.00.26 12/14/2012 Ching Huang Added support ARC1214,1224 ****************************************************************************************** */ @@ -145,7 +144,7 @@ __FBSDID("$FreeBSD$"); #define arcmsr_callout_init(a) callout_init(a); #endif -#define ARCMSR_DRIVER_VERSION "Driver Version 1.20.00.26 2012-12-14" +#define ARCMSR_DRIVER_VERSION "Driver Version 1.20.00.26 2013-01-08" #include /* ************************************************************************** @@ -168,7 +167,7 @@ static void arcmsr_stop_adapter_bgrb(struct AdapterControlBlock *acb); static void arcmsr_start_adapter_bgrb(struct AdapterControlBlock *acb); static void arcmsr_iop_init(struct AdapterControlBlock *acb); static void arcmsr_flush_adapter_cache(struct AdapterControlBlock *acb); -static void arcmsr_Read_iop_rqbuffer_data(struct AdapterControlBlock *acb, struct QBUFFER *prbuffer); +static u_int32_t arcmsr_Read_iop_rqbuffer_data(struct AdapterControlBlock *acb, struct QBUFFER *prbuffer); static void arcmsr_Write_data_2iop_wqbuffer(struct AdapterControlBlock *acb); static void arcmsr_abort_allcmd(struct AdapterControlBlock *acb); static void arcmsr_srb_complete(struct CommandControlBlock *srb, int stand_flag); @@ -212,7 +211,11 @@ static device_method_t arcmsr_methods[]={ DEVMETHOD(device_suspend, arcmsr_suspend), DEVMETHOD(device_resume, arcmsr_resume), +#if __FreeBSD_version >= 803000 DEVMETHOD_END +#else + { 0, 0 } +#endif }; static driver_t arcmsr_driver={ @@ -1381,13 +1384,61 @@ static void arcmsr_poll(struct cam_sim *psim) ************************************************************************** ************************************************************************** */ -static void arcmsr_Read_iop_rqbuffer_data(struct AdapterControlBlock *acb, +static u_int32_t arcmsr_Read_iop_rqbuffer_data_D(struct AdapterControlBlock *acb, + struct QBUFFER *prbuffer) { + + u_int8_t *pQbuffer; + u_int8_t *buf1 = 0; + u_int32_t *iop_data, *buf2 = 0; + u_int32_t iop_len, data_len; + + iop_data = (u_int32_t *)prbuffer->data; + iop_len = (u_int32_t)prbuffer->data_len; + if ( iop_len > 0 ) + { + buf1 = malloc(128, M_DEVBUF, M_NOWAIT | M_ZERO); + buf2 = (u_int32_t *)buf1; + if( buf1 == NULL) + return (0); + data_len = iop_len; + while(data_len >= 4) + { + *buf2++ = *iop_data++; + data_len -= 4; + } + if(data_len) + *buf2 = *iop_data; + buf2 = (u_int32_t *)buf1; + } + while (iop_len > 0) { + pQbuffer = &acb->rqbuffer[acb->rqbuf_lastindex]; + *pQbuffer = *buf1; + acb->rqbuf_lastindex++; + /* if last, index number set it to 0 */ + acb->rqbuf_lastindex %= ARCMSR_MAX_QBUFFER; + buf1++; + iop_len--; + } + if(buf2) + free( (u_int8_t *)buf2, M_DEVBUF); + /* let IOP know data has been read */ + arcmsr_iop_message_read(acb); + return (1); +} +/* +************************************************************************** +************************************************************************** +*/ +static u_int32_t arcmsr_Read_iop_rqbuffer_data(struct AdapterControlBlock *acb, struct QBUFFER *prbuffer) { u_int8_t *pQbuffer; u_int8_t *iop_data; u_int32_t iop_len; + if(acb->adapter_type == ACB_ADAPTER_TYPE_D) { + return(arcmsr_Read_iop_rqbuffer_data_D(acb, prbuffer)); + } iop_data = (u_int8_t *)prbuffer->data; iop_len = (u_int32_t)prbuffer->data_len; while (iop_len > 0) { @@ -1401,6 +1452,7 @@ static void arcmsr_Read_iop_rqbuffer_data(struct AdapterControlBlock *acb, } /* let IOP know data has been read */ arcmsr_iop_message_read(acb); + return (1); } /* ************************************************************************** @@ -1417,7 +1469,8 @@ static void arcmsr_iop2drv_data_wrote_handle(struct AdapterControlBlock *acb) my_empty_len = (acb->rqbuf_lastindex - acb->rqbuf_firstindex - 1) & (ARCMSR_MAX_QBUFFER-1); if(my_empty_len >= prbuffer->data_len) { - arcmsr_Read_iop_rqbuffer_data(acb, prbuffer); + if(arcmsr_Read_iop_rqbuffer_data(acb, prbuffer) == 0) + acb->acb_flags |= ACB_F_IOPDATA_OVERFLOW; } else { acb->acb_flags |= ACB_F_IOPDATA_OVERFLOW; } @@ -1427,6 +1480,50 @@ static void arcmsr_iop2drv_data_wrote_handle(struct AdapterControlBlock *acb) ********************************************************************** ********************************************************************** */ +static void arcmsr_Write_data_2iop_wqbuffer_D(struct AdapterControlBlock *acb) +{ + u_int8_t *pQbuffer; + struct QBUFFER *pwbuffer; + u_int8_t *buf1 = 0; + u_int32_t *iop_data, *buf2 = 0; + u_int32_t allxfer_len = 0, data_len; + + if(acb->acb_flags & ACB_F_MESSAGE_WQBUFFER_READ) { + buf1 = malloc(128, M_DEVBUF, M_NOWAIT | M_ZERO); + buf2 = (u_int32_t *)buf1; + if( buf1 == NULL) + return; + + acb->acb_flags &= (~ACB_F_MESSAGE_WQBUFFER_READ); + pwbuffer = arcmsr_get_iop_wqbuffer(acb); + iop_data = (u_int32_t *)pwbuffer->data; + while((acb->wqbuf_firstindex != acb->wqbuf_lastindex) + && (allxfer_len < 124)) { + pQbuffer = &acb->wqbuffer[acb->wqbuf_firstindex]; + *buf1 = *pQbuffer; + acb->wqbuf_firstindex++; + acb->wqbuf_firstindex %= ARCMSR_MAX_QBUFFER; + buf1++; + allxfer_len++; + } + pwbuffer->data_len = allxfer_len; + data_len = allxfer_len; + buf1 = (u_int8_t *)buf2; + while(data_len >= 4) + { + *iop_data++ = *buf2++; + data_len -= 4; + } + if(data_len) + *iop_data = *buf2; + free( buf1, M_DEVBUF); + arcmsr_iop_message_wrote(acb); + } +} +/* +********************************************************************** +********************************************************************** +*/ static void arcmsr_Write_data_2iop_wqbuffer(struct AdapterControlBlock *acb) { u_int8_t *pQbuffer; @@ -1434,6 +1531,10 @@ static void arcmsr_Write_data_2iop_wqbuffer(struct AdapterControlBlock *acb) u_int8_t *iop_data; int32_t allxfer_len=0; + if(acb->adapter_type == ACB_ADAPTER_TYPE_D) { + arcmsr_Write_data_2iop_wqbuffer_D(acb); + return; + } if(acb->acb_flags & ACB_F_MESSAGE_WQBUFFER_READ) { acb->acb_flags &= (~ACB_F_MESSAGE_WQBUFFER_READ); pwbuffer = arcmsr_get_iop_wqbuffer(acb); @@ -2153,7 +2254,8 @@ u_int32_t arcmsr_iop_ioctlcmd(struct AdapterControlBlock *acb, u_int32_t ioctl_c acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW; prbuffer = arcmsr_get_iop_rqbuffer(acb); - arcmsr_Read_iop_rqbuffer_data(acb, prbuffer); + if(arcmsr_Read_iop_rqbuffer_data(acb, prbuffer) == 0) + acb->acb_flags |= ACB_F_IOPDATA_OVERFLOW; } pcmdmessagefld->cmdmessage.Length = allxfer_len; pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_OK; @@ -2374,7 +2476,8 @@ static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, union ccb *p acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW; prbuffer = arcmsr_get_iop_rqbuffer(acb); - arcmsr_Read_iop_rqbuffer_data(acb, prbuffer); + if(arcmsr_Read_iop_rqbuffer_data(acb, prbuffer) == 0) + acb->acb_flags |= ACB_F_IOPDATA_OVERFLOW; } pcmdmessagefld->cmdmessage.Length = allxfer_len; pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_OK; diff --git a/sys/dev/arcmsr/arcmsr.h b/sys/dev/arcmsr/arcmsr.h index 219372e..95bb6a1 100644 --- a/sys/dev/arcmsr/arcmsr.h +++ b/sys/dev/arcmsr/arcmsr.h @@ -1,17 +1,14 @@ /* -*********************************************************************************************** -** O.S : FreeBSD +******************************************************************************** +** OS : FreeBSD ** FILE NAME : arcmsr.h ** BY : Erich Chen, Ching Huang ** Description: SCSI RAID Device Driver for -** ARECA SATA/SAS RAID HOST Adapter -** [RAID controller:INTEL 331(PCI-X) 341(PCI-EXPRESS) chip set] -*********************************************************************************************** -************************************************************************ -** Copyright (C) 2002 - 2010, Areca Technology Corporation All rights reserved. -** -** Web site: www.areca.com.tw -** E-mail: erich@areca.com.tw; ching2048@areca.com.tw +** ARECA (ARC11XX/ARC12XX/ARC13XX/ARC16XX/ARC188x) +** SATA/SAS RAID HOST Adapter +******************************************************************************** +******************************************************************************** +** Copyright (C) 2002 - 2012, Areca Technology Corporation All rights reserved. ** ** Redistribution and use in source and binary forms,with or without ** modification,are permitted provided that the following conditions -- cgit v1.1 From f3c985cbe184c9d4c31d0e7140c146768a82833c Mon Sep 17 00:00:00 2001 From: mav Date: Thu, 28 Feb 2013 10:21:04 +0000 Subject: Introduce sbintime_t type -- the simplified version of struct bintime, using 32.32 fixed point in form of single int64_t. It is much easier to use in cases where additional precision and range of struct bintime is not required. Reviewed by: bde (previous version), davide --- sys/sys/time.h | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/sys/sys/time.h b/sys/sys/time.h index 80878c0..927cecc 100644 --- a/sys/sys/time.h +++ b/sys/sys/time.h @@ -109,6 +109,37 @@ bintime_mul(struct bintime *bt, u_int x) ((a)->frac cmp (b)->frac) : \ ((a)->sec cmp (b)->sec)) +typedef int64_t sbintime_t; +#define SBT_1S ((sbintime_t)1 << 32) +#define SBT_1M (SBT_1S * 60) +#define SBT_1MS (SBT_1S / 1000) +#define SBT_1US (SBT_1S / 1000000) +#define SBT_1NS (SBT_1S / 1000000000) + +static __inline int +sbintime_getsec(sbintime_t sbt) +{ + + return (sbt >> 32); +} + +static __inline sbintime_t +bttosbt(const struct bintime bt) +{ + + return (((sbintime_t)bt.sec << 32) + (bt.frac >> 32)); +} + +static __inline struct bintime +sbttobt(sbintime_t sbt) +{ + struct bintime bt; + + bt.sec = sbt >> 32; + bt.frac = sbt << 32; + return (bt); +} + /*- * Background information: * @@ -156,6 +187,42 @@ timeval2bintime(const struct timeval *tv, struct bintime *bt) /* 18446744073709 = int(2^64 / 1000000) */ bt->frac = tv->tv_usec * (uint64_t)18446744073709LL; } + +static __inline struct timespec +sbttots(sbintime_t sbt) +{ + struct timespec ts; + + ts.tv_sec = sbt >> 32; + ts.tv_nsec = ((uint64_t)1000000000 * (uint32_t)sbt) >> 32; + return (ts); +} + +static __inline sbintime_t +tstosbt(struct timespec ts) +{ + + return (((sbintime_t)ts.tv_sec << 32) + + (ts.tv_nsec * (((uint64_t)1 << 63) / 500000000) >> 32)); +} + +static __inline struct timeval +sbttotv(sbintime_t sbt) +{ + struct timeval tv; + + tv.tv_sec = sbt >> 32; + tv.tv_usec = ((uint64_t)1000000 * (uint32_t)sbt) >> 32; + return (tv); +} + +static __inline sbintime_t +tvtosbt(struct timeval tv) +{ + + return (((sbintime_t)tv.tv_sec << 32) + + (tv.tv_usec * (((uint64_t)1 << 63) / 500000) >> 32)); +} #endif /* __BSD_VISIBLE */ #ifdef _KERNEL @@ -317,6 +384,15 @@ void binuptime(struct bintime *bt); void nanouptime(struct timespec *tsp); void microuptime(struct timeval *tvp); +static __inline sbintime_t +sbinuptime(void) +{ + struct bintime bt; + + binuptime(&bt); + return (bttosbt(bt)); +} + void bintime(struct bintime *bt); void nanotime(struct timespec *tsp); void microtime(struct timeval *tvp); @@ -325,6 +401,15 @@ void getbinuptime(struct bintime *bt); void getnanouptime(struct timespec *tsp); void getmicrouptime(struct timeval *tvp); +static __inline sbintime_t +getsbinuptime(void) +{ + struct bintime bt; + + getbinuptime(&bt); + return (bttosbt(bt)); +} + void getbintime(struct bintime *bt); void getnanotime(struct timespec *tsp); void getmicrotime(struct timeval *tvp); -- cgit v1.1 From 2bf12d0c7ceced74a813d6940dd07310a11d6a2a Mon Sep 17 00:00:00 2001 From: davide Date: Thu, 28 Feb 2013 10:46:54 +0000 Subject: MFcalloutng: When CPU becomes idle, cpu_idleclock() calculates time to the next timer event in order to reprogram hw timer. Return that time in sbintime_t to the caller and pass it to acpi_cpu_idle(), where it can be used as one more factor (quite precise) to extimate furter sleep time and choose optimal sleep state. This is a preparatory change for further callout improvements will be committed in the next days. The commmit is not targeted for MFC. --- sys/amd64/amd64/machdep.c | 19 ++++++++++--------- sys/dev/acpica/acpi_cpu.c | 11 +++++++---- sys/i386/i386/machdep.c | 23 ++++++++++++----------- sys/ia64/ia64/machdep.c | 7 ++++--- sys/kern/kern_clocksource.c | 6 ++++-- sys/pc98/pc98/machdep.c | 13 +++++++------ sys/powerpc/powerpc/cpu.c | 15 ++++++++------- sys/powerpc/ps3/platform_ps3.c | 4 ++-- sys/powerpc/wii/platform_wii.c | 4 ++-- sys/sys/proc.h | 2 +- sys/sys/systm.h | 3 ++- 11 files changed, 59 insertions(+), 48 deletions(-) diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index abce826..1d7178f 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -658,7 +658,7 @@ cpu_halt(void) halt(); } -void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */ +void (*cpu_idle_hook)(sbintime_t) = NULL; /* ACPI idle hook. */ static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */ static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */ TUNABLE_INT("machdep.idle_mwait", &idle_mwait); @@ -670,7 +670,7 @@ SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait, #define STATE_SLEEPING 0x2 static void -cpu_idle_acpi(int busy) +cpu_idle_acpi(sbintime_t sbt) { int *state; @@ -682,14 +682,14 @@ cpu_idle_acpi(int busy) if (sched_runnable()) enable_intr(); else if (cpu_idle_hook) - cpu_idle_hook(); + cpu_idle_hook(sbt); else __asm __volatile("sti; hlt"); *state = STATE_RUNNING; } static void -cpu_idle_hlt(int busy) +cpu_idle_hlt(sbintime_t sbt) { int *state; @@ -730,7 +730,7 @@ cpu_idle_hlt(int busy) #define MWAIT_C4 0x30 static void -cpu_idle_mwait(int busy) +cpu_idle_mwait(sbintime_t sbt) { int *state; @@ -753,7 +753,7 @@ cpu_idle_mwait(int busy) } static void -cpu_idle_spin(int busy) +cpu_idle_spin(sbintime_t sbt) { int *state; int i; @@ -802,12 +802,13 @@ cpu_probe_amdc1e(void) } } -void (*cpu_idle_fn)(int) = cpu_idle_acpi; +void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi; void cpu_idle(int busy) { uint64_t msr; + sbintime_t sbt = -1; CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); @@ -825,7 +826,7 @@ cpu_idle(int busy) /* If we have time - switch timers into idle mode. */ if (!busy) { critical_enter(); - cpu_idleclock(); + sbt = cpu_idleclock(); } /* Apply AMD APIC timer C1E workaround. */ @@ -836,7 +837,7 @@ cpu_idle(int busy) } /* Call main idle method. */ - cpu_idle_fn(busy); + cpu_idle_fn(sbt); /* Switch timers mack into active mode. */ if (!busy) { diff --git a/sys/dev/acpica/acpi_cpu.c b/sys/dev/acpica/acpi_cpu.c index 7f008f7..df4c4c2 100644 --- a/sys/dev/acpica/acpi_cpu.c +++ b/sys/dev/acpica/acpi_cpu.c @@ -168,7 +168,7 @@ static int acpi_cpu_cx_cst(struct acpi_cpu_softc *sc); static void acpi_cpu_startup(void *arg); static void acpi_cpu_startup_cx(struct acpi_cpu_softc *sc); static void acpi_cpu_cx_list(struct acpi_cpu_softc *sc); -static void acpi_cpu_idle(void); +static void acpi_cpu_idle(sbintime_t sbt); static void acpi_cpu_notify(ACPI_HANDLE h, UINT32 notify, void *context); static int acpi_cpu_quirks(void); static int acpi_cpu_usage_sysctl(SYSCTL_HANDLER_ARGS); @@ -954,13 +954,13 @@ acpi_cpu_startup_cx(struct acpi_cpu_softc *sc) * interrupts are re-enabled. */ static void -acpi_cpu_idle() +acpi_cpu_idle(sbintime_t sbt) { struct acpi_cpu_softc *sc; struct acpi_cx *cx_next; uint64_t cputicks; uint32_t start_time, end_time; - int bm_active, cx_next_idx, i; + int bm_active, cx_next_idx, i, us; /* * Look up our CPU id to get our softc. If it's NULL, we'll use C1 @@ -980,13 +980,16 @@ acpi_cpu_idle() } /* Find the lowest state that has small enough latency. */ + us = sc->cpu_prev_sleep; + if (sbt >= 0 && us > sbt / SBT_1US) + us = sbt / SBT_1US; cx_next_idx = 0; if (cpu_disable_deep_sleep) i = min(sc->cpu_cx_lowest, sc->cpu_non_c3); else i = sc->cpu_cx_lowest; for (; i >= 0; i--) { - if (sc->cpu_cx_states[i].trans_lat * 3 <= sc->cpu_prev_sleep) { + if (sc->cpu_cx_states[i].trans_lat * 3 <= us) { cx_next_idx = i; break; } diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 744c3cb..3f15f08 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -1220,7 +1220,7 @@ cpu_halt(void) int scheduler_running; static void -cpu_idle_hlt(int busy) +cpu_idle_hlt(sbintime_t sbt) { scheduler_running = 1; @@ -1241,7 +1241,7 @@ cpu_halt(void) #endif -void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */ +void (*cpu_idle_hook)(sbintime_t) = NULL; /* ACPI idle hook. */ static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */ static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */ TUNABLE_INT("machdep.idle_mwait", &idle_mwait); @@ -1253,7 +1253,7 @@ SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait, #define STATE_SLEEPING 0x2 static void -cpu_idle_acpi(int busy) +cpu_idle_acpi(sbintime_t sbt) { int *state; @@ -1265,7 +1265,7 @@ cpu_idle_acpi(int busy) if (sched_runnable()) enable_intr(); else if (cpu_idle_hook) - cpu_idle_hook(); + cpu_idle_hook(sbt); else __asm __volatile("sti; hlt"); *state = STATE_RUNNING; @@ -1273,7 +1273,7 @@ cpu_idle_acpi(int busy) #ifndef XEN static void -cpu_idle_hlt(int busy) +cpu_idle_hlt(sbintime_t sbt) { int *state; @@ -1315,7 +1315,7 @@ cpu_idle_hlt(int busy) #define MWAIT_C4 0x30 static void -cpu_idle_mwait(int busy) +cpu_idle_mwait(sbintime_t sbt) { int *state; @@ -1338,7 +1338,7 @@ cpu_idle_mwait(int busy) } static void -cpu_idle_spin(int busy) +cpu_idle_spin(sbintime_t sbt) { int *state; int i; @@ -1388,9 +1388,9 @@ cpu_probe_amdc1e(void) } #ifdef XEN -void (*cpu_idle_fn)(int) = cpu_idle_hlt; +void (*cpu_idle_fn)(sbintime_t) = cpu_idle_hlt; #else -void (*cpu_idle_fn)(int) = cpu_idle_acpi; +void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi; #endif void @@ -1399,6 +1399,7 @@ cpu_idle(int busy) #ifndef XEN uint64_t msr; #endif + sbintime_t sbt = -1; CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); @@ -1418,7 +1419,7 @@ cpu_idle(int busy) /* If we have time - switch timers into idle mode. */ if (!busy) { critical_enter(); - cpu_idleclock(); + sbt = cpu_idleclock(); } #ifndef XEN @@ -1431,7 +1432,7 @@ cpu_idle(int busy) #endif /* Call main idle method. */ - cpu_idle_fn(busy); + cpu_idle_fn(sbt); /* Switch timers mack into active mode. */ if (!busy) { diff --git a/sys/ia64/ia64/machdep.c b/sys/ia64/ia64/machdep.c index 98bcf44..ac6a829 100644 --- a/sys/ia64/ia64/machdep.c +++ b/sys/ia64/ia64/machdep.c @@ -171,7 +171,7 @@ extern vm_offset_t ksym_start, ksym_end; struct msgbuf *msgbufp = NULL; /* Other subsystems (e.g., ACPI) can hook this later. */ -void (*cpu_idle_hook)(void) = NULL; +void (*cpu_idle_hook)(sbintime_t) = NULL; struct kva_md_info kmi; @@ -408,10 +408,11 @@ void cpu_idle(int busy) { register_t ie; + sbintime_t sbt = -1; if (!busy) { critical_enter(); - cpu_idleclock(); + sbt = cpu_idleclock(); } ie = intr_disable(); @@ -420,7 +421,7 @@ cpu_idle(int busy) if (sched_runnable()) ia64_enable_intr(); else if (cpu_idle_hook != NULL) { - (*cpu_idle_hook)(); + (*cpu_idle_hook)(sbt); /* The hook must enable interrupts! */ } else { ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0); diff --git a/sys/kern/kern_clocksource.c b/sys/kern/kern_clocksource.c index 33f21e9..ac7e2de 100644 --- a/sys/kern/kern_clocksource.c +++ b/sys/kern/kern_clocksource.c @@ -767,7 +767,7 @@ cpu_stopprofclock(void) /* * Switch to idle mode (all ticks handled). */ -void +sbintime_t cpu_idleclock(void) { struct bintime now, t; @@ -779,7 +779,7 @@ cpu_idleclock(void) || curcpu == CPU_FIRST() #endif ) - return; + return (-1); state = DPCPU_PTR(timerstate); if (periodic) now = state->now; @@ -795,6 +795,8 @@ cpu_idleclock(void) if (!periodic) loadtimer(&now, 0); ET_HW_UNLOCK(state); + bintime_sub(&t, &now); + return (MAX(bttosbt(t), 0)); } /* diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c index 3133d1b..b0d5acd 100644 --- a/sys/pc98/pc98/machdep.c +++ b/sys/pc98/pc98/machdep.c @@ -1145,7 +1145,7 @@ SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait, #define STATE_SLEEPING 0x2 static void -cpu_idle_hlt(int busy) +cpu_idle_hlt(sbintime_t sbt) { int *state; @@ -1186,7 +1186,7 @@ cpu_idle_hlt(int busy) #define MWAIT_C4 0x30 static void -cpu_idle_mwait(int busy) +cpu_idle_mwait(sbintime_t sbt) { int *state; @@ -1209,7 +1209,7 @@ cpu_idle_mwait(int busy) } static void -cpu_idle_spin(int busy) +cpu_idle_spin(sbintime_t sbt) { int *state; int i; @@ -1229,11 +1229,12 @@ cpu_idle_spin(int busy) } } -void (*cpu_idle_fn)(int) = cpu_idle_hlt; +void (*cpu_idle_fn)(sbintime_t) = cpu_idle_hlt; void cpu_idle(int busy) { + sbintime_t sbt = -1; CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); @@ -1251,11 +1252,11 @@ cpu_idle(int busy) /* If we have time - switch timers into idle mode. */ if (!busy) { critical_enter(); - cpu_idleclock(); + sbt = cpu_idleclock(); } /* Call main idle method. */ - cpu_idle_fn(busy); + cpu_idle_fn(sbt); /* Switch timers mack into active mode. */ if (!busy) { diff --git a/sys/powerpc/powerpc/cpu.c b/sys/powerpc/powerpc/cpu.c index d177c8b..d67f359 100644 --- a/sys/powerpc/powerpc/cpu.c +++ b/sys/powerpc/powerpc/cpu.c @@ -79,9 +79,9 @@ static void cpu_970_setup(int cpuid, uint16_t vers); static void cpu_booke_setup(int cpuid, uint16_t vers); int powerpc_pow_enabled; -void (*cpu_idle_hook)(void) = NULL; -static void cpu_idle_60x(void); -static void cpu_idle_booke(void); +void (*cpu_idle_hook)(sbintime_t) = NULL; +static void cpu_idle_60x(sbintime_t); +static void cpu_idle_booke(sbintime_t); struct cputab { const char *name; @@ -516,6 +516,7 @@ cpu_feature_bit(SYSCTL_HANDLER_ARGS) void cpu_idle(int busy) { + sbintime_t sbt = -1; #ifdef INVARIANTS if ((mfmsr() & PSL_EE) != PSL_EE) { @@ -531,9 +532,9 @@ cpu_idle(int busy) if (cpu_idle_hook != NULL) { if (!busy) { critical_enter(); - cpu_idleclock(); + sbt = cpu_idleclock(); } - cpu_idle_hook(); + cpu_idle_hook(sbt); if (!busy) { cpu_activeclock(); critical_exit(); @@ -551,7 +552,7 @@ cpu_idle_wakeup(int cpu) } static void -cpu_idle_60x(void) +cpu_idle_60x(sbintime_t sbt) { register_t msr; uint16_t vers; @@ -586,7 +587,7 @@ cpu_idle_60x(void) } static void -cpu_idle_booke(void) +cpu_idle_booke(sbintime_t sbt) { register_t msr; diff --git a/sys/powerpc/ps3/platform_ps3.c b/sys/powerpc/ps3/platform_ps3.c index 61ce873..207382d 100644 --- a/sys/powerpc/ps3/platform_ps3.c +++ b/sys/powerpc/ps3/platform_ps3.c @@ -70,7 +70,7 @@ static int ps3_smp_start_cpu(platform_t, struct pcpu *cpu); static struct cpu_group *ps3_smp_topo(platform_t); #endif static void ps3_reset(platform_t); -static void ps3_cpu_idle(void); +static void ps3_cpu_idle(sbintime_t); static platform_method_t ps3_methods[] = { PLATFORMMETHOD(platform_probe, ps3_probe), @@ -245,7 +245,7 @@ ps3_real_maxaddr(platform_t plat) } static void -ps3_cpu_idle(void) +ps3_cpu_idle(sbintime_t sbt) { lv1_pause(0); } diff --git a/sys/powerpc/wii/platform_wii.c b/sys/powerpc/wii/platform_wii.c index 2bb6022..bc35105 100644 --- a/sys/powerpc/wii/platform_wii.c +++ b/sys/powerpc/wii/platform_wii.c @@ -60,7 +60,7 @@ static void wii_mem_regions(platform_t, struct mem_region **, int *, struct mem_region **, int *); static unsigned long wii_timebase_freq(platform_t, struct cpuref *cpuref); static void wii_reset(platform_t); -static void wii_cpu_idle(void); +static void wii_cpu_idle(sbintime_t sbt); static platform_method_t wii_methods[] = { PLATFORMMETHOD(platform_probe, wii_probe), @@ -155,6 +155,6 @@ wii_reset(platform_t plat) } static void -wii_cpu_idle(void) +wii_cpu_idle(sbintime_t sbt) { } diff --git a/sys/sys/proc.h b/sys/sys/proc.h index ade2864..46f5820 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -908,7 +908,7 @@ void tidhash_add(struct thread *); void tidhash_remove(struct thread *); void cpu_idle(int); int cpu_idle_wakeup(int); -extern void (*cpu_idle_hook)(void); /* Hook to machdep CPU idler. */ +extern void (*cpu_idle_hook)(sbintime_t); /* Hook to machdep CPU idler. */ void cpu_switch(struct thread *, struct thread *, struct mtx *); void cpu_throw(struct thread *, struct thread *) __dead2; void unsleep(struct thread *); diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 611d014..2bf8056 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -44,6 +44,7 @@ #include #include #include /* for people using printf mainly */ +#include extern int cold; /* nonzero if we are doing a cold boot */ extern int rebooting; /* kern_reboot() has been called. */ @@ -267,7 +268,7 @@ void startprofclock(struct proc *); void stopprofclock(struct proc *); void cpu_startprofclock(void); void cpu_stopprofclock(void); -void cpu_idleclock(void); +sbintime_t cpu_idleclock(void); void cpu_activeclock(void); extern int cpu_can_deep_sleep; extern int cpu_disable_deep_sleep; -- cgit v1.1 From 240074414d4919ebd889f27087c72783a7ed9b19 Mon Sep 17 00:00:00 2001 From: davide Date: Thu, 28 Feb 2013 11:27:01 +0000 Subject: MFcalloutng (r247427 by mav): We don't need any precision here. Let it be fast and dirty shift then slow and excessively precise 64-bit division. --- sys/dev/acpica/acpi_cpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/dev/acpica/acpi_cpu.c b/sys/dev/acpica/acpi_cpu.c index df4c4c2..e58d980 100644 --- a/sys/dev/acpica/acpi_cpu.c +++ b/sys/dev/acpica/acpi_cpu.c @@ -981,8 +981,8 @@ acpi_cpu_idle(sbintime_t sbt) /* Find the lowest state that has small enough latency. */ us = sc->cpu_prev_sleep; - if (sbt >= 0 && us > sbt / SBT_1US) - us = sbt / SBT_1US; + if (sbt >= 0 && us > (sbt >> 12)) + us = (sbt >> 12); cx_next_idx = 0; if (cpu_disable_deep_sleep) i = min(sc->cpu_cx_lowest, sc->cpu_non_c3); -- cgit v1.1 From 6cf7cc6e4d8da1cf9aba1c481b914e4ca5e9f38f Mon Sep 17 00:00:00 2001 From: mav Date: Thu, 28 Feb 2013 13:46:03 +0000 Subject: MFcalloutng: Switch eventtimers(9) from using struct bintime to sbintime_t. Even before this not a single driver really supported full dynamic range of struct bintime even in theory, not speaking about practical inexpediency. This change legitimates the status quo and cleans up the code. --- share/man/man9/eventtimers.9 | 12 +++---- sys/arm/allwinner/timer.c | 35 ++++++++------------ sys/arm/arm/mpcore_timer.c | 30 +++++------------ sys/arm/broadcom/bcm2835/bcm2835_systimer.c | 19 ++++------- sys/arm/lpc/lpc_timer.c | 32 +++++++----------- sys/arm/mv/timer.c | 31 +++++++----------- sys/arm/ti/am335x/am335x_dmtimer.c | 28 ++++++---------- sys/arm/versatile/sp804.c | 19 ++++------- sys/dev/acpica/acpi_hpet.c | 26 +++++---------- sys/i386/xen/clock.c | 26 +++++---------- sys/ia64/ia64/clock.c | 22 ++++--------- sys/kern/kern_clocksource.c | 16 +++++---- sys/kern/kern_et.c | 51 +++++++++++------------------ sys/mips/mips/tick.c | 24 +++++--------- sys/mips/nlm/tick.c | 26 +++++---------- sys/mips/rmi/tick.c | 26 +++++---------- sys/powerpc/aim/clock.c | 26 +++++---------- sys/powerpc/booke/clock.c | 29 ++++++---------- sys/sparc64/sparc64/tick.c | 29 +++++++--------- sys/sys/timeet.h | 11 +++---- sys/x86/isa/atrtc.c | 11 +++---- sys/x86/isa/clock.c | 17 ++++------ sys/x86/x86/local_apic.c | 31 +++++------------- 23 files changed, 207 insertions(+), 370 deletions(-) diff --git a/share/man/man9/eventtimers.9 b/share/man/man9/eventtimers.9 index 2f528bf..b43bc49 100644 --- a/share/man/man9/eventtimers.9 +++ b/share/man/man9/eventtimers.9 @@ -1,5 +1,5 @@ .\" -.\" Copyright (c) 2011 Alexander Motin +.\" Copyright (c) 2011-2013 Alexander Motin .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 14, 2011 +.Dd February 25, 2013 .Dt EVENTTIMERS 9 .Os .Sh NAME @@ -36,7 +36,7 @@ struct eventtimer; typedef int et_start_t(struct eventtimer *et, - struct bintime *first, struct bintime *period); + sbintime_t first, sbintime_t period); typedef int et_stop_t(struct eventtimer *et); typedef void et_event_cb_t(struct eventtimer *et, void *arg); typedef int et_deregister_cb_t(struct eventtimer *et, void *arg); @@ -53,8 +53,8 @@ struct eventtimer { int et_quality; int et_active; uint64_t et_frequency; - struct bintime et_min_period; - struct bintime et_max_period; + sbintime_t et_min_period; + sbintime_t et_max_period; et_start_t *et_start; et_stop_t *et_stop; et_event_cb_t *et_event_cb; @@ -75,7 +75,7 @@ struct eventtimer { .Ft int .Fn et_init "struct eventtimer *et" "et_event_cb_t *event" "et_deregister_cb_t *deregister" "void *arg" .Ft int -.Fn et_start "struct eventtimer *et" "struct bintime *first" "struct bintime *period" +.Fn et_start "struct eventtimer *et" "sbintime_t first" "sbintime_t period" .Ft int .Fn et_stop "struct eventtimer *et" .Ft int diff --git a/sys/arm/allwinner/timer.c b/sys/arm/allwinner/timer.c index 4da3517..49c5f18 100644 --- a/sys/arm/allwinner/timer.c +++ b/sys/arm/allwinner/timer.c @@ -95,7 +95,7 @@ int a10_timer_get_timerfreq(struct a10_timer_softc *); static u_int a10_timer_get_timecount(struct timecounter *); static int a10_timer_timer_start(struct eventtimer *, - struct bintime *, struct bintime *); + sbintime_t first, sbintime_t period); static int a10_timer_timer_stop(struct eventtimer *); static uint64_t timer_read_counter64(void); @@ -193,12 +193,8 @@ a10_timer_attach(device_t dev) sc->et.et_name = "a10_timer Eventtimer"; sc->et.et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERIODIC; sc->et.et_quality = 1000; - sc->et.et_min_period.sec = 0; - sc->et.et_min_period.frac = - ((0x00000005LLU << 32) / sc->et.et_frequency) << 32; - sc->et.et_max_period.sec = 0xfffffff0U / sc->et.et_frequency; - sc->et.et_max_period.frac = - ((0xfffffffeLLU << 32) / sc->et.et_frequency) << 32; + sc->et.et_min_period = (0x00000005LLU << 32) / sc->et.et_frequency; + sc->et.et_max_period = (0xfffffffeLLU << 32) / sc->et.et_frequency; sc->et.et_start = a10_timer_timer_start; sc->et.et_stop = a10_timer_timer_stop; sc->et.et_priv = sc; @@ -225,8 +221,8 @@ a10_timer_attach(device_t dev) } static int -a10_timer_timer_start(struct eventtimer *et, struct bintime *first, - struct bintime *period) +a10_timer_timer_start(struct eventtimer *et, sbintime_t first, + sbintime_t period) { struct a10_timer_softc *sc; uint32_t count; @@ -234,26 +230,21 @@ a10_timer_timer_start(struct eventtimer *et, struct bintime *first, sc = (struct a10_timer_softc *)et->et_priv; - sc->sc_period = 0; - - if (period != NULL) { - sc->sc_period = (sc->et.et_frequency * (period->frac >> 32)) >> 32; - sc->sc_period += sc->et.et_frequency * period->sec; - } - if (first == NULL) + if (period != 0) + sc->sc_period = ((uint32_t)et->et_frequency * period) >> 32; + else + sc->sc_period = 0; + if (first != 0) + count = ((uint32_t)et->et_frequency * first) >> 32; + else count = sc->sc_period; - else { - count = (sc->et.et_frequency * (first->frac >> 32)) >> 32; - if (first->sec != 0) - count += sc->et.et_frequency * first->sec; - } /* Update timer values */ timer_write_4(sc, SW_TIMER0_INT_VALUE_REG, sc->sc_period); timer_write_4(sc, SW_TIMER0_CUR_VALUE_REG, count); val = timer_read_4(sc, SW_TIMER0_CTRL_REG); - if (first == NULL) { + if (period != 0) { /* periodic */ val |= TIMER_AUTORELOAD; } else { diff --git a/sys/arm/arm/mpcore_timer.c b/sys/arm/arm/mpcore_timer.c index 41a0b27..8445d3d 100644 --- a/sys/arm/arm/mpcore_timer.c +++ b/sys/arm/arm/mpcore_timer.c @@ -167,31 +167,23 @@ arm_tmr_get_timecount(struct timecounter *tc) * Always returns 0 */ static int -arm_tmr_start(struct eventtimer *et, struct bintime *first, - struct bintime *period) +arm_tmr_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { - struct arm_tmr_softc *sc = (struct arm_tmr_softc *)et->et_priv; uint32_t load, count; uint32_t ctrl; ctrl = PRV_TIMER_CTRL_IRQ_ENABLE | PRV_TIMER_CTRL_TIMER_ENABLE; - if (period != NULL) { - load = (et->et_frequency * (period->frac >> 32)) >> 32; - if (period->sec > 0) - load += et->et_frequency * period->sec; + if (period != 0) { + load = ((uint32_t)et->et_frequency * period) >> 32; ctrl |= PRV_TIMER_CTRL_AUTO_RELOAD; - } else { + } else load = 0; - } - if (first != NULL) { - count = (sc->et.et_frequency * (first->frac >> 32)) >> 32; - if (first->sec != 0) - count += sc->et.et_frequency * first->sec; - } else { + if (first != 0) + count = ((uint32_t)et->et_frequency * first) >> 32; + else count = load; - } tmr_prv_write_4(PRV_TIMER_LOAD, load); tmr_prv_write_4(PRV_TIMER_COUNT, count); @@ -330,12 +322,8 @@ arm_tmr_attach(device_t dev) sc->et.et_quality = 1000; sc->et.et_frequency = sc->clkfreq; - sc->et.et_min_period.sec = 0; - sc->et.et_min_period.frac = - ((0x00000002LLU << 32) / sc->et.et_frequency) << 32; - sc->et.et_max_period.sec = 0xfffffff0U / sc->et.et_frequency; - sc->et.et_max_period.frac = - ((0xfffffffeLLU << 32) / sc->et.et_frequency) << 32; + sc->et.et_min_period = (0x00000002LLU << 32) / sc->et.et_frequency; + sc->et.et_max_period = (0xfffffffeLLU << 32) / sc->et.et_frequency; sc->et.et_start = arm_tmr_start; sc->et.et_stop = arm_tmr_stop; sc->et.et_priv = sc; diff --git a/sys/arm/broadcom/bcm2835/bcm2835_systimer.c b/sys/arm/broadcom/bcm2835/bcm2835_systimer.c index 97ec43a..1d7fdda 100644 --- a/sys/arm/broadcom/bcm2835/bcm2835_systimer.c +++ b/sys/arm/broadcom/bcm2835/bcm2835_systimer.c @@ -118,19 +118,16 @@ bcm_systimer_tc_get_timecount(struct timecounter *tc) } static int -bcm_systimer_start(struct eventtimer *et, struct bintime *first, - struct bintime *period) +bcm_systimer_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { struct systimer *st = et->et_priv; uint32_t clo; uint32_t count; register_t s; - if (first != NULL) { + if (first != 0) { - count = (st->et.et_frequency * (first->frac >> 32)) >> 32; - if (first->sec != 0) - count += st->et.et_frequency * first->sec; + count = ((uint32_t)et->et_frequency * first) >> 32; s = intr_disable(); clo = bcm_systimer_tc_read_4(SYSTIMER_CLO); @@ -238,12 +235,10 @@ bcm_systimer_attach(device_t dev) sc->st[DEFAULT_TIMER].et.et_flags = ET_FLAGS_ONESHOT; sc->st[DEFAULT_TIMER].et.et_quality = 1000; sc->st[DEFAULT_TIMER].et.et_frequency = sc->sysclk_freq; - sc->st[DEFAULT_TIMER].et.et_min_period.sec = 0; - sc->st[DEFAULT_TIMER].et.et_min_period.frac = - ((MIN_PERIOD << 32) / sc->st[DEFAULT_TIMER].et.et_frequency) << 32; - sc->st[DEFAULT_TIMER].et.et_max_period.sec = 0xfffffff0U / sc->st[DEFAULT_TIMER].et.et_frequency; - sc->st[DEFAULT_TIMER].et.et_max_period.frac = - ((0xfffffffeLLU << 32) / sc->st[DEFAULT_TIMER].et.et_frequency) << 32; + sc->st[DEFAULT_TIMER].et.et_min_period = + (MIN_PERIOD << 32) / sc->st[DEFAULT_TIMER].et.et_frequency; + sc->st[DEFAULT_TIMER].et.et_max_period = + (0xfffffffeLLU << 32) / sc->st[DEFAULT_TIMER].et.et_frequency; sc->st[DEFAULT_TIMER].et.et_start = bcm_systimer_start; sc->st[DEFAULT_TIMER].et.et_stop = bcm_systimer_stop; sc->st[DEFAULT_TIMER].et.et_priv = &sc->st[DEFAULT_TIMER]; diff --git a/sys/arm/lpc/lpc_timer.c b/sys/arm/lpc/lpc_timer.c index 87ed104..8572e1a 100644 --- a/sys/arm/lpc/lpc_timer.c +++ b/sys/arm/lpc/lpc_timer.c @@ -72,8 +72,8 @@ static struct lpc_timer_softc *timer_softc = NULL; static int lpc_timer_initialized = 0; static int lpc_timer_probe(device_t); static int lpc_timer_attach(device_t); -static int lpc_timer_start(struct eventtimer *, struct bintime *first, - struct bintime *); +static int lpc_timer_start(struct eventtimer *, + sbintime_t first, sbintime_t period); static int lpc_timer_stop(struct eventtimer *et); static unsigned lpc_get_timecount(struct timecounter *); static int lpc_hardclock(void *); @@ -173,12 +173,8 @@ lpc_timer_attach(device_t dev) sc->lt_et.et_name = "LPC32x0 Timer0"; sc->lt_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT; sc->lt_et.et_quality = 1000; - sc->lt_et.et_min_period.sec = 0; - sc->lt_et.et_min_period.frac = - ((0x00000002LLU << 32) / sc->lt_et.et_frequency) << 32; - sc->lt_et.et_max_period.sec = 0xfffffff0U / sc->lt_et.et_frequency; - sc->lt_et.et_max_period.frac = - ((0xfffffffeLLU << 32) / sc->lt_et.et_frequency) << 32; + sc->lt_et.et_min_period = (0x00000002LLU << 32) / sc->lt_et.et_frequency; + sc->lt_et.et_max_period = (0xfffffffeLLU << 32) / sc->lt_et.et_frequency; sc->lt_et.et_start = lpc_timer_start; sc->lt_et.et_stop = lpc_timer_stop; sc->lt_et.et_priv = sc; @@ -199,27 +195,23 @@ lpc_timer_attach(device_t dev) } static int -lpc_timer_start(struct eventtimer *et, struct bintime *first, - struct bintime *period) +lpc_timer_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { struct lpc_timer_softc *sc = (struct lpc_timer_softc *)et->et_priv; uint32_t ticks; - if (period == NULL) + if (period == 0) { sc->lt_oneshot = 1; - else { + sc->lt_period = 0; + } else { sc->lt_oneshot = 0; - sc->lt_period = (sc->lt_et.et_frequency * (first->frac >> 32)) >> 32; - sc->lt_period += sc->lt_et.et_frequency * first->sec; + sc->lt_period = ((uint32_t)et->et_frequency * period) >> 32; } - if (first == NULL) + if (first == 0) ticks = sc->lt_period; - else { - ticks = (sc->lt_et.et_frequency * (first->frac >> 32)) >> 32; - if (first->sec != 0) - ticks += sc->lt_et.et_frequency * first->sec; - } + else + ticks = ((uint32_t)et->et_frequency * first) >> 32; /* Reset timer */ timer0_write_4(sc, LPC_TIMER_TCR, LPC_TIMER_TCR_RESET); diff --git a/sys/arm/mv/timer.c b/sys/arm/mv/timer.c index db6e404..51a6c17 100644 --- a/sys/arm/mv/timer.c +++ b/sys/arm/mv/timer.c @@ -93,7 +93,7 @@ static void mv_watchdog_enable(void); static void mv_watchdog_disable(void); static void mv_watchdog_event(void *, unsigned int, int *); static int mv_timer_start(struct eventtimer *et, - struct bintime *first, struct bintime *period); + sbintime_t first, sbintime_t period); static int mv_timer_stop(struct eventtimer *et); static void mv_setup_timers(void); @@ -168,12 +168,8 @@ mv_timer_attach(device_t dev) sc->et.et_quality = 1000; sc->et.et_frequency = MV_CLOCK_SRC; - sc->et.et_min_period.sec = 0; - sc->et.et_min_period.frac = - ((0x00000002LLU << 32) / sc->et.et_frequency) << 32; - sc->et.et_max_period.sec = 0xfffffff0U / sc->et.et_frequency; - sc->et.et_max_period.frac = - ((0xfffffffeLLU << 32) / sc->et.et_frequency) << 32; + sc->et.et_min_period = (0x00000002LLU << 32) / sc->et.et_frequency; + sc->et.et_max_period = (0xfffffffeLLU << 32) / sc->et.et_frequency; sc->et.et_start = mv_timer_start; sc->et.et_stop = mv_timer_stop; sc->et.et_priv = sc; @@ -394,25 +390,20 @@ mv_watchdog_event(void *arg, unsigned int cmd, int *error) } static int -mv_timer_start(struct eventtimer *et, - struct bintime *first, struct bintime *period) +mv_timer_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { struct mv_timer_softc *sc; uint32_t val, val1; /* Calculate dividers. */ sc = (struct mv_timer_softc *)et->et_priv; - if (period != NULL) { - val = (sc->et.et_frequency * (period->frac >> 32)) >> 32; - if (period->sec != 0) - val += sc->et.et_frequency * period->sec; - } else + if (period != 0) + val = ((uint32_t)sc->et.et_frequency * period) >> 32; + else val = 0; - if (first != NULL) { - val1 = (sc->et.et_frequency * (first->frac >> 32)) >> 32; - if (first->sec != 0) - val1 += sc->et.et_frequency * first->sec; - } else + if (first != 0) + val1 = ((uint32_t)sc->et.et_frequency * first) >> 32; + else val1 = val; /* Apply configuration. */ @@ -420,7 +411,7 @@ mv_timer_start(struct eventtimer *et, mv_set_timer(0, val1); val = mv_get_timer_control(); val |= CPU_TIMER0_EN; - if (period != NULL) + if (period != 0) val |= CPU_TIMER0_AUTO; else val &= ~CPU_TIMER0_AUTO; diff --git a/sys/arm/ti/am335x/am335x_dmtimer.c b/sys/arm/ti/am335x/am335x_dmtimer.c index be9832c..93911f2 100644 --- a/sys/arm/ti/am335x/am335x_dmtimer.c +++ b/sys/arm/ti/am335x/am335x_dmtimer.c @@ -143,30 +143,24 @@ am335x_dmtimer_tc_get_timecount(struct timecounter *tc) } static int -am335x_dmtimer_start(struct eventtimer *et, struct bintime *first, - struct bintime *period) +am335x_dmtimer_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { struct am335x_dmtimer *tmr = (struct am335x_dmtimer *)et->et_priv; uint32_t load, count; uint32_t tclr = 0; - if (period != NULL) { - load = (et->et_frequency * (period->frac >> 32)) >> 32; - if (period->sec > 0) - load += et->et_frequency * period->sec; + if (period != 0) { + load = ((uint32_t)et->et_frequency * period) >> 32; tclr |= 2; /* autoreload bit */ panic("periodic timer not implemented\n"); } else { load = 0; } - if (first != NULL) { - count = (tmr->et.et_frequency * (first->frac >> 32)) >> 32; - if (first->sec != 0) - count += tmr->et.et_frequency * first->sec; - } else { + if (first != 0) + count = ((uint32_t)et->et_frequency * first) >> 32; + else count = load; - } /* Reset Timer */ am335x_dmtimer_et_write_4(DMTIMER_TSICR, 2); @@ -316,12 +310,10 @@ am335x_dmtimer_attach(device_t dev) sc->t[3].et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT; sc->t[3].et.et_quality = 1000; sc->t[3].et.et_frequency = sc->sysclk_freq; - sc->t[3].et.et_min_period.sec = 0; - sc->t[3].et.et_min_period.frac = - ((0x00000002LLU << 32) / sc->t[3].et.et_frequency) << 32; - sc->t[3].et.et_max_period.sec = 0xfffffff0U / sc->t[3].et.et_frequency; - sc->t[3].et.et_max_period.frac = - ((0xfffffffeLLU << 32) / sc->t[3].et.et_frequency) << 32; + sc->t[3].et.et_min_period = + (0x00000002LLU << 32) / sc->t[3].et.et_frequency; + sc->t[3].et.et_max_period = + (0xfffffffeLLU << 32) / sc->t[3].et.et_frequency; sc->t[3].et.et_start = am335x_dmtimer_start; sc->t[3].et.et_stop = am335x_dmtimer_stop; sc->t[3].et.et_priv = &sc->t[3]; diff --git a/sys/arm/versatile/sp804.c b/sys/arm/versatile/sp804.c index 82a1889..000ccb6 100644 --- a/sys/arm/versatile/sp804.c +++ b/sys/arm/versatile/sp804.c @@ -120,18 +120,15 @@ sp804_timer_tc_get_timecount(struct timecounter *tc) } static int -sp804_timer_start(struct eventtimer *et, struct bintime *first, - struct bintime *period) +sp804_timer_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { struct sp804_timer_softc *sc = et->et_priv; uint32_t count, reg; - if (first != NULL) { + if (first != 0) { sc->et_enabled = 1; - count = (sc->et.et_frequency * (first->frac >> 32)) >> 32; - if (first->sec != 0) - count += sc->et.et_frequency * first->sec; + count = ((uint32_t)et->et_frequency * first) >> 32; sp804_timer_tc_write_4(SP804_TIMER2_LOAD, count); reg = TIMER_CONTROL_32BIT | TIMER_CONTROL_INTREN | @@ -142,7 +139,7 @@ sp804_timer_start(struct eventtimer *et, struct bintime *first, return (0); } - if (period != NULL) { + if (period != 0) { panic("period"); } @@ -264,12 +261,8 @@ sp804_timer_attach(device_t dev) sc->et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT; sc->et.et_quality = 1000; sc->et.et_frequency = sc->sysclk_freq / DEFAULT_DIVISOR; - sc->et.et_min_period.sec = 0; - sc->et.et_min_period.frac = - ((0x00000002LLU << 32) / sc->et.et_frequency) << 32; - sc->et.et_max_period.sec = 0xfffffff0U / sc->et.et_frequency; - sc->et.et_max_period.frac = - ((0xfffffffeLLU << 32) / sc->et.et_frequency) << 32; + sc->et.et_min_period = (0x00000002LLU << 32) / sc->et.et_frequency; + sc->et.et_max_period = (0xfffffffeLLU << 32) / sc->et.et_frequency; sc->et.et_start = sp804_timer_start; sc->et.et_stop = sp804_timer_stop; sc->et.et_priv = sc; diff --git a/sys/dev/acpica/acpi_hpet.c b/sys/dev/acpica/acpi_hpet.c index c00bda4..9b9c966 100644 --- a/sys/dev/acpica/acpi_hpet.c +++ b/sys/dev/acpica/acpi_hpet.c @@ -147,8 +147,7 @@ hpet_disable(struct hpet_softc *sc) } static int -hpet_start(struct eventtimer *et, - struct bintime *first, struct bintime *period) +hpet_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { struct hpet_timer *mt = (struct hpet_timer *)et->et_priv; struct hpet_timer *t; @@ -156,20 +155,16 @@ hpet_start(struct eventtimer *et, uint32_t fdiv, now; t = (mt->pcpu_master < 0) ? mt : &sc->t[mt->pcpu_slaves[curcpu]]; - if (period != NULL) { + if (period != 0) { t->mode = 1; - t->div = (sc->freq * (period->frac >> 32)) >> 32; - if (period->sec != 0) - t->div += sc->freq * period->sec; + t->div = (sc->freq * period) >> 32; } else { t->mode = 2; t->div = 0; } - if (first != NULL) { - fdiv = (sc->freq * (first->frac >> 32)) >> 32; - if (first->sec != 0) - fdiv += sc->freq * first->sec; - } else + if (first != 0) + fdiv = (sc->freq * first) >> 32; + else fdiv = t->div; if (t->irq < 0) bus_write_4(sc->mem_res, HPET_ISR, 1 << t->num); @@ -684,12 +679,9 @@ hpet_attach(device_t dev) if ((t->caps & HPET_TCAP_PER_INT) == 0) t->et.et_quality -= 10; t->et.et_frequency = sc->freq; - t->et.et_min_period.sec = 0; - t->et.et_min_period.frac = - (((uint64_t)(HPET_MIN_CYCLES * 2) << 32) / sc->freq) << 32; - t->et.et_max_period.sec = 0xfffffffeLLU / sc->freq; - t->et.et_max_period.frac = - ((0xfffffffeLLU << 32) / sc->freq) << 32; + t->et.et_min_period = + ((uint64_t)(HPET_MIN_CYCLES * 2) << 32) / sc->freq; + t->et.et_max_period = (0xfffffffeLLU << 32) / sc->freq; t->et.et_start = hpet_start; t->et.et_stop = hpet_stop; t->et.et_priv = &sc->t[i]; diff --git a/sys/i386/xen/clock.c b/sys/i386/xen/clock.c index 4e43a12..a10b546 100644 --- a/sys/i386/xen/clock.c +++ b/sys/i386/xen/clock.c @@ -768,8 +768,7 @@ resettodr() #endif static int -xen_et_start(struct eventtimer *et, - struct bintime *first, struct bintime *period) +xen_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { struct xen_et_state *state = DPCPU_PTR(et_state); struct shadow_time_info *shadow; @@ -777,21 +776,16 @@ xen_et_start(struct eventtimer *et, __get_time_values_from_xen(); - if (period != NULL) { + if (period != 0) { state->mode = MODE_PERIODIC; - state->period = (1000000000LL * - (uint32_t)(period->frac >> 32)) >> 32; - if (period->sec != 0) - state->period += 1000000000LL * period->sec; + state->period = (1000000000LLU * period) >> 32; } else { state->mode = MODE_ONESHOT; state->period = 0; } - if (first != NULL) { - fperiod = (1000000000LL * (uint32_t)(first->frac >> 32)) >> 32; - if (first->sec != 0) - fperiod += 1000000000LL * first->sec; - } else + if (first != 0) + fperiod = (1000000000LLU * first) >> 32; + else fperiod = state->period; shadow = &per_cpu(shadow_time, smp_processor_id()); @@ -832,11 +826,9 @@ cpu_initclocks(void) xen_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU; xen_et.et_quality = 600; - xen_et.et_frequency = 0; - xen_et.et_min_period.sec = 0; - xen_et.et_min_period.frac = 0x00400000LL << 32; - xen_et.et_max_period.sec = 2; - xen_et.et_max_period.frac = 0; + xen_et.et_frequency = 1000000000; + xen_et.et_min_period = 0x00400000LL; + xen_et.et_max_period = (0xfffffffeLLU << 32) / xen_et.et_frequency; xen_et.et_start = xen_et_start; xen_et.et_stop = xen_et_stop; xen_et.et_priv = NULL; diff --git a/sys/ia64/ia64/clock.c b/sys/ia64/ia64/clock.c index 24623c5..37a99a1 100644 --- a/sys/ia64/ia64/clock.c +++ b/sys/ia64/ia64/clock.c @@ -105,17 +105,14 @@ ia64_ih_clock(struct thread *td, u_int xiv, struct trapframe *tf) * Event timer start method. */ static int -ia64_clock_start(struct eventtimer *et, struct bintime *first, - struct bintime *period) +ia64_clock_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { u_long itc, load; register_t is; - if (period != NULL) { + if (period != 0) { PCPU_SET(md.clock_mode, CLOCK_ET_PERIODIC); - load = (et->et_frequency * (period->frac >> 32)) >> 32; - if (period->sec > 0) - load += et->et_frequency * period->sec; + load = (et->et_frequency * period) >> 32; } else { PCPU_SET(md.clock_mode, CLOCK_ET_ONESHOT); load = 0; @@ -123,11 +120,8 @@ ia64_clock_start(struct eventtimer *et, struct bintime *first, PCPU_SET(md.clock_load, load); - if (first != NULL) { - load = (et->et_frequency * (first->frac >> 32)) >> 32; - if (first->sec > 0) - load += et->et_frequency * first->sec; - } + if (first != 0) + load = (et->et_frequency * first) >> 32; is = intr_disable(); itc = ia64_get_itc(); @@ -185,10 +179,8 @@ clock_configure(void *dummy) et->et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU; et->et_quality = 1000; et->et_frequency = itc_freq; - et->et_min_period.sec = 0; - et->et_min_period.frac = (0x8000000000000000ul / (u_long)(10*hz)) << 1; - et->et_max_period.sec = 0xffffffff; - et->et_max_period.frac = ((0xfffffffeul << 32) / itc_freq) << 32; + et->et_min_period = SBT_1S / (10 * hz); + et->et_max_period = (0xfffffffeul << 32) / itc_freq; et->et_start = ia64_clock_start; et->et_stop = ia64_clock_stop; et->et_priv = NULL; diff --git a/sys/kern/kern_clocksource.c b/sys/kern/kern_clocksource.c index ac7e2de..10732d9 100644 --- a/sys/kern/kern_clocksource.c +++ b/sys/kern/kern_clocksource.c @@ -153,6 +153,8 @@ static DPCPU_DEFINE(struct pcpu_state, timerstate); (((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) / \ ((bt)->frac >> 1)) +#define SBT2FREQ(sbt) ((SBT_1S + ((sbt) >> 1)) / (sbt)) + /* * Timer broadcast IPI handler. */ @@ -442,7 +444,7 @@ loadtimer(struct bintime *now, int start) new.sec, (u_int)(new.frac >> 32)); *next = new; bintime_add(next, now); - et_start(timer, &new, &timerperiod); + et_start(timer, bttosbt(new), bttosbt(timerperiod)); } } else { getnextevent(&new); @@ -454,7 +456,7 @@ loadtimer(struct bintime *now, int start) if (!eq) { *next = new; bintime_sub(&new, now); - et_start(timer, &new, NULL); + et_start(timer, bttosbt(new), 0); } } } @@ -603,13 +605,13 @@ round_freq(struct eventtimer *et, int freq) div = 1 << (flsl(div + div / 2) - 1); freq = (et->et_frequency + div / 2) / div; } - if (et->et_min_period.sec > 0) + if (et->et_min_period > SBT_1S) panic("Event timer \"%s\" doesn't support sub-second periods!", et->et_name); - else if (et->et_min_period.frac != 0) - freq = min(freq, BT2FREQ(&et->et_min_period)); - if (et->et_max_period.sec == 0 && et->et_max_period.frac != 0) - freq = max(freq, BT2FREQ(&et->et_max_period)); + else if (et->et_min_period != 0) + freq = min(freq, SBT2FREQ(et->et_min_period)); + if (et->et_max_period < SBT_1S && et->et_max_period != 0) + freq = max(freq, SBT2FREQ(et->et_max_period)); return (freq); } diff --git a/sys/kern/kern_et.c b/sys/kern/kern_et.c index 3156c81..472db79 100644 --- a/sys/kern/kern_et.c +++ b/sys/kern/kern_et.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2010 Alexander Motin + * Copyright (c) 2010-2013 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -62,6 +62,7 @@ et_register(struct eventtimer *et) et->et_quality); } } + KASSERT(et->et_start, ("et_register: timer has no start function")); et->et_sysctl = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_kern_eventtimer_et), OID_AUTO, et->et_name, CTLFLAG_RW, 0, "event timer description"); @@ -159,43 +160,29 @@ et_init(struct eventtimer *et, et_event_cb_t *event, * period - period of subsequent periodic ticks. */ int -et_start(struct eventtimer *et, - struct bintime *first, struct bintime *period) +et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { if (!et->et_active) return (ENXIO); - if (first == NULL && period == NULL) - return (EINVAL); - if ((et->et_flags & ET_FLAGS_PERIODIC) == 0 && - period != NULL) - return (ENODEV); - if ((et->et_flags & ET_FLAGS_ONESHOT) == 0 && - period == NULL) - return (ENODEV); - if (first != NULL) { - if (first->sec < et->et_min_period.sec || - (first->sec == et->et_min_period.sec && - first->frac < et->et_min_period.frac)) - first = &et->et_min_period; - if (first->sec > et->et_max_period.sec || - (first->sec == et->et_max_period.sec && - first->frac > et->et_max_period.frac)) - first = &et->et_max_period; + KASSERT(period >= 0, ("et_start: negative period")); + KASSERT((et->et_flags & ET_FLAGS_PERIODIC) || period == 0, + ("et_start: period specified for oneshot-only timer")); + KASSERT((et->et_flags & ET_FLAGS_ONESHOT) && period == 0, + ("et_start: period not specified for periodic-only timer")); + if (period != 0) { + if (period < et->et_min_period) + period = et->et_min_period; + else if (period > et->et_max_period) + period = et->et_max_period; } - if (period != NULL) { - if (period->sec < et->et_min_period.sec || - (period->sec == et->et_min_period.sec && - period->frac < et->et_min_period.frac)) - period = &et->et_min_period; - if (period->sec > et->et_max_period.sec || - (period->sec == et->et_max_period.sec && - period->frac > et->et_max_period.frac)) - period = &et->et_max_period; + if (period == 0 || first != 0) { + if (first < et->et_min_period) + first = et->et_min_period; + else if (first > et->et_max_period) + first = et->et_max_period; } - if (et->et_start) - return (et->et_start(et, first, period)); - return (0); + return (et->et_start(et, first, period)); } /* Stop event timer hardware. */ diff --git a/sys/mips/mips/tick.c b/sys/mips/mips/tick.c index 50f4f24..1dc3e7a 100644 --- a/sys/mips/mips/tick.c +++ b/sys/mips/mips/tick.c @@ -217,22 +217,17 @@ DELAY(int n) } static int -clock_start(struct eventtimer *et, - struct bintime *first, struct bintime *period) +clock_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { uint32_t fdiv, div, next; - if (period != NULL) { - div = (et->et_frequency * (period->frac >> 32)) >> 32; - if (period->sec != 0) - div += et->et_frequency * period->sec; + if (period != 0) { + div = (et->et_frequency * period) >> 32; } else div = 0; - if (first != NULL) { - fdiv = (et->et_frequency * (first->frac >> 32)) >> 32; - if (first->sec != 0) - fdiv += et->et_frequency * first->sec; - } else + if (first != 0) + fdiv = (et->et_frequency * first) >> 32; + else fdiv = div; DPCPU_SET(cycles_per_tick, div); next = mips_rd_count() + fdiv; @@ -361,11 +356,8 @@ clock_attach(device_t dev) ET_FLAGS_PERCPU; sc->et.et_quality = 800; sc->et.et_frequency = counter_freq; - sc->et.et_min_period.sec = 0; - sc->et.et_min_period.frac = 0x00004000LLU << 32; /* To be safe. */ - sc->et.et_max_period.sec = 0xfffffffeU / sc->et.et_frequency; - sc->et.et_max_period.frac = - ((0xfffffffeLLU << 32) / sc->et.et_frequency) << 32; + sc->et.et_min_period = 0x00004000LLU; /* To be safe. */ + sc->et.et_max_period = (0xfffffffeLLU << 32) / sc->et.et_frequency; sc->et.et_start = clock_start; sc->et.et_stop = clock_stop; sc->et.et_priv = sc; diff --git a/sys/mips/nlm/tick.c b/sys/mips/nlm/tick.c index 09f3bb6..b5a8c36 100644 --- a/sys/mips/nlm/tick.c +++ b/sys/mips/nlm/tick.c @@ -222,22 +222,17 @@ DELAY(int n) } static int -clock_start(struct eventtimer *et, - struct bintime *first, struct bintime *period) +clock_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { uint32_t fdiv, div, next; - if (period != NULL) { - div = (et->et_frequency * (period->frac >> 32)) >> 32; - if (period->sec != 0) - div += et->et_frequency * period->sec; - } else + if (period != 0) + div = (et->et_frequency * period) >> 32; + else div = 0; - if (first != NULL) { - fdiv = (et->et_frequency * (first->frac >> 32)) >> 32; - if (first->sec != 0) - fdiv += et->et_frequency * first->sec; - } else + if (first != 0) + fdiv = (et->et_frequency * first) >> 32; + else fdiv = div; DPCPU_SET(cycles_per_tick, div); next = mips_rd_count() + fdiv; @@ -357,11 +352,8 @@ clock_attach(device_t dev) sc->et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_PERCPU; sc->et.et_quality = 800; sc->et.et_frequency = counter_freq; - sc->et.et_min_period.sec = 0; - sc->et.et_min_period.frac = 0x00004000LLU << 32; /* To be safe. */ - sc->et.et_max_period.sec = 0xfffffffeU / sc->et.et_frequency; - sc->et.et_max_period.frac = - ((0xfffffffeLLU << 32) / sc->et.et_frequency) << 32; + sc->et.et_min_period = 0x00004000LLU; /* To be safe. */ + sc->et.et_max_period = (0xfffffffeLLU << 32) / sc->et.et_frequency; sc->et.et_start = clock_start; sc->et.et_stop = clock_stop; sc->et.et_priv = sc; diff --git a/sys/mips/rmi/tick.c b/sys/mips/rmi/tick.c index 39f0a04..adc8c89 100644 --- a/sys/mips/rmi/tick.c +++ b/sys/mips/rmi/tick.c @@ -219,22 +219,17 @@ DELAY(int n) } static int -clock_start(struct eventtimer *et, - struct bintime *first, struct bintime *period) +clock_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { uint32_t fdiv, div, next; - if (period != NULL) { - div = (et->et_frequency * (period->frac >> 32)) >> 32; - if (period->sec != 0) - div += et->et_frequency * period->sec; - } else + if (period != 0) + div = (et->et_frequency * period) >> 32; + else div = 0; - if (first != NULL) { - fdiv = (et->et_frequency * (first->frac >> 32)) >> 32; - if (first->sec != 0) - fdiv += et->et_frequency * first->sec; - } else + if (first != 0) + fdiv = (et->et_frequency * first) >> 32; + else fdiv = div; DPCPU_SET(cycles_per_tick, div); next = mips_rd_count() + fdiv; @@ -351,11 +346,8 @@ clock_attach(device_t dev) ET_FLAGS_PERCPU; sc->et.et_quality = 800; sc->et.et_frequency = counter_freq; - sc->et.et_min_period.sec = 0; - sc->et.et_min_period.frac = 0x00004000LLU << 32; /* To be safe. */ - sc->et.et_max_period.sec = 0xfffffffeU / sc->et.et_frequency; - sc->et.et_max_period.frac = - ((0xfffffffeLLU << 32) / sc->et.et_frequency) << 32; + sc->et.et_min_period = 0x00004000LLU; /* To be safe. */ + sc->et.et_max_period = (0xfffffffeLLU << 32) / sc->et.et_frequency; sc->et.et_start = clock_start; sc->et.et_stop = clock_stop; sc->et.et_priv = sc; diff --git a/sys/powerpc/aim/clock.c b/sys/powerpc/aim/clock.c index 0bf7cfa..ae98d26 100644 --- a/sys/powerpc/aim/clock.c +++ b/sys/powerpc/aim/clock.c @@ -85,7 +85,7 @@ static u_long ticks_per_sec = 12500000; static u_long *decr_counts[MAXCPU]; static int decr_et_start(struct eventtimer *et, - struct bintime *first, struct bintime *period); + sbintime_t first, sbintime_t period); static int decr_et_stop(struct eventtimer *et); static timecounter_get_t decr_get_timecount; @@ -195,12 +195,8 @@ decr_tc_init(void) ET_FLAGS_PERCPU; decr_et.et_quality = 1000; decr_et.et_frequency = ticks_per_sec; - decr_et.et_min_period.sec = 0; - decr_et.et_min_period.frac = - ((0x00000002LLU << 32) / ticks_per_sec) << 32; - decr_et.et_max_period.sec = 0x7fffffffLLU / ticks_per_sec; - decr_et.et_max_period.frac = - ((0x7fffffffLLU << 32) / ticks_per_sec) << 32; + decr_et.et_min_period = (0x00000002LLU << 32) / ticks_per_sec; + decr_et.et_max_period = (0x7fffffffLLU << 32) / ticks_per_sec; decr_et.et_start = decr_et_start; decr_et.et_stop = decr_et_stop; decr_et.et_priv = NULL; @@ -212,24 +208,20 @@ decr_tc_init(void) */ static int decr_et_start(struct eventtimer *et, - struct bintime *first, struct bintime *period) + sbintime_t first, sbintime_t period) { struct decr_state *s = DPCPU_PTR(decr_state); uint32_t fdiv; - if (period != NULL) { + if (period != 0) { s->mode = 1; - s->div = (decr_et.et_frequency * (period->frac >> 32)) >> 32; - if (period->sec != 0) - s->div += decr_et.et_frequency * period->sec; + s->div = (decr_et.et_frequency * period) >> 32; } else { s->mode = 2; - s->div = 0x7fffffff; + s->div = 0; } - if (first != NULL) { - fdiv = (decr_et.et_frequency * (first->frac >> 32)) >> 32; - if (first->sec != 0) - fdiv += decr_et.et_frequency * first->sec; + if (first != 0) { + fdiv = (decr_et.et_frequency * first) >> 32; } else fdiv = s->div; diff --git a/sys/powerpc/booke/clock.c b/sys/powerpc/booke/clock.c index 9958160..5827e73 100644 --- a/sys/powerpc/booke/clock.c +++ b/sys/powerpc/booke/clock.c @@ -88,7 +88,7 @@ static u_long *decr_counts[MAXCPU]; #define DIFF19041970 2082844800 static int decr_et_start(struct eventtimer *et, - struct bintime *first, struct bintime *period); + sbintime_t first, sbintime_t period); static int decr_et_stop(struct eventtimer *et); static timecounter_get_t decr_get_timecount; @@ -193,12 +193,8 @@ decr_tc_init(void) ET_FLAGS_PERCPU; decr_et.et_quality = 1000; decr_et.et_frequency = ticks_per_sec; - decr_et.et_min_period.sec = 0; - decr_et.et_min_period.frac = - ((0x00000002LLU << 32) / ticks_per_sec) << 32; - decr_et.et_max_period.sec = 0xfffffffeLLU / ticks_per_sec; - decr_et.et_max_period.frac = - ((0xfffffffeLLU << 32) / ticks_per_sec) << 32; + decr_et.et_min_period = (0x00000002LLU << 32) / ticks_per_sec; + decr_et.et_max_period = (0xfffffffeLLU << 32) / ticks_per_sec; decr_et.et_start = decr_et_start; decr_et.et_stop = decr_et_stop; decr_et.et_priv = NULL; @@ -209,26 +205,21 @@ decr_tc_init(void) * Event timer start method. */ static int -decr_et_start(struct eventtimer *et, - struct bintime *first, struct bintime *period) +decr_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { struct decr_state *s = DPCPU_PTR(decr_state); uint32_t fdiv, tcr; - if (period != NULL) { + if (period != 0) { s->mode = 1; - s->div = (decr_et.et_frequency * (period->frac >> 32)) >> 32; - if (period->sec != 0) - s->div += decr_et.et_frequency * period->sec; + s->div = (decr_et.et_frequency * period) >> 32; } else { s->mode = 2; - s->div = 0xffffffff; + s->div = 0; } - if (first != NULL) { - fdiv = (decr_et.et_frequency * (first->frac >> 32)) >> 32; - if (first->sec != 0) - fdiv += decr_et.et_frequency * first->sec; - } else + if (first != 0) + fdiv = (decr_et.et_frequency * first) >> 32; + else fdiv = s->div; tcr = mfspr(SPR_TCR); diff --git a/sys/sparc64/sparc64/tick.c b/sys/sparc64/sparc64/tick.c index cfd5776..130efa6 100644 --- a/sys/sparc64/sparc64/tick.c +++ b/sys/sparc64/sparc64/tick.c @@ -93,8 +93,8 @@ static timecounter_get_t stick_get_timecount_mp; static timecounter_get_t stick_get_timecount_up; static rd_tick_t stick_rd; static wr_tick_cmpr_t stick_wr_cmpr; -static int tick_et_start(struct eventtimer *et, struct bintime *first, - struct bintime *period); +static int tick_et_start(struct eventtimer *et, sbintime_t first, + sbintime_t period); static int tick_et_stop(struct eventtimer *et); #ifdef SMP static timecounter_get_t tick_get_timecount_mp; @@ -227,10 +227,8 @@ cpu_initclocks(void) ET_FLAGS_PERCPU; tick_et.et_quality = 1000; tick_et.et_frequency = tick_et_use_stick ? sclock : clock; - tick_et.et_min_period.sec = 0; - tick_et.et_min_period.frac = 0x00010000LLU << 32; /* To be safe. */ - tick_et.et_max_period.sec = 3600 * 24; /* No practical limit. */ - tick_et.et_max_period.frac = 0; + tick_et.et_min_period = 0x00010000LLU; /* To be safe. */ + tick_et.et_max_period = (0xfffffffeLLU << 32) / tick_et.et_frequency; tick_et.et_start = tick_et_start; tick_et.et_stop = tick_et_stop; tick_et.et_priv = NULL; @@ -355,23 +353,18 @@ tick_get_timecount_mp(struct timecounter *tc) #endif static int -tick_et_start(struct eventtimer *et, struct bintime *first, - struct bintime *period) +tick_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { u_long base, div, fdiv; register_t s; - if (period != NULL) { - div = (tick_et.et_frequency * (period->frac >> 32)) >> 32; - if (period->sec != 0) - div += tick_et.et_frequency * period->sec; - } else + if (period != 0) + div = (tick_et.et_frequency * period) >> 32; + else div = 0; - if (first != NULL) { - fdiv = (tick_et.et_frequency * (first->frac >> 32)) >> 32; - if (first->sec != 0) - fdiv += tick_et.et_frequency * first->sec; - } else + if (first != 0) + fdiv = (tick_et.et_frequency * first) >> 32; + else fdiv = div; PCPU_SET(tickincrement, div); diff --git a/sys/sys/timeet.h b/sys/sys/timeet.h index 87392a2..23a170c 100644 --- a/sys/sys/timeet.h +++ b/sys/sys/timeet.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2010 Alexander Motin + * Copyright (c) 2010-2013 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -45,7 +45,7 @@ struct eventtimer; typedef int et_start_t(struct eventtimer *et, - struct bintime *first, struct bintime *period); + sbintime_t first, sbintime_t period); typedef int et_stop_t(struct eventtimer *et); typedef void et_event_cb_t(struct eventtimer *et, void *arg); typedef int et_deregister_cb_t(struct eventtimer *et, void *arg); @@ -70,8 +70,8 @@ struct eventtimer { int et_active; u_int64_t et_frequency; /* Base frequency in Hz. */ - struct bintime et_min_period; - struct bintime et_max_period; + sbintime_t et_min_period; + sbintime_t et_max_period; et_start_t *et_start; et_stop_t *et_stop; et_event_cb_t *et_event_cb; @@ -93,8 +93,7 @@ int et_deregister(struct eventtimer *et); struct eventtimer *et_find(const char *name, int check, int want); int et_init(struct eventtimer *et, et_event_cb_t *event, et_deregister_cb_t *deregister, void *arg); -int et_start(struct eventtimer *et, - struct bintime *first, struct bintime *period); +int et_start(struct eventtimer *et, sbintime_t first, sbintime_t period); int et_stop(struct eventtimer *et); int et_ban(struct eventtimer *et); int et_free(struct eventtimer *et); diff --git a/sys/x86/isa/atrtc.c b/sys/x86/isa/atrtc.c index c6a6f8c..69c2157 100644 --- a/sys/x86/isa/atrtc.c +++ b/sys/x86/isa/atrtc.c @@ -164,11 +164,10 @@ struct atrtc_softc { }; static int -rtc_start(struct eventtimer *et, - struct bintime *first, struct bintime *period) +rtc_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { - atrtc_rate(max(fls((period->frac + (period->frac >> 1)) >> 32) - 17, 1)); + atrtc_rate(max(fls(period + (period >> 1)) - 17, 1)); atrtc_enable_intr(); return (0); } @@ -277,10 +276,8 @@ atrtc_attach(device_t dev) sc->et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_POW2DIV; sc->et.et_quality = 0; sc->et.et_frequency = 32768; - sc->et.et_min_period.sec = 0; - sc->et.et_min_period.frac = 0x0008LLU << 48; - sc->et.et_max_period.sec = 0; - sc->et.et_max_period.frac = 0x8000LLU << 48; + sc->et.et_min_period = 0x00080000; + sc->et.et_max_period = 0x80000000; sc->et.et_start = rtc_start; sc->et.et_stop = rtc_stop; sc->et.et_priv = dev; diff --git a/sys/x86/isa/clock.c b/sys/x86/isa/clock.c index 232c913..29ec02d 100644 --- a/sys/x86/isa/clock.c +++ b/sys/x86/isa/clock.c @@ -589,18 +589,17 @@ i8254_get_timecount(struct timecounter *tc) } static int -attimer_start(struct eventtimer *et, - struct bintime *first, struct bintime *period) +attimer_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { device_t dev = (device_t)et->et_priv; struct attimer_softc *sc = device_get_softc(dev); - if (period != NULL) { + if (period != 0) { sc->mode = MODE_PERIODIC; - sc->period = period->frac >> 32; + sc->period = period; } else { sc->mode = MODE_ONESHOT; - sc->period = first->frac >> 32; + sc->period = first; } if (!sc->intr_en) { i8254_intsrc->is_pic->pic_enable_source(i8254_intsrc); @@ -755,12 +754,8 @@ attimer_attach(device_t dev) sc->et.et_flags |= ET_FLAGS_ONESHOT; sc->et.et_quality = 100; sc->et.et_frequency = i8254_freq; - sc->et.et_min_period.sec = 0; - sc->et.et_min_period.frac = - ((0x0002LLU << 48) / i8254_freq) << 16; - sc->et.et_max_period.sec = 0xffff / i8254_freq; - sc->et.et_max_period.frac = - ((0xfffeLLU << 48) / i8254_freq) << 16; + sc->et.et_min_period = (0x0002LLU << 32) / i8254_freq; + sc->et.et_max_period = (0xfffeLLU << 32) / i8254_freq; sc->et.et_start = attimer_start; sc->et.et_stop = attimer_stop; sc->et.et_priv = dev; diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index c60db22..ac87ebd 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -169,7 +169,7 @@ static void lapic_timer_stop(struct lapic *); static void lapic_timer_set_divisor(u_int divisor); static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value); static int lapic_et_start(struct eventtimer *et, - struct bintime *first, struct bintime *period); + sbintime_t first, sbintime_t period); static int lapic_et_stop(struct eventtimer *et); struct pic lapic_pic = { .pic_resume = lapic_resume }; @@ -268,10 +268,8 @@ lapic_init(vm_paddr_t addr) } lapic_et.et_frequency = 0; /* We don't know frequency yet, so trying to guess. */ - lapic_et.et_min_period.sec = 0; - lapic_et.et_min_period.frac = 0x00001000LL << 32; - lapic_et.et_max_period.sec = 1; - lapic_et.et_max_period.frac = 0; + lapic_et.et_min_period = 0x00001000LL; + lapic_et.et_max_period = SBT_1S; lapic_et.et_start = lapic_et_start; lapic_et.et_stop = lapic_et_stop; lapic_et.et_priv = NULL; @@ -487,8 +485,7 @@ lapic_disable_pmc(void) } static int -lapic_et_start(struct eventtimer *et, - struct bintime *first, struct bintime *period) +lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { struct lapic *la; u_long value; @@ -513,28 +510,18 @@ lapic_et_start(struct eventtimer *et, printf("lapic: Divisor %lu, Frequency %lu Hz\n", lapic_timer_divisor, value); et->et_frequency = value; - et->et_min_period.sec = 0; - et->et_min_period.frac = - ((0x00000002LLU << 32) / et->et_frequency) << 32; - et->et_max_period.sec = 0xfffffffeLLU / et->et_frequency; - et->et_max_period.frac = - ((0xfffffffeLLU << 32) / et->et_frequency) << 32; + et->et_min_period = (0x00000002LLU << 32) / et->et_frequency; + et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency; } if (la->la_timer_mode == 0) lapic_timer_set_divisor(lapic_timer_divisor); - if (period != NULL) { + if (period != 0) { la->la_timer_mode = 1; - la->la_timer_period = - (et->et_frequency * (period->frac >> 32)) >> 32; - if (period->sec != 0) - la->la_timer_period += et->et_frequency * period->sec; + la->la_timer_period = ((uint32_t)et->et_frequency * period) >> 32; lapic_timer_periodic(la, la->la_timer_period, 1); } else { la->la_timer_mode = 2; - la->la_timer_period = - (et->et_frequency * (first->frac >> 32)) >> 32; - if (first->sec != 0) - la->la_timer_period += et->et_frequency * first->sec; + la->la_timer_period = ((uint32_t)et->et_frequency * first) >> 32; lapic_timer_oneshot(la, la->la_timer_period, 1); } return (0); -- cgit v1.1 From 9f70262c678d34ac22ebb61da9b7ae1f7b9dffd4 Mon Sep 17 00:00:00 2001 From: davide Date: Thu, 28 Feb 2013 16:22:49 +0000 Subject: MFcalloutng: Style fixes. --- sys/kern/kern_timeout.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index 80933fa..9cdc39b 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -100,7 +100,7 @@ struct cc_mig_ent { int ce_migration_ticks; #endif }; - + /* * There is one struct callout_cpu per cpu, holding all relevant * state for the callout processing thread on the individual CPU. @@ -613,7 +613,7 @@ skip: } /* - * The callout mechanism is based on the work of Adam M. Costello and + * The callout mechanism is based on the work of Adam M. Costello and * George Varghese, published in a technical report entitled "Redesigning * the BSD Callout and Timer Facilities" and modified slightly for inclusion * in FreeBSD by Justin T. Gibbs. The original work on the data structures -- cgit v1.1 From 9ccbe7b6a4a12fa62e63d80ada8c85930e0c3cba Mon Sep 17 00:00:00 2001 From: davide Date: Thu, 28 Feb 2013 17:10:30 +0000 Subject: Move the definition of sbintime_t type from to . With this change we prevent gross namespace pollution. Reported by: bde Suggested by: attilio --- sys/sys/time.h | 1 - sys/sys/types.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sys/sys/time.h b/sys/sys/time.h index 927cecc..555c37e 100644 --- a/sys/sys/time.h +++ b/sys/sys/time.h @@ -109,7 +109,6 @@ bintime_mul(struct bintime *bt, u_int x) ((a)->frac cmp (b)->frac) : \ ((a)->sec cmp (b)->sec)) -typedef int64_t sbintime_t; #define SBT_1S ((sbintime_t)1 << 32) #define SBT_1M (SBT_1S * 60) #define SBT_1MS (SBT_1S / 1000) diff --git a/sys/sys/types.h b/sys/sys/types.h index 491e99d..cc0bca8 100644 --- a/sys/sys/types.h +++ b/sys/sys/types.h @@ -188,6 +188,8 @@ typedef __rlim_t rlim_t; /* resource limit */ #define _RLIM_T_DECLARED #endif +typedef __int64_t sbintime_t; + typedef __segsz_t segsz_t; /* segment size (in pages) */ #ifndef _SIZE_T_DECLARED -- cgit v1.1 From 9b1d7b3ae47bf2aa81c07ad2179ef27132327f2a Mon Sep 17 00:00:00 2001 From: gonzo Date: Thu, 28 Feb 2013 19:43:14 +0000 Subject: Add hooks for plugging platform-provided transfer backend. In order to use platorm backend hardware driver should impement three methods: - platform_start_transfer and platform_finish_transfer to start and finish transfer - platform_will_handle - check whether transaction is suitable for backend. If not - driver will fall back to PIO mode. Submitted by: Daisuke Aoyama Approved by: ian@ --- sys/dev/sdhci/sdhci.c | 34 ++++++++++++++++++++++++++++------ sys/dev/sdhci/sdhci.h | 6 +++++- sys/dev/sdhci/sdhci_if.m | 16 ++++++++++++++++ 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/sys/dev/sdhci/sdhci.c b/sys/dev/sdhci/sdhci.c index 1044bb6..15199f0 100644 --- a/sys/dev/sdhci/sdhci.c +++ b/sys/dev/sdhci/sdhci.c @@ -573,6 +573,13 @@ sdhci_init_slot(device_t dev, struct sdhci_slot *slot, int num) if (slot->quirks & SDHCI_QUIRK_FORCE_DMA) slot->opt |= SDHCI_HAVE_DMA; + /* + * Use platform-provided transfer backend + * with PIO as a fallback mechanism + */ + if (slot->opt & SDHCI_PLATFORM_TRANSFER) + slot->opt &= ~SDHCI_HAVE_DMA; + if (bootverbose || sdhci_debug) { slot_printf(slot, "%uMHz%s 4bits%s%s%s %s\n", slot->max_clk / 1000000, @@ -909,7 +916,7 @@ sdhci_start_data(struct sdhci_slot *slot, struct mmc_data *data) WR2(slot, SDHCI_BLOCK_COUNT, (data->len + 511) / 512); } -static void +void sdhci_finish_data(struct sdhci_slot *slot) { struct mmc_data *data = slot->curcmd->data; @@ -1102,13 +1109,23 @@ sdhci_data_irq(struct sdhci_slot *slot, uint32_t intmask) } if (slot->curcmd->error) { /* No need to continue after any error. */ - sdhci_finish_data(slot); + if (slot->flags & PLATFORM_DATA_STARTED) { + slot->flags &= ~PLATFORM_DATA_STARTED; + SDHCI_PLATFORM_FINISH_TRANSFER(slot->bus, slot); + } else + sdhci_finish_data(slot); return; } /* Handle PIO interrupt. */ - if (intmask & (SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL)) - sdhci_transfer_pio(slot); + if (intmask & (SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL)) { + if ((slot->opt & SDHCI_PLATFORM_TRANSFER) && + SDHCI_PLATFORM_WILL_HANDLE(slot->bus, slot)) { + SDHCI_PLATFORM_START_TRANSFER(slot->bus, slot, &intmask); + slot->flags |= PLATFORM_DATA_STARTED; + } else + sdhci_transfer_pio(slot); + } /* Handle DMA border. */ if (intmask & SDHCI_INT_DMA_END) { struct mmc_data *data = slot->curcmd->data; @@ -1147,8 +1164,13 @@ sdhci_data_irq(struct sdhci_slot *slot, uint32_t intmask) WR4(slot, SDHCI_DMA_ADDRESS, slot->paddr); } /* We have got all data. */ - if (intmask & SDHCI_INT_DATA_END) - sdhci_finish_data(slot); + if (intmask & SDHCI_INT_DATA_END) { + if (slot->flags & PLATFORM_DATA_STARTED) { + slot->flags &= ~PLATFORM_DATA_STARTED; + SDHCI_PLATFORM_FINISH_TRANSFER(slot->bus, slot); + } else + sdhci_finish_data(slot); + } } static void diff --git a/sys/dev/sdhci/sdhci.h b/sys/dev/sdhci/sdhci.h index 31fbe68..5c46778 100644 --- a/sys/dev/sdhci/sdhci.h +++ b/sys/dev/sdhci/sdhci.h @@ -224,8 +224,9 @@ struct sdhci_slot { device_t dev; /* Slot device */ u_char num; /* Slot number */ u_char opt; /* Slot options */ +#define SDHCI_HAVE_DMA 1 +#define SDHCI_PLATFORM_TRANSFER 2 u_char version; -#define SDHCI_HAVE_DMA 1 uint32_t max_clk; /* Max possible freq */ uint32_t timeout_clk; /* Timeout freq */ bus_dma_tag_t dmatag; @@ -250,6 +251,7 @@ struct sdhci_slot { #define CMD_STARTED 1 #define STOP_STARTED 2 #define SDHCI_USE_DMA 4 /* Use DMA for this req. */ +#define PLATFORM_DATA_STARTED 8 /* Data transfer is handled by platform */ struct mtx mtx; /* Slot mutex */ }; @@ -257,6 +259,8 @@ int sdhci_generic_read_ivar(device_t bus, device_t child, int which, uintptr_t * int sdhci_generic_write_ivar(device_t bus, device_t child, int which, uintptr_t value); int sdhci_init_slot(device_t dev, struct sdhci_slot *slot, int num); void sdhci_start_slot(struct sdhci_slot *slot); +/* performs generic clean-up for platform transfers */ +void sdhci_finish_data(struct sdhci_slot *slot); int sdhci_cleanup_slot(struct sdhci_slot *slot); int sdhci_generic_suspend(struct sdhci_slot *slot); int sdhci_generic_resume(struct sdhci_slot *slot); diff --git a/sys/dev/sdhci/sdhci_if.m b/sys/dev/sdhci/sdhci_if.m index b434fd2..f0b7567 100644 --- a/sys/dev/sdhci/sdhci_if.m +++ b/sys/dev/sdhci/sdhci_if.m @@ -131,6 +131,22 @@ METHOD void write_multi_4 { bus_size_t count; } +METHOD int platform_will_handle { + device_t brdev; + struct sdhci_slot *slot; +} + +METHOD void platform_start_transfer { + device_t brdev; + struct sdhci_slot *slot; + uint32_t *intmask; +} + +METHOD void platform_finish_transfer { + device_t brdev; + struct sdhci_slot *slot; +} + METHOD uint32_t min_freq { device_t brdev; struct sdhci_slot *slot; -- cgit v1.1 From 80b147459e115635bcd4e433faf50effc6910a03 Mon Sep 17 00:00:00 2001 From: gonzo Date: Thu, 28 Feb 2013 19:48:19 +0000 Subject: Add driver for BCM2835's DMA engine This is a version of code submitted by Daisuke Aoyama with some architectural changes. --- sys/arm/broadcom/bcm2835/bcm2835_dma.c | 727 +++++++++++++++++++++++++++++++++ sys/arm/broadcom/bcm2835/bcm2835_dma.h | 62 +++ sys/arm/broadcom/bcm2835/files.bcm2835 | 1 + 3 files changed, 790 insertions(+) create mode 100644 sys/arm/broadcom/bcm2835/bcm2835_dma.c create mode 100644 sys/arm/broadcom/bcm2835/bcm2835_dma.h diff --git a/sys/arm/broadcom/bcm2835/bcm2835_dma.c b/sys/arm/broadcom/bcm2835/bcm2835_dma.c new file mode 100644 index 0000000..5e1c9dc --- /dev/null +++ b/sys/arm/broadcom/bcm2835/bcm2835_dma.c @@ -0,0 +1,727 @@ +/* + * Copyright (c) 2013 Daisuke Aoyama + * Copyright (c) 2013 Oleksandr Tymoshenko + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "bcm2835_dma.h" +#include "bcm2835_vcbus.h" + +#define MAX_REG 9 + +/* private flags */ +#define BCM_DMA_CH_USED 0x00000001 +#define BCM_DMA_CH_FREE 0x40000000 +#define BCM_DMA_CH_UNMAP 0x80000000 + +/* Register Map (4.2.1.2) */ +#define BCM_DMA_CS(n) (0x100*(n) + 0x00) +#define CS_ACTIVE (1 << 0) +#define CS_END (1 << 1) +#define CS_INT (1 << 2) +#define CS_DREQ (1 << 3) +#define CS_ISPAUSED (1 << 4) +#define CS_ISHELD (1 << 5) +#define CS_ISWAIT (1 << 6) +#define CS_ERR (1 << 8) +#define CS_WAITWRT (1 << 28) +#define CS_DISDBG (1 << 29) +#define CS_ABORT (1 << 30) +#define CS_RESET (1 << 31) +#define BCM_DMA_CBADDR(n) (0x100*(n) + 0x04) +#define BCM_DMA_INFO(n) (0x100*(n) + 0x08) +#define INFO_INT_EN (1 << 0) +#define INFO_TDMODE (1 << 1) +#define INFO_WAIT_RESP (1 << 3) +#define INFO_D_INC (1 << 4) +#define INFO_D_WIDTH (1 << 5) +#define INFO_D_DREQ (1 << 6) +#define INFO_S_INC (1 << 8) +#define INFO_S_WIDTH (1 << 9) +#define INFO_S_DREQ (1 << 10) +#define INFO_WAITS_SHIFT (21) +#define INFO_PERMAP_SHIFT (16) +#define INFO_PERMAP_MASK (0x1f << INFO_PERMAP_SHIFT) + +#define BCM_DMA_SRC(n) (0x100*(n) + 0x0C) +#define BCM_DMA_DST(n) (0x100*(n) + 0x10) +#define BCM_DMA_LEN(n) (0x100*(n) + 0x14) +#define BCM_DMA_STRIDE(n) (0x100*(n) + 0x18) +#define BCM_DMA_CBNEXT(n) (0x100*(n) + 0x1C) +#define BCM_DMA_DEBUG(n) (0x100*(n) + 0x20) +#define DEBUG_ERROR_MASK (7) + +#define BCM_DMA_INT_STATUS 0xfe0 +#define BCM_DMA_ENABLE 0xff0 + +/* relative offset from BCM_VC_DMA0_BASE (p.39) */ +#define BCM_DMA_CH(n) (0x100*(n)) + +/* DMA Control Block - 256bit aligned (p.40) */ +struct bcm_dma_cb { + uint32_t info; /* Transfer Information */ + uint32_t src; /* Source Address */ + uint32_t dst; /* Destination Address */ + uint32_t len; /* Transfer Length */ + uint32_t stride; /* 2D Mode Stride */ + uint32_t next; /* Next Control Block Address */ + uint32_t rsvd1; /* Reserved */ + uint32_t rsvd2; /* Reserved */ +}; + +#ifdef DEBUG +static void bcm_dma_cb_dump(struct bcm_dma_cb *cb); +static void bcm_dma_reg_dump(int ch); +#endif + +/* DMA channel private info */ +struct bcm_dma_ch { + int ch; + uint32_t flags; + struct bcm_dma_cb * cb; + uint32_t vc_cb; + bus_dmamap_t dma_map; + void (*intr_func)(int, void *); + void * intr_arg; +}; + +struct bcm_dma_softc { + device_t sc_dev; + struct mtx sc_mtx; + struct resource * sc_mem; + struct resource * sc_irq[BCM_DMA_CH_MAX]; + void * sc_intrhand[BCM_DMA_CH_MAX]; + struct bcm_dma_ch sc_dma_ch[BCM_DMA_CH_MAX]; + bus_dma_tag_t sc_dma_tag; +}; + +static struct bcm_dma_softc *bcm_dma_sc = NULL; + +static void +bcm_dmamap_cb(void *arg, bus_dma_segment_t *segs, + int nseg, int err) +{ + bus_addr_t *addr; + + if (err) + return; + + addr = (bus_addr_t*)arg; + *addr = PHYS_TO_VCBUS(segs[0].ds_addr); +} + +static void +bcm_dma_reset(device_t dev, int ch) +{ + struct bcm_dma_softc *sc = device_get_softc(dev); + struct bcm_dma_cb *cb; + uint32_t cs; + int count; + + if (ch < 0 || ch >= BCM_DMA_CH_MAX) + return; + + cs = bus_read_4(sc->sc_mem, BCM_DMA_CS(ch)); + + if (cs & CS_ACTIVE) { + /* pause current task */ + bus_write_4(sc->sc_mem, BCM_DMA_CS(ch), 0); + + count = 1000; + do { + cs = bus_read_4(sc->sc_mem, BCM_DMA_CS(ch)); + } while (!(cs & CS_ISPAUSED) && (count-- > 0)); + + if (!(cs & CS_ISPAUSED)) { + device_printf(dev, + "Can't abort DMA transfer at channel %d\n", ch); + } + + bus_write_4(sc->sc_mem, BCM_DMA_CBNEXT(ch), 0); + + /* Complete everything, clear interrupt */ + bus_write_4(sc->sc_mem, BCM_DMA_CS(ch), + CS_ABORT | CS_INT | CS_END| CS_ACTIVE); + } + + /* clear control blocks */ + bus_write_4(sc->sc_mem, BCM_DMA_CBADDR(ch), 0); + bus_write_4(sc->sc_mem, BCM_DMA_CBNEXT(ch), 0); + + /* Reset control block */ + cb = sc->sc_dma_ch[ch].cb; + bzero(cb, sizeof(cb)); +} + +static int +bcm_dma_init(device_t dev) +{ + struct bcm_dma_softc *sc = device_get_softc(dev); + uint32_t mask; + struct bcm_dma_ch *ch; + void *cb_virt; + vm_paddr_t cb_phys; + int err; + int i; + + /* disable and clear interrupt status */ + bus_write_4(sc->sc_mem, BCM_DMA_ENABLE, 0); + bus_write_4(sc->sc_mem, BCM_DMA_INT_STATUS, 0); + + /* Allocate DMA chunks control blocks */ + /* p.40 of spec - control block should be 32-bit aligned */ + err = bus_dma_tag_create(bus_get_dma_tag(dev), + 1, 0, BUS_SPACE_MAXADDR_32BIT, + BUS_SPACE_MAXADDR, NULL, NULL, + sizeof(struct bcm_dma_cb), 1, + sizeof(struct bcm_dma_cb), + BUS_DMA_ALLOCNOW, NULL, NULL, + &sc->sc_dma_tag); + + if (err) { + device_printf(dev, "failed allocate DMA tag"); + return (err); + } + + /* setup initial settings */ + for (i = 0; i < BCM_DMA_CH_MAX; i++) { + ch = &sc->sc_dma_ch[i]; + + err = bus_dmamem_alloc(sc->sc_dma_tag, &cb_virt, + BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, + &ch->dma_map); + if (err) { + device_printf(dev, "cannot allocate DMA memory\n"); + break; + } + + /* + * Least alignment for busdma-allocated stuff is cache + * line size, so just make sure nothing stupid happend + * and we got properly aligned address + */ + if ((uintptr_t)cb_virt & 0x1f) { + device_printf(dev, + "DMA address is not 32-bytes aligned: %p\n", + (void*)cb_virt); + break; + } + + err = bus_dmamap_load(sc->sc_dma_tag, ch->dma_map, cb_virt, + sizeof(struct bcm_dma_cb), bcm_dmamap_cb, &cb_phys, + BUS_DMA_WAITOK); + if (err) { + device_printf(dev, "cannot load DMA memory\n"); + break; + } + + bzero(ch, sizeof(struct bcm_dma_ch)); + ch->ch = i; + ch->cb = cb_virt; + ch->vc_cb = cb_phys; + ch->intr_func = NULL; + ch->intr_arg = NULL; + ch->flags = BCM_DMA_CH_UNMAP; + + ch->cb->info = INFO_WAIT_RESP; + + /* reset DMA engine */ + bcm_dma_reset(dev, i); + } + + /* now use DMA2/DMA3 only */ + sc->sc_dma_ch[2].flags = BCM_DMA_CH_FREE; + sc->sc_dma_ch[3].flags = BCM_DMA_CH_FREE; + + /* enable DMAs */ + mask = 0; + + for (i = 0; i < BCM_DMA_CH_MAX; i++) + if (sc->sc_dma_ch[i].flags & BCM_DMA_CH_FREE) + mask |= (1 << i); + + bus_write_4(sc->sc_mem, BCM_DMA_ENABLE, mask); + + return (0); +} + +/* + * Allocate DMA channel for further use, returns channel # or + * BCM_DMA_CH_INVALID + */ +int +bcm_dma_allocate(int req_ch) +{ + struct bcm_dma_softc *sc = bcm_dma_sc; + int ch = BCM_DMA_CH_INVALID; + int i; + + if (req_ch >= BCM_DMA_CH_MAX) + return (BCM_DMA_CH_INVALID); + + /* Auto(req_ch < 0) or CH specified */ + mtx_lock(&sc->sc_mtx); + + if (req_ch < 0) { + for (i = 0; i < BCM_DMA_CH_MAX; i++) { + if (sc->sc_dma_ch[i].flags & BCM_DMA_CH_FREE) { + ch = i; + sc->sc_dma_ch[ch].flags &= ~BCM_DMA_CH_FREE; + sc->sc_dma_ch[ch].flags |= BCM_DMA_CH_USED; + break; + } + } + } + else { + if (sc->sc_dma_ch[req_ch].flags & BCM_DMA_CH_FREE) { + ch = req_ch; + sc->sc_dma_ch[ch].flags &= ~BCM_DMA_CH_FREE; + sc->sc_dma_ch[ch].flags |= BCM_DMA_CH_USED; + } + } + + mtx_unlock(&sc->sc_mtx); + return (ch); +} + +/* + * Frees allocated channel. Returns 0 on success, -1 otherwise + */ +int +bcm_dma_free(int ch) +{ + struct bcm_dma_softc *sc = bcm_dma_sc; + + if (ch < 0 || ch >= BCM_DMA_CH_MAX) + return (-1); + + mtx_lock(&sc->sc_mtx); + if (sc->sc_dma_ch[ch].flags & BCM_DMA_CH_USED) { + sc->sc_dma_ch[ch].flags |= BCM_DMA_CH_FREE; + sc->sc_dma_ch[ch].flags &= ~BCM_DMA_CH_USED; + sc->sc_dma_ch[ch].intr_func = NULL; + sc->sc_dma_ch[ch].intr_arg = NULL; + + /* reset DMA engine */ + bcm_dma_reset(sc->sc_dev, ch); + } + + mtx_unlock(&sc->sc_mtx); + return (0); +} + +/* + * Assign handler function for channel interrupt + * Returns 0 on success, -1 otherwise + */ +int +bcm_dma_setup_intr(int ch, void (*func)(int, void *), void *arg) +{ + struct bcm_dma_softc *sc = bcm_dma_sc; + struct bcm_dma_cb *cb; + + if (ch < 0 || ch >= BCM_DMA_CH_MAX) + return (-1); + + if (!(sc->sc_dma_ch[ch].flags & BCM_DMA_CH_USED)) + return (-1); + + sc->sc_dma_ch[ch].intr_func = func; + sc->sc_dma_ch[ch].intr_arg = arg; + cb = sc->sc_dma_ch[ch].cb; + cb->info |= INFO_INT_EN; + + return (0); +} + +/* + * Setup DMA source parameters + * ch - channel number + * dreq - hardware DREQ # or BCM_DMA_DREQ_NONE if + * source is physical memory + * inc_addr - BCM_DMA_INC_ADDR if source address + * should be increased after each access or + * BCM_DMA_SAME_ADDR if address should remain + * the same + * width - size of read operation, BCM_DMA_32BIT + * for 32bit bursts, BCM_DMA_128BIT for 128 bits + * + * Returns 0 on success, -1 otherwise + */ +int +bcm_dma_setup_src(int ch, int dreq, int inc_addr, int width) +{ + struct bcm_dma_softc *sc = bcm_dma_sc; + uint32_t info; + + if (ch < 0 || ch >= BCM_DMA_CH_MAX) + return (-1); + + if (!(sc->sc_dma_ch[ch].flags & BCM_DMA_CH_USED)) + return (-1); + + info = sc->sc_dma_ch[ch].cb->info; + info &= ~INFO_PERMAP_MASK; + info |= (dreq << INFO_PERMAP_SHIFT) & INFO_PERMAP_MASK; + + if (dreq) + info |= INFO_S_DREQ; + else + info &= ~INFO_S_DREQ; + + if (width == BCM_DMA_128BIT) + info |= INFO_S_WIDTH; + else + info &= ~INFO_S_WIDTH; + + if (inc_addr == BCM_DMA_INC_ADDR) + info |= INFO_S_INC; + else + info &= ~INFO_S_INC; + + sc->sc_dma_ch[ch].cb->info = info; + + return (0); +} + +/* + * Setup DMA destination parameters + * ch - channel number + * dreq - hardware DREQ # or BCM_DMA_DREQ_NONE if + * destination is physical memory + * inc_addr - BCM_DMA_INC_ADDR if source address + * should be increased after each access or + * BCM_DMA_SAME_ADDR if address should remain + * the same + * width - size of write operation, BCM_DMA_32BIT + * for 32bit bursts, BCM_DMA_128BIT for 128 bits + * + * Returns 0 on success, -1 otherwise + */ +int +bcm_dma_setup_dst(int ch, int dreq, int inc_addr, int width) +{ + struct bcm_dma_softc *sc = bcm_dma_sc; + uint32_t info; + + if (ch < 0 || ch >= BCM_DMA_CH_MAX) + return (-1); + + if (!(sc->sc_dma_ch[ch].flags & BCM_DMA_CH_USED)) + return (-1); + + info = sc->sc_dma_ch[ch].cb->info; + info &= ~INFO_PERMAP_MASK; + info |= (dreq << INFO_PERMAP_SHIFT) & INFO_PERMAP_MASK; + + if (dreq) + info |= INFO_D_DREQ; + else + info &= ~INFO_D_DREQ; + + if (width == BCM_DMA_128BIT) + info |= INFO_D_WIDTH; + else + info &= ~INFO_D_WIDTH; + + if (inc_addr == BCM_DMA_INC_ADDR) + info |= INFO_D_INC; + else + info &= ~INFO_D_INC; + + sc->sc_dma_ch[ch].cb->info = info; + + return (0); +} + +#ifdef DEBUG +void +bcm_dma_cb_dump(struct bcm_dma_cb *cb) +{ + + printf("DMA CB "); + printf("INFO: %8.8x ", cb->info); + printf("SRC: %8.8x ", cb->src); + printf("DST: %8.8x ", cb->dst); + printf("LEN: %8.8x ", cb->len); + printf("\n"); + printf("STRIDE: %8.8x ", cb->stride); + printf("NEXT: %8.8x ", cb->next); + printf("RSVD1: %8.8x ", cb->rsvd1); + printf("RSVD2: %8.8x ", cb->rsvd2); + printf("\n"); +} + +void +bcm_dma_reg_dump(int ch) +{ + struct bcm_dma_softc *sc = bcm_dma_sc; + int i; + uint32_t reg; + + if (ch < 0 || ch >= BCM_DMA_CH_MAX) + return; + + printf("DMA%d: ", ch); + for (i = 0; i < MAX_REG; i++) { + reg = bus_read_4(sc->sc_mem, BCM_DMA_CH(ch) + i*4); + printf("%8.8x ", reg); + } + printf("\n"); +} +#endif + +/* + * Start DMA transaction + * ch - channel number + * src, dst - source and destination address in + * ARM physical memory address space. + * len - amount of bytes to be transfered + * + * Returns 0 on success, -1 otherwise + */ +int +bcm_dma_start(int ch, vm_paddr_t src, vm_paddr_t dst, int len) +{ + struct bcm_dma_softc *sc = bcm_dma_sc; + struct bcm_dma_cb *cb; + + if (ch < 0 || ch >= BCM_DMA_CH_MAX) + return (-1); + + if (!(sc->sc_dma_ch[ch].flags & BCM_DMA_CH_USED)) + return (-1); + + cb = sc->sc_dma_ch[ch].cb; + if (BCM2835_ARM_IS_IO(src)) + cb->src = IO_TO_VCBUS(src); + else + cb->src = PHYS_TO_VCBUS(src); + if (BCM2835_ARM_IS_IO(dst)) + cb->dst = IO_TO_VCBUS(dst); + else + cb->dst = PHYS_TO_VCBUS(dst); + cb->len = len; + + bus_dmamap_sync(sc->sc_dma_tag, + sc->sc_dma_ch[ch].dma_map, BUS_DMASYNC_PREWRITE); + + bus_write_4(sc->sc_mem, BCM_DMA_CBADDR(ch), + sc->sc_dma_ch[ch].vc_cb); + bus_write_4(sc->sc_mem, BCM_DMA_CS(ch), CS_ACTIVE); + +#ifdef DEBUG + bcm_dma_cb_dump(sc->sc_dma_ch[ch].cb); + bcm_dma_reg_dump(ch); +#endif + + return (0); +} + +/* + * Get length requested for DMA transaction + * ch - channel number + * + * Returns size of transaction, 0 if channel is invalid + */ +uint32_t +bcm_dma_length(int ch) +{ + struct bcm_dma_softc *sc = bcm_dma_sc; + struct bcm_dma_cb *cb; + + if (ch < 0 || ch >= BCM_DMA_CH_MAX) + return (0); + + if (!(sc->sc_dma_ch[ch].flags & BCM_DMA_CH_USED)) + return (0); + + cb = sc->sc_dma_ch[ch].cb; + + return (cb->len); +} + +static void +bcm_dma_intr(void *arg) +{ + struct bcm_dma_softc *sc = bcm_dma_sc; + struct bcm_dma_ch *ch = (struct bcm_dma_ch *)arg; + uint32_t cs, debug; + + /* my interrupt? */ + cs = bus_read_4(sc->sc_mem, BCM_DMA_CS(ch->ch)); + + if (!(cs & (CS_INT | CS_ERR))) + return; + + /* running? */ + if (!(ch->flags & BCM_DMA_CH_USED)) { + device_printf(sc->sc_dev, + "unused DMA intr CH=%d, CS=%x\n", ch->ch, cs); + return; + } + + if (cs & CS_ERR) { + debug = bus_read_4(sc->sc_mem, BCM_DMA_DEBUG(ch->ch)); + device_printf(sc->sc_dev, "DMA error %d on CH%d\n", + debug & DEBUG_ERROR_MASK, ch->ch); + bus_write_4(sc->sc_mem, BCM_DMA_DEBUG(ch->ch), + debug & DEBUG_ERROR_MASK); + } + + if (cs & CS_INT) { + /* acknowledge interrupt */ + bus_write_4(sc->sc_mem, BCM_DMA_CS(ch->ch), + CS_INT | CS_END); + + /* Prepare for possible access to len field */ + bus_dmamap_sync(sc->sc_dma_tag, ch->dma_map, + BUS_DMASYNC_POSTWRITE); + + /* save callback function and argument */ + if (ch->intr_func) + ch->intr_func(ch->ch, ch->intr_arg); + } +} + +static int +bcm_dma_probe(device_t dev) +{ + + if (!ofw_bus_is_compatible(dev, "broadcom,bcm2835-dma")) + return (ENXIO); + + device_set_desc(dev, "BCM2835 DMA Controller"); + return (BUS_PROBE_DEFAULT); +} + +static int +bcm_dma_attach(device_t dev) +{ + struct bcm_dma_softc *sc = device_get_softc(dev); + int rid, err = 0; + int i; + + sc->sc_dev = dev; + + if (bcm_dma_sc) + return (ENXIO); + + for (i = 0; i < BCM_DMA_CH_MAX; i++) { + sc->sc_irq[i] = NULL; + sc->sc_intrhand[i] = NULL; + } + + /* DMA0 - DMA14 */ + rid = 0; + sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); + if (sc->sc_mem == NULL) { + device_printf(dev, "could not allocate memory resource\n"); + return (ENXIO); + } + + /* IRQ DMA0 - DMA11 XXX NOT USE DMA12(spurious?) */ + for (rid = 0; rid < BCM_DMA_CH_MAX; rid++) { + sc->sc_irq[rid] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, + RF_ACTIVE); + if (sc->sc_irq[rid] == NULL) { + device_printf(dev, "cannot allocate interrupt\n"); + err = ENXIO; + goto fail; + } + if (bus_setup_intr(dev, sc->sc_irq[rid], INTR_TYPE_MISC | INTR_MPSAFE, + NULL, bcm_dma_intr, &sc->sc_dma_ch[rid], + &sc->sc_intrhand[rid])) { + device_printf(dev, "cannot setup interrupt handler\n"); + err = ENXIO; + goto fail; + } + } + + mtx_init(&sc->sc_mtx, "bcmdma", "bcmdma", MTX_DEF); + bcm_dma_sc = sc; + + err = bcm_dma_init(dev); + if (err) + goto fail; + + return (err); + +fail: + if (sc->sc_mem) + bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->sc_mem); + + for (i = 0; i < BCM_DMA_CH_MAX; i++) { + if (sc->sc_intrhand[i]) + bus_teardown_intr(dev, sc->sc_irq[i], sc->sc_intrhand[i]); + if (sc->sc_irq[i]) + bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq[i]); + } + + return (err); +} + +static device_method_t bcm_dma_methods[] = { + DEVMETHOD(device_probe, bcm_dma_probe), + DEVMETHOD(device_attach, bcm_dma_attach), + { 0, 0 } +}; + +static driver_t bcm_dma_driver = { + "bcm_dma", + bcm_dma_methods, + sizeof(struct bcm_dma_softc), +}; + +static devclass_t bcm_dma_devclass; + +DRIVER_MODULE(bcm_dma, simplebus, bcm_dma_driver, bcm_dma_devclass, 0, 0); +MODULE_VERSION(bcm_dma, 1); diff --git a/sys/arm/broadcom/bcm2835/bcm2835_dma.h b/sys/arm/broadcom/bcm2835/bcm2835_dma.h new file mode 100644 index 0000000..785cf2c --- /dev/null +++ b/sys/arm/broadcom/bcm2835/bcm2835_dma.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2013 Daisuke Aoyama + * Copyright (c) 2013 Oleksandr Tymoshenko + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _BCM2835_DMA_H_ +#define _BCM2835_DMA_H_ + +#define BCM_DMA_BLOCK_SIZE 512 + +/* DMA0-DMA15 but DMA15 is special */ +#define BCM_DMA_CH_MAX 12 + +/* request CH for any nubmer */ +#define BCM_DMA_CH_INVALID (-1) +#define BCM_DMA_CH_ANY (-1) +#define BCM_DMA_CH_FAST1 (2) +#define BCM_DMA_CH_FAST2 (3) + +/* Peripheral DREQ Signals (4.2.1.3) */ +#define BCM_DMA_DREQ_NONE 0 +#define BCM_DMA_DREQ_EMMC 11 +#define BCM_DMA_DREQ_SDHOST 13 + +#define BCM_DMA_SAME_ADDR 0 +#define BCM_DMA_INC_ADDR 1 + +#define BCM_DMA_32BIT 0 +#define BCM_DMA_128BIT 1 + +int bcm_dma_allocate(int req_ch); +int bcm_dma_free(int ch); +int bcm_dma_setup_intr(int ch, void (*func)(int, void *), void *arg); +int bcm_dma_setup_src(int ch, int dreq, int inc_addr, int width); +int bcm_dma_setup_dst(int ch, int dreq, int inc_addr, int width); +int bcm_dma_start(int ch, vm_paddr_t src, vm_paddr_t dst, int len); +uint32_t bcm_dma_length(int ch); + +#endif /* _BCM2835_DMA_H_ */ diff --git a/sys/arm/broadcom/bcm2835/files.bcm2835 b/sys/arm/broadcom/bcm2835/files.bcm2835 index 9885155..aa1af40 100644 --- a/sys/arm/broadcom/bcm2835/files.bcm2835 +++ b/sys/arm/broadcom/bcm2835/files.bcm2835 @@ -1,5 +1,6 @@ # $FreeBSD$ +arm/broadcom/bcm2835/bcm2835_dma.c standard arm/broadcom/bcm2835/bcm2835_fb.c optional sc arm/broadcom/bcm2835/bcm2835_gpio.c optional gpio arm/broadcom/bcm2835/bcm2835_intr.c standard -- cgit v1.1 From 3349e30b760bb6888afd0152b4b976c656c042ea Mon Sep 17 00:00:00 2001 From: gonzo Date: Thu, 28 Feb 2013 19:51:30 +0000 Subject: Add platform DMA support to SDHCI driver for BCM2835 Submitted by: Daisuke Aoyama Reviewed by: ian@ --- sys/arm/broadcom/bcm2835/bcm2835_sdhci.c | 304 ++++++++++++++++++++++++++++++- 1 file changed, 300 insertions(+), 4 deletions(-) diff --git a/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c b/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c index cfba2cd..3512954 100644 --- a/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c +++ b/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c @@ -67,8 +67,13 @@ __FBSDID("$FreeBSD$"); #include #include "sdhci_if.h" +#include "bcm2835_dma.h" +#include "bcm2835_vcbus.h" + #define BCM2835_DEFAULT_SDHCI_FREQ 50 +#define BCM_SDHCI_BUFFER_SIZE 512 + #define DEBUG #ifdef DEBUG @@ -85,9 +90,11 @@ __FBSDID("$FreeBSD$"); */ static int bcm2835_sdhci_min_freq = 400000; static int bcm2835_sdhci_hs = 1; +static int bcm2835_sdhci_pio_mode = 0; TUNABLE_INT("hw.bcm2835.sdhci.min_freq", &bcm2835_sdhci_min_freq); TUNABLE_INT("hw.bcm2835.sdhci.hs", &bcm2835_sdhci_hs); +TUNABLE_INT("hw.bcm2835.sdhci.pio_mode", &bcm2835_sdhci_pio_mode); struct bcm_sdhci_dmamap_arg { bus_addr_t sc_dma_busaddr; @@ -111,23 +118,41 @@ struct bcm_sdhci_softc { int sc_xfer_done; int sc_bus_busy; struct sdhci_slot sc_slot; + int sc_dma_inuse; + int sc_dma_ch; + bus_dma_tag_t sc_dma_tag; + bus_dmamap_t sc_dma_map; + void *sc_dma_buffer; + vm_paddr_t sc_dma_buffer_phys; + vm_paddr_t sc_sdhci_buffer_phys;; }; -#define SD_MAX_BLOCKSIZE 1024 -/* XXX */ - static int bcm_sdhci_probe(device_t); static int bcm_sdhci_attach(device_t); static int bcm_sdhci_detach(device_t); static void bcm_sdhci_intr(void *); static int bcm_sdhci_get_ro(device_t, device_t); +static void bcm_sdhci_dma_intr(int ch, void *arg); #define bcm_sdhci_lock(_sc) \ mtx_lock(&_sc->sc_mtx); #define bcm_sdhci_unlock(_sc) \ mtx_unlock(&_sc->sc_mtx); +static void +bcm_dmamap_cb(void *arg, bus_dma_segment_t *segs, + int nseg, int err) +{ + bus_addr_t *addr; + + if (err) + return; + + addr = (bus_addr_t*)arg; + *addr = segs[0].ds_addr; +} + static int bcm_sdhci_probe(device_t dev) { @@ -146,9 +171,13 @@ bcm_sdhci_attach(device_t dev) phandle_t node; pcell_t cell; int default_freq; + void *buffer; + vm_paddr_t buffer_phys; + void *va; sc->sc_dev = dev; sc->sc_req = NULL; + err = 0; default_freq = BCM2835_DEFAULT_SDHCI_FREQ; node = ofw_bus_get_node(sc->sc_dev); @@ -191,6 +220,9 @@ bcm_sdhci_attach(device_t dev) goto fail; } + if (!bcm2835_sdhci_pio_mode) + sc->sc_slot.opt = SDHCI_PLATFORM_TRANSFER; + sc->sc_slot.caps = SDHCI_CAN_VDD_330 | SDHCI_CAN_VDD_180; if (bcm2835_sdhci_hs) sc->sc_slot.caps |= SDHCI_CAN_DO_HISPD; @@ -201,6 +233,61 @@ bcm_sdhci_attach(device_t dev) sdhci_init_slot(dev, &sc->sc_slot, 0); + sc->sc_dma_ch = bcm_dma_allocate(BCM_DMA_CH_FAST1); + if (sc->sc_dma_ch == BCM_DMA_CH_INVALID) + sc->sc_dma_ch = bcm_dma_allocate(BCM_DMA_CH_FAST2); + if (sc->sc_dma_ch == BCM_DMA_CH_INVALID) + sc->sc_dma_ch = bcm_dma_allocate(BCM_DMA_CH_ANY); + if (sc->sc_dma_ch == BCM_DMA_CH_INVALID) + goto fail; + + bcm_dma_setup_intr(sc->sc_dma_ch, bcm_sdhci_dma_intr, sc); + + /* Allocate DMA buffers */ + err = bus_dma_tag_create(bus_get_dma_tag(dev), + 1, 0, BUS_SPACE_MAXADDR_32BIT, + BUS_SPACE_MAXADDR, NULL, NULL, + BCM_SDHCI_BUFFER_SIZE, 1, BCM_SDHCI_BUFFER_SIZE, + BUS_DMA_ALLOCNOW, NULL, NULL, + &sc->sc_dma_tag); + + if (err) { + device_printf(dev, "failed allocate DMA tag"); + goto fail; + } + + err = bus_dmamem_alloc(sc->sc_dma_tag, &buffer, + BUS_DMA_WAITOK | BUS_DMA_COHERENT| BUS_DMA_ZERO, + &sc->sc_dma_map); + + if (err) { + device_printf(dev, "cannot allocate DMA memory\n"); + goto fail; + } + + err = bus_dmamap_load(sc->sc_dma_tag, sc->sc_dma_map, buffer, + BCM_SDHCI_BUFFER_SIZE, bcm_dmamap_cb, &buffer_phys, + BUS_DMA_WAITOK); + if (err) { + device_printf(dev, "cannot load DMA memory\n"); + goto fail; + } + + /* + * Sanity check: two least bits of address should be zero + */ + if ((uintptr_t)buffer & 3) { + device_printf(dev, + "DMA address is not word-aligned\n"); + goto fail; + } + + sc->sc_dma_buffer = buffer; + sc->sc_dma_buffer_phys = buffer_phys; + va = (void*)rman_get_start(sc->sc_mem_res); + sc->sc_sdhci_buffer_phys = + pmap_kextract((vm_offset_t)va) + SDHCI_BUFFER; + bus_generic_probe(dev); bus_generic_attach(dev); @@ -354,6 +441,211 @@ bcm_sdhci_min_freq(device_t dev, struct sdhci_slot *slot) return bcm2835_sdhci_min_freq; } +static void +bcm_sdhci_dma_intr(int ch, void *arg) +{ + struct bcm_sdhci_softc *sc = (struct bcm_sdhci_softc *)arg; + struct sdhci_slot *slot = &sc->sc_slot; + uint32_t reg, mask; + void *buffer; + size_t len; + int left; + + mtx_lock(&slot->mtx); + + /* copy DMA buffer to VA if READ */ + len = bcm_dma_length(sc->sc_dma_ch); + if (slot->curcmd->data->flags & MMC_DATA_READ) { + bus_dmamap_sync(sc->sc_dma_tag, sc->sc_dma_map, + BUS_DMASYNC_POSTREAD); + + mask = SDHCI_INT_DATA_AVAIL; + /* all dma data in single or contiguous page */ + buffer = (uint8_t*)(slot->curcmd->data->data) + slot->offset; + memcpy(buffer, sc->sc_dma_buffer, len); + } else { + bus_dmamap_sync(sc->sc_dma_tag, sc->sc_dma_map, + BUS_DMASYNC_POSTWRITE); + mask = SDHCI_INT_SPACE_AVAIL; + } + + slot->offset += len; + sc->sc_dma_inuse = 0; + + left = min(BCM_SDHCI_BUFFER_SIZE, + slot->curcmd->data->len - slot->offset); + + /* DATA END? */ + reg = bcm_sdhci_read_4(slot->bus, slot, SDHCI_INT_STATUS); + + if (reg & SDHCI_INT_DATA_END) { + /* ACK for all outstanding interrupts */ + bcm_sdhci_write_4(slot->bus, slot, SDHCI_INT_STATUS, reg); + + /* enable INT */ + slot->intmask |= SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL + | SDHCI_INT_DATA_END; + bcm_sdhci_write_4(slot->bus, slot, SDHCI_SIGNAL_ENABLE, + slot->intmask); + + /* finish this data */ + sdhci_finish_data(slot); + } + else { + /* already available? */ + if (reg & mask) { + sc->sc_dma_inuse = 1; + + /* ACK for DATA_AVAIL or SPACE_AVAIL */ + bcm_sdhci_write_4(slot->bus, slot, + SDHCI_INT_STATUS, mask); + + /* continue next DMA transfer */ + if (slot->curcmd->data->flags & MMC_DATA_READ) { + bus_dmamap_sync(sc->sc_dma_tag, sc->sc_dma_map, + BUS_DMASYNC_PREREAD); + + /* DMA start */ + if (bcm_dma_start(sc->sc_dma_ch, + sc->sc_sdhci_buffer_phys, + sc->sc_dma_buffer_phys, left) != 0) + device_printf(sc->sc_dev, "failed DMA start\n"); + } else { + buffer = (char*)slot->curcmd->data->data + slot->offset; + memcpy(sc->sc_dma_buffer, buffer, left); + + bus_dmamap_sync(sc->sc_dma_tag, + sc->sc_dma_map, BUS_DMASYNC_PREWRITE); + + /* DMA start */ + if (bcm_dma_start(sc->sc_dma_ch, + sc->sc_dma_buffer_phys, + sc->sc_sdhci_buffer_phys, left) != 0) + device_printf(sc->sc_dev, "failed DMA start\n"); + } + } else { + /* wait for next data by INT */ + + /* enable INT */ + slot->intmask |= SDHCI_INT_DATA_AVAIL | + SDHCI_INT_SPACE_AVAIL | SDHCI_INT_DATA_END; + bcm_sdhci_write_4(slot->bus, slot, SDHCI_SIGNAL_ENABLE, + slot->intmask); + } + } + + mtx_unlock(&slot->mtx); +} + +static void +bcm_sdhci_read_dma(struct sdhci_slot *slot) +{ + struct bcm_sdhci_softc *sc = device_get_softc(slot->bus); + size_t left; + + if (sc->sc_dma_inuse) { + device_printf(sc->sc_dev, "DMA in use\n"); + return; + } + + sc->sc_dma_inuse = 1; + + left = min(BCM_SDHCI_BUFFER_SIZE, + slot->curcmd->data->len - slot->offset); + + KASSERT((left & 3) == 0, + ("%s: len = %d, not word-aligned", __func__, left)); + + bcm_dma_setup_src(sc->sc_dma_ch, BCM_DMA_DREQ_EMMC, + BCM_DMA_SAME_ADDR, BCM_DMA_32BIT); + bcm_dma_setup_dst(sc->sc_dma_ch, BCM_DMA_DREQ_NONE, + BCM_DMA_INC_ADDR, + (left & 0xf) ? BCM_DMA_32BIT : BCM_DMA_128BIT); + + bus_dmamap_sync(sc->sc_dma_tag, sc->sc_dma_map, + BUS_DMASYNC_PREREAD); + + /* DMA start */ + if (bcm_dma_start(sc->sc_dma_ch, sc->sc_sdhci_buffer_phys, + sc->sc_dma_buffer_phys, left) != 0) + device_printf(sc->sc_dev, "failed DMA start\n"); +} + +static void +bcm_sdhci_write_dma(struct sdhci_slot *slot) +{ + struct bcm_sdhci_softc *sc = device_get_softc(slot->bus); + char *buffer; + size_t left; + + if (sc->sc_dma_inuse) { + device_printf(sc->sc_dev, "DMA in use\n"); + return; + } + + sc->sc_dma_inuse = 1; + + left = min(BCM_SDHCI_BUFFER_SIZE, + slot->curcmd->data->len - slot->offset); + + KASSERT((left & 3) == 0, + ("%s: len = %d, not word-aligned", __func__, left)); + + buffer = (char*)slot->curcmd->data->data + slot->offset; + memcpy(sc->sc_dma_buffer, buffer, left); + + bcm_dma_setup_src(sc->sc_dma_ch, BCM_DMA_DREQ_NONE, + BCM_DMA_INC_ADDR, + (left & 0xf) ? BCM_DMA_32BIT : BCM_DMA_128BIT); + bcm_dma_setup_dst(sc->sc_dma_ch, BCM_DMA_DREQ_EMMC, + BCM_DMA_SAME_ADDR, BCM_DMA_32BIT); + + bus_dmamap_sync(sc->sc_dma_tag, sc->sc_dma_map, + BUS_DMASYNC_PREWRITE); + + /* DMA start */ + if (bcm_dma_start(sc->sc_dma_ch, sc->sc_dma_buffer_phys, + sc->sc_sdhci_buffer_phys, left) != 0) + device_printf(sc->sc_dev, "failed DMA start\n"); +} + +static int +bcm_sdhci_will_handle_transfer(device_t dev, struct sdhci_slot *slot) +{ + size_t left; + + /* Do not use DMA for transfers less then block size */ + left = min(BCM_DMA_BLOCK_SIZE, + slot->curcmd->data->len - slot->offset); + if (left < BCM_DMA_BLOCK_SIZE) + return (0); + + return (1); +} + +static void +bcm_sdhci_start_transfer(device_t dev, struct sdhci_slot *slot, + uint32_t *intmask) +{ + + /* Disable INT */ + slot->intmask &= ~(SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL | SDHCI_INT_DATA_END); + bcm_sdhci_write_4(dev, slot, SDHCI_SIGNAL_ENABLE, slot->intmask); + + /* DMA transfer FIFO 1KB */ + if (slot->curcmd->data->flags & MMC_DATA_READ) + bcm_sdhci_read_dma(slot); + else + bcm_sdhci_write_dma(slot); +} + +static void +bcm_sdhci_finish_transfer(device_t dev, struct sdhci_slot *slot) +{ + + sdhci_finish_data(slot); +} + static device_method_t bcm_sdhci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, bcm_sdhci_probe), @@ -372,8 +664,12 @@ static device_method_t bcm_sdhci_methods[] = { DEVMETHOD(mmcbr_acquire_host, sdhci_generic_acquire_host), DEVMETHOD(mmcbr_release_host, sdhci_generic_release_host), - /* SDHCI registers accessors */ DEVMETHOD(sdhci_min_freq, bcm_sdhci_min_freq), + /* Platform transfer methods */ + DEVMETHOD(sdhci_platform_will_handle, bcm_sdhci_will_handle_transfer), + DEVMETHOD(sdhci_platform_start_transfer, bcm_sdhci_start_transfer), + DEVMETHOD(sdhci_platform_finish_transfer, bcm_sdhci_finish_transfer), + /* SDHCI registers accessors */ DEVMETHOD(sdhci_read_1, bcm_sdhci_read_1), DEVMETHOD(sdhci_read_2, bcm_sdhci_read_2), DEVMETHOD(sdhci_read_4, bcm_sdhci_read_4), -- cgit v1.1 From bc5c536f334d343f0670e47307141285f4d6e45c Mon Sep 17 00:00:00 2001 From: jfv Date: Thu, 28 Feb 2013 22:48:00 +0000 Subject: Change the ixgbe module name to if_ixgbe to conform to the usual naming convention. --- sys/modules/ixgbe/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/modules/ixgbe/Makefile b/sys/modules/ixgbe/Makefile index 1bc64a0..6abdf7a 100644 --- a/sys/modules/ixgbe/Makefile +++ b/sys/modules/ixgbe/Makefile @@ -4,7 +4,7 @@ .PATH: ${.CURDIR}/../../dev/ixgbe -KMOD = ixgbe +KMOD = if_ixgbe SRCS = device_if.h bus_if.h pci_if.h SRCS += opt_inet.h opt_inet6.h SRCS += ixgbe.c ixv.c -- cgit v1.1 From 1a834f6dbf24d717e51cb0a554c24d7833b2f6e4 Mon Sep 17 00:00:00 2001 From: adrian Date: Thu, 28 Feb 2013 23:31:23 +0000 Subject: Don't enable the HT flags for legacy rates. I stumbled across this whilst trying to debug another weird hang reported on the freebsd-wireless list. Whilst here, add in the STBC check to ath_rateseries_setup(). Whilst here, fix the short preamble flag to be set only for legacy rates. Whilst here, comment that we should be using the full set of decisions made by ath_rateseries_setup() rather than recalculating them! --- sys/dev/ath/if_ath_tx_ht.c | 108 +++++++++++++++++++++++++++------------------ 1 file changed, 64 insertions(+), 44 deletions(-) diff --git a/sys/dev/ath/if_ath_tx_ht.c b/sys/dev/ath/if_ath_tx_ht.c index d382f8f..dbd4a84 100644 --- a/sys/dev/ath/if_ath_tx_ht.c +++ b/sys/dev/ath/if_ath_tx_ht.c @@ -236,9 +236,9 @@ ath_tx_rate_fill_rcflags(struct ath_softc *sc, struct ath_buf *bf) rate = rt->info[rc[i].rix].rateCode; /* - * XXX only do this for legacy rates? + * Only enable short preamble for legacy rates */ - if (bf->bf_state.bfs_shpream) + if (IS_HT_RATE(rate) && bf->bf_state.bfs_shpream) rate |= rt->info[rc[i].rix].shortPreamble; /* @@ -267,6 +267,19 @@ ath_tx_rate_fill_rcflags(struct ath_softc *sc, struct ath_buf *bf) ni->ni_htcap & IEEE80211_HTCAP_SHORTGI20) rc[i].flags |= ATH_RC_SGI_FLAG; + /* + * If we have STBC TX enabled and the receiver + * can receive (at least) 1 stream STBC, AND it's + * MCS 0-7, AND we have at least two chains enabled, + * enable STBC. + */ + if (ic->ic_htcaps & IEEE80211_HTCAP_TXSTBC && + ni->ni_htcap & IEEE80211_HTCAP_RXSTBC_1STREAM && + (sc->sc_cur_txchainmask > 1) && + HT_RC_2_STREAMS(rate) == 1) { + rc[i].flags |= ATH_RC_STBC_FLAG; + } + /* XXX dual stream? and 3-stream? */ } @@ -459,6 +472,9 @@ ath_get_aggr_limit(struct ath_softc *sc, struct ath_buf *bf) * * It, along with ath_buf_set_rate, must be called -after- a burst * or aggregate is setup. + * + * XXX TODO: it should use the rate series information from the + * ath_buf, rather than recalculating it here! */ static void ath_rateseries_setup(struct ath_softc *sc, struct ieee80211_node *ni, @@ -507,34 +523,6 @@ ath_rateseries_setup(struct ath_softc *sc, struct ieee80211_node *ni, */ series[i].ChSel = sc->sc_cur_txchainmask; - if (flags & (HAL_TXDESC_RTSENA | HAL_TXDESC_CTSENA)) - series[i].RateFlags |= HAL_RATESERIES_RTS_CTS; - - /* - * Transmit 40MHz frames only if the node has negotiated - * it rather than whether the node is capable of it or not. - * It's subtly different in the hostap case. - */ - if (ni->ni_chw == 40) - series[i].RateFlags |= HAL_RATESERIES_2040; - - /* - * Set short-GI only if the node has advertised it - * the channel width is suitable, and we support it. - * We don't currently have a "negotiated" set of bits - - * ni_htcap is what the remote end sends, not what this - * node is capable of. - */ - if (ni->ni_chw == 40 && - ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI40 && - ni->ni_htcap & IEEE80211_HTCAP_SHORTGI40) - series[i].RateFlags |= HAL_RATESERIES_HALFGI; - - if (ni->ni_chw == 20 && - ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI20 && - ni->ni_htcap & IEEE80211_HTCAP_SHORTGI20) - series[i].RateFlags |= HAL_RATESERIES_HALFGI; - /* * Setup rate and TX power cap for this series. */ @@ -542,23 +530,55 @@ ath_rateseries_setup(struct ath_softc *sc, struct ieee80211_node *ni, series[i].RateIndex = rc[i].rix; series[i].tx_power_cap = 0x3f; /* XXX for now */ - /* - * If we have STBC TX enabled and the receiver - * can receive (at least) 1 stream STBC, AND it's - * MCS 0-7, AND we have at least two chains enabled, - * enable STBC. + * Enable RTS/CTS as appropriate. */ - if (ic->ic_htcaps & IEEE80211_HTCAP_TXSTBC && - ni->ni_htcap & IEEE80211_HTCAP_RXSTBC_1STREAM && - (sc->sc_cur_txchainmask > 1) && - HT_RC_2_STREAMS(series[i].Rate) == 1) { - series[i].RateFlags |= HAL_RATESERIES_STBC; - } + if (flags & (HAL_TXDESC_RTSENA | HAL_TXDESC_CTSENA)) + series[i].RateFlags |= HAL_RATESERIES_RTS_CTS; - /* - * XXX TODO: LDPC if it's possible - */ + + if (IS_HT_RATE(rt->info[rc[i].rix].rateCode)) { + /* + * Transmit 40MHz frames only if the node has negotiated + * it rather than whether the node is capable of it or not. + * It's subtly different in the hostap case. + */ + if (ni->ni_chw == 40) + series[i].RateFlags |= HAL_RATESERIES_2040; + + /* + * Set short-GI only if the node has advertised it + * the channel width is suitable, and we support it. + * We don't currently have a "negotiated" set of bits - + * ni_htcap is what the remote end sends, not what this + * node is capable of. + */ + if (ni->ni_chw == 40 && + ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI40 && + ni->ni_htcap & IEEE80211_HTCAP_SHORTGI40) + series[i].RateFlags |= HAL_RATESERIES_HALFGI; + + if (ni->ni_chw == 20 && + ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI20 && + ni->ni_htcap & IEEE80211_HTCAP_SHORTGI20) + series[i].RateFlags |= HAL_RATESERIES_HALFGI; + + /* + * If we have STBC TX enabled and the receiver + * can receive (at least) 1 stream STBC, AND it's + * MCS 0-7, AND we have at least two chains enabled, + * enable STBC. + */ + if (ic->ic_htcaps & IEEE80211_HTCAP_TXSTBC && + ni->ni_htcap & IEEE80211_HTCAP_RXSTBC_1STREAM && + (sc->sc_cur_txchainmask > 1) && + HT_RC_2_STREAMS(series[i].Rate) == 1) { + series[i].RateFlags |= HAL_RATESERIES_STBC; + } + /* + * XXX TODO: LDPC if it's possible + */ + } /* * PktDuration doesn't include slot, ACK, RTS, etc timing - -- cgit v1.1 From 64b36f1a20744f0cf17f2e8c8b4df62c9bb278c4 Mon Sep 17 00:00:00 2001 From: adrian Date: Thu, 28 Feb 2013 23:39:22 +0000 Subject: Oops - fix an incorrect test. --- sys/dev/ath/if_ath_tx_ht.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/ath/if_ath_tx_ht.c b/sys/dev/ath/if_ath_tx_ht.c index dbd4a84..9921ffa 100644 --- a/sys/dev/ath/if_ath_tx_ht.c +++ b/sys/dev/ath/if_ath_tx_ht.c @@ -238,7 +238,7 @@ ath_tx_rate_fill_rcflags(struct ath_softc *sc, struct ath_buf *bf) /* * Only enable short preamble for legacy rates */ - if (IS_HT_RATE(rate) && bf->bf_state.bfs_shpream) + if ((! IS_HT_RATE(rate)) && bf->bf_state.bfs_shpream) rate |= rt->info[rc[i].rix].shortPreamble; /* -- cgit v1.1 From d6b302da7115761480eb1ebd16956233b1afd45f Mon Sep 17 00:00:00 2001 From: adrian Date: Thu, 28 Feb 2013 23:39:38 +0000 Subject: Add missing flags. --- sys/dev/ath/if_athrate.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/dev/ath/if_athrate.h b/sys/dev/ath/if_athrate.h index d07c9ca..918823f 100644 --- a/sys/dev/ath/if_athrate.h +++ b/sys/dev/ath/if_athrate.h @@ -84,6 +84,8 @@ void ath_rate_detach(struct ath_ratectrl *); #define ATH_RC_SGI_FLAG 0x04 /* use short-GI */ #define ATH_RC_HT_FLAG 0x08 /* use HT */ #define ATH_RC_RTSCTS_FLAG 0x10 /* enable RTS/CTS protection */ +#define ATH_RC_STBC_FLAG 0x20 /* enable STBC */ +#define ATH_RC_LDPC_FLAG 0x40 /* enable STBC */ struct ath_rc_series { uint8_t rix; /* ratetable index, not rate code */ -- cgit v1.1 From b54216a509bd8ecbe2d99ecf22ea8c74471ab751 Mon Sep 17 00:00:00 2001 From: gjb Date: Thu, 28 Feb 2013 23:45:41 +0000 Subject: Minor wordsmithing. X-MFC-Needs: r245617 --- UPDATING | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/UPDATING b/UPDATING index 8be2f46..74da04d 100644 --- a/UPDATING +++ b/UPDATING @@ -46,8 +46,8 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 10.x IS SLOW: unlikely event that -M was the last option on the command line and the command line contained at least two files and a target directory the first file will have logs appended to it. The -M - option served little practical purpose in the last decade so it's - used expected to be extremely rare. + option served little practical purpose in the last decade so its + use is expected to be extremely rare. 20121223: After switching to Clang as the default compiler some users of ZFS -- cgit v1.1 From 07edb9e32756de1022f2fa472b214a481ba329be Mon Sep 17 00:00:00 2001 From: ganbold Date: Fri, 1 Mar 2013 01:42:31 +0000 Subject: Add support for A10 uart. A10 uart is derived from Synopsys DesignWare uart and requires to read Uart Status Register when IIR_BUSY has detected. Also this change includes FDT check, where it checks device specific properties defined in dts and sets the busy_detect variable. broken_txfifo is also needed to be set in order to make it work for A10 uart case. Reviewed by: marcel@ Approved by: gonzo@ --- sys/dev/ic/ns16550.h | 1 + sys/dev/uart/uart_dev_ns8250.c | 43 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/sys/dev/ic/ns16550.h b/sys/dev/ic/ns16550.h index 635270c..659f591 100644 --- a/sys/dev/ic/ns16550.h +++ b/sys/dev/ic/ns16550.h @@ -182,6 +182,7 @@ #define com_xoff1 6 /* XOFF 1 character (R/W) */ #define com_xoff2 7 /* XOFF 2 character (R/W) */ +#define DW_REG_USR 31 /* DesignWare derived Uart Status Reg */ #define com_usr 39 /* Octeon 16750/16550 Uart Status Reg */ #define REG_USR com_usr #define USR_TXFIFO_NOTFULL 2 /* Uart TX FIFO Not full */ diff --git a/sys/dev/uart/uart_dev_ns8250.c b/sys/dev/uart/uart_dev_ns8250.c index eb36da1..8eca8f0 100644 --- a/sys/dev/uart/uart_dev_ns8250.c +++ b/sys/dev/uart/uart_dev_ns8250.c @@ -24,6 +24,8 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "opt_platform.h" + #include __FBSDID("$FreeBSD$"); @@ -35,6 +37,12 @@ __FBSDID("$FreeBSD$"); #include #include +#ifdef FDT +#include +#include +#include +#endif + #include #include #include @@ -45,6 +53,11 @@ __FBSDID("$FreeBSD$"); #define DEFAULT_RCLK 1843200 +static int broken_txfifo = 0; +SYSCTL_INT(_hw, OID_AUTO, broken_txfifo, CTLFLAG_RW | CTLFLAG_TUN, + &broken_txfifo, 0, "UART FIFO has QEMU emulation bug"); +TUNABLE_INT("hw.broken_txfifo", &broken_txfifo); + /* * Clear pending interrupts. THRE is cleared by reading IIR. Data * that may have been received gets lost here. @@ -350,6 +363,7 @@ struct ns8250_softc { uint8_t ier_mask; uint8_t ier_rxbits; + uint8_t busy_detect; }; static int ns8250_bus_attach(struct uart_softc *); @@ -401,6 +415,24 @@ ns8250_bus_attach(struct uart_softc *sc) struct ns8250_softc *ns8250 = (struct ns8250_softc*)sc; struct uart_bas *bas; unsigned int ivar; +#ifdef FDT + phandle_t node; + pcell_t cell; +#endif + + ns8250->busy_detect = 0; + +#ifdef FDT + /* + * Check whether uart requires to read USR reg when IIR_BUSY and + * has broken txfifo. + */ + node = ofw_bus_get_node(sc->sc_dev); + if ((OF_getprop(node, "busy-detect", &cell, sizeof(cell))) > 0) + ns8250->busy_detect = 1; + if ((OF_getprop(node, "broken-txfifo", &cell, sizeof(cell))) > 0) + broken_txfifo = 1; +#endif bas = &sc->sc_bas; @@ -592,6 +624,12 @@ ns8250_bus_ipend(struct uart_softc *sc) bas = &sc->sc_bas; uart_lock(sc->sc_hwmtx); iir = uart_getreg(bas, REG_IIR); + + if (ns8250->busy_detect && (iir & IIR_BUSY) == IIR_BUSY) { + (void)uart_getreg(bas, DW_REG_USR); + uart_unlock(sc->sc_hwmtx); + return (0); + } if (iir & IIR_NOPEND) { uart_unlock(sc->sc_hwmtx); return (0); @@ -847,11 +885,6 @@ ns8250_bus_setsig(struct uart_softc *sc, int sig) return (0); } -static int broken_txfifo = 0; -SYSCTL_INT(_hw, OID_AUTO, broken_txfifo, CTLFLAG_RW | CTLFLAG_TUN, - &broken_txfifo, 0, "UART FIFO has QEMU emulation bug"); -TUNABLE_INT("hw.broken_txfifo", &broken_txfifo); - static int ns8250_bus_transmit(struct uart_softc *sc) { -- cgit v1.1 From 65c312a935cde98a1292b6337cd3c1ece740b835 Mon Sep 17 00:00:00 2001 From: ganbold Date: Fri, 1 Mar 2013 01:47:11 +0000 Subject: Enable uart driver for A10. Approved by: gonzo@ --- sys/arm/allwinner/files.a10 | 2 +- sys/arm/conf/CUBIEBOARD | 4 ++-- sys/boot/fdt/dts/cubieboard.dts | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sys/arm/allwinner/files.a10 b/sys/arm/allwinner/files.a10 index 9e947de..c25682a 100644 --- a/sys/arm/allwinner/files.a10 +++ b/sys/arm/allwinner/files.a10 @@ -17,5 +17,5 @@ arm/allwinner/timer.c standard arm/allwinner/aintc.c standard arm/allwinner/bus_space.c standard arm/allwinner/common.c standard -arm/allwinner/console.c standard +#arm/allwinner/console.c standard arm/allwinner/a10_machdep.c standard diff --git a/sys/arm/conf/CUBIEBOARD b/sys/arm/conf/CUBIEBOARD index fa5bfc8..1c0407b 100644 --- a/sys/arm/conf/CUBIEBOARD +++ b/sys/arm/conf/CUBIEBOARD @@ -87,8 +87,8 @@ options ROOTDEVNAME=\"ufs:/dev/da0s2\" #options ATA_STATIC_ID # Static device numbering # Console and misc -#device uart -#device uart_ns8250 +device uart +device uart_ns8250 device pty device snp device md diff --git a/sys/boot/fdt/dts/cubieboard.dts b/sys/boot/fdt/dts/cubieboard.dts index abb3f45..181d5d4 100644 --- a/sys/boot/fdt/dts/cubieboard.dts +++ b/sys/boot/fdt/dts/cubieboard.dts @@ -121,6 +121,8 @@ interrupt-parent = <&AINTC>; current-speed = <115200>; clock-frequency = < 24000000 >; + busy-detect = <1>; + broken-txfifo = <1>; }; }; -- cgit v1.1 From 9c2aecf6da5fb6aafba1ce1f1488a6b13c8a3174 Mon Sep 17 00:00:00 2001 From: neel Date: Fri, 1 Mar 2013 02:26:28 +0000 Subject: Specify the length of the mapping requested from 'paddr_guest2host()'. This seems prudent to do in its own right but it also opens up the possibility of not having to mmap the entire guest address space in the 'bhyve' process context. Discussed with: grehan Obtained from: NetApp --- usr.sbin/bhyve/acpi.c | 9 ++++++--- usr.sbin/bhyve/bhyverun.c | 18 ++++++++++-------- usr.sbin/bhyve/bhyverun.h | 2 +- usr.sbin/bhyve/mptbl.c | 9 ++++++--- usr.sbin/bhyve/pci_virtio_block.c | 19 +++++++------------ usr.sbin/bhyve/pci_virtio_net.c | 14 ++++---------- usr.sbin/bhyve/virtio.h | 15 +++++++++++++++ 7 files changed, 49 insertions(+), 37 deletions(-) diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c index 32effdc..cabe75e 100644 --- a/usr.sbin/bhyve/acpi.c +++ b/usr.sbin/bhyve/acpi.c @@ -683,13 +683,16 @@ static int basl_load(int fd, uint64_t off) { struct stat sb; + void *gaddr; int err; err = 0; - - if (fstat(fd, &sb) < 0 || - read(fd, paddr_guest2host(basl_acpi_base + off), sb.st_size) < 0) + gaddr = paddr_guest2host(basl_acpi_base + off, sb.st_size); + if (gaddr != NULL) { + if (fstat(fd, &sb) < 0 || read(fd, gaddr, sb.st_size) < 0) err = errno; + } else + err = EFAULT; return (err); } diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index ea4b68c..17d60d6 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -157,17 +157,19 @@ usage(int code) } void * -paddr_guest2host(uintptr_t gaddr) +paddr_guest2host(uintptr_t gaddr, size_t len) { - if (lomem_sz == 0) - return (NULL); - if (gaddr < lomem_sz) { + if (gaddr < lomem_sz && gaddr + len <= lomem_sz) return ((void *)(lomem_addr + gaddr)); - } else if (gaddr >= 4*GB && gaddr < (4*GB + himem_sz)) { - return ((void *)(himem_addr + gaddr - 4*GB)); - } else - return (NULL); + + if (gaddr >= 4*GB) { + gaddr -= 4*GB; + if (gaddr < himem_sz && gaddr + len <= himem_sz) + return ((void *)(himem_addr + gaddr)); + } + + return (NULL); } int diff --git a/usr.sbin/bhyve/bhyverun.h b/usr.sbin/bhyve/bhyverun.h index 45033b8..70455bf 100644 --- a/usr.sbin/bhyve/bhyverun.h +++ b/usr.sbin/bhyve/bhyverun.h @@ -43,7 +43,7 @@ extern char *vmname; extern u_long lomem_sz, himem_sz; -void *paddr_guest2host(uintptr_t); +void *paddr_guest2host(uintptr_t addr, size_t len); void fbsdrun_addcpu(struct vmctx *ctx, int cpu, uint64_t rip); int fbsdrun_muxed(void); diff --git a/usr.sbin/bhyve/mptbl.c b/usr.sbin/bhyve/mptbl.c index 52790f3..9c68b3d 100644 --- a/usr.sbin/bhyve/mptbl.c +++ b/usr.sbin/bhyve/mptbl.c @@ -41,6 +41,9 @@ __FBSDID("$FreeBSD$"); #define MPTABLE_BASE 0xF0000 +/* floating pointer length + maximum length of configuration table */ +#define MPTABLE_MAX_LENGTH (65536 + 16) + #define LAPIC_PADDR 0xFEE00000 #define LAPIC_VERSION 16 @@ -346,13 +349,13 @@ mptable_build(struct vmctx *ctx, int ncpu, int ioapic) char *curraddr; char *startaddr; - if (paddr_guest2host(0) == NULL) { + startaddr = paddr_guest2host(MPTABLE_BASE, MPTABLE_MAX_LENGTH); + if (startaddr == NULL) { printf("mptable requires mapped mem\n"); return (ENOMEM); } - startaddr = curraddr = paddr_guest2host(MPTABLE_BASE); - + curraddr = startaddr; mpfp = (mpfps_t)curraddr; mpt_build_mpfp(mpfp, MPTABLE_BASE); curraddr += sizeof(*mpfp); diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c index 62bf801..31ff2e6 100644 --- a/usr.sbin/bhyve/pci_virtio_block.c +++ b/usr.sbin/bhyve/pci_virtio_block.c @@ -222,13 +222,13 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq) assert(nsegs >= 3); assert(nsegs < VTBLK_MAXSEGS + 2); - vid = paddr_guest2host(vd->vd_addr); + vid = paddr_guest2host(vd->vd_addr, vd->vd_len); assert((vid->vd_flags & VRING_DESC_F_INDIRECT) == 0); /* * The first descriptor will be the read-only fixed header */ - vbh = paddr_guest2host(vid[0].vd_addr); + vbh = paddr_guest2host(vid[0].vd_addr, sizeof(struct virtio_blk_hdr)); assert(vid[0].vd_len == sizeof(struct virtio_blk_hdr)); assert(vid[0].vd_flags & VRING_DESC_F_NEXT); assert((vid[0].vd_flags & VRING_DESC_F_WRITE) == 0); @@ -247,7 +247,8 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq) * Build up the iovec based on the guest's data descriptors */ for (i = 1, iolen = 0; i < nsegs - 1; i++) { - iov[i-1].iov_base = paddr_guest2host(vid[i].vd_addr); + iov[i-1].iov_base = paddr_guest2host(vid[i].vd_addr, + vid[i].vd_len); iov[i-1].iov_len = vid[i].vd_len; iolen += vid[i].vd_len; @@ -265,7 +266,7 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vring_hqueue *hq) } /* Lastly, get the address of the status byte */ - status = paddr_guest2host(vid[nsegs - 1].vd_addr); + status = paddr_guest2host(vid[nsegs - 1].vd_addr, 1); assert(vid[nsegs - 1].vd_len == 1); assert((vid[nsegs - 1].vd_flags & VRING_DESC_F_NEXT) == 0); assert(vid[nsegs - 1].vd_flags & VRING_DESC_F_WRITE); @@ -341,7 +342,8 @@ pci_vtblk_ring_init(struct pci_vtblk_softc *sc, uint64_t pfn) hq = &sc->vbsc_q; hq->hq_size = VTBLK_RINGSZ; - hq->hq_dtable = paddr_guest2host(pfn << VRING_PFN); + hq->hq_dtable = paddr_guest2host(pfn << VRING_PFN, + vring_size(VTBLK_RINGSZ)); hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size); hq->hq_avail_idx = hq->hq_avail_flags + 1; hq->hq_avail_ring = hq->hq_avail_flags + 2; @@ -372,13 +374,6 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) } /* - * Access to guest memory is required. Fail if - * memory not mapped - */ - if (paddr_guest2host(0) == NULL) - return (1); - - /* * The supplied backing file has to exist */ fd = open(opts, O_RDWR); diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c index 327ebf7..a5cf8b3 100644 --- a/usr.sbin/bhyve/pci_virtio_net.c +++ b/usr.sbin/bhyve/pci_virtio_net.c @@ -326,7 +326,7 @@ pci_vtnet_tap_rx(struct pci_vtnet_softc *sc) * Get a pointer to the rx header, and use the * data immediately following it for the packet buffer. */ - vrx = (struct virtio_net_rxhdr *)paddr_guest2host(vd->vd_addr); + vrx = paddr_guest2host(vd->vd_addr, vd->vd_len); buf = (uint8_t *)(vrx + 1); len = read(sc->vsc_tapfd, buf, @@ -434,7 +434,7 @@ pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vring_hqueue *hq) for (i = 0, plen = 0; i < VTNET_MAXSEGS; i++, vd = &hq->hq_dtable[vd->vd_next]) { - iov[i].iov_base = paddr_guest2host(vd->vd_addr); + iov[i].iov_base = paddr_guest2host(vd->vd_addr, vd->vd_len); iov[i].iov_len = vd->vd_len; plen += vd->vd_len; tlen += vd->vd_len; @@ -517,7 +517,8 @@ pci_vtnet_ring_init(struct pci_vtnet_softc *sc, uint64_t pfn) hq = &sc->vsc_hq[qnum]; hq->hq_size = pci_vtnet_qsize(qnum); - hq->hq_dtable = paddr_guest2host(pfn << VRING_PFN); + hq->hq_dtable = paddr_guest2host(pfn << VRING_PFN, + vring_size(hq->hq_size)); hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size); hq->hq_avail_idx = hq->hq_avail_flags + 1; hq->hq_avail_ring = hq->hq_avail_flags + 2; @@ -541,13 +542,6 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) struct pci_vtnet_softc *sc; const char *env_msi; - /* - * Access to guest memory is required. Fail if - * memory not mapped - */ - if (paddr_guest2host(0) == NULL) - return (1); - sc = malloc(sizeof(struct pci_vtnet_softc)); memset(sc, 0, sizeof(struct pci_vtnet_softc)); diff --git a/usr.sbin/bhyve/virtio.h b/usr.sbin/bhyve/virtio.h index 04ef586..fe6fb1a 100644 --- a/usr.sbin/bhyve/virtio.h +++ b/usr.sbin/bhyve/virtio.h @@ -85,4 +85,19 @@ struct virtio_used { #define VTCFG_R_CFG1 24 /* With MSI-X */ #define VTCFG_R_MSIX 20 +/* From section 2.3, "Virtqueue Configuration", of the virtio specification */ +static inline u_int +vring_size(u_int qsz) +{ + u_int size; + + size = sizeof(struct virtio_desc) * qsz + sizeof(uint16_t) * (3 + qsz); + size = roundup2(size, VRING_ALIGN); + + size += sizeof(uint16_t) * 3 + sizeof(struct virtio_used) * qsz; + size = roundup2(size, VRING_ALIGN); + + return (size); +} + #endif /* _VIRTIO_H_ */ -- cgit v1.1 From 1b36da001503ba894e5a06f0dd572ceda06943b4 Mon Sep 17 00:00:00 2001 From: brooks Date: Fri, 1 Mar 2013 03:25:43 +0000 Subject: Provide slightly more helpful feedback when we can't figure out what compiler the user is using. PR: misc/173914 --- share/mk/bsd.compiler.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/mk/bsd.compiler.mk b/share/mk/bsd.compiler.mk index c38c76e..2d648de 100644 --- a/share/mk/bsd.compiler.mk +++ b/share/mk/bsd.compiler.mk @@ -14,7 +14,7 @@ COMPILER_TYPE:= gcc . elif ${_COMPILER_VERSION:Mclang} COMPILER_TYPE:= clang . else -.error Unable to determine compiler type for ${CC} +.error Unable to determine compiler type for ${CC}. Consider setting COMPILER_TYPE. . endif . undef _COMPILER_VERSION . endif -- cgit v1.1 From 7399e0afc25d4f078e2723db324fbec5475f7795 Mon Sep 17 00:00:00 2001 From: ru Date: Fri, 1 Mar 2013 07:39:14 +0000 Subject: Fixed documented prototype of kinfo_getproc(3). --- lib/libutil/kinfo_getproc.3 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/libutil/kinfo_getproc.3 b/lib/libutil/kinfo_getproc.3 index 68b8819..804cb6c 100644 --- a/lib/libutil/kinfo_getproc.3 +++ b/lib/libutil/kinfo_getproc.3 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd February 25, 2012 +.Dd March 1, 2013 .Dt KINFO_GETPROC 3 .Os .Sh NAME @@ -37,7 +37,7 @@ .In sys/types.h .In libutil.h .Ft struct kinfo_proc * -.Fn kinfo_getproc "pid_t pid" "int *cntp" +.Fn kinfo_getproc "pid_t pid" .Sh DESCRIPTION This function is used for obtaining process information from the kernel. .Pp -- cgit v1.1 From 9ecef62d739ff26f081c6ce4d79c74c293ba9fee Mon Sep 17 00:00:00 2001 From: alc Date: Fri, 1 Mar 2013 08:30:31 +0000 Subject: Copy the definition of VM_MAX_AUTOTUNE_MAXUSERS from i386. (See r242847.) Tested by: andrew --- sys/arm/include/vmparam.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sys/arm/include/vmparam.h b/sys/arm/include/vmparam.h index 09bf62c..53c40d2 100644 --- a/sys/arm/include/vmparam.h +++ b/sys/arm/include/vmparam.h @@ -178,4 +178,8 @@ extern vm_offset_t vm_max_kernel_address; #define ZERO_REGION_SIZE (64 * 1024) /* 64KB */ +#ifndef VM_MAX_AUTOTUNE_MAXUSERS +#define VM_MAX_AUTOTUNE_MAXUSERS 384 +#endif + #endif /* _MACHINE_VMPARAM_H_ */ -- cgit v1.1 From e653c470f763b4dff0b51050c92e379b1ff3f646 Mon Sep 17 00:00:00 2001 From: mm Date: Fri, 1 Mar 2013 09:42:58 +0000 Subject: Fix the zfs_ioctl compat layer to support zfs_cmd size change introduced in r247265 (ZFS deadman thread). Both new utilities now support the old kernel and new kernel properly detects old utilities. For future backwards compatibility, the vfs.zfs.version.ioctl read-only sysctl has been introduced. With this sysctl zfs utilities will be able to detect the ioctl interface version of the currently loaded zfs module. As a side effect, the zfs utilities between r247265 and this revision don't support the old kernel module. If you are using HEAD newer or equal than r247265, install the new kernel module (or whole kernel) first. MFC after: 10 days --- .../opensolaris/lib/libzfs/common/libzfs_impl.h | 35 +++- .../opensolaris/common/zfs/zfs_ioctl_compat.c | 207 ++++++++++++++++++--- .../opensolaris/common/zfs/zfs_ioctl_compat.h | 72 ++++++- .../opensolaris/uts/common/fs/zfs/zfs_ioctl.c | 17 +- 4 files changed, 287 insertions(+), 44 deletions(-) diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h index 1c46d32..be07187 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_impl.h @@ -24,6 +24,7 @@ * Copyright (c) 2011 Pawel Jakub Dawidek . * All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2013 Martin Matuska . All rights reserved. */ #ifndef _LIBFS_IMPL_H @@ -216,6 +217,7 @@ extern void libzfs_fru_clear(libzfs_handle_t *, boolean_t); #ifndef sun static int zfs_kernel_version = 0; +static int zfs_ioctl_version = 0; /* * This is FreeBSD version of ioctl, because Solaris' ioctl() updates @@ -225,19 +227,34 @@ static int zfs_kernel_version = 0; static __inline int zcmd_ioctl(int fd, unsigned long cmd, zfs_cmd_t *zc) { - size_t oldsize, zfs_kernel_version_size; + size_t oldsize, zfs_kernel_version_size, zfs_ioctl_version_size; int version, ret, cflag = ZFS_CMD_COMPAT_NONE; - zfs_kernel_version_size = sizeof(zfs_kernel_version); - if (zfs_kernel_version == 0) { - sysctlbyname("vfs.zfs.version.spa", &zfs_kernel_version, - &zfs_kernel_version_size, NULL, 0); + zfs_ioctl_version_size = sizeof(zfs_ioctl_version); + if (zfs_ioctl_version == 0) { + sysctlbyname("vfs.zfs.version.ioctl", &zfs_ioctl_version, + &zfs_ioctl_version_size, NULL, 0); } - if (zfs_kernel_version == SPA_VERSION_15 || - zfs_kernel_version == SPA_VERSION_14 || - zfs_kernel_version == SPA_VERSION_13) - cflag = ZFS_CMD_COMPAT_V15; + /* + * If vfs.zfs.version.ioctl is not defined, assume we have v28 + * compatible binaries and use vfs.zfs.version.spa to test for v15 + */ + if (zfs_ioctl_version < ZFS_IOCVER_DEADMAN) { + cflag = ZFS_CMD_COMPAT_V28; + zfs_kernel_version_size = sizeof(zfs_kernel_version); + + if (zfs_kernel_version == 0) { + sysctlbyname("vfs.zfs.version.spa", + &zfs_kernel_version, + &zfs_kernel_version_size, NULL, 0); + } + + if (zfs_kernel_version == SPA_VERSION_15 || + zfs_kernel_version == SPA_VERSION_14 || + zfs_kernel_version == SPA_VERSION_13) + cflag = ZFS_CMD_COMPAT_V15; + } oldsize = zc->zc_nvlist_dst_size; ret = zcmd_ioctl_compat(fd, cmd, zc, cflag); diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c b/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c index 0b43c78..4959335 100644 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c +++ b/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Martin Matuska . All rights reserved. + * Copyright 2013 Martin Matuska . All rights reserved. * Portions Copyright 2005, 2010, Oracle and/or its affiliates. * All rights reserved. * Use is subject to license terms. @@ -35,22 +35,100 @@ #include #include "zfs_ioctl_compat.h" +static int zfs_version_ioctl = ZFS_IOCVER_CURRENT; +SYSCTL_DECL(_vfs_zfs_version); +SYSCTL_INT(_vfs_zfs_version, OID_AUTO, ioctl, CTLFLAG_RD, &zfs_version_ioctl, + 0, "ZFS_IOCTL_VERSION"); + /* - * FreeBSD zfs_cmd compatibility with v15 and older binaries + * FreeBSD zfs_cmd compatibility with older binaries * appropriately remap/extend the zfs_cmd_t structure */ void zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag) { zfs_cmd_v15_t *zc_c; + zfs_cmd_v28_t *zc28_c; - if (cflag == ZFS_CMD_COMPAT_V15) { + switch (cflag) { + case ZFS_CMD_COMPAT_V28: + zc28_c = (void *)addr; + + /* zc */ + strlcpy(zc->zc_name, zc28_c->zc_name, MAXPATHLEN); + strlcpy(zc->zc_value, zc28_c->zc_value, MAXPATHLEN * 2); + strlcpy(zc->zc_string, zc28_c->zc_string, MAXPATHLEN); + strlcpy(zc->zc_top_ds, zc28_c->zc_top_ds, MAXPATHLEN); + zc->zc_guid = zc28_c->zc_guid; + zc->zc_nvlist_conf = zc28_c->zc_nvlist_conf; + zc->zc_nvlist_conf_size = zc28_c->zc_nvlist_conf_size; + zc->zc_nvlist_src = zc28_c->zc_nvlist_src; + zc->zc_nvlist_src_size = zc28_c->zc_nvlist_src_size; + zc->zc_nvlist_dst = zc28_c->zc_nvlist_dst; + zc->zc_nvlist_dst_size = zc28_c->zc_nvlist_dst_size; + zc->zc_cookie = zc28_c->zc_cookie; + zc->zc_objset_type = zc28_c->zc_objset_type; + zc->zc_perm_action = zc28_c->zc_perm_action; + zc->zc_history = zc28_c->zc_history; + zc->zc_history_len = zc28_c->zc_history_len; + zc->zc_history_offset = zc28_c->zc_history_offset; + zc->zc_obj = zc28_c->zc_obj; + zc->zc_iflags = zc28_c->zc_iflags; + zc->zc_share = zc28_c->zc_share; + zc->zc_jailid = zc28_c->zc_jailid; + zc->zc_objset_stats = zc28_c->zc_objset_stats; + zc->zc_begin_record = zc28_c->zc_begin_record; + zc->zc_defer_destroy = zc28_c->zc_defer_destroy; + zc->zc_temphold = zc28_c->zc_temphold; + zc->zc_action_handle = zc28_c->zc_action_handle; + zc->zc_cleanup_fd = zc28_c->zc_cleanup_fd; + zc->zc_simple = zc28_c->zc_simple; + bcopy(zc28_c->zc_pad, zc->zc_pad, sizeof(zc->zc_pad)); + zc->zc_sendobj = zc28_c->zc_sendobj; + zc->zc_fromobj = zc28_c->zc_fromobj; + zc->zc_createtxg = zc28_c->zc_createtxg; + zc->zc_stat = zc28_c->zc_stat; + + /* zc->zc_inject_record */ + zc->zc_inject_record.zi_objset = + zc28_c->zc_inject_record.zi_objset; + zc->zc_inject_record.zi_object = + zc28_c->zc_inject_record.zi_object; + zc->zc_inject_record.zi_start = + zc28_c->zc_inject_record.zi_start; + zc->zc_inject_record.zi_end = + zc28_c->zc_inject_record.zi_end; + zc->zc_inject_record.zi_guid = + zc28_c->zc_inject_record.zi_guid; + zc->zc_inject_record.zi_level = + zc28_c->zc_inject_record.zi_level; + zc->zc_inject_record.zi_error = + zc28_c->zc_inject_record.zi_error; + zc->zc_inject_record.zi_type = + zc28_c->zc_inject_record.zi_type; + zc->zc_inject_record.zi_freq = + zc28_c->zc_inject_record.zi_freq; + zc->zc_inject_record.zi_failfast = + zc28_c->zc_inject_record.zi_failfast; + strlcpy(zc->zc_inject_record.zi_func, + zc28_c->zc_inject_record.zi_func, MAXNAMELEN); + zc->zc_inject_record.zi_iotype = + zc28_c->zc_inject_record.zi_iotype; + zc->zc_inject_record.zi_duration = + zc28_c->zc_inject_record.zi_duration; + zc->zc_inject_record.zi_timer = + zc28_c->zc_inject_record.zi_timer; + zc->zc_inject_record.zi_cmd = ZINJECT_UNINITIALIZED; + zc->zc_inject_record.zi_pad = 0; + break; + + case ZFS_CMD_COMPAT_V15: zc_c = (void *)addr; /* zc */ - strlcpy(zc->zc_name,zc_c->zc_name,MAXPATHLEN); - strlcpy(zc->zc_value,zc_c->zc_value,MAXPATHLEN); - strlcpy(zc->zc_string,zc_c->zc_string,MAXPATHLEN); + strlcpy(zc->zc_name, zc_c->zc_name, MAXPATHLEN); + strlcpy(zc->zc_value, zc_c->zc_value, MAXPATHLEN); + strlcpy(zc->zc_string, zc_c->zc_string, MAXPATHLEN); zc->zc_guid = zc_c->zc_guid; zc->zc_nvlist_conf = zc_c->zc_nvlist_conf; zc->zc_nvlist_conf_size = zc_c->zc_nvlist_conf_size; @@ -91,6 +169,7 @@ zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag) zc_c->zc_inject_record.zi_freq; zc->zc_inject_record.zi_failfast = zc_c->zc_inject_record.zi_failfast; + break; } } @@ -98,15 +177,84 @@ void zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int cflag) { zfs_cmd_v15_t *zc_c; + zfs_cmd_v28_t *zc28_c; switch (cflag) { + case ZFS_CMD_COMPAT_V28: + zc28_c = (void *)addr; + + strlcpy(zc28_c->zc_name, zc->zc_name, MAXPATHLEN); + strlcpy(zc28_c->zc_value, zc->zc_value, MAXPATHLEN * 2); + strlcpy(zc28_c->zc_string, zc->zc_string, MAXPATHLEN); + strlcpy(zc28_c->zc_top_ds, zc->zc_top_ds, MAXPATHLEN); + zc28_c->zc_guid = zc->zc_guid; + zc28_c->zc_nvlist_conf = zc->zc_nvlist_conf; + zc28_c->zc_nvlist_conf_size = zc->zc_nvlist_conf_size; + zc28_c->zc_nvlist_src = zc->zc_nvlist_src; + zc28_c->zc_nvlist_src_size = zc->zc_nvlist_src_size; + zc28_c->zc_nvlist_dst = zc->zc_nvlist_dst; + zc28_c->zc_nvlist_dst_size = zc->zc_nvlist_dst_size; + zc28_c->zc_cookie = zc->zc_cookie; + zc28_c->zc_objset_type = zc->zc_objset_type; + zc28_c->zc_perm_action = zc->zc_perm_action; + zc28_c->zc_history = zc->zc_history; + zc28_c->zc_history_len = zc->zc_history_len; + zc28_c->zc_history_offset = zc->zc_history_offset; + zc28_c->zc_obj = zc->zc_obj; + zc28_c->zc_iflags = zc->zc_iflags; + zc28_c->zc_share = zc->zc_share; + zc28_c->zc_jailid = zc->zc_jailid; + zc28_c->zc_objset_stats = zc->zc_objset_stats; + zc28_c->zc_begin_record = zc->zc_begin_record; + zc28_c->zc_defer_destroy = zc->zc_defer_destroy; + zc28_c->zc_temphold = zc->zc_temphold; + zc28_c->zc_action_handle = zc->zc_action_handle; + zc28_c->zc_cleanup_fd = zc->zc_cleanup_fd; + zc28_c->zc_simple = zc->zc_simple; + bcopy(zc->zc_pad, zc28_c->zc_pad, sizeof(zc28_c->zc_pad)); + zc28_c->zc_sendobj = zc->zc_sendobj; + zc28_c->zc_fromobj = zc->zc_fromobj; + zc28_c->zc_createtxg = zc->zc_createtxg; + zc28_c->zc_stat = zc->zc_stat; + + /* zc_inject_record */ + zc28_c->zc_inject_record.zi_objset = + zc->zc_inject_record.zi_objset; + zc28_c->zc_inject_record.zi_object = + zc->zc_inject_record.zi_object; + zc28_c->zc_inject_record.zi_start = + zc->zc_inject_record.zi_start; + zc28_c->zc_inject_record.zi_end = + zc->zc_inject_record.zi_end; + zc28_c->zc_inject_record.zi_guid = + zc->zc_inject_record.zi_guid; + zc28_c->zc_inject_record.zi_level = + zc->zc_inject_record.zi_level; + zc28_c->zc_inject_record.zi_error = + zc->zc_inject_record.zi_error; + zc28_c->zc_inject_record.zi_type = + zc->zc_inject_record.zi_type; + zc28_c->zc_inject_record.zi_freq = + zc->zc_inject_record.zi_freq; + zc28_c->zc_inject_record.zi_failfast = + zc->zc_inject_record.zi_failfast; + strlcpy(zc28_c->zc_inject_record.zi_func, + zc->zc_inject_record.zi_func, MAXNAMELEN); + zc28_c->zc_inject_record.zi_iotype = + zc->zc_inject_record.zi_iotype; + zc28_c->zc_inject_record.zi_duration = + zc->zc_inject_record.zi_duration; + zc28_c->zc_inject_record.zi_timer = + zc->zc_inject_record.zi_timer; + break; + case ZFS_CMD_COMPAT_V15: zc_c = (void *)addr; /* zc */ - strlcpy(zc_c->zc_name,zc->zc_name,MAXPATHLEN); - strlcpy(zc_c->zc_value,zc->zc_value,MAXPATHLEN); - strlcpy(zc_c->zc_string,zc->zc_string,MAXPATHLEN); + strlcpy(zc_c->zc_name, zc->zc_name, MAXPATHLEN); + strlcpy(zc_c->zc_value, zc->zc_value, MAXPATHLEN); + strlcpy(zc_c->zc_string, zc->zc_string, MAXPATHLEN); zc_c->zc_guid = zc->zc_guid; zc_c->zc_nvlist_conf = zc->zc_nvlist_conf; zc_c->zc_nvlist_conf_size = zc->zc_nvlist_conf_size; @@ -260,7 +408,7 @@ zfs_ioctl_compat_fix_stats_nvlist(nvlist_t *nvl) } static int -zfs_ioctl_compat_fix_stats(zfs_cmd_t *zc, const int cflag) +zfs_ioctl_compat_fix_stats(zfs_cmd_t *zc, const int nc) { nvlist_t *nv, *nvp = NULL; nvpair_t *elem; @@ -270,7 +418,7 @@ zfs_ioctl_compat_fix_stats(zfs_cmd_t *zc, const int cflag) zc->zc_nvlist_dst_size, zc->zc_iflags, &nv)) != 0) return (error); - if (cflag == 5) { /* ZFS_IOC_POOL_STATS */ + if (nc == 5) { /* ZFS_IOC_POOL_STATS */ elem = NULL; while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) { if (nvpair_value_nvlist(elem, &nvp) == 0) @@ -334,17 +482,22 @@ zcmd_ioctl_compat(int fd, unsigned long cmd, zfs_cmd_t *zc, const int cflag) void *zc_c; unsigned long ncmd; - if (cflag == ZFS_CMD_COMPAT_NONE) { + switch (cflag) { + case ZFS_CMD_COMPAT_NONE: ret = ioctl(fd, cmd, zc); return (ret); - } - - if (cflag == ZFS_CMD_COMPAT_V15) { + case ZFS_CMD_COMPAT_V28: + zc_c = malloc(sizeof(zfs_cmd_v28_t)); + ncmd = _IOWR('Z', ZFS_IOC(cmd), struct zfs_cmd_v28); + break; + case ZFS_CMD_COMPAT_V15: nc = zfs_ioctl_v28_to_v15[ZFS_IOC(cmd)]; zc_c = malloc(sizeof(zfs_cmd_v15_t)); ncmd = _IOWR('Z', nc, struct zfs_cmd_v15); - } else + break; + default: return (EINVAL); + } if (ZFS_IOC(ncmd) == ZFS_IOC_COMPAT_FAIL) return (ENOTSUP); @@ -358,16 +511,18 @@ zcmd_ioctl_compat(int fd, unsigned long cmd, zfs_cmd_t *zc, const int cflag) zfs_cmd_compat_get(zc, (caddr_t)zc_c, cflag); free(zc_c); - switch (nc) { - case 2: /* ZFS_IOC_POOL_IMPORT */ - case 4: /* ZFS_IOC_POOL_CONFIGS */ - case 5: /* ZFS_IOC_POOL_STATS */ - case 6: /* ZFS_IOC_POOL_TRYIMPORT */ - zfs_ioctl_compat_fix_stats(zc, nc); - break; - case 41: /* ZFS_IOC_POOL_GET_PROPS (v15) */ - zfs_ioctl_compat_pool_get_props(zc); - break; + if (cflag == ZFS_CMD_COMPAT_V15) { + switch (nc) { + case 2: /* ZFS_IOC_POOL_IMPORT */ + case 4: /* ZFS_IOC_POOL_CONFIGS */ + case 5: /* ZFS_IOC_POOL_STATS */ + case 6: /* ZFS_IOC_POOL_TRYIMPORT */ + zfs_ioctl_compat_fix_stats(zc, nc); + break; + case 41: /* ZFS_IOC_POOL_GET_PROPS (v15) */ + zfs_ioctl_compat_pool_get_props(zc); + break; + } } return (ret); diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h b/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h index 03d648c..b20ceca 100644 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h +++ b/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Martin Matuska . All rights reserved. + * Copyright 2013 Martin Matuska . All rights reserved. * Use is subject to license terms. */ @@ -40,11 +40,21 @@ extern "C" { #endif -#define ZFS_CMD_COMPAT_NONE 0 +/* + * Backwards ioctl compatibility + */ + +/* ioctl versions for vfs.zfs.version.ioctl */ +#define ZFS_IOCVER_DEADMAN 1 +#define ZFS_IOCVER_CURRENT ZFS_IOCVER_DEADMAN + +/* compatibility conversion flag */ +#define ZFS_CMD_COMPAT_NONE 0 #define ZFS_CMD_COMPAT_V15 1 +#define ZFS_CMD_COMPAT_V28 2 -#define ZFS_IOC_COMPAT_PASS 254 -#define ZFS_IOC_COMPAT_FAIL 255 +#define ZFS_IOC_COMPAT_PASS 254 +#define ZFS_IOC_COMPAT_FAIL 255 typedef struct zinject_record_v15 { uint64_t zi_objset; @@ -84,6 +94,60 @@ typedef struct zfs_cmd_v15 { zinject_record_v15_t zc_inject_record; } zfs_cmd_v15_t; +typedef struct zinject_record_v28 { + uint64_t zi_objset; + uint64_t zi_object; + uint64_t zi_start; + uint64_t zi_end; + uint64_t zi_guid; + uint32_t zi_level; + uint32_t zi_error; + uint64_t zi_type; + uint32_t zi_freq; + uint32_t zi_failfast; + char zi_func[MAXNAMELEN]; + uint32_t zi_iotype; + int32_t zi_duration; + uint64_t zi_timer; +} zinject_record_v28_t; + +typedef struct zfs_cmd_v28 { + char zc_name[MAXPATHLEN]; + char zc_value[MAXPATHLEN * 2]; + char zc_string[MAXNAMELEN]; + char zc_top_ds[MAXPATHLEN]; + uint64_t zc_guid; + uint64_t zc_nvlist_conf; /* really (char *) */ + uint64_t zc_nvlist_conf_size; + uint64_t zc_nvlist_src; /* really (char *) */ + uint64_t zc_nvlist_src_size; + uint64_t zc_nvlist_dst; /* really (char *) */ + uint64_t zc_nvlist_dst_size; + uint64_t zc_cookie; + uint64_t zc_objset_type; + uint64_t zc_perm_action; + uint64_t zc_history; /* really (char *) */ + uint64_t zc_history_len; + uint64_t zc_history_offset; + uint64_t zc_obj; + uint64_t zc_iflags; /* internal to zfs(7fs) */ + zfs_share_t zc_share; + uint64_t zc_jailid; + dmu_objset_stats_t zc_objset_stats; + struct drr_begin zc_begin_record; + zinject_record_v28_t zc_inject_record; + boolean_t zc_defer_destroy; + boolean_t zc_temphold; + uint64_t zc_action_handle; + int zc_cleanup_fd; + uint8_t zc_simple; + uint8_t zc_pad[3]; /* alignment */ + uint64_t zc_sendobj; + uint64_t zc_fromobj; + uint64_t zc_createtxg; + zfs_stat_t zc_stat; +} zfs_cmd_v28_t; + #ifdef _KERNEL unsigned static long zfs_ioctl_v15_to_v28[] = { 0, /* 0 ZFS_IOC_POOL_CREATE */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c index 9925a3b..af0c9f7 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c @@ -23,7 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011-2012 Pawel Jakub Dawidek . * All rights reserved. - * Portions Copyright 2011 Martin Matuska + * Copyright 2013 Martin Matuska . All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. @@ -5331,12 +5331,14 @@ zfsdev_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, len = IOCPARM_LEN(cmd); /* - * Check if we have sufficient kernel memory allocated - * for the zfs_cmd_t request. Bail out if not so we - * will not access undefined memory region. + * Check if we are talking to supported older binaries + * and translate zfs_cmd if necessary */ if (len < sizeof(zfs_cmd_t)) - if (len == sizeof(zfs_cmd_v15_t)) { + if (len == sizeof(zfs_cmd_v28_t)) { + cflag = ZFS_CMD_COMPAT_V28; + vec = ZFS_IOC(cmd); + } else if (len == sizeof(zfs_cmd_v15_t)) { cflag = ZFS_CMD_COMPAT_V15; vec = zfs_ioctl_v15_to_v28[ZFS_IOC(cmd)]; } else @@ -5351,6 +5353,11 @@ zfsdev_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, return (ENOTSUP); } + /* + * Check if we have sufficient kernel memory allocated + * for the zfs_cmd_t request. Bail out if not so we + * will not access undefined memory region. + */ if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) return (EINVAL); -- cgit v1.1 From cf931ec7e19af6e1f43e78b5497e62f79bcff29d Mon Sep 17 00:00:00 2001 From: kevlo Date: Fri, 1 Mar 2013 15:45:57 +0000 Subject: Assign the len field of the netbuf structure to the current length of a sockaddr. Obtained from: NetBSD --- lib/libc/rpc/clnt_vc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libc/rpc/clnt_vc.c b/lib/libc/rpc/clnt_vc.c index 881f84d..53e393e 100644 --- a/lib/libc/rpc/clnt_vc.c +++ b/lib/libc/rpc/clnt_vc.c @@ -260,7 +260,7 @@ clnt_vc_create(fd, raddr, prog, vers, sendsz, recvsz) if (ct->ct_addr.buf == NULL) goto err; memcpy(ct->ct_addr.buf, raddr->buf, raddr->len); - ct->ct_addr.len = raddr->maxlen; + ct->ct_addr.len = raddr->len; ct->ct_addr.maxlen = raddr->maxlen; /* -- cgit v1.1 From 0bc64999312aebf9154a527d512d4c04a379fdd6 Mon Sep 17 00:00:00 2001 From: kevlo Date: Fri, 1 Mar 2013 15:48:31 +0000 Subject: Fix typo. --- sys/dev/tws/tws_hdm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/tws/tws_hdm.c b/sys/dev/tws/tws_hdm.c index 3f6bea0..f5bb3f1 100644 --- a/sys/dev/tws/tws_hdm.c +++ b/sys/dev/tws/tws_hdm.c @@ -99,7 +99,7 @@ tws_init_ctlr(struct tws_softc *sc) regh = tws_read_reg(sc, TWS_I2O0_IOPOBQPH, 4); regl = tws_read_reg(sc, TWS_I2O0_IOPOBQPL, 4); reg = (((u_int64_t)regh) << 32) | regl; - TWS_TRACE_DEBUG(sc, "host outbound clenup",reg, regl); + TWS_TRACE_DEBUG(sc, "host outbound cleanup",reg, regl); if ( regh == TWS_FIFO_EMPTY32 ) break; } -- cgit v1.1 From 0da1880cbc10809b659cabb782a330c021999d1a Mon Sep 17 00:00:00 2001 From: kib Date: Fri, 1 Mar 2013 18:40:14 +0000 Subject: Make the default implementation of the VOP_VPTOCNP() fail if the directory entry, matched by the inode number, is ".". NFSv4 client might instantiate the distinct vnodes which have the same inode number, since single v4 export can be combined from several filesystems on the server. For instance, a case when the nested server mount point is exactly one directory below the top of the export, causes directory and its parent to have the same inode number 2. The vop_stdvptocnp() algorithm then returns "." as the name of the lower directory. Filtering out the "." entry with ENOENT works around this behaviour, the error forces getcwd(3) to fall back to usermode implementation, which compares both st_dev and st_ino. Based on the submission by: rmacklem Tested by: rmacklem MFC after: 1 week --- sys/kern/vfs_default.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 00d064e..1dd0185 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -856,8 +856,12 @@ vop_stdvptocnp(struct vop_vptocnp_args *ap) error = ENOMEM; goto out; } - bcopy(dp->d_name, buf + i, dp->d_namlen); - error = 0; + if (dp->d_namlen == 1 && dp->d_name[0] == '.') { + error = ENOENT; + } else { + bcopy(dp->d_name, buf + i, dp->d_namlen); + error = 0; + } goto out; } } while (len > 0 || !eofflag); -- cgit v1.1 From 11e6f714beffd0a6169d8c891ae993508fafba2c Mon Sep 17 00:00:00 2001 From: marius Date: Fri, 1 Mar 2013 18:49:14 +0000 Subject: - Use strdup(9) instead of reimplementing it. - Use __DECONST instead of strange casts. - Reduce code duplication and simplify name2oid(). PR: 176373 Submitted by: Christoph Mallon MFC after: 1 week --- sys/kern/kern_sysctl.c | 70 +++++++++++++------------------------------------- 1 file changed, 18 insertions(+), 52 deletions(-) diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 33296d3..68bf453 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -444,9 +444,9 @@ sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse) SYSCTL_SLEEP(&oidp->oid_running, "oidrm", 0); } if (oidp->oid_descr) - free((void *)(uintptr_t)(const void *)oidp->oid_descr, M_SYSCTLOID); - free((void *)(uintptr_t)(const void *)oidp->oid_name, - M_SYSCTLOID); + free(__DECONST(char *, oidp->oid_descr), + M_SYSCTLOID); + free(__DECONST(char *, oidp->oid_name), M_SYSCTLOID); free(oidp, M_SYSCTLOID); } } @@ -462,8 +462,6 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr) { struct sysctl_oid *oidp; - ssize_t len; - char *newname; /* You have to hook up somewhere.. */ if (parent == NULL) @@ -490,11 +488,7 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, SLIST_NEXT(oidp, oid_link) = NULL; oidp->oid_number = number; oidp->oid_refcnt = 1; - len = strlen(name); - newname = malloc(len + 1, M_SYSCTLOID, M_WAITOK); - bcopy(name, newname, len + 1); - newname[len] = '\0'; - oidp->oid_name = newname; + oidp->oid_name = strdup(name, M_SYSCTLOID); oidp->oid_handler = handler; oidp->oid_kind = CTLFLAG_DYN | kind; if ((kind & CTLTYPE) == CTLTYPE_NODE) { @@ -508,12 +502,8 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, oidp->oid_arg2 = arg2; } oidp->oid_fmt = fmt; - if (descr) { - int len = strlen(descr) + 1; - oidp->oid_descr = malloc(len, M_SYSCTLOID, M_WAITOK); - if (oidp->oid_descr) - strcpy((char *)(uintptr_t)(const void *)oidp->oid_descr, descr); - } + if (descr) + oidp->oid_descr = strdup(descr, M_SYSCTLOID); /* Update the context, if used */ if (clist != NULL) sysctl_ctx_entry_add(clist, oidp); @@ -529,16 +519,12 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, void sysctl_rename_oid(struct sysctl_oid *oidp, const char *name) { - ssize_t len; char *newname; - void *oldname; + char *oldname; - len = strlen(name); - newname = malloc(len + 1, M_SYSCTLOID, M_WAITOK); - bcopy(name, newname, len + 1); - newname[len] = '\0'; + newname = strdup(name, M_SYSCTLOID); SYSCTL_XLOCK(); - oldname = (void *)(uintptr_t)(const void *)oidp->oid_name; + oldname = __DECONST(char *, oidp->oid_name); oidp->oid_name = newname; SYSCTL_XUNLOCK(); free(oldname, M_SYSCTLOID); @@ -823,39 +809,26 @@ static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_CAPRD, static int name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp) { - int i; struct sysctl_oid *oidp; struct sysctl_oid_list *lsp = &sysctl__children; char *p; SYSCTL_ASSERT_XLOCKED(); - if (!*name) - return (ENOENT); - - p = name + strlen(name) - 1 ; - if (*p == '.') - *p = '\0'; - - *len = 0; + for (*len = 0; *len < CTL_MAXNAME;) { + p = strsep(&name, "."); - for (p = name; *p && *p != '.'; p++) - ; - i = *p; - if (i == '.') - *p = '\0'; - - oidp = SLIST_FIRST(lsp); - - while (oidp && *len < CTL_MAXNAME) { - if (strcmp(name, oidp->oid_name)) { - oidp = SLIST_NEXT(oidp, oid_link); - continue; + oidp = SLIST_FIRST(lsp); + for (;; oidp = SLIST_NEXT(oidp, oid_link)) { + if (oidp == NULL) + return (ENOENT); + if (strcmp(p, oidp->oid_name) == 0) + break; } *oid++ = oidp->oid_number; (*len)++; - if (!i) { + if (name == NULL || *name == '\0') { if (oidpp) *oidpp = oidp; return (0); @@ -868,13 +841,6 @@ name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp) break; lsp = SYSCTL_CHILDREN(oidp); - oidp = SLIST_FIRST(lsp); - name = p+1; - for (p = name; *p && *p != '.'; p++) - ; - i = *p; - if (i == '.') - *p = '\0'; } return (ENOENT); } -- cgit v1.1 From 9eba1eca856c3d31278587d5869ba5a0889a347e Mon Sep 17 00:00:00 2001 From: alc Date: Fri, 1 Mar 2013 19:02:41 +0000 Subject: Eliminate a redundant #include: machine/pmap.h is already included through vm/pmap.h. --- sys/arm/econa/econa_machdep.c | 1 - sys/arm/s3c2xx0/s3c24x0_machdep.c | 1 - sys/arm/xscale/i80321/ep80219_machdep.c | 1 - sys/arm/xscale/i80321/iq31244_machdep.c | 1 - sys/arm/xscale/i8134x/crb_machdep.c | 1 - sys/arm/xscale/ixp425/avila_machdep.c | 1 - sys/arm/xscale/pxa/pxa_machdep.c | 1 - 7 files changed, 7 deletions(-) diff --git a/sys/arm/econa/econa_machdep.c b/sys/arm/econa/econa_machdep.c index d622860..d64e826 100644 --- a/sys/arm/econa/econa_machdep.c +++ b/sys/arm/econa/econa_machdep.c @@ -68,7 +68,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include diff --git a/sys/arm/s3c2xx0/s3c24x0_machdep.c b/sys/arm/s3c2xx0/s3c24x0_machdep.c index e497bc4..1811b52 100644 --- a/sys/arm/s3c2xx0/s3c24x0_machdep.c +++ b/sys/arm/s3c2xx0/s3c24x0_machdep.c @@ -78,7 +78,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include diff --git a/sys/arm/xscale/i80321/ep80219_machdep.c b/sys/arm/xscale/i80321/ep80219_machdep.c index cb3d161..ff2f1c3 100644 --- a/sys/arm/xscale/i80321/ep80219_machdep.c +++ b/sys/arm/xscale/i80321/ep80219_machdep.c @@ -78,7 +78,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include diff --git a/sys/arm/xscale/i80321/iq31244_machdep.c b/sys/arm/xscale/i80321/iq31244_machdep.c index 2375b6b..1412635 100644 --- a/sys/arm/xscale/i80321/iq31244_machdep.c +++ b/sys/arm/xscale/i80321/iq31244_machdep.c @@ -78,7 +78,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include diff --git a/sys/arm/xscale/i8134x/crb_machdep.c b/sys/arm/xscale/i8134x/crb_machdep.c index a1344af..4ae836b 100644 --- a/sys/arm/xscale/i8134x/crb_machdep.c +++ b/sys/arm/xscale/i8134x/crb_machdep.c @@ -78,7 +78,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include diff --git a/sys/arm/xscale/ixp425/avila_machdep.c b/sys/arm/xscale/ixp425/avila_machdep.c index 07026da..82e1aab0 100644 --- a/sys/arm/xscale/ixp425/avila_machdep.c +++ b/sys/arm/xscale/ixp425/avila_machdep.c @@ -78,7 +78,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include diff --git a/sys/arm/xscale/pxa/pxa_machdep.c b/sys/arm/xscale/pxa/pxa_machdep.c index 706fb58..5640622 100644 --- a/sys/arm/xscale/pxa/pxa_machdep.c +++ b/sys/arm/xscale/pxa/pxa_machdep.c @@ -80,7 +80,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include -- cgit v1.1 From 8dc4f9c4f210704409703954be38a00a4cbe1951 Mon Sep 17 00:00:00 2001 From: marius Date: Fri, 1 Mar 2013 19:12:38 +0000 Subject: - Make tables, device ID strings etc const. - Use NULL instead of 0 for pointers. - Remove redundant bzero(9)'ing of the softc. - Remove redundant/unused softc members. - Don't allocate MSI/MSI-X as RF_SHAREABLE. - Re-use bus accessor macros instead of duplicating them. - In bce_miibus_{read,write}_reg(), remove superfluous limiting of the PHY address (missed in r213893). MFC after: 1 week --- sys/dev/bce/if_bce.c | 71 +++++++++++++++++-------------------------------- sys/dev/bce/if_bcereg.h | 22 +++++++-------- 2 files changed, 34 insertions(+), 59 deletions(-) diff --git a/sys/dev/bce/if_bce.c b/sys/dev/bce/if_bce.c index 2471f19..8f6bc48 100644 --- a/sys/dev/bce/if_bce.c +++ b/sys/dev/bce/if_bce.c @@ -95,7 +95,7 @@ __FBSDID("$FreeBSD$"); /****************************************************************************/ #define BCE_DEVDESC_MAX 64 -static struct bce_type bce_devs[] = { +static const struct bce_type bce_devs[] = { /* BCM5706C Controllers and OEM boards. */ { BRCM_VENDORID, BRCM_DEVICEID_BCM5706, HP_VENDORID, 0x3101, "HP NC370T Multifunction Gigabit Server Adapter" }, @@ -161,7 +161,7 @@ static struct bce_type bce_devs[] = { /****************************************************************************/ /* Supported Flash NVRAM device data. */ /****************************************************************************/ -static struct flash_spec flash_table[] = +static const struct flash_spec flash_table[] = { #define BUFFERED_FLAGS (BCE_NV_BUFFERED | BCE_NV_TRANSLATE) #define NONBUFFERED_FLAGS (BCE_NV_WREN) @@ -258,7 +258,7 @@ static struct flash_spec flash_table[] = * logical-to-physical mapping is required in the * driver. */ -static struct flash_spec flash_5709 = { +static const struct flash_spec flash_5709 = { .flags = BCE_NV_BUFFERED, .page_bits = BCM5709_FLASH_PAGE_BITS, .page_size = BCM5709_FLASH_PAGE_SIZE, @@ -481,8 +481,8 @@ MODULE_DEPEND(bce, pci, 1, 1, 1); MODULE_DEPEND(bce, ether, 1, 1, 1); MODULE_DEPEND(bce, miibus, 1, 1, 1); -DRIVER_MODULE(bce, pci, bce_driver, bce_devclass, 0, 0); -DRIVER_MODULE(miibus, bce, miibus_driver, miibus_devclass, 0, 0); +DRIVER_MODULE(bce, pci, bce_driver, bce_devclass, NULL, NULL); +DRIVER_MODULE(miibus, bce, miibus_driver, miibus_devclass, NULL, NULL); /****************************************************************************/ @@ -647,7 +647,7 @@ SYSCTL_UINT(_hw_bce, OID_AUTO, rx_ticks, CTLFLAG_RDTUN, static int bce_probe(device_t dev) { - struct bce_type *t; + const struct bce_type *t; struct bce_softc *sc; char *descbuf; u16 vid = 0, did = 0, svid = 0, sdid = 0; @@ -655,7 +655,6 @@ bce_probe(device_t dev) t = bce_devs; sc = device_get_softc(dev); - bzero(sc, sizeof(struct bce_softc)); sc->bce_unit = device_get_unit(dev); sc->bce_dev = dev; @@ -1040,7 +1039,7 @@ bce_attach(device_t dev) struct bce_softc *sc; struct ifnet *ifp; u32 val; - int error, rid, rc = 0; + int count, error, rc = 0, rid; sc = device_get_softc(dev); sc->bce_dev = dev; @@ -1084,14 +1083,14 @@ bce_attach(device_t dev) ((sc->bce_res_irq = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE)) != NULL)) { - msi_needed = sc->bce_msi_count = 1; + msi_needed = count = 1; - if (((error = pci_alloc_msix(dev, &sc->bce_msi_count)) != 0) || - (sc->bce_msi_count != msi_needed)) { + if (((error = pci_alloc_msix(dev, &count)) != 0) || + (count != msi_needed)) { BCE_PRINTF("%s(%d): MSI-X allocation failed! Requested = %d," "Received = %d, error = %d\n", __FILE__, __LINE__, - msi_needed, sc->bce_msi_count, error); - sc->bce_msi_count = 0; + msi_needed, count, error); + count = 0; pci_release_msi(dev); bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->bce_res_irq); @@ -1100,19 +1099,18 @@ bce_attach(device_t dev) DBPRINT(sc, BCE_INFO_LOAD, "%s(): Using MSI-X interrupt.\n", __FUNCTION__); sc->bce_flags |= BCE_USING_MSIX_FLAG; - sc->bce_intr = bce_intr; } } #endif /* Try allocating a MSI interrupt. */ if ((sc->bce_cap_flags & BCE_MSI_CAPABLE_FLAG) && - (bce_msi_enable >= 1) && (sc->bce_msi_count == 0)) { - sc->bce_msi_count = 1; - if ((error = pci_alloc_msi(dev, &sc->bce_msi_count)) != 0) { + (bce_msi_enable >= 1) && (count == 0)) { + count = 1; + if ((error = pci_alloc_msi(dev, &count)) != 0) { BCE_PRINTF("%s(%d): MSI allocation failed! " "error = %d\n", __FILE__, __LINE__, error); - sc->bce_msi_count = 0; + count = 0; pci_release_msi(dev); } else { DBPRINT(sc, BCE_INFO_LOAD, "%s(): Using MSI " @@ -1120,23 +1118,19 @@ bce_attach(device_t dev) sc->bce_flags |= BCE_USING_MSI_FLAG; if (BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5709) sc->bce_flags |= BCE_ONE_SHOT_MSI_FLAG; - sc->bce_irq_rid = 1; - sc->bce_intr = bce_intr; + rid = 1; } } /* Try allocating a legacy interrupt. */ - if (sc->bce_msi_count == 0) { + if (count == 0) { DBPRINT(sc, BCE_INFO_LOAD, "%s(): Using INTx interrupt.\n", __FUNCTION__); rid = 0; - sc->bce_intr = bce_intr; } sc->bce_res_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, - &rid, RF_SHAREABLE | RF_ACTIVE); - - sc->bce_irq_rid = rid; + &rid, RF_ACTIVE | (count != 0 ? 0 : RF_SHAREABLE)); /* Report any IRQ allocation errors. */ if (sc->bce_res_irq == NULL) { @@ -1635,7 +1629,7 @@ bce_shutdown(device_t dev) static u32 bce_reg_rd(struct bce_softc *sc, u32 offset) { - u32 val = bus_space_read_4(sc->bce_btag, sc->bce_bhandle, offset); + u32 val = REG_RD(sc, offset); DBPRINT(sc, BCE_INSANE_REG, "%s(); offset = 0x%08X, val = 0x%08X\n", __FUNCTION__, offset, val); return val; @@ -1653,7 +1647,7 @@ bce_reg_wr16(struct bce_softc *sc, u32 offset, u16 val) { DBPRINT(sc, BCE_INSANE_REG, "%s(); offset = 0x%08X, val = 0x%04X\n", __FUNCTION__, offset, val); - bus_space_write_2(sc->bce_btag, sc->bce_bhandle, offset, val); + REG_WR16(sc, offset, val); } @@ -1668,7 +1662,7 @@ bce_reg_wr(struct bce_softc *sc, u32 offset, u32 val) { DBPRINT(sc, BCE_INSANE_REG, "%s(); offset = 0x%08X, val = 0x%08X\n", __FUNCTION__, offset, val); - bus_space_write_4(sc->bce_btag, sc->bce_bhandle, offset, val); + REG_WR(sc, offset, val); } #endif @@ -1879,13 +1873,6 @@ bce_miibus_read_reg(device_t dev, int phy, int reg) sc = device_get_softc(dev); - /* Make sure we are accessing the correct PHY address. */ - if (phy != sc->bce_phy_addr) { - DBPRINT(sc, BCE_INSANE_PHY, "Invalid PHY address %d " - "for PHY read!\n", phy); - return(0); - } - /* * The 5709S PHY is an IEEE Clause 45 PHY * with special mappings to work with IEEE @@ -1968,13 +1955,6 @@ bce_miibus_write_reg(device_t dev, int phy, int reg, int val) sc = device_get_softc(dev); - /* Make sure we are accessing the correct PHY address. */ - if (phy != sc->bce_phy_addr) { - DBPRINT(sc, BCE_INSANE_PHY, "Invalid PHY address %d " - "for PHY write!\n", phy); - return(0); - } - DB_PRINT_PHY_REG(reg, val); /* @@ -2535,7 +2515,7 @@ bce_init_nvram(struct bce_softc *sc) { u32 val; int j, entry_count, rc = 0; - struct flash_spec *flash; + const struct flash_spec *flash; DBENTER(BCE_VERBOSE_NVRAM); @@ -3949,8 +3929,8 @@ bce_release_resources(struct bce_softc *sc) if (sc->bce_res_irq != NULL) { DBPRINT(sc, BCE_INFO_RESET, "Releasing IRQ.\n"); - bus_release_resource(dev, SYS_RES_IRQ, sc->bce_irq_rid, - sc->bce_res_irq); + bus_release_resource(dev, SYS_RES_IRQ, + rman_get_rid(sc->bce_res_irq), sc->bce_res_irq); } if (sc->bce_flags & (BCE_USING_MSI_FLAG | BCE_USING_MSIX_FLAG)) { @@ -11650,4 +11630,3 @@ bce_breakpoint(struct bce_softc *sc) return; } #endif - diff --git a/sys/dev/bce/if_bcereg.h b/sys/dev/bce/if_bcereg.h index fc37b2c..37ff5ed 100644 --- a/sys/dev/bce/if_bcereg.h +++ b/sys/dev/bce/if_bcereg.h @@ -622,7 +622,7 @@ struct bce_type { u_int16_t bce_did; u_int16_t bce_svid; u_int16_t bce_sdid; - char *bce_name; + const char *bce_name; }; /****************************************************************************/ @@ -716,7 +716,7 @@ struct flash_spec { u32 page_size; u32 addr_mask; u32 total_size; - u8 *name; + const u8 *name; }; @@ -2001,7 +2001,7 @@ struct l2_fhdr { #define BCE_MISC_ENABLE_CLR_BITS_UMP_ENABLE (1L<<27) #define BCE_MISC_ENABLE_CLR_BITS_RV2P_CMD_SCHEDULER_ENABLE (1L<<28) #define BCE_MISC_ENABLE_CLR_BITS_RSVD_FUTURE_ENABLE (0x7L<<29) - + #define BCE_MISC_ENABLE_CLR_DEFAULT 0x17ffffff #define BCE_MISC_CLOCK_CONTROL_BITS 0x00000818 @@ -6318,19 +6318,19 @@ struct fw_info { u32 text_addr; u32 text_len; u32 text_index; - u32 *text; + const u32 *text; /* Data section. */ u32 data_addr; u32 data_len; u32 data_index; - u32 *data; + const u32 *data; /* SBSS section. */ u32 sbss_addr; u32 sbss_len; u32 sbss_index; - u32 *sbss; + const u32 *sbss; /* BSS section. */ u32 bss_addr; @@ -6421,7 +6421,7 @@ struct fw_info { struct bce_softc { - /* Interface info. Must be first!! */ + /* Interface info */ struct ifnet *bce_ifp; /* Parent device handle */ @@ -6451,10 +6451,7 @@ struct bce_softc struct mtx bce_mtx; /* Interrupt handler. */ - driver_intr_t *bce_intr; void *bce_intrhand; - int bce_irq_rid; - int bce_msi_count; /* ASIC Chip ID. */ u32 bce_chipid; @@ -6509,7 +6506,7 @@ struct bce_softc u16 link_speed; /* Flash NVRAM settings */ - struct flash_spec *bce_flash_info; + const struct flash_spec *bce_flash_info; /* Flash NVRAM size */ u32 bce_flash_size; @@ -6518,7 +6515,7 @@ struct bce_softc u32 bce_shmem_base; /* Name string */ - char *bce_name; + const char *bce_name; /* Tracks the version of bootcode firmware. */ char bce_bc_ver[32]; @@ -6834,4 +6831,3 @@ struct bce_softc }; #endif /* __BCEREG_H_DEFINED */ - -- cgit v1.1 From 433c85439320c5fb5ab884a7859928377bb59c64 Mon Sep 17 00:00:00 2001 From: marius Date: Fri, 1 Mar 2013 19:55:10 +0000 Subject: - Make tables, device ID strings etc const. This includes #ifdef'ing 0 aac_command_status_table, which is actually unused since r111532. While at it, make aac_if a pointer to the now const interface tables instead of copying them over to the softc (this alone already reduces the size of aac.ko on amd64 by ~1 KiB). - Remove redundant softc members. - Use DEVMETHOD_END. - Use NULL instead of 0 for pointers. - Remove redundant bzero(9)'ing of the softc. - Use pci_enable_busmaster(9) instead of duplicating it. - Remove redundant checking for PCIM_CMD_MEMEN (resource allocation will just fail). - Canonicalize the error messages in case of resource allocation failures. - Add support for using MSI instead of INTx, controllable via the tunable hw.aac.enable_msi (defaulting to on). MFC after: 1 month --- sys/dev/aac/aac.c | 59 ++++++++++++----------------- sys/dev/aac/aac_cam.c | 5 +-- sys/dev/aac/aac_disk.c | 4 +- sys/dev/aac/aac_pci.c | 98 +++++++++++++++++++++++++++--------------------- sys/dev/aac/aac_tables.h | 11 +++--- sys/dev/aac/aacvar.h | 45 +++++++++++----------- 6 files changed, 110 insertions(+), 112 deletions(-) diff --git a/sys/dev/aac/aac.c b/sys/dev/aac/aac.c index 9f315b6..d4c7383 100644 --- a/sys/dev/aac/aac.c +++ b/sys/dev/aac/aac.c @@ -117,7 +117,7 @@ static void aac_sa_set_mailbox(struct aac_softc *sc, u_int32_t command, static int aac_sa_get_mailbox(struct aac_softc *sc, int mb); static void aac_sa_set_interrupts(struct aac_softc *sc, int enable); -struct aac_interface aac_sa_interface = { +const struct aac_interface aac_sa_interface = { aac_sa_get_fwstatus, aac_sa_qnotify, aac_sa_get_istatus, @@ -142,7 +142,7 @@ static int aac_rx_send_command(struct aac_softc *sc, struct aac_command *cm); static int aac_rx_get_outb_queue(struct aac_softc *sc); static void aac_rx_set_outb_queue(struct aac_softc *sc, int index); -struct aac_interface aac_rx_interface = { +const struct aac_interface aac_rx_interface = { aac_rx_get_fwstatus, aac_rx_qnotify, aac_rx_get_istatus, @@ -169,7 +169,7 @@ static int aac_rkt_send_command(struct aac_softc *sc, struct aac_command *cm); static int aac_rkt_get_outb_queue(struct aac_softc *sc); static void aac_rkt_set_outb_queue(struct aac_softc *sc, int index); -struct aac_interface aac_rkt_interface = { +const struct aac_interface aac_rkt_interface = { aac_rkt_get_fwstatus, aac_rkt_qnotify, aac_rkt_get_istatus, @@ -183,8 +183,8 @@ struct aac_interface aac_rkt_interface = { }; /* Debugging and Diagnostics */ -static void aac_describe_controller(struct aac_softc *sc); -static char *aac_describe_code(struct aac_code_lookup *table, +static void aac_describe_controller(struct aac_softc *sc); +static const char *aac_describe_code(const struct aac_code_lookup *table, u_int32_t code); /* Management Interface */ @@ -222,7 +222,7 @@ static struct cdevsw aac_cdevsw = { static MALLOC_DEFINE(M_AACBUF, "aacbuf", "Buffers for the AAC driver"); /* sysctl node */ -static SYSCTL_NODE(_hw, OID_AUTO, aac, CTLFLAG_RD, 0, "AAC driver parameters"); +SYSCTL_NODE(_hw, OID_AUTO, aac, CTLFLAG_RD, 0, "AAC driver parameters"); /* * Device Interface @@ -634,8 +634,8 @@ aac_free(struct aac_softc *sc) if (sc->aac_intr) bus_teardown_intr(sc->aac_dev, sc->aac_irq, sc->aac_intr); if (sc->aac_irq != NULL) - bus_release_resource(sc->aac_dev, SYS_RES_IRQ, sc->aac_irq_rid, - sc->aac_irq); + bus_release_resource(sc->aac_dev, SYS_RES_IRQ, + rman_get_rid(sc->aac_irq), sc->aac_irq); /* destroy data-transfer DMA tag */ if (sc->aac_buffer_dmat) @@ -648,10 +648,10 @@ aac_free(struct aac_softc *sc) /* release the register window mapping */ if (sc->aac_regs_res0 != NULL) bus_release_resource(sc->aac_dev, SYS_RES_MEMORY, - sc->aac_regs_rid0, sc->aac_regs_res0); + rman_get_rid(sc->aac_regs_res0), sc->aac_regs_res0); if (sc->aac_hwif == AAC_HWIF_NARK && sc->aac_regs_res1 != NULL) bus_release_resource(sc->aac_dev, SYS_RES_MEMORY, - sc->aac_regs_rid1, sc->aac_regs_res1); + rman_get_rid(sc->aac_regs_res1), sc->aac_regs_res1); } /* @@ -1333,9 +1333,6 @@ aac_bio_complete(struct aac_command *cm) } else { bp->bio_error = EIO; bp->bio_flags |= BIO_ERROR; - /* pass an error string out to the disk layer */ - bp->bio_driver1 = aac_describe_code(aac_command_status_table, - status); } aac_biodone(bp); } @@ -1687,7 +1684,7 @@ static int aac_check_firmware(struct aac_softc *sc) { u_int32_t code, major, minor, options = 0, atu_size = 0; - int status; + int rid, status; time_t then; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); @@ -1765,7 +1762,7 @@ aac_check_firmware(struct aac_softc *sc) sc->flags |= AAC_FLAGS_SG_64BIT; } if ((options & AAC_SUPPORTED_NEW_COMM) - && sc->aac_if.aif_send_command) + && sc->aac_if->aif_send_command) sc->flags |= AAC_FLAGS_NEW_COMM; if (options & AAC_SUPPORTED_64BIT_ARRAYSIZE) sc->flags |= AAC_FLAGS_ARRAY_64BIT; @@ -1776,17 +1773,15 @@ aac_check_firmware(struct aac_softc *sc) /* Remap mem. resource, if required */ if ((sc->flags & AAC_FLAGS_NEW_COMM) && - atu_size > rman_get_size(sc->aac_regs_res1)) { - bus_release_resource( - sc->aac_dev, SYS_RES_MEMORY, - sc->aac_regs_rid1, sc->aac_regs_res1); - sc->aac_regs_res1 = bus_alloc_resource( - sc->aac_dev, SYS_RES_MEMORY, &sc->aac_regs_rid1, - 0ul, ~0ul, atu_size, RF_ACTIVE); + atu_size > rman_get_size(sc->aac_regs_res1)) { + rid = rman_get_rid(sc->aac_regs_res1); + bus_release_resource(sc->aac_dev, SYS_RES_MEMORY, rid, + sc->aac_regs_res1); + sc->aac_regs_res1 = bus_alloc_resource(sc->aac_dev, + SYS_RES_MEMORY, &rid, 0ul, ~0ul, atu_size, RF_ACTIVE); if (sc->aac_regs_res1 == NULL) { sc->aac_regs_res1 = bus_alloc_resource_any( - sc->aac_dev, SYS_RES_MEMORY, - &sc->aac_regs_rid1, RF_ACTIVE); + sc->aac_dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->aac_regs_res1 == NULL) { device_printf(sc->aac_dev, "couldn't allocate register window\n"); @@ -1799,7 +1794,6 @@ aac_check_firmware(struct aac_softc *sc) if (sc->aac_hwif == AAC_HWIF_NARK) { sc->aac_regs_res0 = sc->aac_regs_res1; - sc->aac_regs_rid0 = sc->aac_regs_rid1; sc->aac_btag0 = sc->aac_btag1; sc->aac_bhandle0 = sc->aac_bhandle1; } @@ -2003,14 +1997,7 @@ out: static int aac_setup_intr(struct aac_softc *sc) { - sc->aac_irq_rid = 0; - if ((sc->aac_irq = bus_alloc_resource_any(sc->aac_dev, SYS_RES_IRQ, - &sc->aac_irq_rid, - RF_SHAREABLE | - RF_ACTIVE)) == NULL) { - device_printf(sc->aac_dev, "can't allocate interrupt\n"); - return (EINVAL); - } + if (sc->flags & AAC_FLAGS_NEW_COMM) { if (bus_setup_intr(sc->aac_dev, sc->aac_irq, INTR_MPSAFE|INTR_TYPE_BIO, NULL, @@ -2119,7 +2106,7 @@ aac_sync_fib(struct aac_softc *sc, u_int32_t command, u_int32_t xferstate, * Note that the queue implementation here is a little funky; neither the PI or * CI will ever be zero. This behaviour is a controller feature. */ -static struct { +static const struct { int size; int notify; } aac_qinfo[] = { @@ -2786,8 +2773,8 @@ aac_describe_controller(struct aac_softc *sc) * Look up a text description of a numeric error code and return a pointer to * same. */ -static char * -aac_describe_code(struct aac_code_lookup *table, u_int32_t code) +static const char * +aac_describe_code(const struct aac_code_lookup *table, u_int32_t code) { int i; diff --git a/sys/dev/aac/aac_cam.c b/sys/dev/aac/aac_cam.c index 7f0d7ee..60ead24 100644 --- a/sys/dev/aac/aac_cam.c +++ b/sys/dev/aac/aac_cam.c @@ -92,7 +92,7 @@ static device_method_t aac_pass_methods[] = { DEVMETHOD(device_probe, aac_cam_probe), DEVMETHOD(device_attach, aac_cam_attach), DEVMETHOD(device_detach, aac_cam_detach), - { 0, 0 } + DEVMETHOD_END }; static driver_t aac_pass_driver = { @@ -101,7 +101,7 @@ static driver_t aac_pass_driver = { sizeof(struct aac_cam) }; -DRIVER_MODULE(aacp, aac, aac_pass_driver, aac_pass_devclass, 0, 0); +DRIVER_MODULE(aacp, aac, aac_pass_driver, aac_pass_devclass, NULL, NULL); MODULE_DEPEND(aacp, cam, 1, 1, 1); static MALLOC_DEFINE(M_AACCAM, "aaccam", "AAC CAM info"); @@ -685,4 +685,3 @@ aac_cam_term_io(struct cam_sim *sim, union ccb *ccb) { return (CAM_UA_TERMIO); } - diff --git a/sys/dev/aac/aac_disk.c b/sys/dev/aac/aac_disk.c index f2f6636..082be9b 100644 --- a/sys/dev/aac/aac_disk.c +++ b/sys/dev/aac/aac_disk.c @@ -73,7 +73,7 @@ static device_method_t aac_disk_methods[] = { DEVMETHOD(device_probe, aac_disk_probe), DEVMETHOD(device_attach, aac_disk_attach), DEVMETHOD(device_detach, aac_disk_detach), - { 0, 0 } + DEVMETHOD_END }; static driver_t aac_disk_driver = { @@ -82,7 +82,7 @@ static driver_t aac_disk_driver = { sizeof(struct aac_disk) }; -DRIVER_MODULE(aacd, aac, aac_disk_driver, aac_disk_devclass, 0, 0); +DRIVER_MODULE(aacd, aac, aac_disk_driver, aac_disk_devclass, NULL, NULL); /* * Handle open from generic layer. diff --git a/sys/dev/aac/aac_pci.c b/sys/dev/aac/aac_pci.c index 163c5ea..87ada1c 100644 --- a/sys/dev/aac/aac_pci.c +++ b/sys/dev/aac/aac_pci.c @@ -60,6 +60,11 @@ __FBSDID("$FreeBSD$"); static int aac_pci_probe(device_t dev); static int aac_pci_attach(device_t dev); +static int aac_enable_msi = 1; +TUNABLE_INT("hw.aac.enable_msi", &aac_enable_msi); +SYSCTL_INT(_hw_aac, OID_AUTO, enable_msi, CTLFLAG_RDTUN, &aac_enable_msi, 0, + "Enable MSI interrupts"); + static device_method_t aac_methods[] = { /* Device interface */ DEVMETHOD(device_probe, aac_pci_probe), @@ -79,11 +84,10 @@ static driver_t aac_pci_driver = { static devclass_t aac_devclass; -DRIVER_MODULE(aac, pci, aac_pci_driver, aac_devclass, 0, 0); +DRIVER_MODULE(aac, pci, aac_pci_driver, aac_devclass, NULL, NULL); MODULE_DEPEND(aac, pci, 1, 1, 1); - -struct aac_ident +static const struct aac_ident { u_int16_t vendor; u_int16_t device; @@ -91,7 +95,7 @@ struct aac_ident u_int16_t subdevice; int hwif; int quirks; - char *desc; + const char *desc; } aac_identifiers[] = { {0x1028, 0x0001, 0x1028, 0x0001, AAC_HWIF_I960RX, 0, "Dell PERC 2/Si"}, @@ -139,7 +143,6 @@ struct aac_ident "Adaptec SCSI RAID 2230S"}, {0x9005, 0x0286, 0x9005, 0x028d, AAC_HWIF_RKT, 0, "Adaptec SCSI RAID 2130S"}, - {0x9005, 0x0285, 0x9005, 0x0287, AAC_HWIF_I960RX, AAC_FLAGS_NO4GB | AAC_FLAGS_256FIBS, "Adaptec SCSI RAID 2200S"}, {0x9005, 0x0285, 0x17aa, 0x0286, AAC_HWIF_I960RX, AAC_FLAGS_NO4GB | @@ -276,7 +279,8 @@ struct aac_ident "AOC-USAS-S8iR-LP"}, {0, 0, 0, 0, 0, 0, 0} }; -struct aac_ident + +static const struct aac_ident aac_family_identifiers[] = { {0x9005, 0x0285, 0, 0, AAC_HWIF_I960RX, 0, "Adaptec RAID Controller"}, @@ -285,10 +289,10 @@ aac_family_identifiers[] = { {0, 0, 0, 0, 0, 0, 0} }; -static struct aac_ident * +static const struct aac_ident * aac_find_ident(device_t dev) { - struct aac_ident *m; + const struct aac_ident *m; u_int16_t vendid, devid, sub_vendid, sub_devid; vendid = pci_get_vendor(dev); @@ -317,7 +321,7 @@ aac_find_ident(device_t dev) static int aac_pci_probe(device_t dev) { - struct aac_ident *id; + const struct aac_ident *id; fwprintf(NULL, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); @@ -335,9 +339,8 @@ static int aac_pci_attach(device_t dev) { struct aac_softc *sc; - struct aac_ident *id; - int error; - u_int32_t command; + const struct aac_ident *id; + int count, error, reg, rid; fwprintf(NULL, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); @@ -345,7 +348,6 @@ aac_pci_attach(device_t dev) * Initialise softc. */ sc = device_get_softc(dev); - bzero(sc, sizeof(*sc)); sc->aac_dev = dev; /* assume failure is 'not configured' */ @@ -354,55 +356,65 @@ aac_pci_attach(device_t dev) /* * Verify that the adapter is correctly set up in PCI space. */ - command = pci_read_config(sc->aac_dev, PCIR_COMMAND, 2); - command |= PCIM_CMD_BUSMASTEREN; - pci_write_config(dev, PCIR_COMMAND, command, 2); - command = pci_read_config(sc->aac_dev, PCIR_COMMAND, 2); - if (!(command & PCIM_CMD_BUSMASTEREN)) { - device_printf(sc->aac_dev, "can't enable bus-master feature\n"); - goto out; - } - if ((command & PCIM_CMD_MEMEN) == 0) { - device_printf(sc->aac_dev, "memory window not available\n"); + pci_enable_busmaster(dev); + if (!(pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_BUSMASTEREN)) { + device_printf(dev, "can't enable bus-master feature\n"); goto out; } /* - * Allocate the PCI register window. + * Allocate the PCI register window(s). */ - sc->aac_regs_rid0 = PCIR_BAR(0); - if ((sc->aac_regs_res0 = bus_alloc_resource_any(sc->aac_dev, - SYS_RES_MEMORY, &sc->aac_regs_rid0, RF_ACTIVE)) == NULL) { - device_printf(sc->aac_dev, - "couldn't allocate register window 0\n"); + rid = PCIR_BAR(0); + if ((sc->aac_regs_res0 = bus_alloc_resource_any(dev, + SYS_RES_MEMORY, &rid, RF_ACTIVE)) == NULL) { + device_printf(dev, "can't allocate register window 0\n"); goto out; } sc->aac_btag0 = rman_get_bustag(sc->aac_regs_res0); sc->aac_bhandle0 = rman_get_bushandle(sc->aac_regs_res0); if (sc->aac_hwif == AAC_HWIF_NARK) { - sc->aac_regs_rid1 = PCIR_BAR(1); - if ((sc->aac_regs_res1 = bus_alloc_resource_any(sc->aac_dev, - SYS_RES_MEMORY, &sc->aac_regs_rid1, RF_ACTIVE)) == NULL) { - device_printf(sc->aac_dev, - "couldn't allocate register window 1\n"); + rid = PCIR_BAR(1); + if ((sc->aac_regs_res1 = bus_alloc_resource_any(dev, + SYS_RES_MEMORY, &rid, RF_ACTIVE)) == NULL) { + device_printf(dev, + "can't allocate register window 1\n"); goto out; } sc->aac_btag1 = rman_get_bustag(sc->aac_regs_res1); sc->aac_bhandle1 = rman_get_bushandle(sc->aac_regs_res1); } else { sc->aac_regs_res1 = sc->aac_regs_res0; - sc->aac_regs_rid1 = sc->aac_regs_rid0; sc->aac_btag1 = sc->aac_btag0; sc->aac_bhandle1 = sc->aac_bhandle0; } /* + * Allocate the interrupt. + */ + rid = 0; + if (aac_enable_msi != 0 && pci_find_cap(dev, PCIY_MSI, ®) == 0) { + count = pci_msi_count(dev); + if (count > 1) + count = 1; + else + count = 0; + if (count == 1 && pci_alloc_msi(dev, &count) == 0) + rid = 1; + } + if ((sc->aac_irq = bus_alloc_resource_any(sc->aac_dev, SYS_RES_IRQ, + &rid, RF_ACTIVE | (count != 0 ? 0 : RF_SHAREABLE))) == NULL) { + device_printf(dev, "can't allocate interrupt\n"); + goto out; + } + + /* * Allocate the parent bus DMA tag appropriate for our PCI interface. * * Note that some of these controllers are 64-bit capable. */ - if (bus_dma_tag_create(bus_get_dma_tag(sc->aac_dev), /* parent */ + if (bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ PAGE_SIZE, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ @@ -413,7 +425,7 @@ aac_pci_attach(device_t dev) 0, /* flags */ NULL, NULL, /* No locking needed */ &sc->aac_parent_dmat)) { - device_printf(sc->aac_dev, "can't allocate parent DMA tag\n"); + device_printf(dev, "can't allocate parent DMA tag\n"); goto out; } @@ -427,19 +439,19 @@ aac_pci_attach(device_t dev) case AAC_HWIF_I960RX: case AAC_HWIF_NARK: fwprintf(sc, HBA_FLAGS_DBG_INIT_B, "set hardware up for i960Rx/NARK"); - sc->aac_if = aac_rx_interface; + sc->aac_if = &aac_rx_interface; break; case AAC_HWIF_STRONGARM: fwprintf(sc, HBA_FLAGS_DBG_INIT_B, "set hardware up for StrongARM"); - sc->aac_if = aac_sa_interface; + sc->aac_if = &aac_sa_interface; break; case AAC_HWIF_RKT: fwprintf(sc, HBA_FLAGS_DBG_INIT_B, "set hardware up for Rocket/MIPS"); - sc->aac_if = aac_rkt_interface; + sc->aac_if = &aac_rkt_interface; break; default: sc->aac_hwif = AAC_HWIF_UNKNOWN; - device_printf(sc->aac_dev, "unknown hardware type\n"); + device_printf(dev, "unknown hardware type\n"); error = ENXIO; goto out; } @@ -472,7 +484,7 @@ static device_method_t aacch_methods[] = { DEVMETHOD(device_probe, aacch_probe), DEVMETHOD(device_attach, aacch_attach), DEVMETHOD(device_detach, aacch_detach), - { 0, 0 } + DEVMETHOD_END }; struct aacch_softc { @@ -486,7 +498,7 @@ static driver_t aacch_driver = { }; static devclass_t aacch_devclass; -DRIVER_MODULE(aacch, pci, aacch_driver, aacch_devclass, 0, 0); +DRIVER_MODULE(aacch, pci, aacch_driver, aacch_devclass, NULL, NULL); static int aacch_probe(device_t dev) diff --git a/sys/dev/aac/aac_tables.h b/sys/dev/aac/aac_tables.h index fe687fb..6f52d5d 100644 --- a/sys/dev/aac/aac_tables.h +++ b/sys/dev/aac/aac_tables.h @@ -27,13 +27,14 @@ * $FreeBSD$ */ +#if 0 /* * Status codes for block read/write commands, etc. * * XXX many of these would not normally be returned, as they are * relevant only to FSA operations. */ -static struct aac_code_lookup aac_command_status_table[] = { +static const struct aac_code_lookup aac_command_status_table[] = { {"OK", ST_OK}, {"operation not permitted", ST_PERM}, {"not found", ST_NOENT}, @@ -75,8 +76,9 @@ static struct aac_code_lookup aac_command_status_table[] = { }; #define AAC_COMMAND_STATUS(x) aac_describe_code(aac_command_status_table, x) +#endif -static struct aac_code_lookup aac_cpu_variant[] = { +static const struct aac_code_lookup aac_cpu_variant[] = { {"i960JX", CPUI960_JX}, {"i960CX", CPUI960_CX}, {"i960HX", CPUI960_HX}, @@ -93,7 +95,7 @@ static struct aac_code_lookup aac_cpu_variant[] = { {"Unknown processor", 0} }; -static struct aac_code_lookup aac_battery_platform[] = { +static const struct aac_code_lookup aac_battery_platform[] = { {"required battery present", PLATFORM_BAT_REQ_PRESENT}, {"REQUIRED BATTERY NOT PRESENT", PLATFORM_BAT_REQ_NOTPRESENT}, {"optional battery present", PLATFORM_BAT_OPT_PRESENT}, @@ -103,7 +105,7 @@ static struct aac_code_lookup aac_battery_platform[] = { {"unknown battery platform", 0} }; -static struct aac_code_lookup aac_container_types[] = { +static const struct aac_code_lookup aac_container_types[] = { {"Volume", CT_VOLUME}, {"RAID 1 (Mirror)", CT_MIRROR}, {"RAID 0 (Stripe)", CT_STRIPE}, @@ -126,4 +128,3 @@ static struct aac_code_lookup aac_container_types[] = { {NULL, 0}, {"unknown", 0} }; - diff --git a/sys/dev/aac/aacvar.h b/sys/dev/aac/aacvar.h index d994acf..6dc7795 100644 --- a/sys/dev/aac/aacvar.h +++ b/sys/dev/aac/aacvar.h @@ -33,10 +33,13 @@ #include #include #include -#include #include +#include +#include #include +SYSCTL_DECL(_hw_aac); + #define AAC_TYPE_DEVO 1 #define AAC_TYPE_ALPHA 2 #define AAC_TYPE_BETA 3 @@ -242,28 +245,28 @@ struct aac_interface int (*aif_get_outb_queue)(struct aac_softc *sc); void (*aif_set_outb_queue)(struct aac_softc *sc, int index); }; -extern struct aac_interface aac_rx_interface; -extern struct aac_interface aac_sa_interface; -extern struct aac_interface aac_fa_interface; -extern struct aac_interface aac_rkt_interface; - -#define AAC_GET_FWSTATUS(sc) ((sc)->aac_if.aif_get_fwstatus((sc))) -#define AAC_QNOTIFY(sc, qbit) ((sc)->aac_if.aif_qnotify((sc), (qbit))) -#define AAC_GET_ISTATUS(sc) ((sc)->aac_if.aif_get_istatus((sc))) -#define AAC_CLEAR_ISTATUS(sc, mask) ((sc)->aac_if.aif_clr_istatus((sc), \ +extern const struct aac_interface aac_rx_interface; +extern const struct aac_interface aac_sa_interface; +extern const struct aac_interface aac_fa_interface; +extern const struct aac_interface aac_rkt_interface; + +#define AAC_GET_FWSTATUS(sc) ((sc)->aac_if->aif_get_fwstatus((sc))) +#define AAC_QNOTIFY(sc, qbit) ((sc)->aac_if->aif_qnotify((sc), (qbit))) +#define AAC_GET_ISTATUS(sc) ((sc)->aac_if->aif_get_istatus((sc))) +#define AAC_CLEAR_ISTATUS(sc, mask) ((sc)->aac_if->aif_clr_istatus((sc), \ (mask))) #define AAC_SET_MAILBOX(sc, command, arg0, arg1, arg2, arg3) \ - ((sc)->aac_if.aif_set_mailbox((sc), (command), (arg0), (arg1), (arg2), \ + ((sc)->aac_if->aif_set_mailbox((sc), (command), (arg0), (arg1), (arg2), \ (arg3))) -#define AAC_GET_MAILBOX(sc, mb) ((sc)->aac_if.aif_get_mailbox((sc), \ +#define AAC_GET_MAILBOX(sc, mb) ((sc)->aac_if->aif_get_mailbox((sc), \ (mb))) -#define AAC_MASK_INTERRUPTS(sc) ((sc)->aac_if.aif_set_interrupts((sc), \ +#define AAC_MASK_INTERRUPTS(sc) ((sc)->aac_if->aif_set_interrupts((sc), \ 0)) -#define AAC_UNMASK_INTERRUPTS(sc) ((sc)->aac_if.aif_set_interrupts((sc), \ +#define AAC_UNMASK_INTERRUPTS(sc) ((sc)->aac_if->aif_set_interrupts((sc), \ 1)) -#define AAC_SEND_COMMAND(sc, cm) ((sc)->aac_if.aif_send_command((sc), (cm))) -#define AAC_GET_OUTB_QUEUE(sc) ((sc)->aac_if.aif_get_outb_queue((sc))) -#define AAC_SET_OUTB_QUEUE(sc, idx) ((sc)->aac_if.aif_set_outb_queue((sc), (idx))) +#define AAC_SEND_COMMAND(sc, cm) ((sc)->aac_if->aif_send_command((sc), (cm))) +#define AAC_GET_OUTB_QUEUE(sc) ((sc)->aac_if->aif_get_outb_queue((sc))) +#define AAC_SET_OUTB_QUEUE(sc, idx) ((sc)->aac_if->aif_set_outb_queue((sc), (idx))) #define AAC_MEM0_SETREG4(sc, reg, val) bus_space_write_4(sc->aac_btag0, \ sc->aac_bhandle0, reg, val) @@ -307,14 +310,12 @@ struct aac_softc /* bus connections */ device_t aac_dev; struct resource *aac_regs_res0, *aac_regs_res1; /* reg. if. window */ - int aac_regs_rid0, aac_regs_rid1; /* resource ID */ bus_space_handle_t aac_bhandle0, aac_bhandle1; /* bus space handle */ bus_space_tag_t aac_btag0, aac_btag1; /* bus space tag */ bus_dma_tag_t aac_parent_dmat; /* parent DMA tag */ bus_dma_tag_t aac_buffer_dmat; /* data buffer/command * DMA tag */ struct resource *aac_irq; /* interrupt */ - int aac_irq_rid; void *aac_intr; /* interrupt handle */ eventhandler_tag eh; @@ -339,7 +340,7 @@ struct aac_softc * DMA map */ struct aac_common *aac_common; u_int32_t aac_common_busaddr; - struct aac_interface aac_if; + const struct aac_interface *aac_if; /* command/fib resources */ bus_dma_tag_t aac_fib_dmat; /* DMA tag for allocing FIBs */ @@ -499,7 +500,7 @@ extern void aac_print_aif(struct aac_softc *sc, #endif struct aac_code_lookup { - char *string; + const char *string; u_int32_t code; }; @@ -581,7 +582,6 @@ aac_remove_ ## name (struct aac_command *cm) \ cm->cm_flags &= ~AAC_ON_ ## index; \ AACQ_REMOVE(cm->cm_sc, index); \ } \ -struct hack AACQ_COMMAND_QUEUE(free, AACQ_FREE); AACQ_COMMAND_QUEUE(ready, AACQ_READY); @@ -644,4 +644,3 @@ aac_release_sync_fib(struct aac_softc *sc) mtx_assert(&sc->aac_io_lock, MA_OWNED); } - -- cgit v1.1 From 8e8db171cbe74d53a2ad5144bc9a8bfcfad26b42 Mon Sep 17 00:00:00 2001 From: marius Date: Fri, 1 Mar 2013 20:16:06 +0000 Subject: - Apparently, r186520 was just wrong and the clock of Oxford OX16PCI958 is neither DEFAULT_RCLK * 2 nor DEFAULT_RCLK * 10 but plain DEFAULT_RCLK and there's no (open) source indicating otherwise. This was tested with an EXSYS EX-41098-2, whose clock is not configurable and identifies as: puc0@pci0:5:1:0: class=0x070200 card=0x06711415 chip=0x95381415 rev=0x01 hdr=0x00 vendor = 'Oxford Semiconductor Ltd' class = simple comms subclass = multiport serial Note that this exactly matches the card mentioned in PR 129665 so no sub-device/sub-vendor based quirking of the latter is possible. So maybe we should grow some sort of tunable, in case non-default cards such as the latter aren't configurable either (this also wouldn't be the first time an allegedly tested commit turns out to be wrong though). - Make the TiMedia tables const. MFC after: 1 week --- sys/dev/puc/pucdata.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/sys/dev/puc/pucdata.c b/sys/dev/puc/pucdata.c index 216a30b..6d933e8 100644 --- a/sys/dev/puc/pucdata.c +++ b/sys/dev/puc/pucdata.c @@ -769,7 +769,7 @@ const struct puc_cfg puc_pci_devices[] = { { 0x1415, 0x9538, 0xffff, 0, "Oxford Semiconductor OX16PCI958 UARTs", - DEFAULT_RCLK * 10, + DEFAULT_RCLK, PUC_PORT_8S, 0x18, 0, 8, }, @@ -918,6 +918,7 @@ const struct puc_cfg puc_pci_devices[] = { DEFAULT_RCLK * 8, PUC_PORT_4S, 0x10, 0, 8, }, + { 0x14d2, 0xa004, 0xffff, 0, "Titan PCI-800H", DEFAULT_RCLK * 8, @@ -1060,7 +1061,7 @@ const struct puc_cfg puc_pci_devices[] = { { 0x9710, 0x9865, 0xa000, 0x3004, "NetMos NM9865 Quad UART", DEFAULT_RCLK, - PUC_PORT_4S, 0x10, 4, 0,0 + PUC_PORT_4S, 0x10, 4, 0, }, { 0x9710, 0x9865, 0xa000, 0x3011, @@ -1420,26 +1421,26 @@ static int puc_config_timedia(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, intptr_t *res) { - static uint16_t dual[] = { + static const uint16_t dual[] = { 0x0002, 0x4036, 0x4037, 0x4038, 0x4078, 0x4079, 0x4085, 0x4088, 0x4089, 0x5037, 0x5078, 0x5079, 0x5085, 0x6079, 0x7079, 0x8079, 0x8137, 0x8138, 0x8237, 0x8238, 0x9079, 0x9137, 0x9138, 0x9237, 0x9238, 0xA079, 0xB079, 0xC079, 0xD079, 0 }; - static uint16_t quad[] = { + static const uint16_t quad[] = { 0x4055, 0x4056, 0x4095, 0x4096, 0x5056, 0x8156, 0x8157, 0x8256, 0x8257, 0x9056, 0x9156, 0x9157, 0x9158, 0x9159, 0x9256, 0x9257, 0xA056, 0xA157, 0xA158, 0xA159, 0xB056, 0xB157, 0 }; - static uint16_t octa[] = { + static const uint16_t octa[] = { 0x4065, 0x4066, 0x5065, 0x5066, 0x8166, 0x9066, 0x9166, 0x9167, 0x9168, 0xA066, 0xA167, 0xA168, 0 }; - static struct { + static const struct { int ports; - uint16_t *ids; + const uint16_t *ids; } subdevs[] = { { 2, dual }, { 4, quad }, -- cgit v1.1 From 944a48f5cd4b3de81295d4b289362610ee533cd2 Mon Sep 17 00:00:00 2001 From: marius Date: Fri, 1 Mar 2013 20:34:02 +0000 Subject: - Remove an unused header. - Use NULL instead of 0 for pointers. - Let ofw_pcib_probe() return BUS_PROBE_DEFAULT instead of 0 so specialized PCI-PCI-bridge drivers may attach instead. - Add WARs for PLX Technology PEX 8114 bridges and PEX 8532 switches. Ideally, these should live in MI code but at least for the latter we're missing the necessary infrastructure there. MFC after: 1 week --- sys/sparc64/pci/ofw_pcib.c | 47 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/sys/sparc64/pci/ofw_pcib.c b/sys/sparc64/pci/ofw_pcib.c index 69bdb72..7944237 100644 --- a/sys/sparc64/pci/ofw_pcib.c +++ b/sys/sparc64/pci/ofw_pcib.c @@ -47,8 +47,6 @@ __FBSDID("$FreeBSD$"); #include #include -#include - #include #include #include @@ -58,8 +56,12 @@ __FBSDID("$FreeBSD$"); #include #include +#define PCI_DEVID_ALI_M5249 0x524910b9 +#define PCI_VENDOR_PLX 0x10b5 + static device_probe_t ofw_pcib_probe; static device_attach_t ofw_pcib_attach; +static ofw_pci_setup_device_t ofw_pcib_setup_device; static device_method_t ofw_pcib_methods[] = { /* Device interface */ @@ -73,6 +75,7 @@ static device_method_t ofw_pcib_methods[] = { /* ofw_bus interface */ DEVMETHOD(ofw_bus_get_node, ofw_pcib_gen_get_node), + DEVMETHOD(ofw_pci_setup_device, ofw_pcib_setup_device), DEVMETHOD_END }; @@ -81,7 +84,7 @@ static devclass_t pcib_devclass; DEFINE_CLASS_1(pcib, ofw_pcib_driver, ofw_pcib_methods, sizeof(struct ofw_pcib_gen_softc), pcib_driver); -EARLY_DRIVER_MODULE(ofw_pcib, pci, ofw_pcib_driver, pcib_devclass, 0, 0, +EARLY_DRIVER_MODULE(ofw_pcib, pci, ofw_pcib_driver, pcib_devclass, NULL, NULL, BUS_PASS_BUS); MODULE_DEPEND(ofw_pcib, pci, 1, 1, 1); @@ -104,7 +107,7 @@ ofw_pcib_probe(device_t dev) ISDTYPE(pbdtype, OFW_TYPE_PCIE) ? "e" : "", ISDTYPE(dtype, OFW_TYPE_PCIE) ? "e" : ""); device_set_desc_copy(dev, desc); - return (0); + return (BUS_PROBE_DEFAULT); } #undef ISDTYPE @@ -119,7 +122,6 @@ ofw_pcib_attach(device_t dev) sc = device_get_softc(dev); - /* Quirk handling */ switch (pci_get_devid(dev)) { /* * The ALi M5249 found in Fire-based machines by definition must me @@ -127,13 +129,46 @@ ofw_pcib_attach(device_t dev) * don't indicate this in the class code although the ISA I/O range * isn't included in their bridge decode. */ - case 0x524910b9: + case PCI_DEVID_ALI_M5249: sc->ops_pcib_sc.flags |= PCIB_SUBTRACTIVE; break; } + switch (pci_get_vendor(dev)) { + /* + * Concurrently write the primary and secondary bus numbers in order + * to work around a bug in PLX PEX 8114 causing the internal shadow + * copies of these not to be updated when setting them bytewise. + */ + case PCI_VENDOR_PLX: + pci_write_config(dev, PCIR_PRIBUS_1, + pci_read_config(dev, PCIR_SECBUS_1, 1) << 8 | + pci_read_config(dev, PCIR_PRIBUS_1, 1), 2); + break; + } + ofw_pcib_gen_setup(dev); pcib_attach_common(dev); device_add_child(dev, "pci", -1); return (bus_generic_attach(dev)); } + +static void +ofw_pcib_setup_device(device_t bus, device_t child) +{ + int i; + uint16_t reg; + + switch (pci_get_vendor(bus)) { + /* + * For PLX PEX 8532 issue 64 TLPs to the child from the downstream + * port to the child device in order to work around a hardware bug. + */ + case PCI_VENDOR_PLX: + for (i = 0, reg = 0; i < 64; i++) + reg |= pci_get_devid(child); + break; + } + + OFW_PCI_SETUP_DEVICE(device_get_parent(bus), child); +} -- cgit v1.1 From 0c5e0b209e594a9f0dd463bef0be7c42d11b6ef6 Mon Sep 17 00:00:00 2001 From: marius Date: Fri, 1 Mar 2013 20:36:59 +0000 Subject: - In sbbc_pci_attach() just pass the already obtained bus tag and handle instead of acquiring these anew. - Use NULL instead of 0 for pointers. MFC after: 1 week --- sys/sparc64/pci/sbbc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sys/sparc64/pci/sbbc.c b/sys/sparc64/pci/sbbc.c index 491f653..218093f 100644 --- a/sys/sparc64/pci/sbbc.c +++ b/sys/sparc64/pci/sbbc.c @@ -299,7 +299,7 @@ static device_method_t sbbc_pci_methods[] = { static devclass_t sbbc_devclass; DEFINE_CLASS_0(sbbc, sbbc_driver, sbbc_pci_methods, sizeof(struct sbbc_softc)); -DRIVER_MODULE(sbbc, pci, sbbc_driver, sbbc_devclass, 0, 0); +DRIVER_MODULE(sbbc, pci, sbbc_driver, sbbc_devclass, NULL, NULL); static int sbbc_pci_probe(device_t dev) @@ -358,8 +358,7 @@ sbbc_pci_attach(device_t dev) if (error != 0) device_printf(dev, "failed to attach UART device\n"); } else { - error = sbbc_parse_toc(rman_get_bustag(sc->sc_res), - rman_get_bushandle(sc->sc_res)); + error = sbbc_parse_toc(bst, bsh); if (error != 0) { device_printf(dev, "failed to parse TOC\n"); if (sbbc_console != 0) { @@ -609,7 +608,7 @@ static device_method_t sbbc_uart_sbbc_methods[] = { DEFINE_CLASS_0(uart, sbbc_uart_driver, sbbc_uart_sbbc_methods, sizeof(struct uart_softc)); -DRIVER_MODULE(uart, sbbc, sbbc_uart_driver, uart_devclass, 0, 0); +DRIVER_MODULE(uart, sbbc, sbbc_uart_driver, uart_devclass, NULL, NULL); static int sbbc_uart_sbbc_probe(device_t dev) -- cgit v1.1 From 749e286432e014be6c8ae18a5494fb60b465b88f Mon Sep 17 00:00:00 2001 From: marius Date: Fri, 1 Mar 2013 20:51:53 +0000 Subject: - Move reporting of failures to disable RX/TX MAC under bootverbose as at least the Saturn chips of 501-6738 cards may fail to do so the first time, which isn't fatal though. Reported by: Paul Keusemann - Explain why we don't enable infinite bursts on sparc64. - Given that these chips support memory write invalidate, make sure that it's enabled in the command register. Also make sure that PERR# and SERR# assertion is enabled. MFC after: 1 week --- sys/dev/cas/if_cas.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sys/dev/cas/if_cas.c b/sys/dev/cas/if_cas.c index f98d4f0..eec8fa4 100644 --- a/sys/dev/cas/if_cas.c +++ b/sys/dev/cas/if_cas.c @@ -824,7 +824,8 @@ cas_disable_rx(struct cas_softc *sc) BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (cas_bitwait(sc, CAS_MAC_RX_CONF, CAS_MAC_RX_CONF_EN, 0)) return (1); - device_printf(sc->sc_dev, "cannot disable RX MAC\n"); + if (bootverbose) + device_printf(sc->sc_dev, "cannot disable RX MAC\n"); return (0); } @@ -838,7 +839,8 @@ cas_disable_tx(struct cas_softc *sc) BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (cas_bitwait(sc, CAS_MAC_TX_CONF, CAS_MAC_TX_CONF_EN, 0)) return (1); - device_printf(sc->sc_dev, "cannot disable TX MAC\n"); + if (bootverbose) + device_printf(sc->sc_dev, "cannot disable TX MAC\n"); return (0); } @@ -1041,7 +1043,8 @@ cas_init_locked(struct cas_softc *sc) /* * Enable infinite bursts for revisions without PCI issues if * applicable. Doing so greatly improves the TX performance on - * !__sparc64__. + * !__sparc64__ (on sparc64, setting CAS_INF_BURST improves TX + * performance only marginally but hurts RX throughput quite a bit). */ CAS_WRITE_4(sc, CAS_INF_BURST, #if !defined(__sparc64__) @@ -2691,7 +2694,10 @@ cas_pci_attach(device_t dev) return (ENXIO); } - pci_enable_busmaster(dev); + /* PCI configuration */ + pci_write_config(dev, PCIR_COMMAND, + pci_read_config(dev, PCIR_COMMAND, 2) | PCIM_CMD_BUSMASTEREN | + PCIM_CMD_MWRICEN | PCIM_CMD_PERRESPEN | PCIM_CMD_SERRESPEN, 2); sc->sc_dev = dev; if (sc->sc_variant == CAS_CAS && pci_get_devid(dev) < 0x02) -- cgit v1.1 From ffd2079a18a79d32aee02f64cfe1de0ccd450177 Mon Sep 17 00:00:00 2001 From: pjd Date: Fri, 1 Mar 2013 21:57:02 +0000 Subject: Reduce lock scope a little. --- sys/kern/vfs_syscalls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index bbda70d..bd44a3a 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -2633,9 +2633,9 @@ setfflags(td, vp, flags) if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VATTR_NULL(&vattr); vattr.va_flags = flags; + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); #ifdef MAC error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); if (error == 0) -- cgit v1.1 From 2bed8f5691b7572d6f31bfc68e9f762a938af863 Mon Sep 17 00:00:00 2001 From: mm Date: Fri, 1 Mar 2013 21:58:51 +0000 Subject: MFV r247316: Merge new read-only zfs properties from vendor (illumos) Illumos ZFS issues: 3588 provide zfs properties for logical (uncompressed) space used and referenced References: https://www.illumos.org/issues/3588 MFC after: 2 weeks --- cddl/contrib/opensolaris/cmd/zfs/zfs.8 | 33 ++++++++++++++++++++++ sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c | 6 +++- .../opensolaris/uts/common/fs/zfs/dsl_dataset.c | 2 ++ .../opensolaris/uts/common/fs/zfs/dsl_dir.c | 2 ++ .../contrib/opensolaris/uts/common/sys/fs/zfs.h | 2 ++ 5 files changed, 44 insertions(+), 1 deletion(-) diff --git a/cddl/contrib/opensolaris/cmd/zfs/zfs.8 b/cddl/contrib/opensolaris/cmd/zfs/zfs.8 index a3009a2..be1cf9c 100644 --- a/cddl/contrib/opensolaris/cmd/zfs/zfs.8 +++ b/cddl/contrib/opensolaris/cmd/zfs/zfs.8 @@ -526,6 +526,39 @@ if the snapshot has been marked for deferred destroy by using the .Qq Nm Cm destroy -d command. Otherwise, the property is .Cm off . +.It Sy logicalreferenced +The amount of space that is +.Qq logically +accessible by this dataset. +See the +.Sy referenced +property. +The logical space ignores the effect of the +.Sy compression +and +.Sy copies +properties, giving a quantity closer to the amount of data that applications +see. +However, it does include space consumed by metadata. +.Pp +This property can also be referred to by its shortened column name, +.Sy lrefer . +.It Sy logicalused +The amount of space that is +.Qq logically +consumed by this dataset and all its descendents. +See the +.Sy used +property. +The logical space ignores the effect of the +.Sy compression +and +.Sy copies +properties, giving a quantity closer to the amount of data that applications +see. +.Pp +This property can also be referred to by its shortened column name, +.Sy lused . .It Sy mounted For file systems, indicates whether the file system is currently mounted. This property can be either diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c b/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c index dc0cb33..4243c20 100644 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c +++ b/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. */ @@ -350,6 +350,10 @@ zfs_prop_init(void) ZFS_TYPE_SNAPSHOT, "", "USERREFS"); zprop_register_number(ZFS_PROP_WRITTEN, "written", 0, PROP_READONLY, ZFS_TYPE_DATASET, "", "WRITTEN"); + zprop_register_number(ZFS_PROP_LOGICALUSED, "logicalused", 0, + PROP_READONLY, ZFS_TYPE_DATASET, "", "LUSED"); + zprop_register_number(ZFS_PROP_LOGICALREFERENCED, "logicalreferenced", + 0, PROP_READONLY, ZFS_TYPE_DATASET, "", "LREFER"); /* default number properties */ zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT, diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c index e8bf701..b294ff02 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c @@ -2344,6 +2344,8 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) (ds->ds_phys->ds_uncompressed_bytes * 100 / ds->ds_phys->ds_compressed_bytes); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, + ds->ds_phys->ds_uncompressed_bytes); if (ds->ds_phys->ds_next_snap_obj) { /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c index 4d954bd..79e5121 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c @@ -541,6 +541,8 @@ dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv) dd->dd_phys->dd_compressed_bytes == 0 ? 100 : (dd->dd_phys->dd_uncompressed_bytes * 100 / dd->dd_phys->dd_compressed_bytes)); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALUSED, + dd->dd_phys->dd_uncompressed_bytes); if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP, dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]); diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h b/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h index 64fd2e6..38deab6 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h +++ b/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h @@ -131,6 +131,8 @@ typedef enum { ZFS_PROP_REFRATIO, ZFS_PROP_WRITTEN, ZFS_PROP_CLONES, + ZFS_PROP_LOGICALUSED, + ZFS_PROP_LOGICALREFERENCED, ZFS_NUM_PROPS } zfs_prop_t; -- cgit v1.1 From 54fb726d9b32179cee99e8e32cc6fffe36dfca7c Mon Sep 17 00:00:00 2001 From: pjd Date: Fri, 1 Mar 2013 21:58:56 +0000 Subject: Remove unnecessary variables. --- sys/kern/vfs_vnops.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 32c0978..96ce9e2 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -1860,7 +1860,6 @@ vn_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td) { struct vnode *vp; - int error; vp = fp->f_vnode; #ifdef AUDIT @@ -1868,8 +1867,7 @@ vn_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, AUDIT_ARG_VNODE1(vp); VOP_UNLOCK(vp, 0); #endif - error = setfmode(td, active_cred, vp, mode); - return (error); + return (setfmode(td, active_cred, vp, mode)); } int @@ -1877,7 +1875,6 @@ vn_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, struct thread *td) { struct vnode *vp; - int error; vp = fp->f_vnode; #ifdef AUDIT @@ -1885,8 +1882,7 @@ vn_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, AUDIT_ARG_VNODE1(vp); VOP_UNLOCK(vp, 0); #endif - error = setfown(td, active_cred, vp, uid, gid); - return (error); + return (setfown(td, active_cred, vp, uid, gid)); } void -- cgit v1.1 From 0542e230f823d21b65362dd6575a212e82d79bf5 Mon Sep 17 00:00:00 2001 From: andrew Date: Fri, 1 Mar 2013 21:59:23 +0000 Subject: Increase the maximum text size on ARM to 64MiB. Without this clang would be sent a SIGABRT when it is loaded as it is too large. This is the smallest power of two MiB value that allows us to execute clang. While here wrap it in an #ifndef to be consistent with the other architectures. Submitted by: Daisuke Aoyama --- sys/arm/include/vmparam.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sys/arm/include/vmparam.h b/sys/arm/include/vmparam.h index 53c40d2..aec94f8 100644 --- a/sys/arm/include/vmparam.h +++ b/sys/arm/include/vmparam.h @@ -153,7 +153,9 @@ VM_MIN_KERNEL_ADDRESS + 1) * 2 / 5) #endif -#define MAXTSIZ (16*1024*1024) +#ifndef MAXTSIZ +#define MAXTSIZ (64*1024*1024) +#endif #ifndef DFLDSIZ #define DFLDSIZ (128*1024*1024) #endif -- cgit v1.1 From 8857575b13cf118cc89efb1b462dd314df09c180 Mon Sep 17 00:00:00 2001 From: jhb Date: Fri, 1 Mar 2013 22:03:31 +0000 Subject: Replace the TDP_NOSLEEPING flag with a counter so that the THREAD_NO_SLEEPING() and THREAD_SLEEPING_OK() macros can nest. Reviewed by: attilio --- sys/dev/mps/mps.c | 8 ++++---- sys/kern/subr_sleepqueue.c | 4 ++-- sys/kern/subr_trap.c | 2 +- sys/sys/proc.h | 15 ++++----------- sys/sys/rmlock.h | 2 +- 5 files changed, 12 insertions(+), 19 deletions(-) diff --git a/sys/dev/mps/mps.c b/sys/dev/mps/mps.c index 7a3e4f7..5e41b0a 100644 --- a/sys/dev/mps/mps.c +++ b/sys/dev/mps/mps.c @@ -136,8 +136,8 @@ mps_diag_reset(struct mps_softc *sc,int sleep_flag) /*Force NO_SLEEP for threads prohibited to sleep * e.a Thread from interrupt handler are prohibited to sleep. - */ - if(curthread->td_pflags & TDP_NOSLEEPING) + */ + if (curthread->td_no_sleeping != 0) sleep_flag = NO_SLEEP; /* Push the magic sequence */ @@ -469,8 +469,8 @@ mps_request_sync(struct mps_softc *sc, void *req, MPI2_DEFAULT_REPLY *reply, uint16_t *data16; int i, count, ioc_sz, residual; int sleep_flags = CAN_SLEEP; - - if(curthread->td_pflags & TDP_NOSLEEPING) + + if (curthread->td_no_sleeping != 0) sleep_flags = NO_SLEEP; /* Step 1 */ diff --git a/sys/kern/subr_sleepqueue.c b/sys/kern/subr_sleepqueue.c index b6bd8fc..f187544 100644 --- a/sys/kern/subr_sleepqueue.c +++ b/sys/kern/subr_sleepqueue.c @@ -296,8 +296,8 @@ sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags, MPASS((queue >= 0) && (queue < NR_SLEEPQS)); /* If this thread is not allowed to sleep, die a horrible death. */ - KASSERT(!(td->td_pflags & TDP_NOSLEEPING), - ("%s: td %p to sleep on wchan %p with TDP_NOSLEEPING on", + KASSERT(td->td_no_sleeping == 0, + ("%s: td %p to sleep on wchan %p with sleeping prohibited", __func__, td, wchan)); /* Look up the sleep queue associated with the wait channel 'wchan'. */ diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index bd06f20..1f24e88 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -158,7 +158,7 @@ userret(struct thread *td, struct trapframe *frame) ("userret: Returning with %d locks held", td->td_locks)); KASSERT((td->td_pflags & TDP_NOFAULTING) == 0, ("userret: Returning with pagefaults disabled")); - KASSERT((td->td_pflags & TDP_NOSLEEPING) == 0, + KASSERT(td->td_no_sleeping == 0, ("userret: Returning with sleep disabled")); KASSERT(td->td_pinned == 0 || (td->td_pflags & TDP_CALLCHAIN) != 0, ("userret: Returning with with pinned thread")); diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 46f5820..f6cf2e4 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -273,6 +273,7 @@ struct thread { struct vm_map_entry *td_map_def_user; /* (k) Deferred entries. */ pid_t td_dbg_forked; /* (c) Child pid for debugger. */ u_int td_vp_reserv; /* (k) Count of reserved vnodes. */ + int td_no_sleeping; /* (k) Sleeping disabled count. */ #define td_endzero td_sigmask /* Copied during fork1() or create_thread(). */ @@ -404,7 +405,7 @@ do { \ #define TDP_ALTSTACK 0x00000020 /* Have alternate signal stack. */ #define TDP_DEADLKTREAT 0x00000040 /* Lock aquisition - deadlock treatment. */ #define TDP_NOFAULTING 0x00000080 /* Do not handle page faults. */ -#define TDP_NOSLEEPING 0x00000100 /* Thread is not allowed to sleep on a sq. */ +#define TDP_UNUSED9 0x00000100 /* --available-- */ #define TDP_OWEUPC 0x00000200 /* Call addupc() at next AST. */ #define TDP_ITHREAD 0x00000400 /* Thread is an interrupt thread. */ #define TDP_SYNCIO 0x00000800 /* Local override, disable async i/o. */ @@ -790,17 +791,9 @@ extern pid_t pid_max; #define thread_safetoswapout(td) ((td)->td_flags & TDF_CANSWAP) /* Control whether or not it is safe for curthread to sleep. */ -#define THREAD_NO_SLEEPING() do { \ - KASSERT(!(curthread->td_pflags & TDP_NOSLEEPING), \ - ("nested no sleeping")); \ - curthread->td_pflags |= TDP_NOSLEEPING; \ -} while (0) +#define THREAD_NO_SLEEPING() ((curthread)->td_no_sleeping++) -#define THREAD_SLEEPING_OK() do { \ - KASSERT((curthread->td_pflags & TDP_NOSLEEPING), \ - ("nested sleeping ok")); \ - curthread->td_pflags &= ~TDP_NOSLEEPING; \ -} while (0) +#define THREAD_SLEEPING_OK() ((curthread)->td_no_sleeping--) #define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash]) extern LIST_HEAD(pidhashhead, proc) *pidhashtbl; diff --git a/sys/sys/rmlock.h b/sys/sys/rmlock.h index 0ae2099..5a0fa8a 100644 --- a/sys/sys/rmlock.h +++ b/sys/sys/rmlock.h @@ -40,7 +40,7 @@ #ifdef _KERNEL /* - * Flags passed to rm_init(9). + * Flags passed to rm_init_flags(9). */ #define RM_NOWITNESS 0x00000001 #define RM_RECURSE 0x00000002 -- cgit v1.1 From d85a1ce450d43074c2b7a0b3598366de11994d72 Mon Sep 17 00:00:00 2001 From: marius Date: Fri, 1 Mar 2013 22:05:20 +0000 Subject: Initialize count in order to appease clang. Submitted by: delphij --- sys/dev/bce/if_bce.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/dev/bce/if_bce.c b/sys/dev/bce/if_bce.c index 8f6bc48..032ca5f 100644 --- a/sys/dev/bce/if_bce.c +++ b/sys/dev/bce/if_bce.c @@ -1076,6 +1076,7 @@ bce_attach(device_t dev) bce_probe_pci_caps(dev, sc); rid = 1; + count = 0; #if 0 /* Try allocating MSI-X interrupts. */ if ((sc->bce_cap_flags & BCE_MSIX_CAPABLE_FLAG) && -- cgit v1.1 From 55ff3cee07ed79aca01bab6297b24f104a622058 Mon Sep 17 00:00:00 2001 From: marius Date: Fri, 1 Mar 2013 22:09:08 +0000 Subject: Initialize count in order to appease clang. Submitted by: delphij --- sys/dev/aac/aac_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/dev/aac/aac_pci.c b/sys/dev/aac/aac_pci.c index 87ada1c..f768c8b 100644 --- a/sys/dev/aac/aac_pci.c +++ b/sys/dev/aac/aac_pci.c @@ -394,6 +394,7 @@ aac_pci_attach(device_t dev) * Allocate the interrupt. */ rid = 0; + count = 0; if (aac_enable_msi != 0 && pci_find_cap(dev, PCIY_MSI, ®) == 0) { count = pci_msi_count(dev); if (count > 1) -- cgit v1.1 From cfe29a2eeb5894e142b69de584de6a99e5a432c9 Mon Sep 17 00:00:00 2001 From: delphij Date: Fri, 1 Mar 2013 22:20:13 +0000 Subject: MFV r247575: Import a fix tighten assertion on SPA versions from vendor (Illumos). Illumos ZFS issue: 3543 Feature flags causes assertion in spa.c to miss certain cases MFC after: 2 weeks --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c index 92b7e06..ecfc1b0 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c @@ -6018,7 +6018,7 @@ spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx) */ ASSERT(tx->tx_txg != TXG_INITIAL); - ASSERT(version <= SPA_VERSION); + ASSERT(SPA_VERSION_IS_SUPPORTED(version)); ASSERT(version >= spa_version(spa)); spa->spa_uberblock.ub_version = version; @@ -6559,7 +6559,7 @@ spa_upgrade(spa_t *spa, uint64_t version) * future version would result in an unopenable pool, this shouldn't be * possible. */ - ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION); + ASSERT(SPA_VERSION_IS_SUPPORTED(spa->spa_uberblock.ub_version)); ASSERT(version >= spa->spa_uberblock.ub_version); spa->spa_uberblock.ub_version = version; -- cgit v1.1 From 3201c3850bb49872c01f455ac4221c806576b0b8 Mon Sep 17 00:00:00 2001 From: delphij Date: Fri, 1 Mar 2013 23:18:20 +0000 Subject: Fix a typo in mfi_stp_cmd() that would give wrong assignment. Submitted by: Sascha Wildner Obtained from: DragonFly rev 0dc98fff2206d7bb78ce5e07ac34d6954e4bd96a MFC after: 3 days --- sys/dev/mfi/mfi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/mfi/mfi.c b/sys/dev/mfi/mfi.c index e2401f6..cf2d505 100644 --- a/sys/dev/mfi/mfi.c +++ b/sys/dev/mfi/mfi.c @@ -2997,7 +2997,7 @@ mfi_stp_cmd(struct mfi_softc *sc, struct mfi_command *cm,caddr_t arg) cm->cm_frame->stp.sgl.sg64[i].len = ioc->mfi_sgl[i].iov_len; } else { - cm->cm_frame->stp.sgl.sg32[i].len = + cm->cm_frame->stp.sgl.sg32[i].addr = kern_sge[i].phys_addr; cm->cm_frame->stp.sgl.sg32[i].len = ioc->mfi_sgl[i].iov_len; -- cgit v1.1 From b1482c7ae70107bd26f8a6e340c220353b02be86 Mon Sep 17 00:00:00 2001 From: delphij Date: Fri, 1 Mar 2013 23:21:18 +0000 Subject: Fix wrong assignment. Submitted by: Sascha Wildner Obtained from: DragonFly rev 9568dd07a22a136e380e6c19a8ea188eb92976d5 MFC after: 2 weeks --- sys/compat/ndis/kern_ndis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/compat/ndis/kern_ndis.c b/sys/compat/ndis/kern_ndis.c index 23abaf1..e094997 100644 --- a/sys/compat/ndis/kern_ndis.c +++ b/sys/compat/ndis/kern_ndis.c @@ -566,7 +566,7 @@ ndis_convert_res(arg) return (ENOMEM); rl->cprl_version = 5; - rl->cprl_version = 1; + rl->cprl_revision = 1; rl->cprl_count = sc->ndis_rescnt; prd = rl->cprl_partial_descs; -- cgit v1.1 From 3e4a1731aa17cbd6fdf9c9def3c5eb9b47353aa1 Mon Sep 17 00:00:00 2001 From: delphij Date: Fri, 1 Mar 2013 23:26:13 +0000 Subject: Fix assignment of maximum bounadary. Submitted by: Sascha Wildner Obtained from: DragonFly rev fd39c81ba220f7ad6e4dc9b30d45e828cf58a1ad MFC after: 2 weeks --- lib/libc/regex/regcomp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libc/regex/regcomp.c b/lib/libc/regex/regcomp.c index 68ffa66..f3a41e9 100644 --- a/lib/libc/regex/regcomp.c +++ b/lib/libc/regex/regcomp.c @@ -1212,7 +1212,7 @@ CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max) } cs->ranges = newranges; cs->ranges[cs->nranges].min = min; - cs->ranges[cs->nranges].min = max; + cs->ranges[cs->nranges].max = max; cs->nranges++; } -- cgit v1.1 From 24853370053f911de81d645be20801b842a3dda9 Mon Sep 17 00:00:00 2001 From: pjd Date: Sat, 2 Mar 2013 00:11:27 +0000 Subject: Provide cap_sandboxed(3) function, which is a wrapper around cap_getmode(2) system call, which has a nice property - it never fails, so it is a bit easier to use. If there is no support for capability mode in the kernel the function will return false (not in a sandbox). If the kernel is compiled with the support for capability mode, the function will return true or false depending if the calling process is in the capability mode sandbox or not respectively. Sponsored by: The FreeBSD Foundation --- lib/libc/gen/Makefile.inc | 2 ++ lib/libc/gen/cap_sandboxed.3 | 70 ++++++++++++++++++++++++++++++++++++++++++++ lib/libc/gen/cap_sandboxed.c | 50 +++++++++++++++++++++++++++++++ lib/libc/sys/Symbol.map | 1 + lib/libc/sys/cap_enter.2 | 1 + 5 files changed, 124 insertions(+) create mode 100644 lib/libc/gen/cap_sandboxed.3 create mode 100644 lib/libc/gen/cap_sandboxed.c diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc index 711cbff..89689a0 100644 --- a/lib/libc/gen/Makefile.inc +++ b/lib/libc/gen/Makefile.inc @@ -16,6 +16,7 @@ SRCS+= __getosreldate.c \ assert.c \ auxv.c \ basename.c \ + cap_sandboxed.c \ check_utility_compat.c \ clock.c \ clock_getcpuclockid.c \ @@ -168,6 +169,7 @@ SYM_MAPS+=${.CURDIR}/gen/Symbol.map MAN+= alarm.3 \ arc4random.3 \ basename.3 \ + cap_sandboxed.3 \ check_utility_compat.3 \ clock.3 \ clock_getcpuclockid.3 \ diff --git a/lib/libc/gen/cap_sandboxed.3 b/lib/libc/gen/cap_sandboxed.3 new file mode 100644 index 0000000..067d6d2 --- /dev/null +++ b/lib/libc/gen/cap_sandboxed.3 @@ -0,0 +1,70 @@ +.\" Copyright (c) 2012 The FreeBSD Foundation +.\" All rights reserved. +.\" +.\" This documentation was written by Pawel Jakub Dawidek under sponsorship +.\" from the FreeBSD Foundation. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd September 18, 2012 +.Dt CAP_SANDBOXED 3 +.Os +.Sh NAME +.Nm cap_sandboxed +.Nd Check if in a capability mode sandbox +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/capability.h +.In stdbool.h +.Ft bool +.Fn cap_sandboxed "void" +.Sh DESCRIPTION +.Fn cap_sandboxed +returns +.Va true +if the process is in a capability mode sandbox or +.Va false +if it is not. +This function is a more handy alternative to the +.Xr cap_getmode 2 +system call as it always succeeds, so there is no need for error checking. +If the support for capability mode is not compiled into the kernel, +.Fn cap_sandboxed +will always return +.Va false . +.Sh RETURN VALUES +Function +.Fn cap_sandboxed +is always successful and will return either +.Va true +or +.Va false . +.Sh SEE ALSO +.Xr cap_enter 2 , +.Xr capsicum 4 +.Sh AUTHORS +This function was implemented and manual page was written by +.An Pawel Jakub Dawidek Aq pawel@dawidek.net +under sponsorship of the FreeBSD Foundation. diff --git a/lib/libc/gen/cap_sandboxed.c b/lib/libc/gen/cap_sandboxed.c new file mode 100644 index 0000000..baa9b3f --- /dev/null +++ b/lib/libc/gen/cap_sandboxed.c @@ -0,0 +1,50 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include +#include +#include + +bool +cap_sandboxed(void) +{ + u_int mode; + + if (cap_getmode(&mode) != 0) { + assert(errno == ENOSYS); + return (false); + } + assert(mode == 0 || mode == 1); + return (mode == 1); +} diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map index babae30..d126255 100644 --- a/lib/libc/sys/Symbol.map +++ b/lib/libc/sys/Symbol.map @@ -379,6 +379,7 @@ FBSD_1.2 { }; FBSD_1.3 { + cap_sandboxed; clock_getcpuclockid2; ffclock_getcounter; ffclock_getestimate; diff --git a/lib/libc/sys/cap_enter.2 b/lib/libc/sys/cap_enter.2 index 5454ec9..c3cefe8 100644 --- a/lib/libc/sys/cap_enter.2 +++ b/lib/libc/sys/cap_enter.2 @@ -90,6 +90,7 @@ acquired rights as possible. .Sh SEE ALSO .Xr cap_new 2 , .Xr fexecve 2 , +.Xr cap_sandboxed 3 , .Xr capsicum 4 .Sh HISTORY Support for capabilities and capabilities mode was developed as part of the -- cgit v1.1 From 2774e0404e6c83cc801bf5edbedcb8ceb5ee1e16 Mon Sep 17 00:00:00 2001 From: marius Date: Sat, 2 Mar 2013 00:37:31 +0000 Subject: - While Netra X1 generally show no ill effects when registering a power fail interrupt handler, there seems to be either a broken batch of them or a tendency to develop a defect which causes this interrupt to fire inadvertedly. Given that apart from this problem these machines work just fine, add a tunable allowing the setup of the power fail interrupt to be disabled. While at it, remove the DEBUGGER_ON_POWERFAIL compile time option and make that behavior also selectable via the newly added tunable. - Apparently, it's no longer a problem to call shutdown_nice(9) from within an interrupt filter (some other drivers in the tree do the same). So change the power fail interrupt from an handler in order to simplify the code and get rid of a !INTR_MPSAFE handler. - Use NULL instead of 0 for pointers. MFC after: 1 week --- sys/conf/options.sparc64 | 1 - sys/sparc64/pci/psycho.c | 71 ++++++++++++++++++++++++++++++------------------ 2 files changed, 44 insertions(+), 28 deletions(-) diff --git a/sys/conf/options.sparc64 b/sys/conf/options.sparc64 index bc6af5a..883db16 100644 --- a/sys/conf/options.sparc64 +++ b/sys/conf/options.sparc64 @@ -23,7 +23,6 @@ PSM_DEBUG opt_psm.h PSM_HOOKRESUME opt_psm.h PSM_RESETAFTERSUSPEND opt_psm.h -DEBUGGER_ON_POWERFAIL opt_psycho.h PSYCHO_DEBUG opt_psycho.h SCHIZO_DEBUG opt_schizo.h diff --git a/sys/sparc64/pci/psycho.c b/sys/sparc64/pci/psycho.c index a44af7c..8f4f23e 100644 --- a/sys/sparc64/pci/psycho.c +++ b/sys/sparc64/pci/psycho.c @@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -80,7 +81,7 @@ static const struct psycho_desc *psycho_find_desc(const struct psycho_desc *, const char *); static const struct psycho_desc *psycho_get_desc(device_t); static void psycho_set_intr(struct psycho_softc *, u_int, bus_addr_t, - driver_filter_t, driver_intr_t); + driver_filter_t); static int psycho_find_intrmap(struct psycho_softc *, u_int, bus_addr_t *, bus_addr_t *, u_long *); static void sabre_dmamap_sync(bus_dma_tag_t dt, bus_dmamap_t map, @@ -94,8 +95,9 @@ static void psycho_intr_clear(void *); static driver_filter_t psycho_ue; static driver_filter_t psycho_ce; static driver_filter_t psycho_pci_bus; -static driver_filter_t psycho_powerfail; -static driver_intr_t psycho_overtemp; +static driver_filter_t psycho_powerdebug; +static driver_filter_t psycho_powerdown; +static driver_filter_t psycho_overtemp; #ifdef PSYCHO_MAP_WAKEUP static driver_filter_t psycho_wakeup; #endif @@ -159,9 +161,16 @@ static devclass_t psycho_devclass; DEFINE_CLASS_0(pcib, psycho_driver, psycho_methods, sizeof(struct psycho_softc)); -EARLY_DRIVER_MODULE(psycho, nexus, psycho_driver, psycho_devclass, 0, 0, +EARLY_DRIVER_MODULE(psycho, nexus, psycho_driver, psycho_devclass, NULL, NULL, BUS_PASS_BUS); +static SYSCTL_NODE(_hw, OID_AUTO, psycho, CTLFLAG_RD, 0, "psycho parameters"); + +static u_int psycho_powerfail = 1; +TUNABLE_INT("hw.psycho.powerfail", &psycho_powerfail); +SYSCTL_UINT(_hw_psycho, OID_AUTO, powerfail, CTLFLAG_RDTUN, &psycho_powerfail, + 0, "powerfail action (0: none, 1: shutdown (default), 2: debugger)"); + static SLIST_HEAD(, psycho_softc) psycho_softcs = SLIST_HEAD_INITIALIZER(psycho_softcs); @@ -610,15 +619,20 @@ psycho_attach(device_t dev) * XXX Not all controllers have these, but installing them * is better than trying to sort through this mess. */ - psycho_set_intr(sc, 1, PSR_UE_INT_MAP, psycho_ue, NULL); - psycho_set_intr(sc, 2, PSR_CE_INT_MAP, psycho_ce, NULL); -#ifdef DEBUGGER_ON_POWERFAIL - psycho_set_intr(sc, 3, PSR_POWER_INT_MAP, psycho_powerfail, - NULL); -#else - psycho_set_intr(sc, 3, PSR_POWER_INT_MAP, NULL, - (driver_intr_t *)psycho_powerfail); -#endif + psycho_set_intr(sc, 1, PSR_UE_INT_MAP, psycho_ue); + psycho_set_intr(sc, 2, PSR_CE_INT_MAP, psycho_ce); + switch (psycho_powerfail) { + case 0: + break; + case 2: + psycho_set_intr(sc, 3, PSR_POWER_INT_MAP, + psycho_powerdebug); + break; + default: + psycho_set_intr(sc, 3, PSR_POWER_INT_MAP, + psycho_powerdown); + break; + } if (sc->sc_mode == PSYCHO_MODE_PSYCHO) { /* * Hummingbirds/Sabres do not have the following two @@ -630,14 +644,14 @@ psycho_attach(device_t dev) * over-temperature interrupt. */ psycho_set_intr(sc, 4, PSR_SPARE_INT_MAP, - NULL, psycho_overtemp); + psycho_overtemp); #ifdef PSYCHO_MAP_WAKEUP /* * psycho_wakeup() doesn't do anything useful right * now. */ psycho_set_intr(sc, 5, PSR_PWRMGT_INT_MAP, - psycho_wakeup, NULL); + psycho_wakeup); #endif /* PSYCHO_MAP_WAKEUP */ } } @@ -647,7 +661,7 @@ psycho_attach(device_t dev) * interrupt but they are also only used for PCI bus A. */ psycho_set_intr(sc, 0, sc->sc_half == 0 ? PSR_PCIAERR_INT_MAP : - PSR_PCIBERR_INT_MAP, psycho_pci_bus, NULL); + PSR_PCIBERR_INT_MAP, psycho_pci_bus); /* * Set the latency timer register as this isn't always done by the @@ -687,7 +701,7 @@ psycho_attach(device_t dev) static void psycho_set_intr(struct psycho_softc *sc, u_int index, bus_addr_t intrmap, - driver_filter_t filt, driver_intr_t intr) + driver_filter_t handler) { u_long vec; int rid; @@ -708,7 +722,7 @@ psycho_set_intr(struct psycho_softc *sc, u_int index, bus_addr_t intrmap, INTVEC(PSYCHO_READ8(sc, intrmap)) != vec || intr_vectors[vec].iv_ic != &psycho_ic || bus_setup_intr(sc->sc_dev, sc->sc_irq_res[index], - INTR_TYPE_MISC | INTR_BRIDGE, filt, intr, sc, + INTR_TYPE_MISC | INTR_BRIDGE, handler, NULL, sc, &sc->sc_ihand[index]) != 0) panic("%s: failed to set up interrupt %d", __func__, index); } @@ -837,13 +851,16 @@ psycho_pci_bus(void *arg) } static int -psycho_powerfail(void *arg) +psycho_powerdebug(void *arg __unused) { -#ifdef DEBUGGER_ON_POWERFAIL - struct psycho_softc *sc = arg; kdb_enter(KDB_WHY_POWERFAIL, "powerfail"); -#else + return (FILTER_HANDLED); +} + +static int +psycho_powerdown(void *arg __unused) +{ static int shutdown; /* As the interrupt is cleared we may be called multiple times. */ @@ -851,22 +868,22 @@ psycho_powerfail(void *arg) return (FILTER_HANDLED); shutdown++; printf("Power Failure Detected: Shutting down NOW.\n"); - shutdown_nice(0); -#endif + shutdown_nice(RB_POWEROFF); return (FILTER_HANDLED); } -static void -psycho_overtemp(void *arg) +static int +psycho_overtemp(void *arg __unused) { static int shutdown; /* As the interrupt is cleared we may be called multiple times. */ if (shutdown != 0) - return; + return (FILTER_HANDLED); shutdown++; printf("DANGER: OVER TEMPERATURE detected.\nShutting down NOW.\n"); shutdown_nice(RB_POWEROFF); + return (FILTER_HANDLED); } #ifdef PSYCHO_MAP_WAKEUP -- cgit v1.1 From dd15932a159ec60641cd20e4fb689fa28d75465d Mon Sep 17 00:00:00 2001 From: marius Date: Sat, 2 Mar 2013 00:41:51 +0000 Subject: - Apparently, it's no longer a problem to call shutdown_nice(9) from within an interrupt filter (some other drivers in the tree do the same). So change the overtemperature and power fail interrupts from handlers in order to code and get rid of a !INTR_MPSAFE handlers. - Mark unused parameters as such. - Use NULL instead of 0 for pointers. MFC after: 1 week --- sys/sparc64/sbus/sbus.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/sys/sparc64/sbus/sbus.c b/sys/sparc64/sbus/sbus.c index dbf1ccc..6c69e2d 100644 --- a/sys/sparc64/sbus/sbus.c +++ b/sys/sparc64/sbus/sbus.c @@ -152,8 +152,8 @@ static void sbus_intr_assign(void *); static void sbus_intr_clear(void *); static int sbus_find_intrmap(struct sbus_softc *, u_int, bus_addr_t *, bus_addr_t *); -static driver_intr_t sbus_overtemp; -static driver_intr_t sbus_pwrfail; +static driver_filter_t sbus_overtemp; +static driver_filter_t sbus_pwrfail; static int sbus_print_res(struct sbus_devinfo *); static device_method_t sbus_methods[] = { @@ -199,7 +199,7 @@ static driver_t sbus_driver = { static devclass_t sbus_devclass; -EARLY_DRIVER_MODULE(sbus, nexus, sbus_driver, sbus_devclass, 0, 0, +EARLY_DRIVER_MODULE(sbus, nexus, sbus_driver, sbus_devclass, NULL, NULL, BUS_PASS_BUS); MODULE_DEPEND(sbus, nexus, 1, 1, 1); MODULE_VERSION(sbus, 1); @@ -410,7 +410,7 @@ sbus_attach(device_t dev) INTVEC(SYSIO_READ8(sc, SBR_THERM_INT_MAP)) != vec || intr_vectors[vec].iv_ic != &sbus_ic || bus_setup_intr(dev, sc->sc_ot_ires, INTR_TYPE_MISC | INTR_BRIDGE, - NULL, sbus_overtemp, sc, &sc->sc_ot_ihand) != 0) + sbus_overtemp, NULL, sc, &sc->sc_ot_ihand) != 0) panic("%s: failed to set up temperature interrupt", __func__); i = 3; sc->sc_pf_ires = bus_alloc_resource_any(dev, SYS_RES_IRQ, &i, @@ -420,7 +420,7 @@ sbus_attach(device_t dev) INTVEC(SYSIO_READ8(sc, SBR_POWER_INT_MAP)) != vec || intr_vectors[vec].iv_ic != &sbus_ic || bus_setup_intr(dev, sc->sc_pf_ires, INTR_TYPE_MISC | INTR_BRIDGE, - NULL, sbus_pwrfail, sc, &sc->sc_pf_ihand) != 0) + sbus_pwrfail, NULL, sc, &sc->sc_pf_ihand) != 0) panic("%s: failed to set up power fail interrupt", __func__); /* Initialize the counter-timer. */ @@ -897,31 +897,33 @@ sbus_get_devinfo(device_t bus, device_t child) * This handles the interrupt and powers off the machine. * The same needs to be done to PCI controller drivers. */ -static void -sbus_overtemp(void *arg) +static int +sbus_overtemp(void *arg __unused) { static int shutdown; /* As the interrupt is cleared we may be called multiple times. */ if (shutdown != 0) - return; + return (FILTER_HANDLED); shutdown++; printf("DANGER: OVER TEMPERATURE detected\nShutting down NOW.\n"); shutdown_nice(RB_POWEROFF); + return (FILTER_HANDLED); } /* Try to shut down in time in case of power failure. */ -static void -sbus_pwrfail(void *arg) +static int +sbus_pwrfail(void *arg __unused) { static int shutdown; /* As the interrupt is cleared we may be called multiple times. */ if (shutdown != 0) - return; + return (FILTER_HANDLED); shutdown++; printf("Power failure detected\nShutting down NOW.\n"); - shutdown_nice(0); + shutdown_nice(FILTER_HANDLED); + return (FILTER_HANDLED); } static int -- cgit v1.1 From f07ebb8888ea42f744890a727e8f6799a1086915 Mon Sep 17 00:00:00 2001 From: pjd Date: Sat, 2 Mar 2013 00:53:12 +0000 Subject: Merge Capsicum overhaul: - Capability is no longer separate descriptor type. Now every descriptor has set of its own capability rights. - The cap_new(2) system call is left, but it is no longer documented and should not be used in new code. - The new syscall cap_rights_limit(2) should be used instead of cap_new(2), which limits capability rights of the given descriptor without creating a new one. - The cap_getrights(2) syscall is renamed to cap_rights_get(2). - If CAP_IOCTL capability right is present we can further reduce allowed ioctls list with the new cap_ioctls_limit(2) syscall. List of allowed ioctls can be retrived with cap_ioctls_get(2) syscall. - If CAP_FCNTL capability right is present we can further reduce fcntls that can be used with the new cap_fcntls_limit(2) syscall and retrive them with cap_fcntls_get(2). - To support ioctl and fcntl white-listing the filedesc structure was heavly modified. - The audit subsystem, kdump and procstat tools were updated to recognize new syscalls. - Capability rights were revised and eventhough I tried hard to provide backward API and ABI compatibility there are some incompatible changes that are described in detail below: CAP_CREATE old behaviour: - Allow for openat(2)+O_CREAT. - Allow for linkat(2). - Allow for symlinkat(2). CAP_CREATE new behaviour: - Allow for openat(2)+O_CREAT. Added CAP_LINKAT: - Allow for linkat(2). ABI: Reuses CAP_RMDIR bit. - Allow to be target for renameat(2). Added CAP_SYMLINKAT: - Allow for symlinkat(2). Removed CAP_DELETE. Old behaviour: - Allow for unlinkat(2) when removing non-directory object. - Allow to be source for renameat(2). Removed CAP_RMDIR. Old behaviour: - Allow for unlinkat(2) when removing directory. Added CAP_RENAMEAT: - Required for source directory for the renameat(2) syscall. Added CAP_UNLINKAT (effectively it replaces CAP_DELETE and CAP_RMDIR): - Allow for unlinkat(2) on any object. - Required if target of renameat(2) exists and will be removed by this call. Removed CAP_MAPEXEC. CAP_MMAP old behaviour: - Allow for mmap(2) with any combination of PROT_NONE, PROT_READ and PROT_WRITE. CAP_MMAP new behaviour: - Allow for mmap(2)+PROT_NONE. Added CAP_MMAP_R: - Allow for mmap(PROT_READ). Added CAP_MMAP_W: - Allow for mmap(PROT_WRITE). Added CAP_MMAP_X: - Allow for mmap(PROT_EXEC). Added CAP_MMAP_RW: - Allow for mmap(PROT_READ | PROT_WRITE). Added CAP_MMAP_RX: - Allow for mmap(PROT_READ | PROT_EXEC). Added CAP_MMAP_WX: - Allow for mmap(PROT_WRITE | PROT_EXEC). Added CAP_MMAP_RWX: - Allow for mmap(PROT_READ | PROT_WRITE | PROT_EXEC). Renamed CAP_MKDIR to CAP_MKDIRAT. Renamed CAP_MKFIFO to CAP_MKFIFOAT. Renamed CAP_MKNODE to CAP_MKNODEAT. CAP_READ old behaviour: - Allow pread(2). - Disallow read(2), readv(2) (if there is no CAP_SEEK). CAP_READ new behaviour: - Allow read(2), readv(2). - Disallow pread(2) (CAP_SEEK was also required). CAP_WRITE old behaviour: - Allow pwrite(2). - Disallow write(2), writev(2) (if there is no CAP_SEEK). CAP_WRITE new behaviour: - Allow write(2), writev(2). - Disallow pwrite(2) (CAP_SEEK was also required). Added convinient defines: #define CAP_PREAD (CAP_SEEK | CAP_READ) #define CAP_PWRITE (CAP_SEEK | CAP_WRITE) #define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ) #define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE) #define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000008ULL) #define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W) #define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X) #define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X) #define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X) #define CAP_RECV CAP_READ #define CAP_SEND CAP_WRITE #define CAP_SOCK_CLIENT \ (CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \ CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN) #define CAP_SOCK_SERVER \ (CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \ CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \ CAP_SETSOCKOPT | CAP_SHUTDOWN) Added defines for backward API compatibility: #define CAP_MAPEXEC CAP_MMAP_X #define CAP_DELETE CAP_UNLINKAT #define CAP_MKDIR CAP_MKDIRAT #define CAP_RMDIR CAP_UNLINKAT #define CAP_MKFIFO CAP_MKFIFOAT #define CAP_MKNOD CAP_MKNODAT #define CAP_SOCK_ALL (CAP_SOCK_CLIENT | CAP_SOCK_SERVER) Sponsored by: The FreeBSD Foundation Reviewed by: Christoph Mallon Many aspects discussed with: rwatson, benl, jonathan ABI compatibility discussed with: kib --- contrib/openbsm/etc/audit_event | 7 +- lib/libc/include/compat.h | 2 + lib/libc/sys/Makefile.inc | 8 +- lib/libc/sys/Symbol.map | 7 +- lib/libc/sys/cap_enter.2 | 38 +- lib/libc/sys/cap_fcntls_limit.2 | 127 ++++ lib/libc/sys/cap_ioctls_limit.2 | 158 +++++ lib/libc/sys/cap_new.2 | 475 --------------- lib/libc/sys/cap_rights_limit.2 | 603 +++++++++++++++++++ lib/libc/sys/dup.2 | 6 - lib/libprocstat/libprocstat.c | 1 - lib/libprocstat/libprocstat.h | 1 - sys/bsm/audit_kevents.h | 7 +- sys/cddl/compat/opensolaris/sys/file.h | 8 +- .../opensolaris/uts/common/fs/zfs/zfs_ioctl.c | 6 +- .../opensolaris/uts/common/fs/zfs/zfs_onexit.c | 2 +- sys/compat/freebsd32/syscalls.master | 12 +- sys/compat/linux/linux_file.c | 9 +- sys/compat/svr4/svr4_fcntl.c | 6 +- sys/compat/svr4/svr4_filio.c | 10 +- sys/compat/svr4/svr4_misc.c | 7 +- sys/compat/svr4/svr4_stream.c | 4 +- sys/dev/iscsi/initiator/iscsi.c | 4 +- sys/fs/fdescfs/fdesc_vfsops.c | 2 +- sys/fs/fdescfs/fdesc_vnops.c | 2 +- sys/fs/nfs/nfsdport.h | 2 - sys/fs/nfsclient/nfs_clport.c | 2 +- sys/fs/nfsserver/nfs_nfsdport.c | 8 +- sys/i386/ibcs2/ibcs2_misc.c | 7 +- sys/kern/capabilities.conf | 18 +- sys/kern/kern_descrip.c | 663 ++++++++++----------- sys/kern/kern_exec.c | 3 - sys/kern/kern_exit.c | 2 +- sys/kern/kern_fork.c | 2 +- sys/kern/sys_capability.c | 645 +++++++++++--------- sys/kern/sys_generic.c | 131 ++-- sys/kern/syscalls.master | 12 +- sys/kern/tty.c | 16 +- sys/kern/uipc_mqueue.c | 9 +- sys/kern/uipc_sem.c | 4 +- sys/kern/uipc_shm.c | 4 +- sys/kern/uipc_syscalls.c | 46 +- sys/kern/uipc_usrreq.c | 89 +-- sys/kern/vfs_aio.c | 6 +- sys/kern/vfs_lookup.c | 13 +- sys/kern/vfs_syscalls.c | 88 ++- sys/netsmb/smb_dev.c | 4 +- sys/nfsserver/nfs_srvkrpc.c | 3 +- sys/ofed/include/linux/file.h | 9 +- sys/security/audit/audit.h | 7 + sys/security/audit/audit_arg.c | 13 + sys/security/audit/audit_bsm.c | 15 +- sys/security/audit/audit_private.h | 2 + sys/sys/capability.h | 197 ++++-- sys/sys/file.h | 7 +- sys/sys/filedesc.h | 45 +- sys/sys/namei.h | 5 +- sys/sys/user.h | 4 +- sys/vm/vm_mmap.c | 6 +- usr.bin/kdump/kdump.c | 9 + usr.bin/kdump/mksubr | 1 + usr.bin/procstat/procstat_files.c | 51 +- 62 files changed, 2188 insertions(+), 1472 deletions(-) create mode 100644 lib/libc/sys/cap_fcntls_limit.2 create mode 100644 lib/libc/sys/cap_ioctls_limit.2 delete mode 100644 lib/libc/sys/cap_new.2 create mode 100644 lib/libc/sys/cap_rights_limit.2 diff --git a/contrib/openbsm/etc/audit_event b/contrib/openbsm/etc/audit_event index 0350389..f82841a 100644 --- a/contrib/openbsm/etc/audit_event +++ b/contrib/openbsm/etc/audit_event @@ -548,7 +548,7 @@ 43184:AUE_OPENAT:openat(2) - attr only:fa 43185:AUE_POSIX_OPENPT:posix_openpt(2):ip 43186:AUE_CAP_NEW:cap_new(2):fm -43187:AUE_CAP_GETRIGHTS:cap_getrights(2):fm +43187:AUE_CAP_RIGHTS_GET:cap_rights_get(2):fm 43188:AUE_CAP_ENTER:cap_enter(2):pc 43189:AUE_CAP_GETMODE:cap_getmode(2):pc 43190:AUE_POSIX_SPAWN:posix_spawn(2):pc @@ -563,6 +563,11 @@ 43199:AUE_PDGETPID:pdgetpid(2):pc 43200:AUE_PDWAIT:pdwait(2):pc 43201:AUE_WAIT6:wait6(2):pc +43202:AUE_CAP_RIGHTS_LIMIT:cap_rights_limit(2):fm +43203:AUE_CAP_IOCTLS_LIMIT:cap_ioctls_limit(2):fm +43204:AUE_CAP_IOCTLS_GET:cap_ioctls_get(2):fm +43205:AUE_CAP_FCNTLS_LIMIT:cap_fcntls_limit(2):fm +43206:AUE_CAP_FCNTLS_GET:cap_fcntls_get(2):fm # # Solaris userspace events. # diff --git a/lib/libc/include/compat.h b/lib/libc/include/compat.h index 7694540..3739fe1 100644 --- a/lib/libc/include/compat.h +++ b/lib/libc/include/compat.h @@ -42,6 +42,8 @@ __sym_compat(__semctl, freebsd7___semctl, FBSD_1.0); __sym_compat(msgctl, freebsd7_msgctl, FBSD_1.0); __sym_compat(shmctl, freebsd7_shmctl, FBSD_1.0); +__sym_compat(cap_getrights, cap_rights_get, FBSD_1.2); + #undef __sym_compat #endif /* __LIBC_COMPAT_H__ */ diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc index 9f216dc..03c0090 100644 --- a/lib/libc/sys/Makefile.inc +++ b/lib/libc/sys/Makefile.inc @@ -93,7 +93,9 @@ MAN+= abort2.2 \ bind.2 \ brk.2 \ cap_enter.2 \ - cap_new.2 \ + cap_fcntls_limit.2 \ + cap_ioctls_limit.2 \ + cap_rights_limit.2 \ chdir.2 \ chflags.2 \ chmod.2 \ @@ -270,7 +272,9 @@ MLINKS+=access.2 eaccess.2 \ access.2 faccessat.2 MLINKS+=brk.2 sbrk.2 MLINKS+=cap_enter.2 cap_getmode.2 -MLINKS+=cap_new.2 cap_getrights.2 +MLINKS+=cap_fcntls_limit.2 cap_fcntls_get.2 +MLINKS+=cap_ioctls_limit.2 cap_ioctls_get.2 +MLINKS+=cap_rights_limit.2 cap_rights_get.2 MLINKS+=chdir.2 fchdir.2 MLINKS+=chflags.2 fchflags.2 \ chflags.2 lchflags.2 diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map index d126255..7738e46 100644 --- a/lib/libc/sys/Symbol.map +++ b/lib/libc/sys/Symbol.map @@ -364,7 +364,6 @@ FBSD_1.2 { cap_enter; cap_getmode; cap_new; - cap_getrights; getloginclass; pdfork; pdgetpid; @@ -379,6 +378,12 @@ FBSD_1.2 { }; FBSD_1.3 { + cap_fcntls_get; + cap_fcntls_limit; + cap_ioctls_get; + cap_ioctls_limit; + cap_rights_get; + cap_rights_limit; cap_sandboxed; clock_getcpuclockid2; ffclock_getcounter; diff --git a/lib/libc/sys/cap_enter.2 b/lib/libc/sys/cap_enter.2 index c3cefe8..3369669 100644 --- a/lib/libc/sys/cap_enter.2 +++ b/lib/libc/sys/cap_enter.2 @@ -58,8 +58,10 @@ or .Xr pdfork 2 will be placed in capability mode from inception. .Pp -When combined with capabilities created with -.Xr cap_new 2 , +When combined with +.Xr cap_rights_limit 2 , +.Xr cap_ioctls_limit 2 , +.Xr cap_fcntls_limit 2 , .Fn cap_enter may be used to create kernel-enforced sandboxes in which appropriately-crafted applications or application components may be run. @@ -71,11 +73,6 @@ sandbox. Creating effective process sandboxes is a tricky process that involves identifying the least possible rights required by the process and then passing those rights into the process in a safe manner. -See the CAVEAT -section of -.Xr cap_new 2 -for why this is particularly tricky with UNIX file descriptors as the -canonical representation of a right. Consumers of .Fn cap_enter should also be aware of other inherited rights, such as access to VM @@ -87,8 +84,33 @@ to create a runtime environment inside the sandbox that has as few implicitly acquired rights as possible. .Sh RETURN VALUES .Rv -std cap_enter cap_getmode +.Sh ERRORS +The +.Fn cap_enter +and +.Fn cap_getmode +system calls +will fail if: +.Bl -tag -width Er +.It Bq Er ENOSYS +The kernel is compiled without: +.Pp +.Cd "options CAPABILITY_MODE" +.El +.Pp +The +.Fn cap_getmode +system call may also return the following error: +.Bl -tag -width Er +.It Bq Er EFAULT +Pointer +.Fa modep +points outside the process's allocated address space. +.El .Sh SEE ALSO -.Xr cap_new 2 , +.Xr cap_fcntls_limit 2 , +.Xr cap_ioctls_limit 2 , +.Xr cap_rights_limit 2 , .Xr fexecve 2 , .Xr cap_sandboxed 3 , .Xr capsicum 4 diff --git a/lib/libc/sys/cap_fcntls_limit.2 b/lib/libc/sys/cap_fcntls_limit.2 new file mode 100644 index 0000000..8fa7463 --- /dev/null +++ b/lib/libc/sys/cap_fcntls_limit.2 @@ -0,0 +1,127 @@ +.\" +.\" Copyright (c) 2012 The FreeBSD Foundation +.\" All rights reserved. +.\" +.\" This documentation was written by Pawel Jakub Dawidek under sponsorship +.\" the FreeBSD Foundation. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd September 20, 2012 +.Dt CAP_FCNTLS_LIMIT 2 +.Os +.Sh NAME +.Nm cap_fcntls_limit , +.Nm cap_fcntls_get +.Nd manage allowed fcntl commands +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/capability.h +.Ft int +.Fn cap_fcntls_limit "int fd" "uint32_t fcntlrights" +.Ft int +.Fn cap_fcntls_get "int fd" "uint32_t *fcntlrightsp" +.Sh DESCRIPTION +If a file descriptor is granted the +.Dv CAP_FCNTL +capability right, the list of allowed +.Xr fcntl 2 +commands can be selectively reduced (but never expanded) with the +.Fn cap_fcntls_limit +system call. +.Pp +A bitmask of allowed fcntls commands for a given file descriptor can be obtained +with the +.Fn cap_fcntls_get +system call. +.Sh FLAGS +The following flags may be specified in the +.Fa fcntlrights +argument or returned in the +.Fa fcntlrightsp +argument: +.Bl -tag -width CAP_FCNTL_GETOWN +.It Dv CAP_FCNTL_GETFL +Permit +.Dv F_GETFL +command. +.It Dv CAP_FCNTL_SETFL +Permit +.Dv F_SETFL +command. +.It Dv CAP_FCNTL_GETOWN +Permit +.Dv F_GETOWN +command. +.It Dv CAP_FCNTL_SETOWN +Permit +.Dv F_SETOWN +command. +.El +.Sh RETURN VALUES +.Rv -std +.Sh ERRORS +.Fn cap_fcntls_limit +succeeds unless: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid descriptor. +.It Bq Er EINVAL +An invalid flag has been passed in +.Fa fcntlrights . +.It Bq Er ENOTCAPABLE +.Fa fcntlrights +would expand the list of allowed +.Xr fcntl 2 +commands. +.El +.Pp +.Fn cap_fcntls_get +succeeds unless: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid descriptor. +.It Bq Er EFAULT +The +.Fa fcntlrightsp +argument points at an invalid address. +.El +.Sh SEE ALSO +.Xr cap_ioctls_limit 2 , +.Xr cap_rights_limit 2 , +.Xr fcntl 2 +.Sh HISTORY +Support for capabilities and capabilities mode was developed as part of the +.Tn TrustedBSD +Project. +.Pp +.Sh AUTHORS +This function was created by +.An Pawel Jakub Dawidek Aq pawel@dawidek.net +under sponsorship of the FreeBSD Foundation. diff --git a/lib/libc/sys/cap_ioctls_limit.2 b/lib/libc/sys/cap_ioctls_limit.2 new file mode 100644 index 0000000..5eca18c --- /dev/null +++ b/lib/libc/sys/cap_ioctls_limit.2 @@ -0,0 +1,158 @@ +.\" +.\" Copyright (c) 2012 The FreeBSD Foundation +.\" All rights reserved. +.\" +.\" This documentation was written by Pawel Jakub Dawidek under sponsorship +.\" the FreeBSD Foundation. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd September 20, 2012 +.Dt CAP_IOCTLS_LIMIT 2 +.Os +.Sh NAME +.Nm cap_ioctls_limit , +.Nm cap_ioctls_get +.Nd manage allowed ioctl commands +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/capability.h +.Ft int +.Fn cap_ioctls_limit "int fd" "const unsigned long *cmds" "size_t ncmds" +.Ft ssize_t +.Fn cap_ioctls_get "int fd" "unsigned long *cmds" "size_t maxcmds" +.Sh DESCRIPTION +If a file descriptor is granted the +.Dv CAP_IOCTL +capability right, the list of allowed +.Xr ioctl 2 +commands can be selectively reduced (but never expanded) with the +.Fn cap_ioctls_limit +system call. +The +.Fa cmds +argument is an array of +.Xr ioctl 2 +commands and the +.Fa ncmds +argument specifies the number of elements in the array. +There might be up to +.Va 256 +elements in the array. +.Pp +The list of allowed ioctl commands for a given file descriptor can be obtained +with the +.Fn cap_ioctls_get +system call. +The +.Fa cmds +argument points at memory that can hold up to +.Fa maxcmds +values. +The function populates the provided buffer with up to +.Fa maxcmds +elements, but always returns the total number of ioctl commands allowed for the +given file descriptor. +The total number of ioctls commands for the given file descriptor can be +obtained by passing +.Dv NULL as the +.Fa cmds +argument and +.Va 0 +as the +.Fa maxcmds +argument. +If all ioctl commands are allowed +.Dv ( CAP_IOCTL +capability right is assigned to the file descriptor and the +.Fn cap_ioctls_limit +system call was never called for this file descriptor), the +.Fn cap_ioctls_get +system call will return +.Dv CAP_IOCTLS_ALL +and won't modify the buffer pointed out by the +.Fa cmds +argument. +.Sh RETURN VALUES +.Rv -std cap_ioctls_limit +.Pp +The +.Fn cap_ioctls_limit +function, if successfull, returns the total number of allowed ioctl commands or +the value +.Dv INT_MAX +if all ioctls commands are allowed. +On failure the value +.Va -1 +is returned and the global variable errno is set to indicate the error. +.Sh ERRORS +.Fn cap_ioctls_limit +succeeds unless: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid descriptor. +.It Bq Er EFAULT +The +.Fa cmds +argument points at an invalid address. +.It Bq Er EINVAL +The +.Fa ncmds +argument is greater than +.Va 256 . +.It Bq Er ENOTCAPABLE +.Fa cmds +would expand the list of allowed +.Xr ioctl 2 +commands. +.El +.Pp +.Fn cap_ioctls_get +succeeds unless: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid descriptor. +.It Bq Er EFAULT +The +.Fa cmds +argument points at invalid address. +.El +.Sh SEE ALSO +.Xr cap_fcntls_limit 2 , +.Xr cap_rights_limit 2 , +.Xr ioctl 2 +.Sh HISTORY +Support for capabilities and capabilities mode was developed as part of the +.Tn TrustedBSD +Project. +.Pp +.Sh AUTHORS +This function was created by +.An Pawel Jakub Dawidek Aq pawel@dawidek.net +under sponsorship of the FreeBSD Foundation. diff --git a/lib/libc/sys/cap_new.2 b/lib/libc/sys/cap_new.2 deleted file mode 100644 index a18fd3b..0000000 --- a/lib/libc/sys/cap_new.2 +++ /dev/null @@ -1,475 +0,0 @@ -.\" -.\" Copyright (c) 2008-2010 Robert N. M. Watson -.\" All rights reserved. -.\" -.\" This software was developed at the University of Cambridge Computer -.\" Laboratory with support from a grant from Google, Inc. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" $FreeBSD$ -.\" -.Dd July 20, 2011 -.Dt CAP_NEW 2 -.Os -.Sh NAME -.Nm cap_new , -.Nm cap_getrights -.Nd System calls to manipulate capabilities -.Sh LIBRARY -.Lb libc -.Sh SYNOPSIS -.In sys/capability.h -.Ft int -.Fn cap_new "int fd" "cap_rights_t rights" -.Ft int -.Fn cap_getrights "int fd" "cap_rights_t *rightsp" -.Sh DESCRIPTION -Capabilities are special file descriptors derived from an existing file -descriptor, such as one returned by -.Xr fhopen 2 , -.Xr kqueue 2 , -.Xr mq_open 2 , -.Xr open 2 , -.Xr pipe 2 , -.Xr shm_open 2 , -.Xr socket 2 , -or -.Xr socketpair 2 , -but with a restricted set of permitted operations determined by a rights -mask set when the capability is created. -These restricted rights cannot be changed after the capability is created, -although further capabilities with yet more restricted rights may be created -from an existing capability. -In every other sense, a capability behaves in the same way as the file -descriptor it was created from. -.Pp -.Fn cap_new -creates a new capability for the existing file descriptor -.Fa fd , -and returns a file descriptor for it. -Operations on the capability will be limited to those permitted by -.Fa rights , -which is static for the lifetime of the capability. -If -.Fa fd -refers to an existing capability, then -.Fa rights -must be equal to or a subset of the rights on that capability. -As with -.Xr dup 2 -and -.Xr dup2 2 , -many properties are shared between the new capability and the existing file -descriptor, including open file flags, blocking disposition, and file offset. -Many applications will prefer to use the -.Xr cap_limitfd 3 -library call, part of -.Xr libcapsicum 3 , -as it offers a more convenient interface. -.Pp -.Fn cap_getrights -queries the rights associated with the capability referred to by file -descriptor -.Fa fd . -.Pp -These system calls, when combined with -.Xr cap_enter 2 , -may be used to construct process sandboxes with highly granular rights -assignment. -.Sh RIGHTS -The following rights may be specified in a new capability rights mask: -.Bl -tag -width CAP_EXTATTR_DELETE -.It Dv CAP_ACCEPT -Permit -.Xr accept 2 . -.It Dv CAP_ACL_CHECK -Permit checking of an ACL on a file descriptor; there is no cross-reference -for this system call. -.It Dv CAP_ACL_DELETE -Permit -.Xr acl_delete_fd_np 3 . -.It Dv CAP_ACL_GET -Permit -.Xr acl_get_fd 3 -and -.Xr acl_get_fd_np 3 . -.It Dv CAP_ACL_SET -Permit -.Xr acl_set_fd 3 -and -.Xr acl_set_fd_np 3 . -.It Dv CAP_BIND -Permit -.Xr bind 2 . -Note that sockets can also become bound implicitly as a result of -.Xr connect 2 -or -.Xr send 2 , -and that socket options set with -.Xr setsockopt 2 -may also affect binding behavior. -.It Dv CAP_CONNECT -Permit -.Xr connect 2 ; -also required for -.Xr sendto 2 -with a non-NULL destination address. -.It Dv CAP_EVENT -Permit -.Xr select 2 , -.Xr poll 2 , -and -.Xr kevent 2 -to be used in monitoring the file descriptor for events. -.It Dv CAP_FEXECVE -Permit -.Xr fexecve 2 ; -.Dv CAP_READ -will also be required. -.It Dv CAP_EXTATTR_DELETE -Permit -.Xr extattr_delete_fd 2 . -.It Dv CAP_EXTATTR_GET -Permit -.Xr extattr_get_fd 2 . -.It Dv CAP_EXTATTR_LIST -Permit -.Xr extattr_list_fd 2 . -.It Dv CAP_EXTATTR_SET -Permit -.Xr extattr_set_fd 2 . -.It Dv CAP_FCHDIR -Permit -.Xr fchdir 2 . -.It Dv CAP_FCHFLAGS -Permit -.Xr fchflags 2 . -.It Dv CAP_FCHMOD -Permit -.Xr fchmod 2 . -.It Dv CAP_FCHOWN -Permit -.Xr fchown 2 . -.It Dv CAP_FCNTL -Permit -.Xr fcntl 2 ; -be aware that this call provides indirect access to other operations, such as -.Xr flock 2 . -.It Dv CAP_FLOCK -Permit -.Xr flock 2 -and related calls. -.It Dv CAP_FPATHCONF -Permit -.Xr fpathconf 2 . -.It Dv CAP_FSCK -Permit UFS background-fsck operations on the descriptor. -.It Dv CAP_FSTAT -Permit -.Xr fstat 2 . -.It Dv CAP_FSTATFS -Permit -.Xr fstatfs 2 . -.It Dv CAP_FSYNC -Permit -.Xr aio_fsync 2 -and -.Xr fsync 2 . -.Pp -.It Dv CAP_FTRUNCATE -Permit -.Xr ftruncate 2 . -.It Dv CAP_FUTIMES -Permit -.Xr futimes 2 . -.It Dv CAP_GETPEERNAME -Permit -.Xr getpeername 2 . -.It Dv CAP_GETSOCKNAME -Permit -.Xr getsockname 2 . -.It Dv CAP_GETSOCKOPT -Permit -.Xr getsockopt 2 . -.It Dv CAP_IOCTL -Permit -.Xr ioctl 2 . -Be aware that this system call has enormous scope, including potentially -global scope for some objects. -.It Dv CAP_KEVENT -Permit -.Xr kevent 2 ; -.Dv CAP_EVENT -is also required on file descriptors that will be monitored using -.Xr kevent 2 . -.It Dv CAP_LISTEN -Permit -.Xr listen 2 ; -not much use (generally) without -.Dv CAP_BIND . -.It Dv CAP_LOOKUP -Permit the file descriptor to be used as a starting directory for calls such -as -.Xr linkat 2 , -.Xr openat 2 , -and -.Xr unlinkat 2 . -Note that these calls are not available in capability mode as they manipulate -a global name space; see -.Xr cap_enter 2 -for details. -.It Dv CAP_MAC_GET -Permit -.Xr mac_get_fd 3 . -.It Dv CAP_MAC_SET -Permit -.Xr mac_set_fd 3 . -.It Dv CAP_MMAP -Permit -.Xr mmap 2 ; -specific invocations may also require -.Dv CAP_READ -or -.Dv CAP_WRITE . -.Pp -.It Dv CAP_PDGETPID -Permit -.Xr pdgetpid 2 . -.It Dv CAP_PDKILL -Permit -.Xr pdkill 2 . -.It Dv CAP_PDWAIT -Permit -.Xr pdwait4 2 . -.It Dv CAP_PEELOFF -Permit -.Xr sctp_peeloff 2 . -.It Dv CAP_READ -Allow -.Xr aio_read 2 , -.Xr pread 2 , -.Xr read 2 , -.Xr recv 2 , -.Xr recvfrom 2 , -.Xr recvmsg 2 , -and related system calls. -.Pp -For files and other seekable objects, -.Dv CAP_SEEK -may also be required. -.It Dv CAP_REVOKE -Permit -.Xr frevoke 2 -in certain ABI compatibility modes that support this system call. -.It Dv CAP_SEEK -Permit operations that seek on the file descriptor, such as -.Xr lseek 2 , -but also required for I/O system calls that modify the file offset, such as -.Xr read 2 -and -.Xr write 2 . -.It Dv CAP_SEM_GETVALUE -Permit -.Xr sem_getvalue 3 . -.It Dv CAP_SEM_POST -Permit -.Xr sem_post 3 . -.It Dv CAP_SEM_WAIT -Permit -.Xr sem_wait 3 -and -.Xr sem_trywait 3 . -.It Dv CAP_SETSOCKOPT -Permit -.Xr setsockopt 2 ; -this controls various aspects of socket behavior and may affect binding, -connecting, and other behaviors with global scope. -.It Dv CAP_SHUTDOWN -Permit explicit -.Xr shutdown 2 ; -closing the socket will also generally shut down any connections on it. -.It Dv CAP_TTYHOOK -Allow configuration of TTY hooks, such as -.Xr snp 4 , -on the file descriptor. -.It Dv CAP_WRITE -Allow -.Xr aio_write 2 , -.Xr pwrite 2 , -.Xr send 2 , -.Xr sendmsg 2 , -.Xr sendto 2 , -.Xr write 2 , -and related system calls. -.Pp -For files and other seekable objects, -.Dv CAP_SEEK -may also be required. -.Pp -For -.Xr sendto 2 -with a non-NULL connection address, -.Dv CAP_CONNECT -is also required. -.El -.Sh CAVEAT -The -.Fn cap_new -system call and the capabilities it creates may be used to assign -fine-grained rights to sandboxed processes running in capability mode. -However, the semantics of objects accessed via file descriptors are complex, -so caution should be exercised in passing object capabilities into sandboxes. -.Sh RETURN VALUES -If successful, -.Fn cap_new -returns a non-negative integer, termed a file descriptor. -It returns -1 on failure, and sets -.Va errno -to indicate the error. -.Pp -.Rv -std cap_getrights -.Sh ERRORS -.Fn cap_new -may return the following errors: -.Bl -tag -width Er -.It Bq Er EBADF -The -.Fa fd -argument is not a valid active descriptor. -.It Bq Er EINVAL -An invalid right has been requested in -.Fa rights . -.It Bq Er EMFILE -The process has already reached its limit for open file descriptors. -.It Bq Er ENFILE -The system file table is full. -.It Bq Er EPERM -.Fa rights -contains requested rights not present in the current rights mask associated -with the capability referenced by -.Fa fd , -if any. -.El -.Pp -.Fn cap_getrights -may return the following errors: -.Bl -tag -width Er -.It Bq Er EBADF -The -.Fa fd -argument is not a valid active descriptor. -.It Bq Er EINVAL -The -.Fa fd -argument is not a capability. -.El -.Sh SEE ALSO -.Xr accept 2 , -.Xr aio_fsync 2 , -.Xr aio_read 2 , -.Xr aio_write 2 , -.Xr bind 2 , -.Xr cap_enter 2 , -.Xr connect 2 , -.Xr dup 2 , -.Xr dup2 2 , -.Xr extattr_delete_fd 2 , -.Xr extattr_get_fd 2 , -.Xr extattr_list_fd 2 , -.Xr extattr_set_fd 2 , -.Xr fchflags 2 , -.Xr fchown 2 , -.Xr fcntl 2 , -.Xr fexecve 2 , -.Xr fhopen 2 , -.Xr flock 2 , -.Xr fpathconf 2 , -.Xr fstat 2 , -.Xr fstatfs 2 , -.Xr fsync 2 , -.Xr ftruncate 2 , -.Xr futimes 2 , -.Xr getpeername 2 , -.Xr getsockname 2 , -.Xr getsockopt 2 , -.Xr ioctl 2 , -.Xr kevent 2 , -.Xr kqueue 2 , -.Xr linkat 2 , -.Xr listen 2 , -.Xr mmap 2 , -.Xr mq_open 2 , -.Xr open 2 , -.Xr openat 2 , -.Xr pdgetpid 2 , -.Xr pdkill 2 , -.Xr pdwait4 2 , -.Xr pipe 2 , -.Xr poll 2 , -.Xr pread 2 , -.Xr pwrite 2 , -.Xr read 2 , -.Xr recv 2 , -.Xr recvfrom 2 , -.Xr recvmsg 2 , -.Xr sctp_peeloff 2 , -.Xr select 2 , -.Xr send 2 , -.Xr sendmsg 2 , -.Xr sendto 2 , -.Xr setsockopt 2 , -.Xr shm_open 2 , -.Xr shutdown 2 , -.Xr socket 2 , -.Xr socketpair 2 , -.Xr unlinkat 2 , -.Xr write 2 , -.Xr acl_delete_fd_np 3 , -.Xr acl_get_fd 3 , -.Xr acl_get_fd_np 3 , -.Xr acl_set_fd_np 3 , -.Xr cap_limitfd 3 , -.Xr libcapsicum 3 , -.Xr mac_get_fd 3 , -.Xr mac_set_fd 3 , -.Xr sem_getvalue 3 , -.Xr sem_post 3 , -.Xr sem_trywait 3 , -.Xr sem_wait 3 , -.Xr capsicum 4 , -.Xr snp 4 -.Sh HISTORY -Support for capabilities and capabilities mode was developed as part of the -.Tn TrustedBSD -Project. -.Sh AUTHORS -These functions and the capability facility were created by -.An "Robert N. M. Watson" -at the University of Cambridge Computer Laboratory with support from a grant -from Google, Inc. -.Sh BUGS -This man page should list the set of permitted system calls more specifically -for each capability right. -.Pp -Capability rights sometimes have unclear indirect impacts, which should be -documented, or at least hinted at. diff --git a/lib/libc/sys/cap_rights_limit.2 b/lib/libc/sys/cap_rights_limit.2 new file mode 100644 index 0000000..d8d8777 --- /dev/null +++ b/lib/libc/sys/cap_rights_limit.2 @@ -0,0 +1,603 @@ +.\" +.\" Copyright (c) 2008-2010 Robert N. M. Watson +.\" Copyright (c) 2012-2013 The FreeBSD Foundation +.\" All rights reserved. +.\" +.\" This software was developed at the University of Cambridge Computer +.\" Laboratory with support from a grant from Google, Inc. +.\" +.\" Portions of this documentation were written by Pawel Jakub Dawidek +.\" under sponsorship from the FreeBSD Foundation. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd February 23, 2013 +.Dt CAP_RIGHTS_LIMIT 2 +.Os +.Sh NAME +.Nm cap_rights_limit , +.Nm cap_rights_get +.Nd manage capability rights +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/capability.h +.Ft int +.Fn cap_rights_limit "int fd" "cap_rights_t rights" +.Ft int +.Fn cap_rights_get "int fd" "cap_rights_t *rightsp" +.Sh DESCRIPTION +When a file descriptor is created by a function such as +.Xr fhopen 2 , +.Xr kqueue 2 , +.Xr mq_open 2 , +.Xr open 2 , +.Xr openat 2 , +.Xr pdfork 2 , +.Xr pipe 2 , +.Xr shm_open 2 , +.Xr socket 2 , +or +.Xr socketpair 2 , +it is assigned all capability rights. +Those rights can be reduced (but never expanded) by using the +.Fn cap_rights_limit +system call. +Once capability rights are reduced, operations on the file descriptor will be +limited to those permitted by +.Fa rights . +.Pp +A bitmask of capability rights assigned to a file descriptor can be obtained with +the +.Fn cap_rights_get +system call. +.Sh RIGHTS +The following rights may be specified in a rights mask: +.Bl -tag -width CAP_EXTATTR_DELETE +.It Dv CAP_ACCEPT +Permit +.Xr accept 2 . +.It Dv CAP_ACL_CHECK +Permit checking of an ACL on a file descriptor; there is no cross-reference +for this system call. +.It Dv CAP_ACL_DELETE +Permit +.Xr acl_delete_fd_np 3 . +.It Dv CAP_ACL_GET +Permit +.Xr acl_get_fd 3 +and +.Xr acl_get_fd_np 3 . +.It Dv CAP_ACL_SET +Permit +.Xr acl_set_fd 3 +and +.Xr acl_set_fd_np 3 . +.It Dv CAP_BIND +Permit +.Xr bind 2 . +Note that sockets can also become bound implicitly as a result of +.Xr connect 2 +or +.Xr send 2 , +and that socket options set with +.Xr setsockopt 2 +may also affect binding behavior. +.It Dv CAP_CONNECT +Permit +.Xr connect 2 ; +also required for +.Xr sendto 2 +with a non-NULL destination address. +.It Dv CAP_CREATE +Permit +.Xr openat 2 +with the +.Dv O_CREAT +flag. +.\" XXXPJD: Doesn't exist anymore. +.It Dv CAP_EVENT +Permit +.Xr select 2 , +.Xr poll 2 , +and +.Xr kevent 2 +to be used in monitoring the file descriptor for events. +.It Dv CAP_FEXECVE +Permit +.Xr fexecve 2 +and +.Xr openat 2 +with the +.Dv O_EXEC +flag; +.Dv CAP_READ +will also be required. +.It Dv CAP_EXTATTR_DELETE +Permit +.Xr extattr_delete_fd 2 . +.It Dv CAP_EXTATTR_GET +Permit +.Xr extattr_get_fd 2 . +.It Dv CAP_EXTATTR_LIST +Permit +.Xr extattr_list_fd 2 . +.It Dv CAP_EXTATTR_SET +Permit +.Xr extattr_set_fd 2 . +.It Dv CAP_FCHDIR +Permit +.Xr fchdir 2 . +.It Dv CAP_FCHFLAGS +Permit +.Xr fchflags 2 . +.It Dv CAP_FCHMOD +Permit +.Xr fchmod 2 +and +.Xr fchmodat 2 . +.It Dv CAP_FCHMODAT +An alias to +.Dv CAP_FCHMOD . +.It Dv CAP_FCHOWN +Permit +.Xr fchown 2 +and +.Xr fchownat 2 . +.It Dv CAP_FCHOWNAT +An alias to +.Dv CAP_FCHOWN . +.It Dv CAP_FCNTL +Permit +.Xr fcntl 2 . +Note that only the +.Dv F_GETFL , +.Dv F_SETFL , +.Dv F_GETOWN +and +.Dv F_SETOWN +commands require this capability right. +Also note that the list of permitted commands can be further limited with the +.Xr cap_fcntls_limit 2 +system call. +.It Dv CAP_FLOCK +Permit +.Xr flock 2 , +.Xr fcntl 2 +(with +.Dv F_GETLK , +.Dv F_SETLK +or +.Dv F_SETLKW +flag) and +.Xr openat 2 +(with +.Dv O_EXLOCK +or +.Dv O_SHLOCK +flag). +.It Dv CAP_FPATHCONF +Permit +.Xr fpathconf 2 . +.It Dv CAP_FSCK +Permit UFS background-fsck operations on the descriptor. +.It Dv CAP_FSTAT +Permit +.Xr fstat 2 +and +.Xr fstatat 2 . +.It Dv CAP_FSTATAT +An alias to +.Dv CAP_FSTAT . +.It Dv CAP_FSTATFS +Permit +.Xr fstatfs 2 . +.It Dv CAP_FSYNC +Permit +.Xr aio_fsync 2 , +.Xr fsync 2 +and +.Xr openat 2 +with +.Dv O_FSYNC +or +.Dv O_SYNC +flag. +.It Dv CAP_FTRUNCATE +Permit +.Xr ftruncate 2 +and +.Xr openat 2 +with the +.Dv O_TRUNC +flag. +.It Dv CAP_FUTIMES +Permit +.Xr futimes 2 +and +.Xr futimesat 2 . +.It Dv CAP_FUTIMESAT +An alias to +.Dv CAP_FUTIMES . +.It Dv CAP_GETPEERNAME +Permit +.Xr getpeername 2 . +.It Dv CAP_GETSOCKNAME +Permit +.Xr getsockname 2 . +.It Dv CAP_GETSOCKOPT +Permit +.Xr getsockopt 2 . +.It Dv CAP_IOCTL +Permit +.Xr ioctl 2 . +Be aware that this system call has enormous scope, including potentially +global scope for some objects. +The list of permitted ioctl commands can be further limited with the +.Xr cap_ioctls_limit 2 +system call. +.\" XXXPJD: Doesn't exist anymore. +.It Dv CAP_KEVENT +Permit +.Xr kevent 2 ; +.Dv CAP_EVENT +is also required on file descriptors that will be monitored using +.Xr kevent 2 . +.It Dv CAP_LINKAT +Permit +.Xr linkat 2 +and +.Xr renameat 2 . +This right is required for the destination directory descriptor. +.It Dv CAP_LISTEN +Permit +.Xr listen 2 ; +not much use (generally) without +.Dv CAP_BIND . +.It Dv CAP_LOOKUP +Permit the file descriptor to be used as a starting directory for calls such as +.Xr linkat 2 , +.Xr openat 2 , +and +.Xr unlinkat 2 . +.It Dv CAP_MAC_GET +Permit +.Xr mac_get_fd 3 . +.It Dv CAP_MAC_SET +Permit +.Xr mac_set_fd 3 . +.It Dv CAP_MKDIRAT +Permit +.Xr mkdirat 2 . +.It Dv CAP_MKFIFOAT +Permit +.Xr mkfifoat 2 . +.It Dv CAP_MKNODAT +Permit +.Xr mknodat 2 . +.It Dv CAP_MMAP +Permit +.Xr mmap 2 +with the +.Dv PROT_NONE +protection. +.It Dv CAP_MMAP_R +Permit +.Xr mmap 2 +with the +.Dv PROT_READ +protection. +This also implies +.Dv CAP_READ +and +.Dv CAP_SEEK +rights. +.It Dv CAP_MMAP_W +Permit +.Xr mmap 2 +with the +.Dv PROT_WRITE +protection. +This also implies +.Dv CAP_WRITE +and +.Dv CAP_SEEK +rights. +.It Dv CAP_MMAP_X +Permit +.Xr mmap 2 +with the +.Dv PROT_EXEC +protection. +This also implies +.Dv CAP_SEEK +right. +.It Dv CAP_MMAP_RW +Implies +.Dv CAP_MMAP_R +and +.Dv CAP_MMAP_W . +.It Dv CAP_MMAP_RX +Implies +.Dv CAP_MMAP_R +and +.Dv CAP_MMAP_X . +.It Dv CAP_MMAP_WX +Implies +.Dv CAP_MMAP_W +and +.Dv CAP_MMAP_X . +.It Dv CAP_MMAP_RWX +Implies +.Dv CAP_MMAP_R , +.Dv CAP_MMAP_W +and +.Dv CAP_MMAP_X . +.It Dv CAP_PDGETPID +Permit +.Xr pdgetpid 2 . +.It Dv CAP_PDKILL +Permit +.Xr pdkill 2 . +.It Dv CAP_PDWAIT +Permit +.Xr pdwait4 2 . +.It Dv CAP_PEELOFF +Permit +.Xr sctp_peeloff 2 . +.\" XXXPJD: Not documented. +.It Dv CAP_POLL_EVENT +.\" XXXPJD: Not documented. +.It Dv CAP_POST_EVENT +.It Dv CAP_PREAD +Implies +.Dv CAP_SEEK +and +.Dv CAP_READ . +.It Dv CAP_PWRITE +Implies +.Dv CAP_SEEK +and +.Dv CAP_WRITE . +.It Dv CAP_READ +Allow +.Xr aio_read 2 , +.Xr openat +with the +.Dv O_RDONLY flag, +.Xr read 2 , +.Xr recv 2 , +.Xr recvfrom 2 , +.Xr recvmsg 2 +and related system calls. +.It Dv CAP_RECV +An alias to +.Dv CAP_READ . +.It Dv CAP_RENAMEAT +Permit +.Xr renameat 2 . +This right is required for the source directory descriptor. +.It Dv CAP_SEEK +Permit operations that seek on the file descriptor, such as +.Xr lseek 2 , +but also required for I/O system calls that can read or write at any position +in the file, such as +.Xr pread 2 +and +.Xr pwrite 2 . +.It Dv CAP_SEM_GETVALUE +Permit +.Xr sem_getvalue 3 . +.It Dv CAP_SEM_POST +Permit +.Xr sem_post 3 . +.It Dv CAP_SEM_WAIT +Permit +.Xr sem_wait 3 +and +.Xr sem_trywait 3 . +.It Dv CAP_SEND +An alias to +.Dv CAP_WRITE . +.It Dv CAP_SETSOCKOPT +Permit +.Xr setsockopt 2 ; +this controls various aspects of socket behavior and may affect binding, +connecting, and other behaviors with global scope. +.It Dv CAP_SHUTDOWN +Permit explicit +.Xr shutdown 2 ; +closing the socket will also generally shut down any connections on it. +.It Dv CAP_SYMLINKAT +Permit +.Xr symlinkat 2 . +.It Dv CAP_TTYHOOK +Allow configuration of TTY hooks, such as +.Xr snp 4 , +on the file descriptor. +.It Dv CAP_UNLINKAT +Permit +.Xr unlinkat 2 +and +.Xr renameat 2 . +This right is only required for +.Xr renameat 2 +on the destination directory descriptor if the destination object already +exists and will be removed by the rename. +.It Dv CAP_WRITE +Allow +.Xr aio_write 2 , +.Xr openat 2 +with +.Dv O_WRONLY +and +.Dv O_APPEND +flags, +.Xr send 2 , +.Xr sendmsg 2 , +.Xr sendto 2 , +.Xr write 2 , +and related system calls. +For +.Xr sendto 2 +with a non-NULL connection address, +.Dv CAP_CONNECT +is also required. +For +.Xr openat 2 +with the +.Dv O_WRONLY +flag, but without the +.Dv O_APPEND +flag, +.Dv CAP_SEEK +is also required. +.El +.Sh RETURN VALUES +.Rv -std +.Sh ERRORS +.Fn cap_rights_limit +succeeds unless: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid active descriptor. +.It Bq Er EINVAL +An invalid right has been requested in +.Fa rights . +.It Bq Er ENOTCAPABLE +.Fa rights +contains requested rights not present in the current rights mask associated +with the given file descriptor. +.El +.Pp +.Fn cap_rights_get +succeeds unless: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid active descriptor. +.It Bq Er EFAULT +The +.Fa rightsp +argument points at an invalid address. +.El +.Sh SEE ALSO +.Xr accept 2 , +.Xr aio_fsync 2 , +.Xr aio_read 2 , +.Xr aio_write 2 , +.Xr bind 2 , +.Xr cap_enter 2 , +.Xr cap_fcntls_limit 2 , +.Xr cap_ioctls_limit 2 , +.Xr cap_rights_limit 2 , +.Xr connect 2 , +.Xr dup 2 , +.Xr dup2 2 , +.Xr extattr_delete_fd 2 , +.Xr extattr_get_fd 2 , +.Xr extattr_list_fd 2 , +.Xr extattr_set_fd 2 , +.Xr fchflags 2 , +.Xr fchown 2 , +.Xr fcntl 2 , +.Xr fexecve 2 , +.Xr fhopen 2 , +.Xr flock 2 , +.Xr fpathconf 2 , +.Xr fstat 2 , +.Xr fstatfs 2 , +.Xr fsync 2 , +.Xr ftruncate 2 , +.Xr futimes 2 , +.Xr getpeername 2 , +.Xr getsockname 2 , +.Xr getsockopt 2 , +.Xr ioctl 2 , +.Xr kevent 2 , +.Xr kqueue 2 , +.Xr linkat 2 , +.Xr listen 2 , +.Xr mmap 2 , +.Xr mq_open 2 , +.Xr open 2 , +.Xr openat 2 , +.Xr pdfork 2 , +.Xr pdgetpid 2 , +.Xr pdkill 2 , +.Xr pdwait4 2 , +.Xr pipe 2 , +.Xr poll 2 , +.Xr pread 2 , +.Xr pwrite 2 , +.Xr read 2 , +.Xr recv 2 , +.Xr recvfrom 2 , +.Xr recvmsg 2 , +.Xr renameat 2 , +.Xr sctp_peeloff 2 , +.Xr select 2 , +.Xr send 2 , +.Xr sendmsg 2 , +.Xr sendto 2 , +.Xr setsockopt 2 , +.Xr shm_open 2 , +.Xr shutdown 2 , +.Xr socket 2 , +.Xr socketpair 2 , +.Xr symlinkat 2 , +.Xr unlinkat 2 , +.Xr write 2 , +.Xr acl_delete_fd_np 3 , +.Xr acl_get_fd 3 , +.Xr acl_get_fd_np 3 , +.Xr acl_set_fd_np 3 , +.Xr cap_limitfd 3 , +.Xr libcapsicum 3 , +.Xr mac_get_fd 3 , +.Xr mac_set_fd 3 , +.Xr sem_getvalue 3 , +.Xr sem_post 3 , +.Xr sem_trywait 3 , +.Xr sem_wait 3 , +.Xr capsicum 4 , +.Xr snp 4 +.Sh HISTORY +Support for capabilities and capabilities mode was developed as part of the +.Tn TrustedBSD +Project. +.Pp +.Sh AUTHORS +This function was created by +.An Pawel Jakub Dawidek Aq pawel@dawidek.net +under sponsorship of the FreeBSD Foundation. +.Sh BUGS +This man page should list the set of permitted system calls more specifically +for each capability right. +.Pp +Capability rights sometimes have unclear indirect impacts, which should be +documented, or at least hinted at. diff --git a/lib/libc/sys/dup.2 b/lib/libc/sys/dup.2 index 7a07c21..6e1de20 100644 --- a/lib/libc/sys/dup.2 +++ b/lib/libc/sys/dup.2 @@ -115,11 +115,6 @@ and is a valid descriptor, then .Fn dup2 is successful, and does nothing. -.Pp -The related -.Xr cap_new 2 -system call allows file descriptors to be duplicated with restrictions on -their use. .Sh RETURN VALUES The value -1 is returned if an error occurs in either call. The external variable @@ -152,7 +147,6 @@ argument is negative or exceeds the maximum allowable descriptor number .El .Sh SEE ALSO .Xr accept 2 , -.Xr cap_new 2 , .Xr close 2 , .Xr fcntl 2 , .Xr getdtablesize 2 , diff --git a/lib/libprocstat/libprocstat.c b/lib/libprocstat/libprocstat.c index 9d9c111..f23ec96 100644 --- a/lib/libprocstat/libprocstat.c +++ b/lib/libprocstat/libprocstat.c @@ -600,7 +600,6 @@ kinfo_fflags2fst(int kfflags) } kfflags2fst[] = { { KF_FLAG_APPEND, PS_FST_FFLAG_APPEND }, { KF_FLAG_ASYNC, PS_FST_FFLAG_ASYNC }, - { KF_FLAG_CAPABILITY, PS_FST_FFLAG_CAPABILITY }, { KF_FLAG_CREAT, PS_FST_FFLAG_CREAT }, { KF_FLAG_DIRECT, PS_FST_FFLAG_DIRECT }, { KF_FLAG_EXCL, PS_FST_FFLAG_EXCL }, diff --git a/lib/libprocstat/libprocstat.h b/lib/libprocstat/libprocstat.h index 662ea37..1c55aa7 100644 --- a/lib/libprocstat/libprocstat.h +++ b/lib/libprocstat/libprocstat.h @@ -88,7 +88,6 @@ #define PS_FST_FFLAG_DIRECT 0x1000 #define PS_FST_FFLAG_EXEC 0x2000 #define PS_FST_FFLAG_HASLOCK 0x4000 -#define PS_FST_FFLAG_CAPABILITY 0x8000 struct procstat; struct filestat { diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h index d227981..9d71fa2 100644 --- a/sys/bsm/audit_kevents.h +++ b/sys/bsm/audit_kevents.h @@ -588,7 +588,7 @@ #define AUE_OPENAT 43184 /* FreeBSD. */ #define AUE_POSIX_OPENPT 43185 /* FreeBSD. */ #define AUE_CAP_NEW 43186 /* TrustedBSD. */ -#define AUE_CAP_GETRIGHTS 43187 /* TrustedBSD. */ +#define AUE_CAP_RIGHTS_GET 43187 /* TrustedBSD. */ #define AUE_CAP_ENTER 43188 /* TrustedBSD. */ #define AUE_CAP_GETMODE 43189 /* TrustedBSD. */ #define AUE_POSIX_SPAWN 43190 /* Darwin. */ @@ -603,6 +603,11 @@ #define AUE_PDGETPID 43199 /* FreeBSD. */ #define AUE_PDWAIT 43200 /* FreeBSD. */ #define AUE_WAIT6 43201 /* FreeBSD. */ +#define AUE_CAP_RIGHTS_LIMIT 43202 /* TrustedBSD. */ +#define AUE_CAP_IOCTLS_LIMIT 43203 /* TrustedBSD. */ +#define AUE_CAP_IOCTLS_GET 43204 /* TrustedBSD. */ +#define AUE_CAP_FCNTLS_LIMIT 43205 /* TrustedBSD. */ +#define AUE_CAP_FCNTLS_GET 43206 /* TrustedBSD. */ /* * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the diff --git a/sys/cddl/compat/opensolaris/sys/file.h b/sys/cddl/compat/opensolaris/sys/file.h index 7a3df36..0b8f875 100644 --- a/sys/cddl/compat/opensolaris/sys/file.h +++ b/sys/cddl/compat/opensolaris/sys/file.h @@ -39,15 +39,11 @@ typedef struct file file_t; #include static __inline file_t * -getf(int fd) +getf(int fd, cap_rights_t rights) { struct file *fp; - /* - * We wouldn't need all of these rights on every invocation - * if we had more information about intent. - */ - if (fget(curthread, fd, CAP_READ | CAP_WRITE | CAP_SEEK, &fp) == 0) + if (fget(curthread, fd, rights, &fp) == 0) return (fp); return (NULL); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c index af0c9f7..fce4bb5 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c @@ -3822,7 +3822,7 @@ zfs_ioc_recv(zfs_cmd_t *zc) return (error); fd = zc->zc_cookie; - fp = getf(fd); + fp = getf(fd, CAP_PREAD); if (fp == NULL) { nvlist_free(props); return (EBADF); @@ -4079,7 +4079,7 @@ zfs_ioc_send(zfs_cmd_t *zc) error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj, &zc->zc_objset_type); } else { - file_t *fp = getf(zc->zc_cookie); + file_t *fp = getf(zc->zc_cookie, CAP_WRITE); if (fp == NULL) { dsl_dataset_rele(ds, FTAG); if (dsfrom) @@ -4675,7 +4675,7 @@ zfs_ioc_diff(zfs_cmd_t *zc) return (error); } - fp = getf(zc->zc_cookie); + fp = getf(zc->zc_cookie, CAP_WRITE); if (fp == NULL) { dmu_objset_rele(fromsnap, FTAG); dmu_objset_rele(tosnap, FTAG); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c index ca0acfd..c12826f 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c @@ -124,7 +124,7 @@ zfs_onexit_fd_hold(int fd, minor_t *minorp) void *data; int error; - fp = getf(fd); + fp = getf(fd, CAP_NONE); if (fp == NULL) return (EBADF); diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master index 4106447..6552d13 100644 --- a/sys/compat/freebsd32/syscalls.master +++ b/sys/compat/freebsd32/syscalls.master @@ -963,7 +963,7 @@ struct shmid_ds32 *buf); } 513 AUE_LPATHCONF NOPROTO { int lpathconf(char *path, int name); } 514 AUE_CAP_NEW NOPROTO { int cap_new(int fd, uint64_t rights); } -515 AUE_CAP_GETRIGHTS NOPROTO { int cap_getrights(int fd, \ +515 AUE_CAP_RIGHTS_GET NOPROTO { int cap_rights_get(int fd, \ uint64_t *rightsp); } 516 AUE_CAP_ENTER NOPROTO { int cap_enter(void); } 517 AUE_CAP_GETMODE NOPROTO { int cap_getmode(u_int *modep); } @@ -1005,3 +1005,13 @@ struct wrusage32 *wrusage, \ siginfo_t *info); } +533 AUE_CAP_RIGHTS_LIMIT NOPROTO { int cap_rights_limit(int fd, \ + uint64_t rights); } +534 AUE_CAP_IOCTLS_LIMIT NOPROTO { int cap_ioctls_limit(int fd, \ + const u_long *cmds, size_t ncmds); } +535 AUE_CAP_IOCTLS_GET NOPROTO { ssize_t cap_ioctls_get(int fd, \ + u_long *cmds, size_t maxcmds); } +536 AUE_CAP_FCNTLS_LIMIT NOPROTO { int cap_fcntls_limit(int fd, \ + uint32_t fcntlrights); } +537 AUE_CAP_FCNTLS_GET NOPROTO { int cap_fcntls_get(int fd, \ + uint32_t *fcntlrightsp); } diff --git a/sys/compat/linux/linux_file.c b/sys/compat/linux/linux_file.c index 0318a5c..346d178 100644 --- a/sys/compat/linux/linux_file.c +++ b/sys/compat/linux/linux_file.c @@ -154,6 +154,7 @@ linux_common_open(struct thread *td, int dirfd, char *path, int l_flags, int mod SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) { PROC_UNLOCK(p); sx_unlock(&proctree_lock); + /* XXXPJD: Verify if TIOCSCTTY is allowed. */ if (fp->f_type == DTYPE_VNODE) (void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0, td->td_ucred, td); @@ -1038,11 +1039,11 @@ linux_pread(td, uap) error = sys_pread(td, &bsd); if (error == 0) { - /* This seems to violate POSIX but linux does it */ - if ((error = fgetvp(td, uap->fd, CAP_READ, &vp)) != 0) - return (error); + /* This seems to violate POSIX but linux does it */ + if ((error = fgetvp(td, uap->fd, CAP_PREAD, &vp)) != 0) + return (error); if (vp->v_type == VDIR) { - vrele(vp); + vrele(vp); return (EISDIR); } vrele(vp); diff --git a/sys/compat/svr4/svr4_fcntl.c b/sys/compat/svr4/svr4_fcntl.c index b9d3ace..86fab78 100644 --- a/sys/compat/svr4/svr4_fcntl.c +++ b/sys/compat/svr4/svr4_fcntl.c @@ -265,14 +265,14 @@ fd_revoke(td, fd) /* * If we ever want to support Capsicum on SVR4 processes (unlikely) * or FreeBSD grows a native frevoke() (more likely), we will need a - * CAP_REVOKE here. + * CAP_FREVOKE here. * - * In the meantime, use CAP_MASK_VALID: if a SVR4 process wants to + * In the meantime, use CAP_ALL: if a SVR4 process wants to * do an frevoke(), it needs to do it on either a regular file * descriptor or a fully-privileged capability (which is effectively * the same as a non-capability-restricted file descriptor). */ - if ((error = fgetvp(td, fd, CAP_MASK_VALID, &vp)) != 0) + if ((error = fgetvp(td, fd, CAP_ALL, &vp)) != 0) return (error); if (vp->v_type != VCHR && vp->v_type != VBLK) { diff --git a/sys/compat/svr4/svr4_filio.c b/sys/compat/svr4/svr4_filio.c index 967169b..0fbba07 100644 --- a/sys/compat/svr4/svr4_filio.c +++ b/sys/compat/svr4/svr4_filio.c @@ -197,22 +197,24 @@ svr4_fil_ioctl(fp, td, retval, fd, cmd, data) u_long cmd; caddr_t data; { - int error; - int num; struct filedesc *fdp = td->td_proc->p_fd; + struct filedescent *fde; + int error, num; *retval = 0; switch (cmd) { case SVR4_FIOCLEX: FILEDESC_XLOCK(fdp); - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + fde = &fdp->fd_ofiles[fd]; + fde->fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); return 0; case SVR4_FIONCLEX: FILEDESC_XLOCK(fdp); - fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; + fde = &fdp->fd_ofiles[fd]; + fde->fde_flags &= ~UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); return 0; diff --git a/sys/compat/svr4/svr4_misc.c b/sys/compat/svr4/svr4_misc.c index d6bc4eb..0cfaeae 100644 --- a/sys/compat/svr4/svr4_misc.c +++ b/sys/compat/svr4/svr4_misc.c @@ -247,10 +247,8 @@ svr4_sys_getdents64(td, uap) DPRINTF(("svr4_sys_getdents64(%d, *, %d)\n", uap->fd, uap->nbytes)); - if ((error = getvnode(td->td_proc->p_fd, uap->fd, - CAP_READ | CAP_SEEK, &fp)) != 0) { + if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, &fp)) != 0) return (error); - } if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); @@ -426,8 +424,7 @@ svr4_sys_getdents(td, uap) if (uap->nbytes < 0) return (EINVAL); - if ((error = getvnode(td->td_proc->p_fd, uap->fd, - CAP_READ | CAP_SEEK, &fp)) != 0) + if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) { diff --git a/sys/compat/svr4/svr4_stream.c b/sys/compat/svr4/svr4_stream.c index 27014e3..1c7e83e 100644 --- a/sys/compat/svr4/svr4_stream.c +++ b/sys/compat/svr4/svr4_stream.c @@ -1449,7 +1449,7 @@ svr4_sys_putmsg(td, uap) struct file *fp; int error; - if ((error = fget(td, uap->fd, CAP_WRITE, &fp)) != 0) { + if ((error = fget(td, uap->fd, CAP_SEND, &fp)) != 0) { #ifdef DEBUG_SVR4 uprintf("putmsg: bad fp\n"); #endif @@ -1621,7 +1621,7 @@ svr4_sys_getmsg(td, uap) struct file *fp; int error; - if ((error = fget(td, uap->fd, CAP_READ, &fp)) != 0) { + if ((error = fget(td, uap->fd, CAP_RECV, &fp)) != 0) { #ifdef DEBUG_SVR4 uprintf("getmsg: bad fp\n"); #endif diff --git a/sys/dev/iscsi/initiator/iscsi.c b/sys/dev/iscsi/initiator/iscsi.c index a93a685..3737b7f 100644 --- a/sys/dev/iscsi/initiator/iscsi.c +++ b/sys/dev/iscsi/initiator/iscsi.c @@ -387,11 +387,11 @@ i_setsoc(isc_session_t *sp, int fd, struct thread *td) if(sp->soc != NULL) isc_stop_receiver(sp); - error = fget(td, fd, CAP_SOCK_ALL, &sp->fp); + error = fget(td, fd, CAP_SOCK_CLIENT, &sp->fp); if(error) return error; - if((error = fgetsock(td, fd, CAP_SOCK_ALL, &sp->soc, 0)) == 0) { + if((error = fgetsock(td, fd, CAP_SOCK_CLIENT, &sp->soc, 0)) == 0) { sp->td = td; isc_start_receiver(sp); } diff --git a/sys/fs/fdescfs/fdesc_vfsops.c b/sys/fs/fdescfs/fdesc_vfsops.c index c3dbccf..cb5e3c0 100644 --- a/sys/fs/fdescfs/fdesc_vfsops.c +++ b/sys/fs/fdescfs/fdesc_vfsops.c @@ -205,7 +205,7 @@ fdesc_statfs(mp, sbp) last = min(fdp->fd_nfiles, lim); freefd = 0; for (i = fdp->fd_freefile; i < last; i++) - if (fdp->fd_ofiles[i] == NULL) + if (fdp->fd_ofiles[i].fde_file == NULL) freefd++; /* diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c index 3c4f44d..7923fc6 100644 --- a/sys/fs/fdescfs/fdesc_vnops.c +++ b/sys/fs/fdescfs/fdesc_vnops.c @@ -534,7 +534,7 @@ fdesc_readdir(ap) dp->d_type = DT_DIR; break; default: - if (fdp->fd_ofiles[fcnt] == NULL) + if (fdp->fd_ofiles[fcnt].fde_file == NULL) break; dp->d_namlen = sprintf(dp->d_name, "%d", fcnt); dp->d_reclen = UIO_MX; diff --git a/sys/fs/nfs/nfsdport.h b/sys/fs/nfs/nfsdport.h index 529ada2..a09a6dd 100644 --- a/sys/fs/nfs/nfsdport.h +++ b/sys/fs/nfs/nfsdport.h @@ -94,8 +94,6 @@ struct nfsexstuff { #define NFSFPCRED(f) ((f)->f_cred) #define NFSFPFLAG(f) ((f)->f_flag) -int fp_getfvp(NFSPROC_T *, int, struct file **, struct vnode **); - #define NFSNAMEICNDSET(n, c, o, f) do { \ (n)->cn_cred = (c); \ (n)->cn_nameiop = (o); \ diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c index 1179eb5..a07a67f 100644 --- a/sys/fs/nfsclient/nfs_clport.c +++ b/sys/fs/nfsclient/nfs_clport.c @@ -1215,7 +1215,7 @@ nfssvc_nfscl(struct thread *td, struct nfssvc_args *uap) * pretend that we need them all. It is better to be too * careful than too reckless. */ - if ((error = fget(td, nfscbdarg.sock, CAP_SOCK_ALL, &fp)) + if ((error = fget(td, nfscbdarg.sock, CAP_SOCK_CLIENT, &fp)) != 0) { return (error); } diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c index 1731c72..ef98e2b 100644 --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -2767,7 +2767,7 @@ out: /* * glue for fp. */ -int +static int fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) { struct filedesc *fdp; @@ -2775,8 +2775,8 @@ fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) int error = 0; fdp = p->td_proc->p_fd; - if (fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL) { + if (fd < 0 || fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { error = EBADF; goto out; } @@ -3041,7 +3041,7 @@ nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) * pretend that we need them all. It is better to be too * careful than too reckless. */ - if ((error = fget(td, sockarg.sock, CAP_SOCK_ALL, &fp)) != 0) + if ((error = fget(td, sockarg.sock, CAP_SOCK_SERVER, &fp)) != 0) goto out; if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); diff --git a/sys/i386/ibcs2/ibcs2_misc.c b/sys/i386/ibcs2/ibcs2_misc.c index 0692122..9f382aa 100644 --- a/sys/i386/ibcs2/ibcs2_misc.c +++ b/sys/i386/ibcs2/ibcs2_misc.c @@ -337,8 +337,7 @@ ibcs2_getdents(td, uap) #define BSD_DIRENT(cp) ((struct dirent *)(cp)) #define IBCS2_RECLEN(reclen) (reclen + sizeof(u_short)) - if ((error = getvnode(td->td_proc->p_fd, uap->fd, - CAP_READ | CAP_SEEK, &fp)) != 0) + if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); @@ -491,8 +490,8 @@ ibcs2_read(td, uap) u_long *cookies = NULL, *cookiep; int ncookies; - if ((error = getvnode(td->td_proc->p_fd, uap->fd, - CAP_READ | CAP_SEEK, &fp)) != 0) { + if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, + &fp)) != 0) { if (error == EINVAL) return sys_read(td, (struct read_args *)uap); else diff --git a/sys/kern/capabilities.conf b/sys/kern/capabilities.conf index 11aad16..3c08782 100644 --- a/sys/kern/capabilities.conf +++ b/sys/kern/capabilities.conf @@ -110,9 +110,14 @@ aio_write ## Allow capability mode and capability system calls. ## cap_enter +cap_fcntls_get +cap_fcntls_limit cap_getmode -cap_getrights +cap_ioctls_get +cap_ioctls_limit cap_new +cap_rights_get +cap_rights_limit ## ## Allow read-only clock operations. @@ -239,7 +244,7 @@ getcontext ## Allow directory I/O on a file descriptor, subject to capability rights. ## Originally we had separate capabilities for directory-specific read ## operations, but on BSD we allow reading the raw directory data, so we just -## rely on CAP_READ and CAP_SEEK now. +## rely on CAP_READ now. ## getdents getdirentries @@ -317,13 +322,10 @@ gettimeofday getuid ## -## Disallow ioctl(2) for now, as frequently ioctl(2) operations have global -## scope, but this is a tricky one as it is also required for tty control. -## We do have a capability right for this operation. +## Allow ioctl(2), which hopefully will be limited by applications only to +## required commands with cap_ioctls_limit(2) syscall. ## -## XXXRW: This needs to be revisited. -## -#ioctl +ioctl ## ## Allow querying current process credential state. diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index acdea40..b146bab 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -110,15 +110,8 @@ MALLOC_DECLARE(M_FADVISE); static uma_zone_t file_zone; -/* Flags for do_dup() */ -#define DUP_FIXED 0x1 /* Force fixed allocation */ -#define DUP_FCNTL 0x2 /* fcntl()-style errors */ -#define DUP_CLOEXEC 0x4 /* Atomically set FD_CLOEXEC. */ - static int closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, int holdleaders); -static int do_dup(struct thread *td, int flags, int old, int new, - register_t *retval); static int fd_first_free(struct filedesc *fdp, int low, int size); static int fd_last_used(struct filedesc *fdp, int size); static void fdgrowtable(struct filedesc *fdp, int nfd); @@ -166,7 +159,7 @@ static int fill_vnode_info(struct vnode *vp, struct kinfo_file *kif); * the process exits. */ struct freetable { - struct file **ft_table; + struct filedescent *ft_table; SLIST_ENTRY(freetable) ft_next; }; @@ -177,8 +170,7 @@ struct freetable { struct filedesc0 { struct filedesc fd_fd; SLIST_HEAD(, freetable) fd_free; - struct file *fd_dfiles[NDFILE]; - char fd_dfileflags[NDFILE]; + struct filedescent fd_dfiles[NDFILE]; NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; }; @@ -284,7 +276,8 @@ fdunused(struct filedesc *fdp, int fd) FILEDESC_XLOCK_ASSERT(fdp); KASSERT(fdisused(fdp, fd), ("fd=%d is already unused", fd)); - KASSERT(fdp->fd_ofiles[fd] == NULL, ("fd=%d is still in use", fd)); + KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, + ("fd=%d is still in use", fd)); fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); if (fd < fdp->fd_freefile) @@ -294,6 +287,20 @@ fdunused(struct filedesc *fdp, int fd) } /* + * Free a file descriptor. + */ +static inline void +fdfree(struct filedesc *fdp, int fd) +{ + struct filedescent *fde; + + fde = &fdp->fd_ofiles[fd]; + filecaps_free(&fde->fde_caps); + bzero(fde, sizeof(*fde)); + fdunused(fdp, fd); +} + +/* * System calls on descriptors. */ #ifndef _SYS_SYSPROTO_H_ @@ -434,36 +441,14 @@ sys_fcntl(struct thread *td, struct fcntl_args *uap) return (error); } -static inline int -fdunwrap(int fd, cap_rights_t rights, struct filedesc *fdp, struct file **fpp) -{ - - FILEDESC_LOCK_ASSERT(fdp); - - *fpp = fget_locked(fdp, fd); - if (*fpp == NULL) - return (EBADF); - -#ifdef CAPABILITIES - if ((*fpp)->f_type == DTYPE_CAPABILITY) { - int err = cap_funwrap(*fpp, rights, fpp); - if (err != 0) { - *fpp = NULL; - return (err); - } - } -#endif /* CAPABILITIES */ - return (0); -} - int kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) { struct filedesc *fdp; struct flock *flp; - struct file *fp; + struct file *fp, *fp2; + struct filedescent *fde; struct proc *p; - char *pop; struct vnode *vp; int error, flg, tmp; u_int old, new; @@ -505,8 +490,9 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) error = EBADF; break; } - pop = &fdp->fd_ofileflags[fd]; - td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; + fde = &fdp->fd_ofiles[fd]; + td->td_retval[0] = + (fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0; FILEDESC_SUNLOCK(fdp); break; @@ -517,32 +503,24 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) error = EBADF; break; } - pop = &fdp->fd_ofileflags[fd]; - *pop = (*pop &~ UF_EXCLOSE) | + fde = &fdp->fd_ofiles[fd]; + fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) | (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); FILEDESC_XUNLOCK(fdp); break; case F_GETFL: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, F_GETFL, &fp, NULL); + if (error != 0) break; - } td->td_retval[0] = OFLAGS(fp->f_flag); - FILEDESC_SUNLOCK(fdp); + fdrop(fp, td); break; case F_SETFL: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, F_SETFL, &fp, NULL); + if (error != 0) break; - } - fhold(fp); - FILEDESC_SUNLOCK(fdp); do { tmp = flg = fp->f_flag; tmp &= ~FCNTLFLAGS; @@ -550,7 +528,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) } while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0); tmp = fp->f_flag & FNONBLOCK; error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); - if (error) { + if (error != 0) { fdrop(fp, td); break; } @@ -567,14 +545,9 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) break; case F_GETOWN: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, F_GETOWN, &fp, NULL); + if (error != 0) break; - } - fhold(fp); - FILEDESC_SUNLOCK(fdp); error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); if (error == 0) td->td_retval[0] = tmp; @@ -582,14 +555,9 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) break; case F_SETOWN: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, F_SETOWN, &fp, NULL); + if (error != 0) break; - } - fhold(fp); - FILEDESC_SUNLOCK(fdp); tmp = arg; error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); fdrop(fp, td); @@ -608,17 +576,15 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) case F_SETLK: do_setlk: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FLOCK, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FLOCK, 0, &fp, NULL); + if (error != 0) break; - } if (fp->f_type != DTYPE_VNODE) { - FILEDESC_SUNLOCK(fdp); error = EBADF; + fdrop(fp, td); break; } + flp = (struct flock *)arg; if (flp->l_whence == SEEK_CUR) { foffset = foffset_get(fp); @@ -627,16 +593,12 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) foffset > OFF_MAX - flp->l_start)) { FILEDESC_SUNLOCK(fdp); error = EOVERFLOW; + fdrop(fp, td); break; } flp->l_start += foffset; } - /* - * VOP_ADVLOCK() may block. - */ - fhold(fp); - FILEDESC_SUNLOCK(fdp); vp = fp->f_vnode; switch (flp->l_type) { case F_RDLCK: @@ -703,37 +665,37 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) * that the closing thread was a bit slower and that the * advisory lock succeeded before the close. */ - FILEDESC_SLOCK(fdp); - if (fget_locked(fdp, fd) != fp) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, 0, 0, &fp2, NULL); + if (error != 0) { + fdrop(fp, td); + break; + } + if (fp != fp2) { flp->l_whence = SEEK_SET; flp->l_start = 0; flp->l_len = 0; flp->l_type = F_UNLCK; (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, flp, F_POSIX); - } else - FILEDESC_SUNLOCK(fdp); + } fdrop(fp, td); + fdrop(fp2, td); break; case F_GETLK: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FLOCK, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FLOCK, 0, &fp, NULL); + if (error != 0) break; - } if (fp->f_type != DTYPE_VNODE) { - FILEDESC_SUNLOCK(fdp); error = EBADF; + fdrop(fp, td); break; } flp = (struct flock *)arg; if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && flp->l_type != F_UNLCK) { - FILEDESC_SUNLOCK(fdp); error = EINVAL; + fdrop(fp, td); break; } if (flp->l_whence == SEEK_CUR) { @@ -744,15 +706,11 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) foffset < OFF_MIN - flp->l_start)) { FILEDESC_SUNLOCK(fdp); error = EOVERFLOW; + fdrop(fp, td); break; } flp->l_start += foffset; } - /* - * VOP_ADVLOCK() may block. - */ - fhold(fp); - FILEDESC_SUNLOCK(fdp); vp = fp->f_vnode; error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, F_POSIX); @@ -763,19 +721,14 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) arg = arg ? 128 * 1024: 0; /* FALLTHROUGH */ case F_READAHEAD: - FILEDESC_SLOCK(fdp); - if ((fp = fget_locked(fdp, fd)) == NULL) { - FILEDESC_SUNLOCK(fdp); - error = EBADF; + error = fget_unlocked(fdp, fd, 0, 0, &fp, NULL); + if (error != 0) break; - } if (fp->f_type != DTYPE_VNODE) { - FILEDESC_SUNLOCK(fdp); + fdrop(fp, td); error = EBADF; break; } - fhold(fp); - FILEDESC_SUNLOCK(fdp); if (arg >= 0) { vp = fp->f_vnode; error = vn_lock(vp, LK_SHARED); @@ -809,11 +762,12 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) /* * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD). */ -static int +int do_dup(struct thread *td, int flags, int old, int new, register_t *retval) { struct filedesc *fdp; + struct filedescent *oldfde, *newfde; struct proc *p; struct file *fp; struct file *delfp; @@ -842,14 +796,15 @@ do_dup(struct thread *td, int flags, int old, int new, FILEDESC_XUNLOCK(fdp); return (EBADF); } + oldfde = &fdp->fd_ofiles[old]; if (flags & DUP_FIXED && old == new) { *retval = new; if (flags & DUP_CLOEXEC) - fdp->fd_ofileflags[new] |= UF_EXCLOSE; + fdp->fd_ofiles[new].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); return (0); } - fp = fdp->fd_ofiles[old]; + fp = oldfde->fde_file; fhold(fp); /* @@ -878,8 +833,10 @@ do_dup(struct thread *td, int flags, int old, int new, } #endif fdgrowtable(fdp, new + 1); + oldfde = &fdp->fd_ofiles[old]; } - if (fdp->fd_ofiles[new] == NULL) + newfde = &fdp->fd_ofiles[new]; + if (newfde->fde_file == NULL) fdused(fdp, new); } else { if ((error = fdalloc(td, new, &new)) != 0) { @@ -887,20 +844,23 @@ do_dup(struct thread *td, int flags, int old, int new, fdrop(fp, td); return (error); } + newfde = &fdp->fd_ofiles[new]; } - KASSERT(fp == fdp->fd_ofiles[old], ("old fd has been modified")); + KASSERT(fp == oldfde->fde_file, ("old fd has been modified")); KASSERT(old != new, ("new fd is same as old")); - delfp = fdp->fd_ofiles[new]; + delfp = newfde->fde_file; + /* * Duplicate the source descriptor. */ - fdp->fd_ofiles[new] = fp; + *newfde = *oldfde; + filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps); if ((flags & DUP_CLOEXEC) != 0) - fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] | UF_EXCLOSE; + newfde->fde_flags = oldfde->fde_flags | UF_EXCLOSE; else - fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE; + newfde->fde_flags = oldfde->fde_flags & ~UF_EXCLOSE; if (new > fdp->fd_lastfile) fdp->fd_lastfile = new; *retval = new; @@ -1141,7 +1101,6 @@ static int closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, int holdleaders) { - struct file *fp_object; int error; FILEDESC_XLOCK_ASSERT(fdp); @@ -1167,12 +1126,10 @@ closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, knote_fdclose(td, fd); /* - * When we're closing an fd with a capability, we need to notify - * mqueue if the underlying object is of type mqueue. + * We need to notify mqueue if the object is of type mqueue. */ - (void)cap_funwrap(fp, 0, &fp_object); - if (fp_object->f_type == DTYPE_MQUEUE) - mq_fdclose(td, fd, fp_object); + if (fp->f_type == DTYPE_MQUEUE) + mq_fdclose(td, fd, fp); FILEDESC_XUNLOCK(fdp); error = closef(fp, td); @@ -1224,9 +1181,7 @@ kern_close(td, fd) FILEDESC_XUNLOCK(fdp); return (EBADF); } - fdp->fd_ofiles[fd] = NULL; - fdp->fd_ofileflags[fd] = 0; - fdunused(fdp, fd); + fdfree(fdp, fd); /* closefp() drops the FILEDESC lock for us. */ return (closefp(fdp, fd, fp, td, 1)); @@ -1258,7 +1213,7 @@ sys_closefrom(struct thread *td, struct closefrom_args *uap) uap->lowfd = 0; FILEDESC_SLOCK(fdp); for (fd = uap->lowfd; fd < fdp->fd_nfiles; fd++) { - if (fdp->fd_ofiles[fd] != NULL) { + if (fdp->fd_ofiles[fd].fde_file != NULL) { FILEDESC_SUNLOCK(fdp); (void)kern_close(td, fd); FILEDESC_SLOCK(fdp); @@ -1410,6 +1365,91 @@ out: } /* + * Initialize filecaps structure. + */ +void +filecaps_init(struct filecaps *fcaps) +{ + + bzero(fcaps, sizeof(*fcaps)); + fcaps->fc_nioctls = -1; +} + +/* + * Copy filecaps structure allocating memory for ioctls array if needed. + */ +void +filecaps_copy(const struct filecaps *src, struct filecaps *dst) +{ + size_t size; + + *dst = *src; + if (src->fc_ioctls != NULL) { + KASSERT(src->fc_nioctls > 0, + ("fc_ioctls != NULL, but fc_nioctls=%hd", src->fc_nioctls)); + + size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls; + dst->fc_ioctls = malloc(size, M_TEMP, M_WAITOK); + bcopy(src->fc_ioctls, dst->fc_ioctls, size); + } +} + +/* + * Move filecaps structure to the new place and clear the old place. + */ +static void +filecaps_move(struct filecaps *src, struct filecaps *dst) +{ + + *dst = *src; + bzero(src, sizeof(*src)); +} + +/* + * Fill the given filecaps structure with full rights. + */ +static void +filecaps_fill(struct filecaps *fcaps) +{ + + fcaps->fc_rights = CAP_ALL; + fcaps->fc_ioctls = NULL; + fcaps->fc_nioctls = -1; + fcaps->fc_fcntls = CAP_FCNTL_ALL; +} + +/* + * Free memory allocated within filecaps structure. + */ +void +filecaps_free(struct filecaps *fcaps) +{ + + free(fcaps->fc_ioctls, M_TEMP); + bzero(fcaps, sizeof(*fcaps)); +} + +/* + * Validate the given filecaps structure. + */ +static void +filecaps_validate(const struct filecaps *fcaps, const char *func) +{ + + KASSERT((fcaps->fc_rights & ~CAP_MASK_VALID) == 0, + ("%s: invalid rights", func)); + KASSERT((fcaps->fc_fcntls & ~CAP_FCNTL_ALL) == 0, + ("%s: invalid fcntls", func)); + KASSERT(fcaps->fc_fcntls == 0 || (fcaps->fc_rights & CAP_FCNTL) != 0, + ("%s: fcntls without CAP_FCNTL", func)); + KASSERT(fcaps->fc_ioctls != NULL ? fcaps->fc_nioctls > 0 : + (fcaps->fc_nioctls == -1 || fcaps->fc_nioctls == 0), + ("%s: invalid ioctls", func)); + KASSERT(fcaps->fc_nioctls == 0 || (fcaps->fc_rights & CAP_IOCTL) != 0, + ("%s: ioctls without CAP_IOCTL", func)); +} + +/* * Grow the file table to accomodate (at least) nfd descriptors. */ static void @@ -1417,9 +1457,8 @@ fdgrowtable(struct filedesc *fdp, int nfd) { struct filedesc0 *fdp0; struct freetable *ft; - struct file **ntable; - struct file **otable; - char *nfileflags, *ofileflags; + struct filedescent *ntable; + struct filedescent *otable; int nnfiles, onfiles; NDSLOTTYPE *nmap, *omap; @@ -1430,7 +1469,6 @@ fdgrowtable(struct filedesc *fdp, int nfd) /* save old values */ onfiles = fdp->fd_nfiles; otable = fdp->fd_ofiles; - ofileflags = fdp->fd_ofileflags; omap = fdp->fd_map; /* compute the size of the new table */ @@ -1440,27 +1478,25 @@ fdgrowtable(struct filedesc *fdp, int nfd) return; /* - * Allocate a new table and map. We need enough space for a) the - * file entries themselves, b) the file flags, and c) the struct - * freetable we will use when we decommission the table and place - * it on the freelist. We place the struct freetable in the - * middle so we don't have to worry about padding. + * Allocate a new table and map. We need enough space for the + * file entries themselves and the struct freetable we will use + * when we decommission the table and place it on the freelist. + * We place the struct freetable in the middle so we don't have + * to worry about padding. */ - ntable = malloc(nnfiles * sizeof(*ntable) + sizeof(struct freetable) + - nnfiles * sizeof(*nfileflags), M_FILEDESC, M_ZERO | M_WAITOK); - nfileflags = (char *)&ntable[nnfiles] + sizeof(struct freetable); + ntable = malloc(nnfiles * sizeof(ntable[0]) + sizeof(struct freetable), + M_FILEDESC, M_ZERO | M_WAITOK); nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE, M_FILEDESC, M_ZERO | M_WAITOK); /* copy the old data over and point at the new tables */ memcpy(ntable, otable, onfiles * sizeof(*otable)); - memcpy(nfileflags, ofileflags, onfiles * sizeof(*ofileflags)); memcpy(nmap, omap, NDSLOTS(onfiles) * sizeof(*omap)); /* update the pointers and counters */ fdp->fd_nfiles = nnfiles; + memcpy(ntable, otable, onfiles * sizeof(ntable[0])); fdp->fd_ofiles = ntable; - fdp->fd_ofileflags = nfileflags; fdp->fd_map = nmap; /* @@ -1536,8 +1572,9 @@ fdalloc(struct thread *td, int minfd, int *result) ("invalid descriptor %d", fd)); KASSERT(!fdisused(fdp, fd), ("fd_first_free() returned non-free descriptor")); - KASSERT(fdp->fd_ofiles[fd] == NULL, ("file descriptor isn't free")); - KASSERT(fdp->fd_ofileflags[fd] == 0, ("file flags are set")); + KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, + ("file descriptor isn't free")); + KASSERT(fdp->fd_ofiles[fd].fde_flags == 0, ("file flags are set")); fdused(fdp, fd); *result = fd; return (0); @@ -1568,7 +1605,7 @@ fdavail(struct thread *td, int n) return (1); last = min(fdp->fd_nfiles, lim); for (i = fdp->fd_freefile; i < last; i++) { - if (fdp->fd_ofiles[i] == NULL && --n <= 0) + if (fdp->fd_ofiles[i].fde_file == NULL && --n <= 0) return (1); } return (0); @@ -1591,7 +1628,7 @@ falloc(struct thread *td, struct file **resultfp, int *resultfd, int flags) if (error) return (error); /* no reference held on error */ - error = finstall(td, fp, &fd, flags); + error = finstall(td, fp, &fd, flags, NULL); if (error) { fdrop(fp, td); /* one reference (fp only) */ return (error); @@ -1645,13 +1682,17 @@ falloc_noinstall(struct thread *td, struct file **resultfp) * Install a file in a file descriptor table. */ int -finstall(struct thread *td, struct file *fp, int *fd, int flags) +finstall(struct thread *td, struct file *fp, int *fd, int flags, + struct filecaps *fcaps) { struct filedesc *fdp = td->td_proc->p_fd; + struct filedescent *fde; int error; KASSERT(fd != NULL, ("%s: fd == NULL", __func__)); KASSERT(fp != NULL, ("%s: fp == NULL", __func__)); + if (fcaps != NULL) + filecaps_validate(fcaps, __func__); FILEDESC_XLOCK(fdp); if ((error = fdalloc(td, 0, fd))) { @@ -1659,9 +1700,14 @@ finstall(struct thread *td, struct file *fp, int *fd, int flags) return (error); } fhold(fp); - fdp->fd_ofiles[*fd] = fp; + fde = &fdp->fd_ofiles[*fd]; + fde->fde_file = fp; if ((flags & O_CLOEXEC) != 0) - fdp->fd_ofileflags[*fd] |= UF_EXCLOSE; + fde->fde_flags |= UF_EXCLOSE; + if (fcaps != NULL) + filecaps_move(fcaps, &fde->fde_caps); + else + filecaps_fill(&fde->fde_caps); FILEDESC_XUNLOCK(fdp); return (0); } @@ -1696,7 +1742,6 @@ fdinit(struct filedesc *fdp) newfdp->fd_fd.fd_holdcnt = 1; newfdp->fd_fd.fd_cmask = CMASK; newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; - newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; newfdp->fd_fd.fd_nfiles = NDFILE; newfdp->fd_fd.fd_map = newfdp->fd_dmap; newfdp->fd_fd.fd_lastfile = -1; @@ -1764,7 +1809,7 @@ fdunshare(struct proc *p, struct thread *td) FILEDESC_XUNLOCK(p->p_fd); tmp = fdcopy(p->p_fd); - fdfree(td); + fdescfree(td); p->p_fd = tmp; } else FILEDESC_XUNLOCK(p->p_fd); @@ -1778,6 +1823,7 @@ struct filedesc * fdcopy(struct filedesc *fdp) { struct filedesc *newfdp; + struct filedescent *nfde, *ofde; int i; /* Certain daemons might not have file descriptors. */ @@ -1796,12 +1842,14 @@ fdcopy(struct filedesc *fdp) /* copy all passable descriptors (i.e. not kqueue) */ newfdp->fd_freefile = -1; for (i = 0; i <= fdp->fd_lastfile; ++i) { + ofde = &fdp->fd_ofiles[i]; if (fdisused(fdp, i) && - (fdp->fd_ofiles[i]->f_ops->fo_flags & DFLAG_PASSABLE) && - fdp->fd_ofiles[i]->f_ops != &badfileops) { - newfdp->fd_ofiles[i] = fdp->fd_ofiles[i]; - newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i]; - fhold(newfdp->fd_ofiles[i]); + (ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) && + ofde->fde_file->f_ops != &badfileops) { + nfde = &newfdp->fd_ofiles[i]; + *nfde = *ofde; + filecaps_copy(&ofde->fde_caps, &nfde->fde_caps); + fhold(nfde->fde_file); newfdp->fd_lastfile = i; } else { if (newfdp->fd_freefile == -1) @@ -1811,9 +1859,10 @@ fdcopy(struct filedesc *fdp) newfdp->fd_cmask = fdp->fd_cmask; FILEDESC_SUNLOCK(fdp); FILEDESC_XLOCK(newfdp); - for (i = 0; i <= newfdp->fd_lastfile; ++i) - if (newfdp->fd_ofiles[i] != NULL) + for (i = 0; i <= newfdp->fd_lastfile; ++i) { + if (newfdp->fd_ofiles[i].fde_file != NULL) fdused(newfdp, i); + } if (newfdp->fd_freefile == -1) newfdp->fd_freefile = i; FILEDESC_XUNLOCK(newfdp); @@ -1824,7 +1873,7 @@ fdcopy(struct filedesc *fdp) * Release a filedesc structure. */ void -fdfree(struct thread *td) +fdescfree(struct thread *td) { struct filedesc *fdp; int i; @@ -1849,12 +1898,12 @@ fdfree(struct thread *td) if (fdtol != NULL) { FILEDESC_XLOCK(fdp); KASSERT(fdtol->fdl_refcount > 0, - ("filedesc_to_refcount botch: fdl_refcount=%d", - fdtol->fdl_refcount)); + ("filedesc_to_refcount botch: fdl_refcount=%d", + fdtol->fdl_refcount)); if (fdtol->fdl_refcount == 1 && (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { for (i = 0; i <= fdp->fd_lastfile; i++) { - fp = fdp->fd_ofiles[i]; + fp = fdp->fd_ofiles[i].fde_file; if (fp == NULL || fp->f_type != DTYPE_VNODE) continue; fhold(fp); @@ -1914,10 +1963,10 @@ fdfree(struct thread *td) return; for (i = 0; i <= fdp->fd_lastfile; i++) { - fp = fdp->fd_ofiles[i]; + fp = fdp->fd_ofiles[i].fde_file; if (fp != NULL) { FILEDESC_XLOCK(fdp); - fdp->fd_ofiles[i] = NULL; + fdfree(fdp, i); FILEDESC_XUNLOCK(fdp); (void) closef(fp, td); } @@ -1982,6 +2031,7 @@ void setugidsafety(struct thread *td) { struct filedesc *fdp; + struct file *fp; int i; /* Certain daemons might not have file descriptors. */ @@ -1997,18 +2047,14 @@ setugidsafety(struct thread *td) for (i = 0; i <= fdp->fd_lastfile; i++) { if (i > 2) break; - if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { - struct file *fp; - + fp = fdp->fd_ofiles[i].fde_file; + if (fp != NULL && is_unsafe(fp)) { knote_fdclose(td, i); /* * NULL-out descriptor prior to close to avoid * a race while close blocks. */ - fp = fdp->fd_ofiles[i]; - fdp->fd_ofiles[i] = NULL; - fdp->fd_ofileflags[i] = 0; - fdunused(fdp, i); + fdfree(fdp, i); FILEDESC_XUNLOCK(fdp); (void) closef(fp, td); FILEDESC_XLOCK(fdp); @@ -2029,9 +2075,8 @@ fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td) { FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[idx] == fp) { - fdp->fd_ofiles[idx] = NULL; - fdunused(fdp, idx); + if (fdp->fd_ofiles[idx].fde_file == fp) { + fdfree(fdp, idx); FILEDESC_XUNLOCK(fdp); fdrop(fp, td); } else @@ -2045,6 +2090,7 @@ void fdcloseexec(struct thread *td) { struct filedesc *fdp; + struct filedescent *fde; struct file *fp; int i; @@ -2054,17 +2100,16 @@ fdcloseexec(struct thread *td) return; /* - * We cannot cache fd_ofiles or fd_ofileflags since operations + * We cannot cache fd_ofiles since operations * may block and rip them out from under us. */ FILEDESC_XLOCK(fdp); for (i = 0; i <= fdp->fd_lastfile; i++) { - fp = fdp->fd_ofiles[i]; + fde = &fdp->fd_ofiles[i]; + fp = fde->fde_file; if (fp != NULL && (fp->f_type == DTYPE_MQUEUE || - (fdp->fd_ofileflags[i] & UF_EXCLOSE))) { - fdp->fd_ofiles[i] = NULL; - fdp->fd_ofileflags[i] = 0; - fdunused(fdp, i); + (fde->fde_flags & UF_EXCLOSE))) { + fdfree(fdp, i); (void) closefp(fdp, i, fp, td, 0); /* closefp() drops the FILEDESC lock. */ FILEDESC_XLOCK(fdp); @@ -2094,7 +2139,7 @@ fdcheckstd(struct thread *td) devnull = -1; error = 0; for (i = 0; i < 3; i++) { - if (fdp->fd_ofiles[i] != NULL) + if (fdp->fd_ofiles[i].fde_file != NULL) continue; if (devnull < 0) { save = td->td_retval[0]; @@ -2129,7 +2174,6 @@ closef(struct file *fp, struct thread *td) struct flock lf; struct filedesc_to_leader *fdtol; struct filedesc *fdp; - struct file *fp_object; /* * POSIX record locking dictates that any close releases ALL @@ -2142,13 +2186,9 @@ closef(struct file *fp, struct thread *td) * NULL thread pointer when there really is no owning * context that might have locks, or the locks will be * leaked. - * - * If this is a capability, we do lock processing under the underlying - * node, not the capability itself. */ - (void)cap_funwrap(fp, 0, &fp_object); - if (fp_object->f_type == DTYPE_VNODE && td != NULL) { - vp = fp_object->f_vnode; + if (fp->f_type == DTYPE_VNODE && td != NULL) { + vp = fp->f_vnode; if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { lf.l_whence = SEEK_SET; lf.l_start = 0; @@ -2177,7 +2217,7 @@ closef(struct file *fp, struct thread *td) lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; - vp = fp_object->f_vnode; + vp = fp->f_vnode; (void) VOP_ADVLOCK(vp, (caddr_t)fdtol->fdl_leader, F_UNLCK, &lf, F_POSIX); @@ -2211,14 +2251,19 @@ finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops) atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops); } -struct file * -fget_unlocked(struct filedesc *fdp, int fd) +int +fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t needrights, + int needfcntl, struct file **fpp, cap_rights_t *haverightsp) { struct file *fp; u_int count; +#ifdef CAPABILITIES + cap_rights_t haverights; + int error; +#endif if (fd < 0 || fd >= fdp->fd_nfiles) - return (NULL); + return (EBADF); /* * Fetch the descriptor locklessly. We avoid fdrop() races by * never raising a refcount above 0. To accomplish this we have @@ -2228,9 +2273,20 @@ fget_unlocked(struct filedesc *fdp, int fd) * due to preemption. */ for (;;) { - fp = fdp->fd_ofiles[fd]; + fp = fdp->fd_ofiles[fd].fde_file; if (fp == NULL) - break; + return (EBADF); +#ifdef CAPABILITIES + haverights = cap_rights(fdp, fd); + error = cap_check(haverights, needrights); + if (error != 0) + return (error); + if ((needrights & CAP_FCNTL) != 0) { + error = cap_fcntl_check(fdp, fd, needfcntl); + if (error != 0) + return (error); + } +#endif count = fp->f_count; if (count == 0) continue; @@ -2240,12 +2296,19 @@ fget_unlocked(struct filedesc *fdp, int fd) */ if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) != 1) continue; - if (fp == fdp->fd_ofiles[fd]) + if (fp == fdp->fd_ofiles[fd].fde_file) break; fdrop(fp, curthread); } - - return (fp); + *fpp = fp; + if (haverightsp != NULL) { +#ifdef CAPABILITIES + *haverightsp = haverights; +#else + *haverightsp = CAP_ALL; +#endif + } + return (0); } /* @@ -2255,33 +2318,29 @@ fget_unlocked(struct filedesc *fdp, int fd) * If the descriptor doesn't exist or doesn't match 'flags', EBADF is * returned. * - * If the FGET_GETCAP flag is set, the capability itself will be returned. - * Calling _fget() with FGET_GETCAP on a non-capability will return EINVAL. - * Otherwise, if the file is a capability, its rights will be checked against - * the capability rights mask, and if successful, the object will be unwrapped. + * File's rights will be checked against the capability rights mask. * * If an error occured the non-zero error is returned and *fpp is set to * NULL. Otherwise *fpp is held and set and zero is returned. Caller is * responsible for fdrop(). */ -#define FGET_GETCAP 0x00000001 static __inline int _fget(struct thread *td, int fd, struct file **fpp, int flags, - cap_rights_t needrights, cap_rights_t *haverightsp, u_char *maxprotp, - int fget_flags) + cap_rights_t needrights, u_char *maxprotp) { struct filedesc *fdp; struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; -#endif + cap_rights_t haverights; int error; *fpp = NULL; if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) return (EBADF); - if ((fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); + if (maxprotp != NULL) + needrights |= CAP_MMAP; + error = fget_unlocked(fdp, fd, needrights, 0, &fp, &haverights); + if (error != 0) + return (error); if (fp->f_ops == &badfileops) { fdrop(fp, td); return (EBADF); @@ -2289,50 +2348,11 @@ _fget(struct thread *td, int fd, struct file **fpp, int flags, #ifdef CAPABILITIES /* - * If this is a capability, what rights does it have? + * If requested, convert capability rights to access flags. */ - if (haverightsp != NULL) { - if (fp->f_type == DTYPE_CAPABILITY) - *haverightsp = cap_rights(fp); - else - *haverightsp = CAP_MASK_VALID; - } - - /* - * If a capability has been requested, return the capability directly. - * Otherwise, check capability rights, extract the underlying object, - * and check its access flags. - */ - if (fget_flags & FGET_GETCAP) { - if (fp->f_type != DTYPE_CAPABILITY) { - fdrop(fp, td); - return (EINVAL); - } - } else { - if (maxprotp == NULL) - error = cap_funwrap(fp, needrights, &fp_fromcap); - else - error = cap_funwrap_mmap(fp, needrights, maxprotp, - &fp_fromcap); - if (error != 0) { - fdrop(fp, td); - return (error); - } - - /* - * If we've unwrapped a file, drop the original capability - * and hold the new descriptor. fp after this point refers to - * the actual (unwrapped) object, not the capability. - */ - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, td); - fp = fp_fromcap; - } - } + if (maxprotp != NULL) + *maxprotp = cap_rights_to_vmprot(haverights); #else /* !CAPABILITIES */ - KASSERT(fp->f_type != DTYPE_CAPABILITY, - ("%s: saw capability", __func__)); if (maxprotp != NULL) *maxprotp = VM_PROT_ALL; #endif /* CAPABILITIES */ @@ -2371,7 +2391,7 @@ int fget(struct thread *td, int fd, cap_rights_t rights, struct file **fpp) { - return(_fget(td, fd, fpp, 0, rights, NULL, NULL, 0)); + return(_fget(td, fd, fpp, 0, rights, NULL)); } int @@ -2379,37 +2399,24 @@ fget_mmap(struct thread *td, int fd, cap_rights_t rights, u_char *maxprotp, struct file **fpp) { - return (_fget(td, fd, fpp, 0, rights, NULL, maxprotp, 0)); + return (_fget(td, fd, fpp, 0, rights, maxprotp)); } int fget_read(struct thread *td, int fd, cap_rights_t rights, struct file **fpp) { - return(_fget(td, fd, fpp, FREAD, rights, NULL, NULL, 0)); + return(_fget(td, fd, fpp, FREAD, rights, NULL)); } int fget_write(struct thread *td, int fd, cap_rights_t rights, struct file **fpp) { - return (_fget(td, fd, fpp, FWRITE, rights, NULL, NULL, 0)); + return (_fget(td, fd, fpp, FWRITE, rights, NULL)); } /* - * Unlike the other fget() calls, which accept and check capability rights - * but never return capabilities, fgetcap() returns the capability but doesn't - * check capability rights. - */ -int -fgetcap(struct thread *td, int fd, struct file **fpp) -{ - - return (_fget(td, fd, fpp, 0, 0, NULL, NULL, FGET_GETCAP)); -} - - -/* * Like fget() but loads the underlying vnode, or returns an error if the * descriptor does not represent a vnode. Note that pipes use vnodes but * never have VM objects. The returned vnode will be vref()'d. @@ -2418,14 +2425,14 @@ fgetcap(struct thread *td, int fd, struct file **fpp) */ static __inline int _fgetvp(struct thread *td, int fd, int flags, cap_rights_t needrights, - cap_rights_t *haverightsp, struct vnode **vpp) + struct vnode **vpp) { struct file *fp; int error; *vpp = NULL; - if ((error = _fget(td, fd, &fp, flags, needrights, haverightsp, - NULL, 0)) != 0) + error = _fget(td, fd, &fp, flags, needrights, NULL); + if (error) return (error); if (fp->f_vnode == NULL) { error = EINVAL; @@ -2442,28 +2449,54 @@ int fgetvp(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp) { - return (_fgetvp(td, fd, 0, rights, NULL, vpp)); + return (_fgetvp(td, fd, 0, rights, vpp)); } int -fgetvp_rights(struct thread *td, int fd, cap_rights_t need, cap_rights_t *have, - struct vnode **vpp) +fgetvp_rights(struct thread *td, int fd, cap_rights_t need, + struct filecaps *havecaps, struct vnode **vpp) { - return (_fgetvp(td, fd, 0, need, have, vpp)); + struct filedesc *fdp; + struct file *fp; +#ifdef CAPABILITIES + int error; +#endif + + if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) + return (EBADF); + + fp = fget_locked(fdp, fd); + if (fp == NULL || fp->f_ops == &badfileops) + return (EBADF); + +#ifdef CAPABILITIES + error = cap_check(cap_rights(fdp, fd), need); + if (error != 0) + return (error); +#endif + + if (fp->f_vnode == NULL) + return (EINVAL); + + *vpp = fp->f_vnode; + vref(*vpp); + filecaps_copy(&fdp->fd_ofiles[fd].fde_caps, havecaps); + + return (0); } int fgetvp_read(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp) { - return (_fgetvp(td, fd, FREAD, rights, NULL, vpp)); + return (_fgetvp(td, fd, FREAD, rights, vpp)); } int fgetvp_exec(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp) { - return (_fgetvp(td, fd, FEXEC, rights, NULL, vpp)); + return (_fgetvp(td, fd, FEXEC, rights, vpp)); } #ifdef notyet @@ -2472,7 +2505,7 @@ fgetvp_write(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp) { - return (_fgetvp(td, fd, FWRITE, rights, NULL, vpp)); + return (_fgetvp(td, fd, FWRITE, rights, vpp)); } #endif @@ -2497,7 +2530,7 @@ fgetsock(struct thread *td, int fd, cap_rights_t rights, struct socket **spp, *spp = NULL; if (fflagp != NULL) *fflagp = 0; - if ((error = _fget(td, fd, &fp, 0, rights, NULL, NULL, 0)) != 0) + if ((error = _fget(td, fd, &fp, 0, rights, NULL)) != 0) return (error); if (fp->f_type != DTYPE_SOCKET) { error = ENOTSOCK; @@ -2533,9 +2566,6 @@ fputsock(struct socket *so) /* * Handle the last reference to a file being closed. - * - * No special capability handling here, as the capability's fo_close will run - * instead of the object here, and perform any necessary drop on the object. */ int _fdrop(struct file *fp, struct thread *td) @@ -2612,7 +2642,8 @@ done2: * Duplicate the specified descriptor to a free descriptor. */ int -dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, int openerror, int *indxp) +dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, + int openerror, int *indxp) { struct file *fp; int error, indx; @@ -2656,18 +2687,17 @@ dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, int opener FILEDESC_XUNLOCK(fdp); return (EACCES); } - fdp->fd_ofiles[indx] = fp; - fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; fhold(fp); + fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; + filecaps_copy(&fdp->fd_ofiles[dfd].fde_caps, + &fdp->fd_ofiles[indx].fde_caps); break; case ENXIO: /* * Steal away the file pointer from dfd and stuff it into indx. */ - fdp->fd_ofiles[indx] = fp; - fdp->fd_ofiles[dfd] = NULL; - fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; - fdp->fd_ofileflags[dfd] = 0; + fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; + bzero(&fdp->fd_ofiles[dfd], sizeof(fdp->fd_ofiles[dfd])); fdunused(fdp, dfd); break; } @@ -2823,7 +2853,7 @@ sysctl_kern_file(SYSCTL_HANDLER_ARGS) continue; FILEDESC_SLOCK(fdp); for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) { - if ((fp = fdp->fd_ofiles[n]) == NULL) + if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) continue; xf.xf_fd = n; xf.xf_file = fp; @@ -2935,7 +2965,7 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS) export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif, fdp, req); for (i = 0; i < fdp->fd_nfiles; i++) { - if ((fp = fdp->fd_ofiles[i]) == NULL) + if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) continue; bzero(kif, sizeof(*kif)); kif->kf_structsize = sizeof(*kif); @@ -2945,21 +2975,6 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS) shmfd = NULL; kif->kf_fd = i; -#ifdef CAPABILITIES - /* - * When reporting a capability, most fields will be from the - * underlying object, but do mark as a capability. With - * ofiledesc, we don't have a field to export the cap_rights_t, - * but we do with the new filedesc. - */ - if (fp->f_type == DTYPE_CAPABILITY) { - kif->kf_flags |= KF_FLAG_CAPABILITY; - (void)cap_funwrap(fp, 0, &fp); - } -#else - KASSERT(fp->f_type != DTYPE_CAPABILITY, - ("sysctl_kern_proc_ofiledesc: saw capability")); -#endif switch (fp->f_type) { case DTYPE_VNODE: kif->kf_type = KF_TYPE_VNODE; @@ -3128,8 +3143,8 @@ CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE); static int export_fd_for_sysctl(void *data, int type, int fd, int fflags, int refcnt, - int64_t offset, int fd_is_cap, cap_rights_t fd_cap_rights, - struct kinfo_file *kif, struct sysctl_req *req) + int64_t offset, cap_rights_t fd_cap_rights, struct kinfo_file *kif, + struct sysctl_req *req) { struct { int fflag; @@ -3191,10 +3206,7 @@ export_fd_for_sysctl(void *data, int type, int fd, int fflags, int refcnt, for (i = 0; i < NFFLAGS; i++) if (fflags & fflags_table[i].fflag) kif->kf_flags |= fflags_table[i].kf_fflag; - if (fd_is_cap) - kif->kf_flags |= KF_FLAG_CAPABILITY; - if (fd_is_cap) - kif->kf_cap_rights = fd_cap_rights; + kif->kf_cap_rights = fd_cap_rights; kif->kf_fd = fd; kif->kf_type = type; kif->kf_ref_count = refcnt; @@ -3222,7 +3234,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) int64_t offset; void *data; int error, i, *name; - int fd_is_cap, type, refcnt, fflags; + int type, refcnt, fflags; cap_rights_t fd_cap_rights; name = (int *)arg1; @@ -3252,13 +3264,13 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK); if (tracevp != NULL) export_fd_for_sysctl(tracevp, KF_TYPE_VNODE, KF_FD_TYPE_TRACE, - FREAD | FWRITE, -1, -1, 0, 0, kif, req); + FREAD | FWRITE, -1, -1, 0, kif, req); if (textvp != NULL) export_fd_for_sysctl(textvp, KF_TYPE_VNODE, KF_FD_TYPE_TEXT, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); if (cttyvp != NULL) export_fd_for_sysctl(cttyvp, KF_TYPE_VNODE, KF_FD_TYPE_CTTY, - FREAD | FWRITE, -1, -1, 0, 0, kif, req); + FREAD | FWRITE, -1, -1, 0, kif, req); if (fdp == NULL) goto fail; FILEDESC_SLOCK(fdp); @@ -3268,7 +3280,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) data = fdp->fd_cdir; FILEDESC_SUNLOCK(fdp); export_fd_for_sysctl(data, KF_TYPE_VNODE, KF_FD_TYPE_CWD, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); FILEDESC_SLOCK(fdp); } /* root directory */ @@ -3277,7 +3289,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) data = fdp->fd_rdir; FILEDESC_SUNLOCK(fdp); export_fd_for_sysctl(data, KF_TYPE_VNODE, KF_FD_TYPE_ROOT, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); FILEDESC_SLOCK(fdp); } /* jail directory */ @@ -3286,30 +3298,17 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) data = fdp->fd_jdir; FILEDESC_SUNLOCK(fdp); export_fd_for_sysctl(data, KF_TYPE_VNODE, KF_FD_TYPE_JAIL, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); FILEDESC_SLOCK(fdp); } for (i = 0; i < fdp->fd_nfiles; i++) { - if ((fp = fdp->fd_ofiles[i]) == NULL) + if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) continue; data = NULL; - fd_is_cap = 0; - fd_cap_rights = 0; - #ifdef CAPABILITIES - /* - * When reporting a capability, most fields will be from the - * underlying object, but do mark as a capability and export - * the capability rights mask. - */ - if (fp->f_type == DTYPE_CAPABILITY) { - fd_is_cap = 1; - fd_cap_rights = cap_rights(fp); - (void)cap_funwrap(fp, 0, &fp); - } + fd_cap_rights = cap_rights(fdp, i); #else /* !CAPABILITIES */ - KASSERT(fp->f_type != DTYPE_CAPABILITY, - ("sysctl_kern_proc_filedesc: saw capability")); + fd_cap_rights = 0; #endif switch (fp->f_type) { case DTYPE_VNODE: @@ -3385,7 +3384,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) if (type == KF_TYPE_VNODE || type == KF_TYPE_FIFO) FILEDESC_SUNLOCK(fdp); error = export_fd_for_sysctl(data, type, i, fflags, refcnt, - offset, fd_is_cap, fd_cap_rights, kif, req); + offset, fd_cap_rights, kif, req); if (type == KF_TYPE_VNODE || type == KF_TYPE_FIFO) FILEDESC_SLOCK(fdp); if (error) { @@ -3644,7 +3643,7 @@ file_to_first_proc(struct file *fp) if (fdp == NULL) continue; for (n = 0; n < fdp->fd_nfiles; n++) { - if (fp == fdp->fd_ofiles[n]) + if (fp == fdp->fd_ofiles[n].fde_file) return (p); } } @@ -3694,7 +3693,7 @@ DB_SHOW_COMMAND(files, db_show_files) if ((fdp = p->p_fd) == NULL) continue; for (n = 0; n < fdp->fd_nfiles; ++n) { - if ((fp = fdp->fd_ofiles[n]) == NULL) + if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) continue; db_print_file(fp, header); header = 0; diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 965ce31..7c0d2d6 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -438,9 +438,6 @@ interpret: } else { AUDIT_ARG_FD(args->fd); /* - * Some might argue that CAP_READ and/or CAP_MMAP should also - * be required here; such arguments will be entertained. - * * Descriptors opened only with O_EXEC or O_RDONLY are allowed. */ error = fgetvp_exec(td, args->fd, CAP_FEXECVE, &binvp); diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 82f0344..5bd2daa 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -297,7 +297,7 @@ exit1(struct thread *td, int rv) * Close open files and release open-file table. * This may block! */ - fdfree(td); + fdescfree(td); /* * If this thread tickled GEOM, we need to wait for the giggling to diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 287d202..b5a4934 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -342,7 +342,7 @@ fork_norfproc(struct thread *td, int flags) if (flags & RFCFDG) { struct filedesc *fdtmp; fdtmp = fdinit(td->td_proc->p_fd); - fdfree(td); + fdescfree(td); p1->p_fd = fdtmp; } diff --git a/sys/kern/sys_capability.c b/sys/kern/sys_capability.c index 6fb4fee..ba168e9 100644 --- a/sys/kern/sys_capability.c +++ b/sys/kern/sys_capability.c @@ -1,11 +1,15 @@ /*- * Copyright (c) 2008-2011 Robert N. M. Watson * Copyright (c) 2010-2011 Jonathan Anderson + * Copyright (c) 2012 FreeBSD Foundation * All rights reserved. * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * + * Portions of this software were developed by Pawel Jakub Dawidek under + * sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -62,6 +66,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -139,90 +144,48 @@ sys_cap_getmode(struct thread *td, struct cap_getmode_args *uap) FEATURE(security_capabilities, "Capsicum Capabilities"); -/* - * struct capability describes a capability, and is hung off of its struct - * file f_data field. cap_file and cap_rightss are static once hooked up, as - * neither the object it references nor the rights it encapsulates are - * permitted to change. - */ -struct capability { - struct file *cap_object; /* Underlying object's file. */ - struct file *cap_file; /* Back-pointer to cap's file. */ - cap_rights_t cap_rights; /* Mask of rights on object. */ -}; +static inline int +_cap_check(cap_rights_t have, cap_rights_t need, enum ktr_cap_fail_type type) +{ + + + if ((need & ~have) != 0) { +#ifdef KTRACE + if (KTRPOINT(curthread, KTR_CAPFAIL)) + ktrcapfail(type, need, have); +#endif + return (ENOTCAPABLE); + } + return (0); +} /* - * Capabilities have a fileops vector, but in practice none should ever be - * called except for fo_close, as the capability will normally not be - * returned during a file descriptor lookup in the system call code. + * Test whether a capability grants the requested rights. */ -static fo_rdwr_t capability_read; -static fo_rdwr_t capability_write; -static fo_truncate_t capability_truncate; -static fo_ioctl_t capability_ioctl; -static fo_poll_t capability_poll; -static fo_kqfilter_t capability_kqfilter; -static fo_stat_t capability_stat; -static fo_close_t capability_close; -static fo_chmod_t capability_chmod; -static fo_chown_t capability_chown; - -static struct fileops capability_ops = { - .fo_read = capability_read, - .fo_write = capability_write, - .fo_truncate = capability_truncate, - .fo_ioctl = capability_ioctl, - .fo_poll = capability_poll, - .fo_kqfilter = capability_kqfilter, - .fo_stat = capability_stat, - .fo_close = capability_close, - .fo_chmod = capability_chmod, - .fo_chown = capability_chown, - .fo_flags = DFLAG_PASSABLE, -}; - -static struct fileops capability_ops_unpassable = { - .fo_read = capability_read, - .fo_write = capability_write, - .fo_truncate = capability_truncate, - .fo_ioctl = capability_ioctl, - .fo_poll = capability_poll, - .fo_kqfilter = capability_kqfilter, - .fo_stat = capability_stat, - .fo_close = capability_close, - .fo_chmod = capability_chmod, - .fo_chown = capability_chown, - .fo_flags = 0, -}; - -static uma_zone_t capability_zone; - -static void -capability_init(void *dummy __unused) +int +cap_check(cap_rights_t have, cap_rights_t need) { - capability_zone = uma_zcreate("capability", sizeof(struct capability), - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - if (capability_zone == NULL) - panic("capability_init: capability_zone not initialized"); + return (_cap_check(have, need, CAPFAIL_NOTCAPABLE)); } -SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, capability_init, NULL); /* - * Test whether a capability grants the requested rights. + * Convert capability rights into VM access flags. */ -static int -cap_check(struct capability *c, cap_rights_t rights) +u_char +cap_rights_to_vmprot(cap_rights_t have) { + u_char maxprot; - if ((c->cap_rights | rights) != c->cap_rights) { -#ifdef KTRACE - if (KTRPOINT(curthread, KTR_CAPFAIL)) - ktrcapfail(CAPFAIL_NOTCAPABLE, rights, c->cap_rights); -#endif - return (ENOTCAPABLE); - } - return (0); + maxprot = VM_PROT_NONE; + if (have & CAP_MMAP_R) + maxprot |= VM_PROT_READ; + if (have & CAP_MMAP_W) + maxprot |= VM_PROT_WRITE; + if (have & CAP_MMAP_X) + maxprot |= VM_PROT_EXECUTE; + + return (maxprot); } /* @@ -231,43 +194,49 @@ cap_check(struct capability *c, cap_rights_t rights) * this one file. */ cap_rights_t -cap_rights(struct file *fp_cap) +cap_rights(struct filedesc *fdp, int fd) { - struct capability *c; - - KASSERT(fp_cap->f_type == DTYPE_CAPABILITY, - ("cap_rights: !capability")); - c = fp_cap->f_data; - return (c->cap_rights); + return (fdp->fd_ofiles[fd].fde_rights); } /* - * System call to create a new capability reference to either an existing - * file object or an an existing capability. + * System call to limit rights of the given capability. */ int -sys_cap_new(struct thread *td, struct cap_new_args *uap) +sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap) { - int error, capfd; - int fd = uap->fd; - struct file *fp; - cap_rights_t rights = uap->rights; + struct filedesc *fdp; + cap_rights_t rights; + int error, fd; + + fd = uap->fd; + rights = uap->rights; AUDIT_ARG_FD(fd); AUDIT_ARG_RIGHTS(rights); - error = fget(td, fd, rights, &fp); - if (error) - return (error); - AUDIT_ARG_FILE(td->td_proc, fp); - error = kern_capwrap(td, fp, rights, &capfd); - /* - * Release our reference to the file (kern_capwrap has held a reference - * for the filedesc array). - */ - fdrop(fp, td); - if (error == 0) - td->td_retval[0] = capfd; + + if ((rights & ~CAP_ALL) != 0) + return (EINVAL); + + fdp = td->td_proc->p_fd; + FILEDESC_XLOCK(fdp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_XUNLOCK(fdp); + return (EBADF); + } + error = _cap_check(cap_rights(fdp, fd), rights, CAPFAIL_INCREASE); + if (error == 0) { + fdp->fd_ofiles[fd].fde_rights = rights; + if ((rights & CAP_IOCTL) == 0) { + free(fdp->fd_ofiles[fd].fde_ioctls, M_TEMP); + fdp->fd_ofiles[fd].fde_ioctls = NULL; + fdp->fd_ofiles[fd].fde_nioctls = 0; + } + if ((rights & CAP_FCNTL) == 0) + fdp->fd_ofiles[fd].fde_fcntls = 0; + } + FILEDESC_XUNLOCK(fdp); return (error); } @@ -275,247 +244,321 @@ sys_cap_new(struct thread *td, struct cap_new_args *uap) * System call to query the rights mask associated with a capability. */ int -sys_cap_getrights(struct thread *td, struct cap_getrights_args *uap) +sys_cap_rights_get(struct thread *td, struct cap_rights_get_args *uap) { - struct capability *cp; - struct file *fp; - int error; + struct filedesc *fdp; + cap_rights_t rights; + int fd; - AUDIT_ARG_FD(uap->fd); - error = fgetcap(td, uap->fd, &fp); - if (error) - return (error); - cp = fp->f_data; - error = copyout(&cp->cap_rights, uap->rightsp, sizeof(*uap->rightsp)); - fdrop(fp, td); - return (error); + fd = uap->fd; + + AUDIT_ARG_FD(fd); + + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_SUNLOCK(fdp); + return (EBADF); + } + rights = cap_rights(fdp, fd); + FILEDESC_SUNLOCK(fdp); + return (copyout(&rights, uap->rightsp, sizeof(*uap->rightsp))); } /* - * Create a capability to wrap around an existing file. + * Test whether a capability grants the given ioctl command. + * If descriptor doesn't have CAP_IOCTL, then ioctls list is empty and + * ENOTCAPABLE will be returned. */ int -kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights, - int *capfdp) +cap_ioctl_check(struct filedesc *fdp, int fd, u_long cmd) { - struct capability *cp, *cp_old; - struct file *fp_object, *fcapp; - int error; - - if ((rights | CAP_MASK_VALID) != CAP_MASK_VALID) - return (EINVAL); + u_long *cmds; + ssize_t ncmds; + long i; - /* - * If a new capability is being derived from an existing capability, - * then the new capability rights must be a subset of the existing - * rights. - */ - if (fp->f_type == DTYPE_CAPABILITY) { - cp_old = fp->f_data; - if ((cp_old->cap_rights | rights) != cp_old->cap_rights) { -#ifdef KTRACE - if (KTRPOINT(curthread, KTR_CAPFAIL)) - ktrcapfail(CAPFAIL_INCREASE, - rights, cp_old->cap_rights); -#endif - return (ENOTCAPABLE); - } - } + FILEDESC_LOCK_ASSERT(fdp); + KASSERT(fd >= 0 && fd < fdp->fd_nfiles, + ("%s: invalid fd=%d", __func__, fd)); - /* - * Allocate a new file descriptor to hang the capability off of. - */ - error = falloc(td, &fcapp, capfdp, fp->f_flag); - if (error) - return (error); + ncmds = fdp->fd_ofiles[fd].fde_nioctls; + if (ncmds == -1) + return (0); - /* - * Rather than nesting capabilities, directly reference the object an - * existing capability references. There's nothing else interesting - * to preserve for future use, as we've incorporated the previous - * rights mask into the new one. This prevents us from having to - * deal with capability chains. - */ - if (fp->f_type == DTYPE_CAPABILITY) - fp_object = ((struct capability *)fp->f_data)->cap_object; - else - fp_object = fp; - fhold(fp_object); - cp = uma_zalloc(capability_zone, M_WAITOK | M_ZERO); - cp->cap_rights = rights; - cp->cap_object = fp_object; - cp->cap_file = fcapp; - if (fp->f_flag & DFLAG_PASSABLE) - finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp, - &capability_ops); - else - finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp, - &capability_ops_unpassable); + cmds = fdp->fd_ofiles[fd].fde_ioctls; + for (i = 0; i < ncmds; i++) { + if (cmds[i] == cmd) + return (0); + } - /* - * Release our private reference (the proc filedesc still has one). - */ - fdrop(fcapp, td); - return (0); + return (ENOTCAPABLE); } /* - * Given a file descriptor, test it against a capability rights mask and then - * return the file descriptor on which to actually perform the requested - * operation. As long as the reference to fp_cap remains valid, the returned - * pointer in *fp will remain valid, so no extra reference management is - * required, and the caller should fdrop() fp_cap as normal when done with - * both. + * Check if the current ioctls list can be replaced by the new one. */ -int -cap_funwrap(struct file *fp_cap, cap_rights_t rights, struct file **fpp) +static int +cap_ioctl_limit_check(struct filedesc *fdp, int fd, const u_long *cmds, + size_t ncmds) { - struct capability *c; - int error; + u_long *ocmds; + ssize_t oncmds; + u_long i; + long j; - if (fp_cap->f_type != DTYPE_CAPABILITY) { - *fpp = fp_cap; + oncmds = fdp->fd_ofiles[fd].fde_nioctls; + if (oncmds == -1) return (0); + if (oncmds < (ssize_t)ncmds) + return (ENOTCAPABLE); + + ocmds = fdp->fd_ofiles[fd].fde_ioctls; + for (i = 0; i < ncmds; i++) { + for (j = 0; j < oncmds; j++) { + if (cmds[i] == ocmds[j]) + break; + } + if (j == oncmds) + return (ENOTCAPABLE); } - c = fp_cap->f_data; - error = cap_check(c, rights); - if (error) - return (error); - *fpp = c->cap_object; + return (0); } -/* - * Slightly different routine for memory mapping file descriptors: unwrap the - * capability and check CAP_MMAP, but also return a bitmask representing the - * maximum mapping rights the capability allows on the object. - */ int -cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights, u_char *maxprotp, - struct file **fpp) +sys_cap_ioctls_limit(struct thread *td, struct cap_ioctls_limit_args *uap) { - struct capability *c; - u_char maxprot; - int error; + struct filedesc *fdp; + u_long *cmds, *ocmds; + size_t ncmds; + int error, fd; - if (fp_cap->f_type != DTYPE_CAPABILITY) { - *fpp = fp_cap; - *maxprotp = VM_PROT_ALL; - return (0); + fd = uap->fd; + ncmds = uap->ncmds; + + AUDIT_ARG_FD(fd); + + if (ncmds > 256) /* XXX: Is 256 sane? */ + return (EINVAL); + + if (ncmds == 0) { + cmds = NULL; + } else { + cmds = malloc(sizeof(cmds[0]) * ncmds, M_TEMP, M_WAITOK); + error = copyin(uap->cmds, cmds, sizeof(cmds[0]) * ncmds); + if (error != 0) { + free(cmds, M_TEMP); + return (error); + } } - c = fp_cap->f_data; - error = cap_check(c, rights | CAP_MMAP); - if (error) - return (error); - *fpp = c->cap_object; - maxprot = 0; - if (c->cap_rights & CAP_READ) - maxprot |= VM_PROT_READ; - if (c->cap_rights & CAP_WRITE) - maxprot |= VM_PROT_WRITE; - if (c->cap_rights & CAP_MAPEXEC) - maxprot |= VM_PROT_EXECUTE; - *maxprotp = maxprot; - return (0); + + fdp = td->td_proc->p_fd; + FILEDESC_XLOCK(fdp); + + if (fget_locked(fdp, fd) == NULL) { + error = EBADF; + goto out; + } + + error = cap_ioctl_limit_check(fdp, fd, cmds, ncmds); + if (error != 0) + goto out; + + ocmds = fdp->fd_ofiles[fd].fde_ioctls; + fdp->fd_ofiles[fd].fde_ioctls = cmds; + fdp->fd_ofiles[fd].fde_nioctls = ncmds; + + cmds = ocmds; + error = 0; +out: + FILEDESC_XUNLOCK(fdp); + free(cmds, M_TEMP); + return (error); } -/* - * When a capability is closed, simply drop the reference on the underlying - * object and free the capability. fdrop() will handle the case where the - * underlying object also needs to close, and the caller will have already - * performed any object-specific lock or mqueue handling. - */ -static int -capability_close(struct file *fp, struct thread *td) +int +sys_cap_ioctls_get(struct thread *td, struct cap_ioctls_get_args *uap) { - struct capability *c; - struct file *fp_object; - - KASSERT(fp->f_type == DTYPE_CAPABILITY, - ("capability_close: !capability")); - - c = fp->f_data; - fp->f_ops = &badfileops; - fp->f_data = NULL; - fp_object = c->cap_object; - uma_zfree(capability_zone, c); - return (fdrop(fp_object, td)); + struct filedesc *fdp; + struct filedescent *fdep; + u_long *cmds; + size_t maxcmds; + int error, fd; + + fd = uap->fd; + cmds = uap->cmds; + maxcmds = uap->maxcmds; + + AUDIT_ARG_FD(fd); + + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + + if (fget_locked(fdp, fd) == NULL) { + error = EBADF; + goto out; + } + + /* + * If all ioctls are allowed (fde_nioctls == -1 && fde_ioctls == NULL) + * the only sane thing we can do is to not populate the given array and + * return CAP_IOCTLS_ALL. + */ + + fdep = &fdp->fd_ofiles[fd]; + if (cmds != NULL && fdep->fde_ioctls != NULL) { + error = copyout(fdep->fde_ioctls, cmds, + sizeof(cmds[0]) * MIN(fdep->fde_nioctls, maxcmds)); + if (error != 0) + goto out; + } + if (fdep->fde_nioctls == -1) + td->td_retval[0] = CAP_IOCTLS_ALL; + else + td->td_retval[0] = fdep->fde_nioctls; + + error = 0; +out: + FILEDESC_SUNLOCK(fdp); + return (error); } /* - * In general, file descriptor operations should never make it to the - * capability, only the underlying file descriptor operation vector, so panic - * if we do turn up here. + * Test whether a capability grants the given fcntl command. */ -static int -capability_read(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) +int +cap_fcntl_check(struct filedesc *fdp, int fd, int cmd) { + uint32_t fcntlcap; - panic("capability_read"); -} - -static int -capability_write(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) -{ + KASSERT(fd >= 0 && fd < fdp->fd_nfiles, + ("%s: invalid fd=%d", __func__, fd)); - panic("capability_write"); -} + fcntlcap = (1 << cmd); + KASSERT((CAP_FCNTL_ALL & fcntlcap) != 0, + ("Unsupported fcntl=%d.", cmd)); -static int -capability_truncate(struct file *fp, off_t length, struct ucred *active_cred, - struct thread *td) -{ + if ((fdp->fd_ofiles[fd].fde_fcntls & fcntlcap) != 0) + return (0); - panic("capability_truncate"); + return (ENOTCAPABLE); } -static int -capability_ioctl(struct file *fp, u_long com, void *data, - struct ucred *active_cred, struct thread *td) +int +sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap) { + struct filedesc *fdp; + uint32_t fcntlrights; + int fd; - panic("capability_ioctl"); -} + fd = uap->fd; + fcntlrights = uap->fcntlrights; -static int -capability_poll(struct file *fp, int events, struct ucred *active_cred, - struct thread *td) -{ + AUDIT_ARG_FD(fd); + AUDIT_ARG_FCNTL_RIGHTS(fcntlrights); - panic("capability_poll"); -} + if ((fcntlrights & ~CAP_FCNTL_ALL) != 0) + return (EINVAL); -static int -capability_kqfilter(struct file *fp, struct knote *kn) -{ + fdp = td->td_proc->p_fd; + FILEDESC_XLOCK(fdp); - panic("capability_kqfilter"); -} + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_XUNLOCK(fdp); + return (EBADF); + } -static int -capability_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, - struct thread *td) -{ + if ((fcntlrights & ~fdp->fd_ofiles[fd].fde_fcntls) != 0) { + FILEDESC_XUNLOCK(fdp); + return (ENOTCAPABLE); + } - panic("capability_stat"); + fdp->fd_ofiles[fd].fde_fcntls = fcntlrights; + FILEDESC_XUNLOCK(fdp); + + return (0); } int -capability_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, - struct thread *td) +sys_cap_fcntls_get(struct thread *td, struct cap_fcntls_get_args *uap) { + struct filedesc *fdp; + uint32_t rights; + int fd; + + fd = uap->fd; - panic("capability_chmod"); + AUDIT_ARG_FD(fd); + + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_SUNLOCK(fdp); + return (EBADF); + } + rights = fdp->fd_ofiles[fd].fde_fcntls; + FILEDESC_SUNLOCK(fdp); + + return (copyout(&rights, uap->fcntlrightsp, sizeof(rights))); } +/* + * For backward compatibility. + */ int -capability_chown(struct file *fp, uid_t uid, gid_t gid, - struct ucred *active_cred, struct thread *td) +sys_cap_new(struct thread *td, struct cap_new_args *uap) { + struct filedesc *fdp; + cap_rights_t rights; + register_t newfd; + int error, fd; + + fd = uap->fd; + rights = uap->rights; + + AUDIT_ARG_FD(fd); + AUDIT_ARG_RIGHTS(rights); + + if ((rights & ~CAP_ALL) != 0) + return (EINVAL); + + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_SUNLOCK(fdp); + return (EBADF); + } + error = _cap_check(cap_rights(fdp, fd), rights, CAPFAIL_INCREASE); + FILEDESC_SUNLOCK(fdp); + if (error != 0) + return (error); + + error = do_dup(td, 0, fd, 0, &newfd); + if (error != 0) + return (error); - panic("capability_chown"); + FILEDESC_XLOCK(fdp); + /* + * We don't really care about the race between checking capability + * rights for the source descriptor and now. If capability rights + * were ok at that earlier point, the process had this descriptor + * with those rights, so we don't increase them in security sense, + * the process might have done the cap_new(2) a bit earlier to get + * the same effect. + */ + fdp->fd_ofiles[newfd].fde_rights = rights; + if ((rights & CAP_IOCTL) == 0) { + free(fdp->fd_ofiles[newfd].fde_ioctls, M_TEMP); + fdp->fd_ofiles[newfd].fde_ioctls = NULL; + fdp->fd_ofiles[newfd].fde_nioctls = 0; + } + if ((rights & CAP_FCNTL) == 0) + fdp->fd_ofiles[newfd].fde_fcntls = 0; + FILEDESC_XUNLOCK(fdp); + + td->td_retval[0] = newfd; + + return (0); } #else /* !CAPABILITIES */ @@ -524,42 +567,54 @@ capability_chown(struct file *fp, uid_t uid, gid_t gid, * Stub Capability functions for when options CAPABILITIES isn't compiled * into the kernel. */ + int -sys_cap_new(struct thread *td, struct cap_new_args *uap) +sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap) { return (ENOSYS); } int -sys_cap_getrights(struct thread *td, struct cap_getrights_args *uap) +sys_cap_rights_get(struct thread *td, struct cap_rights_get_args *uap) { return (ENOSYS); } int -cap_funwrap(struct file *fp_cap, cap_rights_t rights, struct file **fpp) +sys_cap_ioctls_limit(struct thread *td, struct cap_ioctls_limit_args *uap) { - KASSERT(fp_cap->f_type != DTYPE_CAPABILITY, - ("cap_funwrap: saw capability")); + return (ENOSYS); +} - *fpp = fp_cap; - return (0); +int +sys_cap_ioctls_get(struct thread *td, struct cap_ioctls_get_args *uap) +{ + + return (ENOSYS); } int -cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights, u_char *maxprotp, - struct file **fpp) +sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap) { - KASSERT(fp_cap->f_type != DTYPE_CAPABILITY, - ("cap_funwrap_mmap: saw capability")); + return (ENOSYS); +} - *fpp = fp_cap; - *maxprotp = VM_PROT_ALL; - return (0); +int +sys_cap_fcntls_get(struct thread *td, struct cap_fcntls_get_args *uap) +{ + + return (ENOSYS); +} + +int +sys_cap_new(struct thread *td, struct cap_new_args *uap) +{ + + return (ENOSYS); } #endif /* CAPABILITIES */ diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index b97ff7f..39f33f3 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -244,7 +245,7 @@ kern_readv(struct thread *td, int fd, struct uio *auio) struct file *fp; int error; - error = fget_read(td, fd, CAP_READ | CAP_SEEK, &fp); + error = fget_read(td, fd, CAP_READ, &fp); if (error) return (error); error = dofileread(td, fd, fp, auio, (off_t)-1, 0); @@ -287,7 +288,7 @@ kern_preadv(td, fd, auio, offset) struct file *fp; int error; - error = fget_read(td, fd, CAP_READ, &fp); + error = fget_read(td, fd, CAP_PREAD, &fp); if (error) return (error); if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) @@ -453,7 +454,7 @@ kern_writev(struct thread *td, int fd, struct uio *auio) struct file *fp; int error; - error = fget_write(td, fd, CAP_WRITE | CAP_SEEK, &fp); + error = fget_write(td, fd, CAP_WRITE, &fp); if (error) return (error); error = dofilewrite(td, fd, fp, auio, (off_t)-1, 0); @@ -496,7 +497,7 @@ kern_pwritev(td, fd, auio, offset) struct file *fp; int error; - error = fget_write(td, fd, CAP_WRITE, &fp); + error = fget_write(td, fd, CAP_PWRITE, &fp); if (error) return (error); if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) @@ -704,28 +705,60 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data) { struct file *fp; struct filedesc *fdp; - int error; - int tmp; + int error, tmp, locked; AUDIT_ARG_FD(fd); AUDIT_ARG_CMD(com); - if ((error = fget(td, fd, CAP_IOCTL, &fp)) != 0) - return (error); - if ((fp->f_flag & (FREAD | FWRITE)) == 0) { - fdrop(fp, td); - return (EBADF); - } + fdp = td->td_proc->p_fd; + switch (com) { case FIONCLEX: + case FIOCLEX: FILEDESC_XLOCK(fdp); - fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; - FILEDESC_XUNLOCK(fdp); + locked = LA_XLOCKED; + break; + default: +#ifdef CAPABILITIES + FILEDESC_SLOCK(fdp); + locked = LA_SLOCKED; +#else + locked = LA_UNLOCKED; +#endif + break; + } + +#ifdef CAPABILITIES + if ((fp = fget_locked(fdp, fd)) == NULL) { + error = EBADF; + goto out; + } + if ((error = cap_ioctl_check(fdp, fd, com)) != 0) { + fp = NULL; /* fhold() was not called yet */ + goto out; + } + fhold(fp); + if (locked == LA_SLOCKED) { + FILEDESC_SUNLOCK(fdp); + locked = LA_UNLOCKED; + } +#else + if ((error = fget(td, fd, CAP_IOCTL, &fp)) != 0) { + fp = NULL; + goto out; + } +#endif + if ((fp->f_flag & (FREAD | FWRITE)) == 0) { + error = EBADF; + goto out; + } + + switch (com) { + case FIONCLEX: + fdp->fd_ofiles[fd].fde_flags &= ~UF_EXCLOSE; goto out; case FIOCLEX: - FILEDESC_XLOCK(fdp); - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; - FILEDESC_XUNLOCK(fdp); + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; goto out; case FIONBIO: if ((tmp = *(int *)data)) @@ -745,7 +778,21 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data) error = fo_ioctl(fp, com, data, td->td_ucred, td); out: - fdrop(fp, td); + switch (locked) { + case LA_XLOCKED: + FILEDESC_XUNLOCK(fdp); + break; +#ifdef CAPABILITIES + case LA_SLOCKED: + FILEDESC_SUNLOCK(fdp); + break; +#endif + default: + FILEDESC_UNLOCK_ASSERT(fdp); + break; + } + if (fp != NULL) + fdrop(fp, td); return (error); } @@ -1130,32 +1177,8 @@ selsetbits(fd_mask **ibits, fd_mask **obits, int idx, fd_mask bit, int events) static __inline int getselfd_cap(struct filedesc *fdp, int fd, struct file **fpp) { - struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; - int error; -#endif - if ((fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); -#ifdef CAPABILITIES - /* - * If the file descriptor is for a capability, test rights and use - * the file descriptor references by the capability. - */ - error = cap_funwrap(fp, CAP_POLL_EVENT, &fp_fromcap); - if (error) { - fdrop(fp, curthread); - return (error); - } - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, curthread); - fp = fp_fromcap; - } -#endif /* CAPABILITIES */ - *fpp = fp; - return (0); + return (fget_unlocked(fdp, fd, CAP_POLL_EVENT, 0, fpp, NULL)); } /* @@ -1349,13 +1372,14 @@ pollrescan(struct thread *td) /* If the selinfo wasn't cleared the event didn't fire. */ if (si != NULL) continue; - fp = fdp->fd_ofiles[fd->fd]; + fp = fdp->fd_ofiles[fd->fd].fde_file; #ifdef CAPABILITIES - if ((fp == NULL) - || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) { + if (fp == NULL || + cap_check(cap_rights(fdp, fd->fd), CAP_POLL_EVENT) != 0) #else - if (fp == NULL) { + if (fp == NULL) #endif + { fd->revents = POLLNVAL; n++; continue; @@ -1408,9 +1432,8 @@ pollscan(td, fds, nfd) u_int nfd; { struct filedesc *fdp = td->td_proc->p_fd; - int i; struct file *fp; - int n = 0; + int i, n = 0; FILEDESC_SLOCK(fdp); for (i = 0; i < nfd; i++, fds++) { @@ -1420,13 +1443,15 @@ pollscan(td, fds, nfd) } else if (fds->fd < 0) { fds->revents = 0; } else { - fp = fdp->fd_ofiles[fds->fd]; + fp = fdp->fd_ofiles[fds->fd].fde_file; #ifdef CAPABILITIES - if ((fp == NULL) - || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) { + if (fp == NULL || + cap_check(cap_rights(fdp, fds->fd), + CAP_POLL_EVENT) != 0) #else - if (fp == NULL) { + if (fp == NULL) #endif + { fds->revents = POLLNVAL; n++; } else { diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 148dea3..1a89010 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -917,7 +917,7 @@ struct shmid_ds *buf); } 513 AUE_LPATHCONF STD { int lpathconf(char *path, int name); } 514 AUE_CAP_NEW STD { int cap_new(int fd, uint64_t rights); } -515 AUE_CAP_GETRIGHTS STD { int cap_getrights(int fd, \ +515 AUE_CAP_RIGHTS_GET STD { int cap_rights_get(int fd, \ uint64_t *rightsp); } 516 AUE_CAP_ENTER STD { int cap_enter(void); } 517 AUE_CAP_GETMODE STD { int cap_getmode(u_int *modep); } @@ -955,5 +955,15 @@ int *status, int options, \ struct __wrusage *wrusage, \ siginfo_t *info); } +533 AUE_CAP_RIGHTS_LIMIT STD { int cap_rights_limit(int fd, \ + uint64_t rights); } +534 AUE_CAP_IOCTLS_LIMIT STD { int cap_ioctls_limit(int fd, \ + const u_long *cmds, size_t ncmds); } +535 AUE_CAP_IOCTLS_GET STD { ssize_t cap_ioctls_get(int fd, \ + u_long *cmds, size_t maxcmds); } +536 AUE_CAP_FCNTLS_LIMIT STD { int cap_fcntls_limit(int fd, \ + uint32_t fcntlrights); } +537 AUE_CAP_FCNTLS_GET STD { int cap_fcntls_get(int fd, \ + uint32_t *fcntlrightsp); } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master diff --git a/sys/kern/tty.c b/sys/kern/tty.c index 5c7b753..02eccd7 100644 --- a/sys/kern/tty.c +++ b/sys/kern/tty.c @@ -1840,23 +1840,15 @@ ttyhook_register(struct tty **rtp, struct proc *p, int fd, int error, ref; /* Validate the file descriptor. */ - if ((fdp = p->p_fd) == NULL) - return (EBADF); - - fp = fget_unlocked(fdp, fd); - if (fp == NULL) - return (EBADF); + fdp = p->p_fd; + error = fget_unlocked(fdp, fd, CAP_TTYHOOK, 0, &fp, NULL); + if (error != 0) + return (error); if (fp->f_ops == &badfileops) { error = EBADF; goto done1; } -#ifdef CAPABILITIES - error = cap_funwrap(fp, CAP_TTYHOOK, &fp); - if (error) - goto done1; -#endif - /* * Make sure the vnode is bound to a character device. * Unlocked check for the vnode type is ok there, because we diff --git a/sys/kern/uipc_mqueue.c b/sys/kern/uipc_mqueue.c index 9da464c..2d18e77 100644 --- a/sys/kern/uipc_mqueue.c +++ b/sys/kern/uipc_mqueue.c @@ -45,6 +45,7 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_capsicum.h" #include "opt_compat.h" #include @@ -2032,8 +2033,8 @@ kern_kmq_open(struct thread *td, const char *upath, int flags, mode_t mode, &mqueueops); FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[fd] == fp) - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + if (fdp->fd_ofiles[fd].fde_file == fp) + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); td->td_retval[0] = fd; fdrop(fp, td); @@ -2275,11 +2276,13 @@ again: error = EBADF; goto out; } - error = cap_funwrap(fp2, CAP_POLL_EVENT, &fp2); +#ifdef CAPABILITIES + error = cap_check(cap_rights(fdp, uap->mqd), CAP_POLL_EVENT); if (error) { FILEDESC_SUNLOCK(fdp); goto out; } +#endif if (fp2 != fp) { FILEDESC_SUNLOCK(fdp); error = EBADF; diff --git a/sys/kern/uipc_sem.c b/sys/kern/uipc_sem.c index c219844..2de3409 100644 --- a/sys/kern/uipc_sem.c +++ b/sys/kern/uipc_sem.c @@ -579,8 +579,8 @@ ksem_create(struct thread *td, const char *name, semid_t *semidp, mode_t mode, finit(fp, FREAD | FWRITE, DTYPE_SEM, ks, &ksem_ops); FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[fd] == fp) - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + if (fdp->fd_ofiles[fd].fde_file == fp) + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); fdrop(fp, td); diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c index 7f75bdc..0cbb8b3 100644 --- a/sys/kern/uipc_shm.c +++ b/sys/kern/uipc_shm.c @@ -629,8 +629,8 @@ sys_shm_open(struct thread *td, struct shm_open_args *uap) finit(fp, FFLAGS(uap->flags & O_ACCMODE), DTYPE_SHM, shmfd, &shm_ops); FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[fd] == fp) - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + if (fdp->fd_ofiles[fd].fde_file == fp) + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); td->td_retval[0] = fd; fdrop(fp, td); diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index 665eb6d..847db35 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -121,38 +121,20 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, "Number of sendfile(2) sf_bufs in use"); /* - * Convert a user file descriptor to a kernel file entry and check that, if - * it is a capability, the right rights are present. A reference on the file - * entry is held upon returning. + * Convert a user file descriptor to a kernel file entry and check if required + * capability rights are present. + * A reference on the file entry is held upon returning. */ static int getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights, struct file **fpp, u_int *fflagp) { struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; int error; -#endif - if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); -#ifdef CAPABILITIES - /* - * If the file descriptor is for a capability, test rights and use - * the file descriptor referenced by the capability. - */ - error = cap_funwrap(fp, rights, &fp_fromcap); - if (error) { - fdrop(fp, curthread); + error = fget_unlocked(fdp, fd, rights, 0, &fp, NULL); + if (error != 0) return (error); - } - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, curthread); - fp = fp_fromcap; - } -#endif /* CAPABILITIES */ if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, curthread); return (ENOTSOCK); @@ -765,7 +747,7 @@ kern_sendit(td, s, mp, flags, control, segflg) #endif AUDIT_ARG_FD(s); - rights = CAP_WRITE; + rights = CAP_SEND; if (mp->msg_name != NULL) { AUDIT_ARG_SOCKADDR(td, mp->msg_name); rights |= CAP_CONNECT; @@ -974,7 +956,7 @@ kern_recvit(td, s, mp, fromseg, controlp) *controlp = NULL; AUDIT_ARG_FD(s); - error = getsock_cap(td->td_proc->p_fd, s, CAP_READ, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, s, CAP_RECV, &fp, NULL); if (error) return (error); so = fp->f_data; @@ -1850,7 +1832,11 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, * we send only the header/trailer and no payload data. */ AUDIT_ARG_FD(uap->fd); - if ((error = fgetvp_read(td, uap->fd, CAP_READ, &vp)) != 0) + /* + * sendfile(2) can start at any offset within a file so we require + * CAP_READ+CAP_SEEK = CAP_PREAD. + */ + if ((error = fgetvp_read(td, uap->fd, CAP_PREAD, &vp)) != 0) goto out; vn_lock(vp, LK_SHARED | LK_RETRY); if (vp->v_type == VREG) { @@ -1886,7 +1872,7 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, * The socket must be a stream socket and connected. * Remember if it a blocking or non-blocking socket. */ - if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_WRITE, + if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SEND, &sock_fp, NULL)) != 0) goto out; so = sock_fp->f_data; @@ -2423,7 +2409,7 @@ sys_sctp_generic_sendmsg (td, uap) u_sinfo = &sinfo; } - rights = CAP_WRITE; + rights = CAP_SEND; if (uap->tolen) { error = getsockaddr(&to, uap->to, uap->tolen); if (error) { @@ -2534,7 +2520,7 @@ sys_sctp_generic_sendmsg_iov(td, uap) return (error); u_sinfo = &sinfo; } - rights = CAP_WRITE; + rights = CAP_SEND; if (uap->tolen) { error = getsockaddr(&to, uap->to, uap->tolen); if (error) { @@ -2658,7 +2644,7 @@ sys_sctp_generic_recvmsg(td, uap) #endif AUDIT_ARG_FD(uap->sd); - error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_READ, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_RECV, &fp, NULL); if (error) { return (error); } diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index a6c308f..dcfd009 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -279,7 +279,7 @@ static void unp_drop(struct unpcb *, int); static void unp_gc(__unused void *, int); static void unp_scan(struct mbuf *, void (*)(struct file *)); static void unp_discard(struct file *); -static void unp_freerights(struct file **, int); +static void unp_freerights(struct filedescent *, int); static void unp_init(void); static int unp_internalize(struct mbuf **, struct thread *); static void unp_internalize_fp(struct file *); @@ -1642,14 +1642,14 @@ unp_drop(struct unpcb *unp, int errno) } static void -unp_freerights(struct file **rp, int fdcount) +unp_freerights(struct filedescent *fde, int fdcount) { - int i; struct file *fp; + int i; - for (i = 0; i < fdcount; i++) { - fp = *rp; - *rp++ = NULL; + for (i = 0; i < fdcount; i++, fde++) { + fp = fde->fde_file; + bzero(fde, sizeof(*fde)); unp_discard(fp); } } @@ -1661,8 +1661,8 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp) struct cmsghdr *cm = mtod(control, struct cmsghdr *); int i; int *fdp; - struct file **rp; - struct file *fp; + struct filedesc *fdesc = td->td_proc->p_fd; + struct filedescent *fde, *fdep; void *data; socklen_t clen = control->m_len, datalen; int error, newfds; @@ -1683,20 +1683,20 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp) datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) { - newfds = datalen / sizeof(struct file *); - rp = data; + newfds = datalen / sizeof(*fdep); + fdep = data; /* If we're not outputting the descriptors free them. */ if (error || controlp == NULL) { - unp_freerights(rp, newfds); + unp_freerights(fdep, newfds); goto next; } - FILEDESC_XLOCK(td->td_proc->p_fd); + FILEDESC_XLOCK(fdesc); /* if the new FD's will not fit free them. */ if (!fdavail(td, newfds)) { - FILEDESC_XUNLOCK(td->td_proc->p_fd); + FILEDESC_XUNLOCK(fdesc); error = EMSGSIZE; - unp_freerights(rp, newfds); + unp_freerights(fdep, newfds); goto next; } @@ -1710,23 +1710,24 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp) *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { - FILEDESC_XUNLOCK(td->td_proc->p_fd); + FILEDESC_XUNLOCK(fdesc); error = E2BIG; - unp_freerights(rp, newfds); + unp_freerights(fdep, newfds); goto next; } fdp = (int *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); - for (i = 0; i < newfds; i++) { + for (i = 0; i < newfds; i++, fdep++, fdp++) { if (fdalloc(td, 0, &f)) panic("unp_externalize fdalloc failed"); - fp = *rp++; - td->td_proc->p_fd->fd_ofiles[f] = fp; - unp_externalize_fp(fp); - *fdp++ = f; + fde = &fdesc->fd_ofiles[f]; + fde->fde_file = fdep->fde_file; + filecaps_copy(&fdep->fde_caps, &fde->fde_caps); + unp_externalize_fp(fde->fde_file); + *fdp = f; } - FILEDESC_XUNLOCK(td->td_proc->p_fd); + FILEDESC_XUNLOCK(fdesc); } else { /* We can just copy anything else across. */ if (error || controlp == NULL) @@ -1797,11 +1798,11 @@ unp_internalize(struct mbuf **controlp, struct thread *td) { struct mbuf *control = *controlp; struct proc *p = td->td_proc; - struct filedesc *fdescp = p->p_fd; + struct filedesc *fdesc = p->p_fd; struct bintime *bt; struct cmsghdr *cm = mtod(control, struct cmsghdr *); struct cmsgcred *cmcred; - struct file **rp; + struct filedescent *fde, *fdep; struct file *fp; struct timeval *tv; int i, fd, *fdp; @@ -1854,18 +1855,17 @@ unp_internalize(struct mbuf **controlp, struct thread *td) * files. If not, reject the entire operation. */ fdp = data; - FILEDESC_SLOCK(fdescp); + FILEDESC_SLOCK(fdesc); for (i = 0; i < oldfds; i++) { fd = *fdp++; - if (fd < 0 || fd >= fdescp->fd_nfiles || - fdescp->fd_ofiles[fd] == NULL) { - FILEDESC_SUNLOCK(fdescp); + if (fget_locked(fdesc, fd) == NULL) { + FILEDESC_SUNLOCK(fdesc); error = EBADF; goto out; } - fp = fdescp->fd_ofiles[fd]; + fp = fdesc->fd_ofiles[fd].fde_file; if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { - FILEDESC_SUNLOCK(fdescp); + FILEDESC_SUNLOCK(fdesc); error = EOPNOTSUPP; goto out; } @@ -1874,25 +1874,26 @@ unp_internalize(struct mbuf **controlp, struct thread *td) /* * Now replace the integer FDs with pointers to the - * associated global file table entry.. + * file structure and capability rights. */ - newlen = oldfds * sizeof(struct file *); + newlen = oldfds * sizeof(*fdep); *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { - FILEDESC_SUNLOCK(fdescp); + FILEDESC_SUNLOCK(fdesc); error = E2BIG; goto out; } fdp = data; - rp = (struct file **) + fdep = (struct filedescent *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); - for (i = 0; i < oldfds; i++) { - fp = fdescp->fd_ofiles[*fdp++]; - *rp++ = fp; - unp_internalize_fp(fp); + for (i = 0; i < oldfds; i++, fdep++, fdp++) { + fde = &fdesc->fd_ofiles[*fdp]; + fdep->fde_file = fde->fde_file; + filecaps_copy(&fde->fde_caps, &fdep->fde_caps); + unp_internalize_fp(fdep->fde_file); } - FILEDESC_SUNLOCK(fdescp); + FILEDESC_SUNLOCK(fdesc); break; case SCM_TIMESTAMP: @@ -2252,7 +2253,7 @@ static void unp_scan(struct mbuf *m0, void (*op)(struct file *)) { struct mbuf *m; - struct file **rp; + struct filedescent *fdep; struct cmsghdr *cm; void *data; int i; @@ -2277,10 +2278,10 @@ unp_scan(struct mbuf *m0, void (*op)(struct file *)) if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) { - qfds = datalen / sizeof (struct file *); - rp = data; - for (i = 0; i < qfds; i++) - (*op)(*rp++); + qfds = datalen / sizeof(*fdep); + fdep = data; + for (i = 0; i < qfds; i++, fdep++) + (*op)(fdep->fde_file); } if (CMSG_SPACE(datalen) < clen) { diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index 99b0197..cba1638 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -1593,16 +1593,16 @@ aio_aqueue(struct thread *td, struct aiocb *job, struct aioliojob *lj, fd = aiocbe->uaiocb.aio_fildes; switch (opcode) { case LIO_WRITE: - error = fget_write(td, fd, CAP_WRITE | CAP_SEEK, &fp); + error = fget_write(td, fd, CAP_PWRITE, &fp); break; case LIO_READ: - error = fget_read(td, fd, CAP_READ | CAP_SEEK, &fp); + error = fget_read(td, fd, CAP_PREAD, &fp); break; case LIO_SYNC: error = fget(td, fd, CAP_FSYNC, &fp); break; case LIO_NOP: - error = fget(td, fd, 0, &fp); + error = fget(td, fd, CAP_NONE, &fp); break; default: error = EINVAL; diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index fbde152..94d11f2 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -227,17 +227,18 @@ namei(struct nameidata *ndp) AUDIT_ARG_ATFD2(ndp->ni_dirfd); error = fgetvp_rights(td, ndp->ni_dirfd, ndp->ni_rightsneeded | CAP_LOOKUP, - &(ndp->ni_baserights), &dp); + &ndp->ni_filecaps, &dp); #ifdef CAPABILITIES /* - * Lookups relative to a capability must also be + * If file descriptor doesn't have all rights, + * all lookups relative to it must also be * strictly relative. - * - * Note that a capability with rights CAP_MASK_VALID - * is treated exactly like a regular file descriptor. */ - if (ndp->ni_baserights != CAP_MASK_VALID) + if (ndp->ni_filecaps.fc_rights != CAP_ALL || + ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL || + ndp->ni_filecaps.fc_nioctls != -1) { ndp->ni_strictrelative = 1; + } #endif } if (error != 0 || dp != NULL) { diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index bd44a3a..787399a 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -970,6 +970,8 @@ flags_to_rights(int flags) /* FALLTHROUGH */ case O_WRONLY: rights |= CAP_WRITE; + if (!(flags & O_APPEND)) + rights |= CAP_SEEK; break; } } @@ -1143,19 +1145,22 @@ success: * If we haven't already installed the FD (for dupfdopen), do so now. */ if (indx == -1) { + struct filecaps *fcaps; + #ifdef CAPABILITIES - if (nd.ni_strictrelative == 1) { - /* - * We are doing a strict relative lookup; wrap the - * result in a capability. - */ - if ((error = kern_capwrap(td, fp, nd.ni_baserights, - &indx)) != 0) - goto bad; - } else + if (nd.ni_strictrelative == 1) + fcaps = &nd.ni_filecaps; + else #endif - if ((error = finstall(td, fp, &indx, flags)) != 0) - goto bad; + fcaps = NULL; + error = finstall(td, fp, &indx, flags, fcaps); + /* On success finstall() consumes fcaps. */ + if (error != 0) { + filecaps_free(&nd.ni_filecaps); + goto bad; + } + } else { + filecaps_free(&nd.ni_filecaps); } /* @@ -1279,7 +1284,7 @@ kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, - pathseg, path, fd, CAP_MKNOD, td); + pathseg, path, fd, CAP_MKNODAT, td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; @@ -1399,7 +1404,7 @@ kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, - pathseg, path, fd, CAP_MKFIFO, td); + pathseg, path, fd, CAP_MKFIFOAT, td); if ((error = namei(&nd)) != 0) return (error); if (nd.ni_vp != NULL) { @@ -1553,7 +1558,7 @@ kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, return (error); } NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2, - segflg, path2, fd2, CAP_CREATE, td); + segflg, path2, fd2, CAP_LINKAT, td); if ((error = namei(&nd)) == 0) { if (nd.ni_vp != NULL) { if (nd.ni_dvp == nd.ni_vp) @@ -1646,7 +1651,7 @@ kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, - segflg, path2, fd, CAP_CREATE, td); + segflg, path2, fd, CAP_SYMLINKAT, td); if ((error = namei(&nd)) != 0) goto out; if (nd.ni_vp) { @@ -1798,7 +1803,7 @@ kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, - pathseg, path, fd, CAP_DELETE, td); + pathseg, path, fd, CAP_UNLINKAT, td); if ((error = namei(&nd)) != 0) return (error == EINVAL ? EPERM : error); vp = nd.ni_vp; @@ -3502,10 +3507,10 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, bwillwrite(); #ifdef MAC NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | - AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td); + AUDITVNODE1, pathseg, old, oldfd, CAP_RENAMEAT, td); #else NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, - pathseg, old, oldfd, CAP_DELETE, td); + pathseg, old, oldfd, CAP_RENAMEAT, td); #endif if ((error = namei(&fromnd)) != 0) @@ -3527,7 +3532,7 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, goto out1; } NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | - SAVESTART | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE, td); + SAVESTART | AUDITVNODE2, pathseg, new, newfd, CAP_LINKAT, td); if (fromnd.ni_vp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&tond)) != 0) { @@ -3550,6 +3555,15 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, error = EISDIR; goto out; } +#ifdef CAPABILITIES + /* + * If the target already exists we require CAP_UNLINKAT + * from 'newfd'. + */ + error = cap_check(tond.ni_filecaps.fc_rights, CAP_UNLINKAT); + if (error != 0) + goto out; +#endif } if (fvp == tdvp) { error = EINVAL; @@ -3650,7 +3664,7 @@ kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, - segflg, path, fd, CAP_MKDIR, td); + segflg, path, fd, CAP_MKDIRAT, td); nd.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&nd)) != 0) return (error); @@ -3734,7 +3748,7 @@ kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, - pathseg, path, fd, CAP_RMDIR, td); + pathseg, path, fd, CAP_UNLINKAT, td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; @@ -3987,8 +4001,7 @@ kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, if (count > IOSIZE_MAX) return (EINVAL); auio.uio_resid = count; - if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ | CAP_SEEK, - &fp)) != 0) + if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ, &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); @@ -4151,33 +4164,14 @@ out: * entry is held upon returning. */ int -getvnode(struct filedesc *fdp, int fd, cap_rights_t rights, - struct file **fpp) +getvnode(struct filedesc *fdp, int fd, cap_rights_t rights, struct file **fpp) { struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; int error; -#endif - if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); -#ifdef CAPABILITIES - /* - * If the file descriptor is for a capability, test rights and use the - * file descriptor referenced by the capability. - */ - error = cap_funwrap(fp, rights, &fp_fromcap); - if (error) { - fdrop(fp, curthread); + error = fget_unlocked(fdp, fd, rights, 0, &fp, NULL); + if (error != 0) return (error); - } - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, curthread); - fp = fp_fromcap; - } -#endif /* CAPABILITIES */ /* * The file could be not of the vnode type, or it may be not @@ -4361,7 +4355,7 @@ sys_fhopen(td, uap) goto bad; } - error = finstall(td, fp, &indx, fmode); + error = finstall(td, fp, &indx, fmode, NULL); bad: fdrop(fp, td); td->td_retval[0] = indx; @@ -4614,7 +4608,7 @@ kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, return (EINVAL); } /* XXX: CAP_POSIX_FADVISE? */ - error = fget(td, fd, 0, &fp); + error = fget(td, fd, CAP_NONE, &fp); if (error != 0) goto out; diff --git a/sys/netsmb/smb_dev.c b/sys/netsmb/smb_dev.c index dfedd88..a09d74d 100644 --- a/sys/netsmb/smb_dev.c +++ b/sys/netsmb/smb_dev.c @@ -399,9 +399,7 @@ nsmb_getfp(struct filedesc* fdp, int fd, int flag) struct file* fp; FILEDESC_SLOCK(fdp); - if (fd < 0 || fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL || - (fp->f_flag & flag) == 0) { + if ((fp = fget_locked(fdp, fd)) == NULL || (fp->f_flag & flag) == 0) { FILEDESC_SUNLOCK(fdp); return (NULL); } diff --git a/sys/nfsserver/nfs_srvkrpc.c b/sys/nfsserver/nfs_srvkrpc.c index 64f2aaa..6b3a6b7 100644 --- a/sys/nfsserver/nfs_srvkrpc.c +++ b/sys/nfsserver/nfs_srvkrpc.c @@ -174,7 +174,8 @@ nfssvc_nfsserver(struct thread *td, struct nfssvc_args *uap) sizeof(addsockarg)); if (error) return (error); - if ((error = fget(td, addsockarg.sock, CAP_SOCK_ALL, &fp)) != 0) + error = fget(td, addsockarg.sock, CAP_SOCK_SERVER, &fp); + if (error) return (error); if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); diff --git a/sys/ofed/include/linux/file.h b/sys/ofed/include/linux/file.h index cbeec39..b9bd8b1 100644 --- a/sys/ofed/include/linux/file.h +++ b/sys/ofed/include/linux/file.h @@ -47,7 +47,8 @@ linux_fget(unsigned int fd) { struct file *file; - file = fget_unlocked(curthread->td_proc->p_fd, fd); + if (fget_unlocked(curthread->td_proc->p_fd, fd, 0, 0, &file, NULL) != 0) + return (NULL); return (struct linux_file *)file->f_data; } @@ -69,8 +70,7 @@ put_unused_fd(unsigned int fd) { struct file *file; - file = fget_unlocked(curthread->td_proc->p_fd, fd); - if (file == NULL) + if (fget_unlocked(curthread->td_proc->p_fd, fd, 0, 0, &file, NULL) != 0) return; fdclose(curthread->td_proc->p_fd, file, fd, curthread); } @@ -80,7 +80,8 @@ fd_install(unsigned int fd, struct linux_file *filp) { struct file *file; - file = fget_unlocked(curthread->td_proc->p_fd, fd); + if (fget_unlocked(curthread->td_proc->p_fd, fd, 0, 0, &file, NULL) != 0) + file = NULL; filp->_file = file; finit(file, filp->f_mode, DTYPE_DEV, filp, &linuxfileops); } diff --git a/sys/security/audit/audit.h b/sys/security/audit/audit.h index f43f6c8..733a3c7 100644 --- a/sys/security/audit/audit.h +++ b/sys/security/audit/audit.h @@ -115,6 +115,7 @@ void audit_arg_file(struct proc *p, struct file *fp); void audit_arg_argv(char *argv, int argc, int length); void audit_arg_envv(char *envv, int envc, int length); void audit_arg_rights(cap_rights_t rights); +void audit_arg_fcntl_rights(uint32_t fcntlrights); void audit_sysclose(struct thread *td, int fd); void audit_cred_copy(struct ucred *src, struct ucred *dest); void audit_cred_destroy(struct ucred *cred); @@ -241,6 +242,11 @@ void audit_thread_free(struct thread *td); audit_arg_rights((rights)); \ } while (0) +#define AUDIT_ARG_FCNTL_RIGHTS(fcntlrights) do { \ + if (AUDITING_TD(curthread)) \ + audit_arg_fcntl_rights((fcntlrights)); \ +} while (0) + #define AUDIT_ARG_RUID(ruid) do { \ if (AUDITING_TD(curthread)) \ audit_arg_ruid((ruid)); \ @@ -354,6 +360,7 @@ void audit_thread_free(struct thread *td); #define AUDIT_ARG_PROCESS(p) #define AUDIT_ARG_RGID(rgid) #define AUDIT_ARG_RIGHTS(rights) +#define AUDIT_ARG_FCNTL_RIGHTS(fcntlrights) #define AUDIT_ARG_RUID(ruid) #define AUDIT_ARG_SIGNUM(signum) #define AUDIT_ARG_SGID(sgid) diff --git a/sys/security/audit/audit_arg.c b/sys/security/audit/audit_arg.c index 41d6b42..ec04b8b 100644 --- a/sys/security/audit/audit_arg.c +++ b/sys/security/audit/audit_arg.c @@ -871,6 +871,19 @@ audit_arg_rights(cap_rights_t rights) ARG_SET_VALID(ar, ARG_RIGHTS); } +void +audit_arg_fcntl_rights(uint32_t fcntlrights) +{ + struct kaudit_record *ar; + + ar = currecord(); + if (ar == NULL) + return; + + ar->k_ar.ar_arg_fcntl_rights = fcntlrights; + ARG_SET_VALID(ar, ARG_FCNTL_RIGHTS); +} + /* * The close() system call uses it's own audit call to capture the path/vnode * information because those pieces are not easily obtained within the system diff --git a/sys/security/audit/audit_bsm.c b/sys/security/audit/audit_bsm.c index 8881cea..9c69b1e 100644 --- a/sys/security/audit/audit_bsm.c +++ b/sys/security/audit/audit_bsm.c @@ -1597,6 +1597,7 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) break; case AUE_CAP_NEW: + case AUE_CAP_RIGHTS_LIMIT: /* * XXXRW/XXXJA: Would be nice to audit socket/etc information. */ @@ -1607,13 +1608,25 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) } break; - case AUE_CAP_GETRIGHTS: + case AUE_CAP_FCNTLS_GET: + case AUE_CAP_IOCTLS_GET: + case AUE_CAP_IOCTLS_LIMIT: + case AUE_CAP_RIGHTS_GET: if (ARG_IS_VALID(kar, ARG_FD)) { tok = au_to_arg32(1, "fd", ar->ar_arg_fd); kau_write(rec, tok); } break; + case AUE_CAP_FCNTLS_LIMIT: + FD_VNODE1_TOKENS; + if (ARG_IS_VALID(kar, ARG_FCNTL_RIGHTS)) { + tok = au_to_arg32(2, "fcntlrights", + ar->ar_arg_fcntl_rights); + kau_write(rec, tok); + } + break; + case AUE_CAP_ENTER: case AUE_CAP_GETMODE: break; diff --git a/sys/security/audit/audit_private.h b/sys/security/audit/audit_private.h index 10ccd5b..e23ba08 100644 --- a/sys/security/audit/audit_private.h +++ b/sys/security/audit/audit_private.h @@ -230,6 +230,7 @@ struct audit_record { int ar_arg_exitretval; struct sockaddr_storage ar_arg_sockaddr; cap_rights_t ar_arg_rights; + uint32_t ar_arg_fcntl_rights; char ar_jailname[MAXHOSTNAMELEN]; }; @@ -291,6 +292,7 @@ struct audit_record { #define ARG_ATFD1 0x0004000000000000ULL #define ARG_ATFD2 0x0008000000000000ULL #define ARG_RIGHTS 0x0010000000000000ULL +#define ARG_FCNTL_RIGHTS 0x0020000000000000ULL #define ARG_NONE 0x0000000000000000ULL #define ARG_ALL 0xFFFFFFFFFFFFFFFFULL diff --git a/sys/sys/capability.h b/sys/sys/capability.h index a163c4c..27e56c2 100644 --- a/sys/sys/capability.h +++ b/sys/sys/capability.h @@ -1,10 +1,14 @@ /*- * Copyright (c) 2008-2010 Robert N. M. Watson + * Copyright (c) 2012 FreeBSD Foundation * All rights reserved. * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * + * Portions of this software were developed by Pawel Jakub Dawidek under + * sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -36,9 +40,10 @@ #define _SYS_CAPABILITY_H_ #include -#include +#include #include +#include /* * Possible rights on capabilities. @@ -54,34 +59,69 @@ * involve reads or writes depending a great deal on context. */ -/* General file I/O. */ -#define CAP_READ 0x0000000000000001ULL /* read/recv */ -#define CAP_WRITE 0x0000000000000002ULL /* write/send */ -#define CAP_MMAP 0x0000000000000004ULL /* mmap */ -#define CAP_MAPEXEC 0x0000000000000008ULL /* mmap(2) as exec */ +#define CAP_NONE 0x0000000000000000ULL + +/* + * General file I/O. + */ +/* Allows for openat(O_RDONLY), read(2), readv(2). */ +#define CAP_READ 0x0000000000000001ULL +/* Allows for openat(O_WRONLY | O_APPEND), write(2), writev(2). */ +#define CAP_WRITE 0x0000000000000002ULL +/* Allows for lseek(2). */ +#define CAP_SEEK 0x0000000000000080ULL +/* Allows for pread(2), preadv(2). */ +#define CAP_PREAD (CAP_SEEK | CAP_READ) +/* Allows for openat(O_WRONLY) (without O_APPEND), pwrite(2), pwritev(2). */ +#define CAP_PWRITE (CAP_SEEK | CAP_WRITE) +/* Allows for mmap(PROT_NONE). */ +#define CAP_MMAP 0x0000000000000004ULL +/* Allows for mmap(PROT_READ). */ +#define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ) +/* Allows for mmap(PROT_WRITE). */ +#define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE) +/* Allows for mmap(PROT_EXEC). */ +#define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000008ULL) +/* Allows for mmap(PROT_READ | PROT_WRITE). */ +#define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W) +/* Allows for mmap(PROT_READ | PROT_EXEC). */ +#define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X) +/* Allows for mmap(PROT_WRITE | PROT_EXEC). */ +#define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X) +/* Allows for mmap(PROT_READ | PROT_WRITE | PROT_EXEC). */ +#define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X) +/* Allows for openat(O_CREAT). */ +#define CAP_CREATE 0x0000000000080000ULL +/* Allows for openat(O_EXEC) and fexecve(2) in turn. */ #define CAP_FEXECVE 0x0000000000000010ULL +/* Allows for openat(O_SYNC), openat(O_FSYNC), fsync(2). */ #define CAP_FSYNC 0x0000000000000020ULL +/* Allows for openat(O_TRUNC), ftruncate(2). */ #define CAP_FTRUNCATE 0x0000000000000040ULL -#define CAP_SEEK 0x0000000000000080ULL /* VFS methods. */ -#define CAP_FCHFLAGS 0x0000000000000100ULL #define CAP_FCHDIR 0x0000000000000200ULL +#define CAP_FCHFLAGS 0x0000000000000100ULL #define CAP_FCHMOD 0x0000000000000400ULL +#define CAP_FCHMODAT CAP_FCHMOD #define CAP_FCHOWN 0x0000000000000800ULL +#define CAP_FCHOWNAT CAP_FCHOWN #define CAP_FCNTL 0x0000000000001000ULL -#define CAP_FPATHCONF 0x0000000000002000ULL #define CAP_FLOCK 0x0000000000004000ULL +#define CAP_FPATHCONF 0x0000000000002000ULL #define CAP_FSCK 0x0000000000008000ULL #define CAP_FSTAT 0x0000000000010000ULL +#define CAP_FSTATAT CAP_FSTAT #define CAP_FSTATFS 0x0000000000020000ULL #define CAP_FUTIMES 0x0000000000040000ULL -#define CAP_CREATE 0x0000000000080000ULL -#define CAP_DELETE 0x0000000000100000ULL -#define CAP_MKDIR 0x0000000000200000ULL -#define CAP_RMDIR 0x0000000000400000ULL -#define CAP_MKFIFO 0x0000000000800000ULL -#define CAP_MKNOD 0x0080000000000000ULL +#define CAP_FUTIMESAT CAP_FUTIMES +#define CAP_LINKAT 0x0000000000400000ULL +#define CAP_MKDIRAT 0x0000000000200000ULL +#define CAP_MKFIFOAT 0x0000000000800000ULL +#define CAP_MKNODAT 0x0080000000000000ULL +#define CAP_RENAMEAT 0x0200000000000000ULL +#define CAP_SYMLINKAT 0x0100000000000000ULL +#define CAP_UNLINKAT 0x0000000000100000ULL /* Lookups - used to constrain *at() calls. */ #define CAP_LOOKUP 0x0000000001000000ULL @@ -107,13 +147,18 @@ #define CAP_GETSOCKOPT 0x0000004000000000ULL #define CAP_LISTEN 0x0000008000000000ULL #define CAP_PEELOFF 0x0000010000000000ULL +#define CAP_RECV CAP_READ +#define CAP_SEND CAP_WRITE #define CAP_SETSOCKOPT 0x0000020000000000ULL #define CAP_SHUTDOWN 0x0000040000000000ULL -#define CAP_SOCK_ALL \ - (CAP_ACCEPT | CAP_BIND | CAP_CONNECT \ - | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT \ - | CAP_LISTEN | CAP_PEELOFF | CAP_SETSOCKOPT | CAP_SHUTDOWN) +#define CAP_SOCK_CLIENT \ + (CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \ + CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN) +#define CAP_SOCK_SERVER \ + (CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \ + CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \ + CAP_SETSOCKOPT | CAP_SHUTDOWN) /* Mandatory Access Control. */ #define CAP_MAC_GET 0x0000080000000000ULL @@ -138,40 +183,77 @@ #define CAP_PDKILL 0x0040000000000000ULL /* The mask of all valid method rights. */ -#define CAP_MASK_VALID 0x00ffffffffffffffULL +#define CAP_MASK_VALID 0x03ffffffffffffffULL +#define CAP_ALL CAP_MASK_VALID -#ifdef _KERNEL +/* Available bits. */ +#define CAP_UNUSED5 0x0400000000000000ULL +#define CAP_UNUSED4 0x0800000000000000ULL +#define CAP_UNUSED3 0x1000000000000000ULL +#define CAP_UNUSED2 0x2000000000000000ULL +#define CAP_UNUSED1 0x4000000000000000ULL +#define CAP_UNUSED0 0x8000000000000000ULL -#define IN_CAPABILITY_MODE(td) (td->td_ucred->cr_flags & CRED_FLAG_CAPMODE) +/* + * The following defines are provided for backward API compatibility and + * should not be used in new code. + */ +#define CAP_MAPEXEC CAP_MMAP_X +#define CAP_DELETE CAP_UNLINKAT +#define CAP_MKDIR CAP_MKDIRAT +#define CAP_RMDIR CAP_UNLINKAT +#define CAP_MKFIFO CAP_MKFIFOAT +#define CAP_MKNOD CAP_MKNODAT +#define CAP_SOCK_ALL (CAP_SOCK_CLIENT | CAP_SOCK_SERVER) /* - * Create a capability to wrap a file object. + * Allowed fcntl(2) commands. */ -int kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights, - int *capfd); +#define CAP_FCNTL_GETFL (1 << F_GETFL) +#define CAP_FCNTL_SETFL (1 << F_SETFL) +#if __BSD_VISIBLE || __XSI_VISIBLE || __POSIX_VISIBLE >= 200112 +#define CAP_FCNTL_GETOWN (1 << F_GETOWN) +#define CAP_FCNTL_SETOWN (1 << F_SETOWN) +#endif +#if __BSD_VISIBLE || __XSI_VISIBLE || __POSIX_VISIBLE >= 200112 +#define CAP_FCNTL_ALL (CAP_FCNTL_GETFL | CAP_FCNTL_SETFL | \ + CAP_FCNTL_GETOWN | CAP_FCNTL_SETOWN) +#else +#define CAP_FCNTL_ALL (CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) +#endif + +#define CAP_IOCTLS_ALL SSIZE_MAX + +#ifdef _KERNEL + +#include + +#define IN_CAPABILITY_MODE(td) ((td->td_ucred->cr_flags & CRED_FLAG_CAPMODE) != 0) + +struct filedesc; /* - * Unwrap a capability if its rights mask is a superset of 'rights'. - * - * Unwrapping a non-capability is effectively a no-op; the value of fp_cap - * is simply copied into fpp. + * Test whether a capability grants the requested rights. + */ +int cap_check(cap_rights_t have, cap_rights_t need); +/* + * Convert capability rights into VM access flags. */ -int cap_funwrap(struct file *fp_cap, cap_rights_t rights, - struct file **fpp); -int cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights, - u_char *maxprotp, struct file **fpp); +u_char cap_rights_to_vmprot(cap_rights_t have); /* * For the purposes of procstat(1) and similar tools, allow kern_descrip.c to - * extract the rights from a capability. However, this should not be used by - * kernel code generally, instead cap_funwrap() should be used in order to - * keep all access control in one place. + * extract the rights from a capability. */ -cap_rights_t cap_rights(struct file *fp_cap); +cap_rights_t cap_rights(struct filedesc *fdp, int fd); + +int cap_ioctl_check(struct filedesc *fdp, int fd, u_long cmd); +int cap_fcntl_check(struct filedesc *fdp, int fd, int cmd); #else /* !_KERNEL */ __BEGIN_DECLS +#include /* * cap_enter(): Cause the process to enter capability mode, which will @@ -187,21 +269,46 @@ __BEGIN_DECLS int cap_enter(void); /* - * cap_getmode(): Are we in capability mode? + * Are we sandboxed (in capability mode)? + * This is a libc wrapper around the cap_getmode(2) system call. */ -int cap_getmode(u_int* modep); +bool cap_sandboxed(void); /* - * cap_new(): Create a new capability derived from an existing file - * descriptor with the specified rights. If the existing file descriptor is - * a capability, then the new rights must be a subset of the existing rights. + * cap_getmode(): Are we in capability mode? */ -int cap_new(int fd, cap_rights_t rights); +int cap_getmode(u_int *modep); /* - * cap_getrights(): Query the rights on a capability. + * Limits capability rights for the given descriptor (CAP_*). + */ +int cap_rights_limit(int fd, cap_rights_t rights); +/* + * Returns bitmask of capability rights for the given descriptor. */ -int cap_getrights(int fd, cap_rights_t *rightsp); +int cap_rights_get(int fd, cap_rights_t *rightsp); +/* + * Limits allowed ioctls for the given descriptor. + */ +int cap_ioctls_limit(int fd, const unsigned long *cmds, size_t ncmds); +/* + * Returns array of allowed ioctls for the given descriptor. + * If all ioctls are allowed, the cmds array is not populated and + * the function returns CAP_IOCTLS_ALL. + */ +ssize_t cap_ioctls_get(int fd, unsigned long *cmds, size_t maxcmds); +/* + * Limits allowed fcntls for the given descriptor (CAP_FCNTL_*). + */ +int cap_fcntls_limit(int fd, uint32_t fcntlrights); +/* + * Returns bitmask of allowed fcntls for the given descriptor. + */ +int cap_fcntls_get(int fd, uint32_t *fcntlrightsp); + +/* For backward compatibility. */ +int cap_new(int fd, cap_rights_t rights); +#define cap_getrights(fd, rightsp) cap_rights_get((fd), (rightsp)) __END_DECLS diff --git a/sys/sys/file.h b/sys/sys/file.h index cf5f1ea..cfdc1d8 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -64,12 +64,12 @@ struct socket; #define DTYPE_SEM 9 /* posix semaphore */ #define DTYPE_PTS 10 /* pseudo teletype master device */ #define DTYPE_DEV 11 /* Device specific fd type */ -#define DTYPE_CAPABILITY 12 /* capability */ -#define DTYPE_PROCDESC 13 /* process descriptor */ +#define DTYPE_PROCDESC 12 /* process descriptor */ #ifdef _KERNEL struct file; +struct filecaps; struct ucred; #define FOF_OFFSET 0x01 /* Use the offset in uio argument */ @@ -217,7 +217,6 @@ int fget_read(struct thread *td, int fd, cap_rights_t rights, struct file **fpp); int fget_write(struct thread *td, int fd, cap_rights_t rights, struct file **fpp); -int fgetcap(struct thread *td, int fd, struct file **fpp); int _fdrop(struct file *fp, struct thread *td); /* @@ -242,7 +241,7 @@ int fgetvp(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp); int fgetvp_exec(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp); int fgetvp_rights(struct thread *td, int fd, cap_rights_t need, - cap_rights_t *have, struct vnode **vpp); + struct filecaps *havecaps, struct vnode **vpp); int fgetvp_read(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp); int fgetvp_write(struct thread *td, int fd, cap_rights_t rights, diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index 07cfb1f..f3e3a09 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -41,6 +41,23 @@ #include +struct filecaps { + cap_rights_t fc_rights; /* per-descriptor capability rights */ + uint32_t fc_fcntls; /* per-descriptor allowed fcntls */ + u_long *fc_ioctls; /* per-descriptor allowed ioctls */ + int16_t fc_nioctls; /* fc_ioctls array size */ +}; + +struct filedescent { + struct file *fde_file; /* file structure for open file */ + struct filecaps fde_caps; /* per-descriptor rights */ + uint8_t fde_flags; /* per-process open file flags */ +}; +#define fde_rights fde_caps.fc_rights +#define fde_fcntls fde_caps.fc_fcntls +#define fde_ioctls fde_caps.fc_ioctls +#define fde_nioctls fde_caps.fc_nioctls + /* * This structure is used for the management of descriptors. It may be * shared by multiple processes. @@ -48,8 +65,7 @@ #define NDSLOTTYPE u_long struct filedesc { - struct file **fd_ofiles; /* file structures for open files */ - char *fd_ofileflags; /* per-process open file flags */ + struct filedescent *fd_ofiles; /* open files */ struct vnode *fd_cdir; /* current directory */ struct vnode *fd_rdir; /* root directory */ struct vnode *fd_jdir; /* jail root directory */ @@ -92,6 +108,15 @@ struct filedesc_to_leader { #ifdef _KERNEL +#include /* CTASSERT() */ + +CTASSERT(sizeof(cap_rights_t) == sizeof(uint64_t)); + +/* Flags for do_dup() */ +#define DUP_FIXED 0x1 /* Force fixed allocation. */ +#define DUP_FCNTL 0x2 /* fcntl()-style errors. */ +#define DUP_CLOEXEC 0x4 /* Atomically set FD_CLOEXEC. */ + /* Lock a file descriptor table. */ #define FILEDESC_LOCK_INIT(fdp) sx_init(&(fdp)->fd_sx, "filedesc structure") #define FILEDESC_LOCK_DESTROY(fdp) sx_destroy(&(fdp)->fd_sx) @@ -109,13 +134,20 @@ struct filedesc_to_leader { struct thread; +void filecaps_init(struct filecaps *fcaps); +void filecaps_copy(const struct filecaps *src, struct filecaps *dst); +void filecaps_free(struct filecaps *fcaps); + int closef(struct file *fp, struct thread *td); +int do_dup(struct thread *td, int flags, int old, int new, + register_t *retval); int dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, int openerror, int *indxp); int falloc(struct thread *td, struct file **resultfp, int *resultfd, int flags); int falloc_noinstall(struct thread *td, struct file **resultfp); -int finstall(struct thread *td, struct file *fp, int *resultfp, int flags); +int finstall(struct thread *td, struct file *fp, int *resultfp, int flags, + struct filecaps *fcaps); int fdalloc(struct thread *td, int minfd, int *result); int fdavail(struct thread *td, int n); int fdcheckstd(struct thread *td); @@ -123,7 +155,7 @@ void fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td); void fdcloseexec(struct thread *td); struct filedesc *fdcopy(struct filedesc *fdp); void fdunshare(struct proc *p, struct thread *td); -void fdfree(struct thread *td); +void fdescfree(struct thread *td); struct filedesc *fdinit(struct filedesc *fdp); struct filedesc *fdshare(struct filedesc *fdp); struct filedesc_to_leader * @@ -135,7 +167,8 @@ void mountcheckdirs(struct vnode *olddp, struct vnode *newdp); void setugidsafety(struct thread *td); /* Return a referenced file from an unlocked descriptor. */ -struct file *fget_unlocked(struct filedesc *fdp, int fd); +int fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t needrights, + int needfcntl, struct file **fpp, cap_rights_t *haverightsp); /* Requires a FILEDESC_{S,X}LOCK held and returns without a ref. */ static __inline struct file * @@ -147,7 +180,7 @@ fget_locked(struct filedesc *fdp, int fd) if (fd < 0 || fd >= fdp->fd_nfiles) return (NULL); - return (fdp->fd_ofiles[fd]); + return (fdp->fd_ofiles[fd].fde_file); } #endif /* _KERNEL */ diff --git a/sys/sys/namei.h b/sys/sys/namei.h index 3b43916..a9992f4 100644 --- a/sys/sys/namei.h +++ b/sys/sys/namei.h @@ -33,6 +33,7 @@ #ifndef _SYS_NAMEI_H_ #define _SYS_NAMEI_H_ +#include #include #include @@ -75,7 +76,7 @@ struct nameidata { /* * Results: returned from namei */ - cap_rights_t ni_baserights; /* rights the *at base has (or -1) */ + struct filecaps ni_filecaps; /* rights the *at base has */ /* * Results: returned from/manipulated by lookup */ @@ -180,7 +181,7 @@ NDINIT_ALL(struct nameidata *ndp, ndp->ni_startdir = startdir; ndp->ni_strictrelative = 0; ndp->ni_rightsneeded = rights; - ndp->ni_baserights = 0; + filecaps_init(&ndp->ni_filecaps); ndp->ni_cnd.cn_thread = td; } diff --git a/sys/sys/user.h b/sys/sys/user.h index ddaccb8..5de76ac 100644 --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -251,8 +251,7 @@ struct user { #define KF_TYPE_SHM 8 #define KF_TYPE_SEM 9 #define KF_TYPE_PTS 10 -/* no KF_TYPE_CAPABILITY (11), since capabilities wrap other file objects */ -#define KF_TYPE_PROCDESC 12 +#define KF_TYPE_PROCDESC 11 #define KF_TYPE_UNKNOWN 255 #define KF_VTYPE_VNON 0 @@ -288,7 +287,6 @@ struct user { #define KF_FLAG_TRUNC 0x00001000 #define KF_FLAG_EXCL 0x00002000 #define KF_FLAG_EXEC 0x00004000 -#define KF_FLAG_CAPABILITY 0x00008000 /* * Old format. Has variable hidden padding due to alignment. diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 05bb8ae..cf94fe5 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -305,13 +305,13 @@ sys_mmap(td, uap) */ rights = CAP_MMAP; if (prot & PROT_READ) - rights |= CAP_READ; + rights |= CAP_MMAP_R; if ((flags & MAP_SHARED) != 0) { if (prot & PROT_WRITE) - rights |= CAP_WRITE; + rights |= CAP_MMAP_W; } if (prot & PROT_EXEC) - rights |= CAP_MAPEXEC; + rights |= CAP_MMAP_X; if ((error = fget_mmap(td, uap->fd, rights, &cap_maxprot, &fp)) != 0) goto done; diff --git a/usr.bin/kdump/kdump.c b/usr.bin/kdump/kdump.c index 2769832..82317f2 100644 --- a/usr.bin/kdump/kdump.c +++ b/usr.bin/kdump/kdump.c @@ -1008,6 +1008,7 @@ ktrsyscall(struct ktr_syscall *ktr, u_int flags) narg--; break; case SYS_cap_new: + case SYS_cap_rights_limit: print_number(ip, narg, c); putchar(','); arg = *ip; @@ -1035,6 +1036,14 @@ ktrsyscall(struct ktr_syscall *ktr, u_int flags) } capname(arg); break; + case SYS_cap_fcntls_limit: + print_number(ip, narg, c); + putchar(','); + arg = *ip; + ip++; + narg--; + capfcntlname(arg); + break; case SYS_posix_fadvise: print_number(ip, narg, c); print_number(ip, narg, c); diff --git a/usr.bin/kdump/mksubr b/usr.bin/kdump/mksubr index d56f030..aed8291 100644 --- a/usr.bin/kdump/mksubr +++ b/usr.bin/kdump/mksubr @@ -361,6 +361,7 @@ _EOF_ auto_or_type "accessmodename" "[A-Z]_OK[[:space:]]+0?x?[0-9A-Fa-f]+" "sys/unistd.h" auto_switch_type "acltypename" "ACL_TYPE_[A-Z4_]+[[:space:]]+0x[0-9]+" "sys/acl.h" auto_or_type "capname" "CAP_[A-Z]+[[:space:]]+0x[01248]{16}ULL" "sys/capability.h" +auto_or_type "capfcntlname" "CAP_FCNTL_[A-Z]+[[:space:]]+\(1" "sys/capability.h" auto_switch_type "extattrctlname" "EXTATTR_NAMESPACE_[A-Z]+[[:space:]]+0x[0-9]+" "sys/extattr.h" auto_switch_type "fadvisebehavname" "POSIX_FADV_[A-Z]+[[:space:]]+[0-9]+" "sys/fcntl.h" auto_or_type "flagsname" "O_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" "sys/fcntl.h" diff --git a/usr.bin/procstat/procstat_files.c b/usr.bin/procstat/procstat_files.c index 3773900..030bba1 100644 --- a/usr.bin/procstat/procstat_files.c +++ b/usr.bin/procstat/procstat_files.c @@ -139,33 +139,34 @@ static struct cap_desc { /* General file I/O. */ { CAP_READ, "rd" }, { CAP_WRITE, "wr" }, + { CAP_SEEK, "se" }, { CAP_MMAP, "mm" }, - { CAP_MAPEXEC, "me" }, + { CAP_CREATE, "cr" }, { CAP_FEXECVE, "fe" }, { CAP_FSYNC, "fy" }, { CAP_FTRUNCATE, "ft" }, - { CAP_SEEK, "se" }, /* VFS methods. */ - { CAP_FCHFLAGS, "cf" }, { CAP_FCHDIR, "cd" }, + { CAP_FCHFLAGS, "cf" }, { CAP_FCHMOD, "cm" }, { CAP_FCHOWN, "cn" }, { CAP_FCNTL, "fc" }, - { CAP_FPATHCONF, "fp" }, { CAP_FLOCK, "fl" }, + { CAP_FPATHCONF, "fp" }, { CAP_FSCK, "fk" }, { CAP_FSTAT, "fs" }, { CAP_FSTATFS, "sf" }, { CAP_FUTIMES, "fu" }, - { CAP_CREATE, "cr" }, - { CAP_DELETE, "de" }, - { CAP_MKDIR, "md" }, - { CAP_RMDIR, "rm" }, - { CAP_MKFIFO, "mf" }, - { CAP_MKNOD, "mn" }, - - /* Lookups - used to constraint *at() calls. */ + { CAP_LINKAT, "li" }, + { CAP_MKDIRAT, "md" }, + { CAP_MKFIFOAT, "mf" }, + { CAP_MKNODAT, "mn" }, + { CAP_RENAMEAT, "rn" }, + { CAP_SYMLINKAT, "sl" }, + { CAP_UNLINKAT, "un" }, + + /* Lookups - used to constrain *at() calls. */ { CAP_LOOKUP, "lo" }, /* Extended attributes. */ @@ -213,6 +214,24 @@ static struct cap_desc { { CAP_PDGETPID, "pg" }, { CAP_PDWAIT, "pw" }, { CAP_PDKILL, "pk" }, + + /* Aliases and defines that combine multiple rights. */ + { CAP_PREAD, "prd" }, + { CAP_PWRITE, "pwr" }, + + { CAP_MMAP_R, "mmr" }, + { CAP_MMAP_W, "mmw" }, + { CAP_MMAP_X, "mmx" }, + { CAP_MMAP_RW, "mrw" }, + { CAP_MMAP_RX, "mrx" }, + { CAP_MMAP_WX, "mwx" }, + { CAP_MMAP_RWX, "mma" }, + + { CAP_RECV, "re" }, + { CAP_SEND, "sd" }, + + { CAP_SOCK_CLIENT, "scl" }, + { CAP_SOCK_SERVER, "ssr" }, }; static const u_int cap_desc_count = sizeof(cap_desc) / sizeof(cap_desc[0]); @@ -225,7 +244,7 @@ width_capability(cap_rights_t rights) count = 0; width = 0; for (i = 0; i < cap_desc_count; i++) { - if (rights & cap_desc[i].cd_right) { + if ((cap_desc[i].cd_right & ~rights) == 0) { width += strlen(cap_desc[i].cd_desc); if (count) width++; @@ -249,7 +268,7 @@ print_capability(cap_rights_t rights, u_int capwidth) printf("-"); } for (i = 0; i < cap_desc_count; i++) { - if (rights & cap_desc[i].cd_right) { + if ((cap_desc[i].cd_right & ~rights) == 0) { printf("%s%s", count ? "," : "", cap_desc[i].cd_desc); width += strlen(cap_desc[i].cd_desc); if (count) @@ -261,7 +280,7 @@ print_capability(cap_rights_t rights, u_int capwidth) void procstat_files(struct procstat *procstat, struct kinfo_proc *kipp) -{ +{ struct sockstat sock; struct filestat_list *head; struct filestat *fst; @@ -423,8 +442,6 @@ procstat_files(struct procstat *procstat, struct kinfo_proc *kipp) printf("%s", fst->fs_fflags & PS_FST_FFLAG_NONBLOCK ? "n" : "-"); printf("%s", fst->fs_fflags & PS_FST_FFLAG_DIRECT ? "d" : "-"); printf("%s", fst->fs_fflags & PS_FST_FFLAG_HASLOCK ? "l" : "-"); - printf("%s ", fst->fs_fflags & PS_FST_FFLAG_CAPABILITY ? - "c" : "-"); if (!Cflag) { if (fst->fs_ref_count > -1) printf("%3d ", fst->fs_ref_count); -- cgit v1.1 From 48e0f13795816420f399e2606735c7ef9dab3d7c Mon Sep 17 00:00:00 2001 From: pjd Date: Sat, 2 Mar 2013 00:55:09 +0000 Subject: Regen after r247602. --- sys/compat/freebsd32/freebsd32_proto.h | 2 +- sys/compat/freebsd32/freebsd32_syscall.h | 11 +- sys/compat/freebsd32/freebsd32_syscalls.c | 9 +- sys/compat/freebsd32/freebsd32_sysent.c | 9 +- sys/compat/freebsd32/freebsd32_systrace_args.c | 146 ++++++++++++++++++++++++- sys/kern/init_sysent.c | 11 +- sys/kern/syscalls.c | 9 +- sys/kern/systrace_args.c | 146 ++++++++++++++++++++++++- sys/sys/syscall.h | 11 +- sys/sys/syscall.mk | 11 +- sys/sys/sysproto.h | 40 ++++++- 11 files changed, 374 insertions(+), 31 deletions(-) diff --git a/sys/compat/freebsd32/freebsd32_proto.h b/sys/compat/freebsd32/freebsd32_proto.h index 2b241df..52156bd 100644 --- a/sys/compat/freebsd32/freebsd32_proto.h +++ b/sys/compat/freebsd32/freebsd32_proto.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/compat/freebsd32/syscalls.master 242958 2012-11-13 12:52:31Z kib + * created from FreeBSD: head/sys/compat/freebsd32/syscalls.master 247602 2013-03-02 00:53:12Z pjd */ #ifndef _FREEBSD32_SYSPROTO_H_ diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h index 76311ba..4b689d3 100644 --- a/sys/compat/freebsd32/freebsd32_syscall.h +++ b/sys/compat/freebsd32/freebsd32_syscall.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/compat/freebsd32/syscalls.master 242958 2012-11-13 12:52:31Z kib + * created from FreeBSD: head/sys/compat/freebsd32/syscalls.master 247602 2013-03-02 00:53:12Z pjd */ #define FREEBSD32_SYS_syscall 0 @@ -416,7 +416,7 @@ #define FREEBSD32_SYS_freebsd32_shmctl 512 #define FREEBSD32_SYS_lpathconf 513 #define FREEBSD32_SYS_cap_new 514 -#define FREEBSD32_SYS_cap_getrights 515 +#define FREEBSD32_SYS_cap_rights_get 515 #define FREEBSD32_SYS_cap_enter 516 #define FREEBSD32_SYS_cap_getmode 517 #define FREEBSD32_SYS_freebsd32_pselect 522 @@ -430,4 +430,9 @@ #define FREEBSD32_SYS_freebsd32_posix_fallocate 530 #define FREEBSD32_SYS_freebsd32_posix_fadvise 531 #define FREEBSD32_SYS_freebsd32_wait6 532 -#define FREEBSD32_SYS_MAXSYSCALL 533 +#define FREEBSD32_SYS_cap_rights_limit 533 +#define FREEBSD32_SYS_cap_ioctls_limit 534 +#define FREEBSD32_SYS_cap_ioctls_get 535 +#define FREEBSD32_SYS_cap_fcntls_limit 536 +#define FREEBSD32_SYS_cap_fcntls_get 537 +#define FREEBSD32_SYS_MAXSYSCALL 538 diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c index 499b3e8..4878b200 100644 --- a/sys/compat/freebsd32/freebsd32_syscalls.c +++ b/sys/compat/freebsd32/freebsd32_syscalls.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/compat/freebsd32/syscalls.master 242958 2012-11-13 12:52:31Z kib + * created from FreeBSD: head/sys/compat/freebsd32/syscalls.master 247602 2013-03-02 00:53:12Z pjd */ const char *freebsd32_syscallnames[] = { @@ -538,7 +538,7 @@ const char *freebsd32_syscallnames[] = { "freebsd32_shmctl", /* 512 = freebsd32_shmctl */ "lpathconf", /* 513 = lpathconf */ "cap_new", /* 514 = cap_new */ - "cap_getrights", /* 515 = cap_getrights */ + "cap_rights_get", /* 515 = cap_rights_get */ "cap_enter", /* 516 = cap_enter */ "cap_getmode", /* 517 = cap_getmode */ "#518", /* 518 = pdfork */ @@ -556,4 +556,9 @@ const char *freebsd32_syscallnames[] = { "freebsd32_posix_fallocate", /* 530 = freebsd32_posix_fallocate */ "freebsd32_posix_fadvise", /* 531 = freebsd32_posix_fadvise */ "freebsd32_wait6", /* 532 = freebsd32_wait6 */ + "cap_rights_limit", /* 533 = cap_rights_limit */ + "cap_ioctls_limit", /* 534 = cap_ioctls_limit */ + "cap_ioctls_get", /* 535 = cap_ioctls_get */ + "cap_fcntls_limit", /* 536 = cap_fcntls_limit */ + "cap_fcntls_get", /* 537 = cap_fcntls_get */ }; diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c index 814d88e..65da2c7 100644 --- a/sys/compat/freebsd32/freebsd32_sysent.c +++ b/sys/compat/freebsd32/freebsd32_sysent.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/compat/freebsd32/syscalls.master 242958 2012-11-13 12:52:31Z kib + * created from FreeBSD: head/sys/compat/freebsd32/syscalls.master 247602 2013-03-02 00:53:12Z pjd */ #include "opt_compat.h" @@ -575,7 +575,7 @@ struct sysent freebsd32_sysent[] = { { AS(freebsd32_shmctl_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 512 = freebsd32_shmctl */ { AS(lpathconf_args), (sy_call_t *)sys_lpathconf, AUE_LPATHCONF, NULL, 0, 0, 0, SY_THR_STATIC }, /* 513 = lpathconf */ { AS(cap_new_args), (sy_call_t *)sys_cap_new, AUE_CAP_NEW, NULL, 0, 0, 0, SY_THR_STATIC }, /* 514 = cap_new */ - { AS(cap_getrights_args), (sy_call_t *)sys_cap_getrights, AUE_CAP_GETRIGHTS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 515 = cap_getrights */ + { AS(cap_rights_get_args), (sy_call_t *)sys_cap_rights_get, AUE_CAP_RIGHTS_GET, NULL, 0, 0, 0, SY_THR_STATIC }, /* 515 = cap_rights_get */ { 0, (sy_call_t *)sys_cap_enter, AUE_CAP_ENTER, NULL, 0, 0, 0, SY_THR_STATIC }, /* 516 = cap_enter */ { AS(cap_getmode_args), (sy_call_t *)sys_cap_getmode, AUE_CAP_GETMODE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 517 = cap_getmode */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 518 = pdfork */ @@ -593,4 +593,9 @@ struct sysent freebsd32_sysent[] = { { AS(freebsd32_posix_fallocate_args), (sy_call_t *)freebsd32_posix_fallocate, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 530 = freebsd32_posix_fallocate */ { AS(freebsd32_posix_fadvise_args), (sy_call_t *)freebsd32_posix_fadvise, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 531 = freebsd32_posix_fadvise */ { AS(freebsd32_wait6_args), (sy_call_t *)freebsd32_wait6, AUE_WAIT6, NULL, 0, 0, 0, SY_THR_STATIC }, /* 532 = freebsd32_wait6 */ + { AS(cap_rights_limit_args), (sy_call_t *)sys_cap_rights_limit, AUE_CAP_RIGHTS_LIMIT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 533 = cap_rights_limit */ + { AS(cap_ioctls_limit_args), (sy_call_t *)sys_cap_ioctls_limit, AUE_CAP_IOCTLS_LIMIT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 534 = cap_ioctls_limit */ + { AS(cap_ioctls_get_args), (sy_call_t *)sys_cap_ioctls_get, AUE_CAP_IOCTLS_GET, NULL, 0, 0, 0, SY_THR_STATIC }, /* 535 = cap_ioctls_get */ + { AS(cap_fcntls_limit_args), (sy_call_t *)sys_cap_fcntls_limit, AUE_CAP_FCNTLS_LIMIT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 536 = cap_fcntls_limit */ + { AS(cap_fcntls_get_args), (sy_call_t *)sys_cap_fcntls_get, AUE_CAP_FCNTLS_GET, NULL, 0, 0, 0, SY_THR_STATIC }, /* 537 = cap_fcntls_get */ }; diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c index 36a646c..e2d7adb 100644 --- a/sys/compat/freebsd32/freebsd32_systrace_args.c +++ b/sys/compat/freebsd32/freebsd32_systrace_args.c @@ -2956,9 +2956,9 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 2; break; } - /* cap_getrights */ + /* cap_rights_get */ case 515: { - struct cap_getrights_args *p = params; + struct cap_rights_get_args *p = params; iarg[0] = p->fd; /* int */ uarg[1] = (intptr_t) p->rightsp; /* uint64_t * */ *n_args = 2; @@ -3088,6 +3088,48 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 6; break; } + /* cap_rights_limit */ + case 533: { + struct cap_rights_limit_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = p->rights; /* uint64_t */ + *n_args = 2; + break; + } + /* cap_ioctls_limit */ + case 534: { + struct cap_ioctls_limit_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = (intptr_t) p->cmds; /* const u_long * */ + uarg[2] = p->ncmds; /* size_t */ + *n_args = 3; + break; + } + /* cap_ioctls_get */ + case 535: { + struct cap_ioctls_get_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = (intptr_t) p->cmds; /* u_long * */ + uarg[2] = p->maxcmds; /* size_t */ + *n_args = 3; + break; + } + /* cap_fcntls_limit */ + case 536: { + struct cap_fcntls_limit_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = p->fcntlrights; /* uint32_t */ + *n_args = 2; + break; + } + /* cap_fcntls_get */ + case 537: { + struct cap_fcntls_get_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = (intptr_t) p->fcntlrightsp; /* uint32_t * */ + *n_args = 2; + break; + } default: *n_args = 0; break; @@ -8002,7 +8044,7 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; - /* cap_getrights */ + /* cap_rights_get */ case 515: switch(ndx) { case 0: @@ -8243,6 +8285,77 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; + /* cap_rights_limit */ + case 533: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "uint64_t"; + break; + default: + break; + }; + break; + /* cap_ioctls_limit */ + case 534: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "const u_long *"; + break; + case 2: + p = "size_t"; + break; + default: + break; + }; + break; + /* cap_ioctls_get */ + case 535: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "u_long *"; + break; + case 2: + p = "size_t"; + break; + default: + break; + }; + break; + /* cap_fcntls_limit */ + case 536: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; + break; + /* cap_fcntls_get */ + case 537: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "uint32_t *"; + break; + default: + break; + }; + break; default: break; }; @@ -9938,7 +10051,7 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; - /* cap_getrights */ + /* cap_rights_get */ case 515: if (ndx == 0 || ndx == 1) p = "int"; @@ -10005,6 +10118,31 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; + /* cap_rights_limit */ + case 533: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* cap_ioctls_limit */ + case 534: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* cap_ioctls_get */ + case 535: + if (ndx == 0 || ndx == 1) + p = "ssize_t"; + break; + /* cap_fcntls_limit */ + case 536: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* cap_fcntls_get */ + case 537: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index ce47f56..b5ed57b 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/kern/syscalls.master 242958 2012-11-13 12:52:31Z kib + * created from FreeBSD: head/sys/kern/syscalls.master 247602 2013-03-02 00:53:12Z pjd */ #include "opt_compat.h" @@ -88,7 +88,7 @@ struct sysent sysent[] = { { AS(acct_args), (sy_call_t *)sys_acct, AUE_ACCT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 51 = acct */ { compat(0,sigpending), AUE_SIGPENDING, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 52 = old sigpending */ { AS(sigaltstack_args), (sy_call_t *)sys_sigaltstack, AUE_SIGALTSTACK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 53 = sigaltstack */ - { AS(ioctl_args), (sy_call_t *)sys_ioctl, AUE_IOCTL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 54 = ioctl */ + { AS(ioctl_args), (sy_call_t *)sys_ioctl, AUE_IOCTL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 54 = ioctl */ { AS(reboot_args), (sy_call_t *)sys_reboot, AUE_REBOOT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 55 = reboot */ { AS(revoke_args), (sy_call_t *)sys_revoke, AUE_REVOKE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 56 = revoke */ { AS(symlink_args), (sy_call_t *)sys_symlink, AUE_SYMLINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 57 = symlink */ @@ -549,7 +549,7 @@ struct sysent sysent[] = { { AS(shmctl_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 512 = shmctl */ { AS(lpathconf_args), (sy_call_t *)sys_lpathconf, AUE_LPATHCONF, NULL, 0, 0, 0, SY_THR_STATIC }, /* 513 = lpathconf */ { AS(cap_new_args), (sy_call_t *)sys_cap_new, AUE_CAP_NEW, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 514 = cap_new */ - { AS(cap_getrights_args), (sy_call_t *)sys_cap_getrights, AUE_CAP_GETRIGHTS, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 515 = cap_getrights */ + { AS(cap_rights_get_args), (sy_call_t *)sys_cap_rights_get, AUE_CAP_RIGHTS_GET, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 515 = cap_rights_get */ { 0, (sy_call_t *)sys_cap_enter, AUE_CAP_ENTER, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 516 = cap_enter */ { AS(cap_getmode_args), (sy_call_t *)sys_cap_getmode, AUE_CAP_GETMODE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 517 = cap_getmode */ { AS(pdfork_args), (sy_call_t *)sys_pdfork, AUE_PDFORK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 518 = pdfork */ @@ -567,4 +567,9 @@ struct sysent sysent[] = { { AS(posix_fallocate_args), (sy_call_t *)sys_posix_fallocate, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 530 = posix_fallocate */ { AS(posix_fadvise_args), (sy_call_t *)sys_posix_fadvise, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 531 = posix_fadvise */ { AS(wait6_args), (sy_call_t *)sys_wait6, AUE_WAIT6, NULL, 0, 0, 0, SY_THR_STATIC }, /* 532 = wait6 */ + { AS(cap_rights_limit_args), (sy_call_t *)sys_cap_rights_limit, AUE_CAP_RIGHTS_LIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 533 = cap_rights_limit */ + { AS(cap_ioctls_limit_args), (sy_call_t *)sys_cap_ioctls_limit, AUE_CAP_IOCTLS_LIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 534 = cap_ioctls_limit */ + { AS(cap_ioctls_get_args), (sy_call_t *)sys_cap_ioctls_get, AUE_CAP_IOCTLS_GET, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 535 = cap_ioctls_get */ + { AS(cap_fcntls_limit_args), (sy_call_t *)sys_cap_fcntls_limit, AUE_CAP_FCNTLS_LIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 536 = cap_fcntls_limit */ + { AS(cap_fcntls_get_args), (sy_call_t *)sys_cap_fcntls_get, AUE_CAP_FCNTLS_GET, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 537 = cap_fcntls_get */ }; diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c index 96f2400..e066014 100644 --- a/sys/kern/syscalls.c +++ b/sys/kern/syscalls.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/kern/syscalls.master 242958 2012-11-13 12:52:31Z kib + * created from FreeBSD: head/sys/kern/syscalls.master 247602 2013-03-02 00:53:12Z pjd */ const char *syscallnames[] = { @@ -522,7 +522,7 @@ const char *syscallnames[] = { "shmctl", /* 512 = shmctl */ "lpathconf", /* 513 = lpathconf */ "cap_new", /* 514 = cap_new */ - "cap_getrights", /* 515 = cap_getrights */ + "cap_rights_get", /* 515 = cap_rights_get */ "cap_enter", /* 516 = cap_enter */ "cap_getmode", /* 517 = cap_getmode */ "pdfork", /* 518 = pdfork */ @@ -540,4 +540,9 @@ const char *syscallnames[] = { "posix_fallocate", /* 530 = posix_fallocate */ "posix_fadvise", /* 531 = posix_fadvise */ "wait6", /* 532 = wait6 */ + "cap_rights_limit", /* 533 = cap_rights_limit */ + "cap_ioctls_limit", /* 534 = cap_ioctls_limit */ + "cap_ioctls_get", /* 535 = cap_ioctls_get */ + "cap_fcntls_limit", /* 536 = cap_fcntls_limit */ + "cap_fcntls_get", /* 537 = cap_fcntls_get */ }; diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c index c755f92..d811da2 100644 --- a/sys/kern/systrace_args.c +++ b/sys/kern/systrace_args.c @@ -3134,9 +3134,9 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 2; break; } - /* cap_getrights */ + /* cap_rights_get */ case 515: { - struct cap_getrights_args *p = params; + struct cap_rights_get_args *p = params; iarg[0] = p->fd; /* int */ uarg[1] = (intptr_t) p->rightsp; /* uint64_t * */ *n_args = 2; @@ -3286,6 +3286,48 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 6; break; } + /* cap_rights_limit */ + case 533: { + struct cap_rights_limit_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = p->rights; /* uint64_t */ + *n_args = 2; + break; + } + /* cap_ioctls_limit */ + case 534: { + struct cap_ioctls_limit_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = (intptr_t) p->cmds; /* const u_long * */ + uarg[2] = p->ncmds; /* size_t */ + *n_args = 3; + break; + } + /* cap_ioctls_get */ + case 535: { + struct cap_ioctls_get_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = (intptr_t) p->cmds; /* u_long * */ + uarg[2] = p->maxcmds; /* size_t */ + *n_args = 3; + break; + } + /* cap_fcntls_limit */ + case 536: { + struct cap_fcntls_limit_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = p->fcntlrights; /* uint32_t */ + *n_args = 2; + break; + } + /* cap_fcntls_get */ + case 537: { + struct cap_fcntls_get_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = (intptr_t) p->fcntlrightsp; /* uint32_t * */ + *n_args = 2; + break; + } default: *n_args = 0; break; @@ -8477,7 +8519,7 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; - /* cap_getrights */ + /* cap_rights_get */ case 515: switch(ndx) { case 0: @@ -8745,6 +8787,77 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; + /* cap_rights_limit */ + case 533: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "uint64_t"; + break; + default: + break; + }; + break; + /* cap_ioctls_limit */ + case 534: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "const u_long *"; + break; + case 2: + p = "size_t"; + break; + default: + break; + }; + break; + /* cap_ioctls_get */ + case 535: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "u_long *"; + break; + case 2: + p = "size_t"; + break; + default: + break; + }; + break; + /* cap_fcntls_limit */ + case 536: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; + break; + /* cap_fcntls_get */ + case 537: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "uint32_t *"; + break; + default: + break; + }; + break; default: break; }; @@ -10556,7 +10669,7 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; - /* cap_getrights */ + /* cap_rights_get */ case 515: if (ndx == 0 || ndx == 1) p = "int"; @@ -10638,6 +10751,31 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; + /* cap_rights_limit */ + case 533: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* cap_ioctls_limit */ + case 534: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* cap_ioctls_get */ + case 535: + if (ndx == 0 || ndx == 1) + p = "ssize_t"; + break; + /* cap_fcntls_limit */ + case 536: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* cap_fcntls_get */ + case 537: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h index 6b0cd67..5a2a1f6 100644 --- a/sys/sys/syscall.h +++ b/sys/sys/syscall.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/kern/syscalls.master 242958 2012-11-13 12:52:31Z kib + * created from FreeBSD: head/sys/kern/syscalls.master 247602 2013-03-02 00:53:12Z pjd */ #define SYS_syscall 0 @@ -435,7 +435,7 @@ #define SYS_shmctl 512 #define SYS_lpathconf 513 #define SYS_cap_new 514 -#define SYS_cap_getrights 515 +#define SYS_cap_rights_get 515 #define SYS_cap_enter 516 #define SYS_cap_getmode 517 #define SYS_pdfork 518 @@ -452,4 +452,9 @@ #define SYS_posix_fallocate 530 #define SYS_posix_fadvise 531 #define SYS_wait6 532 -#define SYS_MAXSYSCALL 533 +#define SYS_cap_rights_limit 533 +#define SYS_cap_ioctls_limit 534 +#define SYS_cap_ioctls_get 535 +#define SYS_cap_fcntls_limit 536 +#define SYS_cap_fcntls_get 537 +#define SYS_MAXSYSCALL 538 diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk index 25f0470..3e2a3d4 100644 --- a/sys/sys/syscall.mk +++ b/sys/sys/syscall.mk @@ -1,7 +1,7 @@ # FreeBSD system call names. # DO NOT EDIT-- this file is automatically generated. # $FreeBSD$ -# created from FreeBSD: head/sys/kern/syscalls.master 242958 2012-11-13 12:52:31Z kib +# created from FreeBSD: head/sys/kern/syscalls.master 247602 2013-03-02 00:53:12Z pjd MIASM = \ syscall.o \ exit.o \ @@ -384,7 +384,7 @@ MIASM = \ shmctl.o \ lpathconf.o \ cap_new.o \ - cap_getrights.o \ + cap_rights_get.o \ cap_enter.o \ cap_getmode.o \ pdfork.o \ @@ -400,4 +400,9 @@ MIASM = \ rctl_remove_rule.o \ posix_fallocate.o \ posix_fadvise.o \ - wait6.o + wait6.o \ + cap_rights_limit.o \ + cap_ioctls_limit.o \ + cap_ioctls_get.o \ + cap_fcntls_limit.o \ + cap_fcntls_get.o diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h index ef59ad5..8ef4080 100644 --- a/sys/sys/sysproto.h +++ b/sys/sys/sysproto.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: head/sys/kern/syscalls.master 242958 2012-11-13 12:52:31Z kib + * created from FreeBSD: head/sys/kern/syscalls.master 247602 2013-03-02 00:53:12Z pjd */ #ifndef _SYS_SYSPROTO_H_ @@ -1676,7 +1676,7 @@ struct cap_new_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; char rights_l_[PADL_(uint64_t)]; uint64_t rights; char rights_r_[PADR_(uint64_t)]; }; -struct cap_getrights_args { +struct cap_rights_get_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; char rightsp_l_[PADL_(uint64_t *)]; uint64_t * rightsp; char rightsp_r_[PADR_(uint64_t *)]; }; @@ -1762,6 +1762,28 @@ struct wait6_args { char wrusage_l_[PADL_(struct __wrusage *)]; struct __wrusage * wrusage; char wrusage_r_[PADR_(struct __wrusage *)]; char info_l_[PADL_(siginfo_t *)]; siginfo_t * info; char info_r_[PADR_(siginfo_t *)]; }; +struct cap_rights_limit_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char rights_l_[PADL_(uint64_t)]; uint64_t rights; char rights_r_[PADR_(uint64_t)]; +}; +struct cap_ioctls_limit_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char cmds_l_[PADL_(const u_long *)]; const u_long * cmds; char cmds_r_[PADR_(const u_long *)]; + char ncmds_l_[PADL_(size_t)]; size_t ncmds; char ncmds_r_[PADR_(size_t)]; +}; +struct cap_ioctls_get_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char cmds_l_[PADL_(u_long *)]; u_long * cmds; char cmds_r_[PADR_(u_long *)]; + char maxcmds_l_[PADL_(size_t)]; size_t maxcmds; char maxcmds_r_[PADR_(size_t)]; +}; +struct cap_fcntls_limit_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char fcntlrights_l_[PADL_(uint32_t)]; uint32_t fcntlrights; char fcntlrights_r_[PADR_(uint32_t)]; +}; +struct cap_fcntls_get_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char fcntlrightsp_l_[PADL_(uint32_t *)]; uint32_t * fcntlrightsp; char fcntlrightsp_r_[PADR_(uint32_t *)]; +}; int nosys(struct thread *, struct nosys_args *); void sys_sys_exit(struct thread *, struct sys_exit_args *); int sys_fork(struct thread *, struct fork_args *); @@ -2127,7 +2149,7 @@ int sys_msgctl(struct thread *, struct msgctl_args *); int sys_shmctl(struct thread *, struct shmctl_args *); int sys_lpathconf(struct thread *, struct lpathconf_args *); int sys_cap_new(struct thread *, struct cap_new_args *); -int sys_cap_getrights(struct thread *, struct cap_getrights_args *); +int sys_cap_rights_get(struct thread *, struct cap_rights_get_args *); int sys_cap_enter(struct thread *, struct cap_enter_args *); int sys_cap_getmode(struct thread *, struct cap_getmode_args *); int sys_pdfork(struct thread *, struct pdfork_args *); @@ -2144,6 +2166,11 @@ int sys_rctl_remove_rule(struct thread *, struct rctl_remove_rule_args *); int sys_posix_fallocate(struct thread *, struct posix_fallocate_args *); int sys_posix_fadvise(struct thread *, struct posix_fadvise_args *); int sys_wait6(struct thread *, struct wait6_args *); +int sys_cap_rights_limit(struct thread *, struct cap_rights_limit_args *); +int sys_cap_ioctls_limit(struct thread *, struct cap_ioctls_limit_args *); +int sys_cap_ioctls_get(struct thread *, struct cap_ioctls_get_args *); +int sys_cap_fcntls_limit(struct thread *, struct cap_fcntls_limit_args *); +int sys_cap_fcntls_get(struct thread *, struct cap_fcntls_get_args *); #ifdef COMPAT_43 @@ -2823,7 +2850,7 @@ int freebsd7_shmctl(struct thread *, struct freebsd7_shmctl_args *); #define SYS_AUE_shmctl AUE_SHMCTL #define SYS_AUE_lpathconf AUE_LPATHCONF #define SYS_AUE_cap_new AUE_CAP_NEW -#define SYS_AUE_cap_getrights AUE_CAP_GETRIGHTS +#define SYS_AUE_cap_rights_get AUE_CAP_RIGHTS_GET #define SYS_AUE_cap_enter AUE_CAP_ENTER #define SYS_AUE_cap_getmode AUE_CAP_GETMODE #define SYS_AUE_pdfork AUE_PDFORK @@ -2840,6 +2867,11 @@ int freebsd7_shmctl(struct thread *, struct freebsd7_shmctl_args *); #define SYS_AUE_posix_fallocate AUE_NULL #define SYS_AUE_posix_fadvise AUE_NULL #define SYS_AUE_wait6 AUE_WAIT6 +#define SYS_AUE_cap_rights_limit AUE_CAP_RIGHTS_LIMIT +#define SYS_AUE_cap_ioctls_limit AUE_CAP_IOCTLS_LIMIT +#define SYS_AUE_cap_ioctls_get AUE_CAP_IOCTLS_GET +#define SYS_AUE_cap_fcntls_limit AUE_CAP_FCNTLS_LIMIT +#define SYS_AUE_cap_fcntls_get AUE_CAP_FCNTLS_GET #undef PAD_ #undef PADL_ -- cgit v1.1 From 782c8022a25e4abf89eefcab8b95ee74400b7caf Mon Sep 17 00:00:00 2001 From: pjd Date: Sat, 2 Mar 2013 00:56:53 +0000 Subject: Update existing regression tests after Capsicum overhaul. --- .../security/cap_test/cap_test_capabilities.c | 447 ++++++++++++++++----- .../security/cap_test/cap_test_relative.c | 3 +- 2 files changed, 349 insertions(+), 101 deletions(-) diff --git a/tools/regression/security/cap_test/cap_test_capabilities.c b/tools/regression/security/cap_test/cap_test_capabilities.c index 450ad88..b6423f8 100644 --- a/tools/regression/security/cap_test/cap_test_capabilities.c +++ b/tools/regression/security/cap_test/cap_test_capabilities.c @@ -1,8 +1,12 @@ /*- * Copyright (c) 2009-2011 Robert N. M. Watson * Copyright (c) 2011 Jonathan Anderson + * Copyright (c) 2012 FreeBSD Foundation * All rights reserved. * + * Portions of this software were developed by Pawel Jakub Dawidek under + * sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -43,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -60,14 +65,20 @@ __FBSDID("$FreeBSD$"); */ #define CHECK_RESULT(syscall, rights_needed, succeeded) do { \ if ((rights & (rights_needed)) == (rights_needed)) { \ - if (!(succeeded)) \ + if (succeeded) { \ + if (success == -1) \ + success = PASSED; \ + } else { \ SYSCALL_FAIL(syscall, "failed"); \ + } \ } else { \ - if (succeeded) \ + if (succeeded) { \ FAILX("%s:\tsucceeded when it shouldn't have" \ - " (rights 0x%jx)", #syscall, rights); \ - else if (errno != ENOTCAPABLE) \ + " (rights 0x%jx)", #syscall, \ + (uintmax_t)rights); \ + } else if (errno != ENOTCAPABLE) { \ SYSCALL_FAIL(syscall, "errno != ENOTCAPABLE"); \ + } \ } \ errno = 0; \ } while (0) @@ -79,8 +90,11 @@ __FBSDID("$FreeBSD$"); if ((rights & (rights_needed)) == (rights_needed)) { \ if (p == MAP_FAILED) \ SYSCALL_FAIL(mmap, "failed"); \ - else \ + else { \ (void)munmap(p, getpagesize()); \ + if (success == -1) \ + success = PASSED; \ + } \ } else { \ if (p != MAP_FAILED) { \ FAILX("%s:\tsucceeded when it shouldn't have" \ @@ -97,96 +111,200 @@ __FBSDID("$FreeBSD$"); * make sure only those rights work. */ static int -try_file_ops(int fd, cap_rights_t rights) +try_file_ops(int filefd, int dirfd, cap_rights_t rights) { struct stat sb; struct statfs sf; - int fd_cap, fd_capcap; + cap_rights_t erights; + int fd_cap, fd_capcap, dfd_cap; ssize_t ssize, ssize2; off_t off; void *p; char ch; int ret, is_nfs; struct pollfd pollfd; - int success = PASSED; + int success = -1; - REQUIRE(fstatfs(fd, &sf)); - is_nfs = (strncmp("nfs", sf.f_fstypename, sizeof(sf.f_fstypename)) - == 0); + REQUIRE(fstatfs(filefd, &sf)); + is_nfs = (strcmp("nfs", sf.f_fstypename) == 0); - REQUIRE(fd_cap = cap_new(fd, rights)); + REQUIRE(fd_cap = cap_new(filefd, rights)); + CHECK(cap_getrights(fd_cap, &erights) == 0); + CHECK(rights == erights); REQUIRE(fd_capcap = cap_new(fd_cap, rights)); + CHECK(cap_getrights(fd_capcap, &erights) == 0); + CHECK(rights == erights); CHECK(fd_capcap != fd_cap); - - pollfd.fd = fd_cap; - pollfd.events = POLLIN | POLLERR | POLLHUP; - pollfd.revents = 0; + REQUIRE(dfd_cap = cap_new(dirfd, rights)); + CHECK(cap_getrights(dfd_cap, &erights) == 0); + CHECK(rights == erights); ssize = read(fd_cap, &ch, sizeof(ch)); - CHECK_RESULT(read, CAP_READ | CAP_SEEK, ssize >= 0); - - ssize = pread(fd_cap, &ch, sizeof(ch), 0); - ssize2 = pread(fd_cap, &ch, sizeof(ch), 0); - CHECK_RESULT(pread, CAP_READ, ssize >= 0); - CHECK(ssize == ssize2); + CHECK_RESULT(read, CAP_READ, ssize >= 0); ssize = write(fd_cap, &ch, sizeof(ch)); - CHECK_RESULT(write, CAP_WRITE | CAP_SEEK, ssize >= 0); - - ssize = pwrite(fd_cap, &ch, sizeof(ch), 0); - CHECK_RESULT(pwrite, CAP_WRITE, ssize >= 0); + CHECK_RESULT(write, CAP_WRITE, ssize >= 0); off = lseek(fd_cap, 0, SEEK_SET); CHECK_RESULT(lseek, CAP_SEEK, off >= 0); - /* - * Note: this is not expected to work over NFS. - */ - ret = fchflags(fd_cap, UF_NODUMP); - CHECK_RESULT(fchflags, CAP_FCHFLAGS, - (ret == 0) || (is_nfs && (errno == EOPNOTSUPP))); + ssize = pread(fd_cap, &ch, sizeof(ch), 0); + ssize2 = pread(fd_cap, &ch, sizeof(ch), 0); + CHECK_RESULT(pread, CAP_PREAD, ssize >= 0); + CHECK(ssize == ssize2); - ret = fstat(fd_cap, &sb); - CHECK_RESULT(fstat, CAP_FSTAT, ret == 0); + ssize = pwrite(fd_cap, &ch, sizeof(ch), 0); + CHECK_RESULT(pwrite, CAP_PWRITE, ssize >= 0); + + p = mmap(NULL, getpagesize(), PROT_NONE, MAP_SHARED, fd_cap, 0); + CHECK_MMAP_RESULT(CAP_MMAP); p = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, fd_cap, 0); - CHECK_MMAP_RESULT(CAP_MMAP | CAP_READ); + CHECK_MMAP_RESULT(CAP_MMAP_R); p = mmap(NULL, getpagesize(), PROT_WRITE, MAP_SHARED, fd_cap, 0); - CHECK_MMAP_RESULT(CAP_MMAP | CAP_WRITE); + CHECK_MMAP_RESULT(CAP_MMAP_W); p = mmap(NULL, getpagesize(), PROT_EXEC, MAP_SHARED, fd_cap, 0); - CHECK_MMAP_RESULT(CAP_MMAP | CAP_MAPEXEC); + CHECK_MMAP_RESULT(CAP_MMAP_X); p = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_SHARED, fd_cap, 0); - CHECK_MMAP_RESULT(CAP_MMAP | CAP_READ | CAP_WRITE); + CHECK_MMAP_RESULT(CAP_MMAP_RW); p = mmap(NULL, getpagesize(), PROT_READ | PROT_EXEC, MAP_SHARED, fd_cap, 0); - CHECK_MMAP_RESULT(CAP_MMAP | CAP_READ | CAP_MAPEXEC); + CHECK_MMAP_RESULT(CAP_MMAP_RX); p = mmap(NULL, getpagesize(), PROT_EXEC | PROT_WRITE, MAP_SHARED, fd_cap, 0); - CHECK_MMAP_RESULT(CAP_MMAP | CAP_MAPEXEC | CAP_WRITE); + CHECK_MMAP_RESULT(CAP_MMAP_WX); p = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED, fd_cap, 0); - CHECK_MMAP_RESULT(CAP_MMAP | CAP_READ | CAP_WRITE | CAP_MAPEXEC); + CHECK_MMAP_RESULT(CAP_MMAP_RWX); + + /* TODO: openat(O_APPEND) */ + ret = openat(dfd_cap, "cap_create", O_CREAT | O_RDONLY, 0600); + CHECK_RESULT(openat(O_CREATE | O_RDONLY), + CAP_CREATE | CAP_READ | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + CHECK(ret == -1 || unlinkat(dirfd, "cap_create", 0) == 0); + ret = openat(dfd_cap, "cap_create", O_CREAT | O_WRONLY, 0600); + CHECK_RESULT(openat(O_CREATE | O_WRONLY), + CAP_CREATE | CAP_WRITE | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + CHECK(ret == -1 || unlinkat(dirfd, "cap_create", 0) == 0); + ret = openat(dfd_cap, "cap_create", O_CREAT | O_RDWR, 0600); + CHECK_RESULT(openat(O_CREATE | O_RDWR), + CAP_CREATE | CAP_READ | CAP_WRITE | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + CHECK(ret == -1 || unlinkat(dirfd, "cap_create", 0) == 0); ret = fsync(fd_cap); CHECK_RESULT(fsync, CAP_FSYNC, ret == 0); + ret = openat(dirfd, "cap_fsync", O_CREAT, 0600); + CHECK(ret >= 0); + CHECK(close(ret) == 0); + ret = openat(dfd_cap, "cap_fsync", O_FSYNC | O_RDONLY, 0600); + CHECK_RESULT(openat(O_FSYNC | O_RDONLY), + CAP_FSYNC | CAP_READ | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + ret = openat(dfd_cap, "cap_fsync", O_FSYNC | O_WRONLY, 0600); + CHECK_RESULT(openat(O_FSYNC | O_WRONLY), + CAP_FSYNC | CAP_WRITE | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + ret = openat(dfd_cap, "cap_fsync", O_FSYNC | O_RDWR, 0600); + CHECK_RESULT(openat(O_FSYNC | O_RDWR), + CAP_FSYNC | CAP_READ | CAP_WRITE | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + ret = openat(dfd_cap, "cap_fsync", O_SYNC | O_RDONLY, 0600); + CHECK_RESULT(openat(O_SYNC | O_RDONLY), + CAP_FSYNC | CAP_READ | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + ret = openat(dfd_cap, "cap_fsync", O_SYNC | O_WRONLY, 0600); + CHECK_RESULT(openat(O_SYNC | O_WRONLY), + CAP_FSYNC | CAP_WRITE | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + ret = openat(dfd_cap, "cap_fsync", O_SYNC | O_RDWR, 0600); + CHECK_RESULT(openat(O_SYNC | O_RDWR), + CAP_FSYNC | CAP_READ | CAP_WRITE | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + CHECK(unlinkat(dirfd, "cap_fsync", 0) == 0); + + ret = ftruncate(fd_cap, 0); + CHECK_RESULT(ftruncate, CAP_FTRUNCATE, ret == 0); + + ret = openat(dirfd, "cap_ftruncate", O_CREAT, 0600); + CHECK(ret >= 0); + CHECK(close(ret) == 0); + ret = openat(dfd_cap, "cap_ftruncate", O_TRUNC | O_RDONLY); + CHECK_RESULT(openat(O_TRUNC | O_RDONLY), + CAP_FTRUNCATE | CAP_READ | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + ret = openat(dfd_cap, "cap_ftruncate", O_TRUNC | O_WRONLY); + CHECK_RESULT(openat(O_TRUNC | O_WRONLY), + CAP_FTRUNCATE | CAP_WRITE | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + ret = openat(dfd_cap, "cap_ftruncate", O_TRUNC | O_RDWR); + CHECK_RESULT(openat(O_TRUNC | O_RDWR), + CAP_FTRUNCATE | CAP_READ | CAP_WRITE | CAP_LOOKUP, ret >= 0); + CHECK(ret == -1 || close(ret) == 0); + CHECK(unlinkat(dirfd, "cap_ftruncate", 0) == 0); + + /* + * Note: this is not expected to work over NFS. + */ + ret = fchflags(fd_cap, UF_NODUMP); + CHECK_RESULT(fchflags, CAP_FCHFLAGS, + ret == 0 || (is_nfs && errno == EOPNOTSUPP)); + +#ifdef TODO /* No such syscalls yet. */ + ret = openat(dirfd, "cap_fchflagsat", O_CREAT, 0600); + CHECK(ret >= 0); + CHECK(close(ret) == 0); + ret = fchflagsat(dfd_cap, "cap_fchflagsat", UF_NODUMP, 0); + CHECK_RESULT(fchflagsat, CAP_FCHFLAGSAT | CAP_LOOKUP, ret == 0); + CHECK(unlinkat(dirfd, "cap_fchflagsat", 0) == 0); +#endif + ret = fchown(fd_cap, -1, -1); CHECK_RESULT(fchown, CAP_FCHOWN, ret == 0); + ret = openat(dirfd, "cap_fchownat", O_CREAT, 0600); + CHECK(ret >= 0); + CHECK(close(ret) == 0); + ret = fchownat(dfd_cap, "cap_fchownat", -1, -1, 0); + CHECK_RESULT(fchownat, CAP_FCHOWN | CAP_LOOKUP, ret == 0); + CHECK(unlinkat(dirfd, "cap_fchownat", 0) == 0); + ret = fchmod(fd_cap, 0644); CHECK_RESULT(fchmod, CAP_FCHMOD, ret == 0); + ret = openat(dirfd, "cap_fchmodat", O_CREAT, 0600); + CHECK(ret >= 0); + CHECK(close(ret) == 0); + ret = fchmodat(dfd_cap, "cap_fchmodat", 0600, 0); + CHECK_RESULT(fchmodat, CAP_FCHMOD | CAP_LOOKUP, ret == 0); + CHECK(unlinkat(dirfd, "cap_fchmodat", 0) == 0); + + ret = fcntl(fd_cap, F_GETFL); + CHECK_RESULT(fcntl(F_GETFL), CAP_FCNTL, ret >= 0); + ret = fcntl(fd_cap, F_SETFL, ret); + CHECK_RESULT(fcntl(F_SETFL), CAP_FCNTL, ret == 0); + /* XXX flock */ - ret = ftruncate(fd_cap, 0); - CHECK_RESULT(ftruncate, CAP_FTRUNCATE, ret == 0); + ret = fstat(fd_cap, &sb); + CHECK_RESULT(fstat, CAP_FSTAT, ret == 0); + + ret = openat(dirfd, "cap_fstatat", O_CREAT, 0600); + CHECK(ret >= 0); + CHECK(close(ret) == 0); + ret = fstatat(dfd_cap, "cap_fstatat", &sb, 0); + CHECK_RESULT(fstatat, CAP_FSTAT | CAP_LOOKUP, ret == 0); + CHECK(unlinkat(dirfd, "cap_fstatat", 0) == 0); ret = fstatfs(fd_cap, &sf); CHECK_RESULT(fstatfs, CAP_FSTATFS, ret == 0); @@ -197,6 +315,55 @@ try_file_ops(int fd, cap_rights_t rights) ret = futimes(fd_cap, NULL); CHECK_RESULT(futimes, CAP_FUTIMES, ret == 0); + ret = openat(dirfd, "cap_futimesat", O_CREAT, 0600); + CHECK(ret >= 0); + CHECK(close(ret) == 0); + ret = futimesat(dfd_cap, "cap_futimesat", NULL); + CHECK_RESULT(futimesat, CAP_FUTIMES | CAP_LOOKUP, ret == 0); + CHECK(unlinkat(dirfd, "cap_futimesat", 0) == 0); + + ret = openat(dirfd, "cap_linkat_src", O_CREAT, 0600); + CHECK(ret >= 0); + CHECK(close(ret) == 0); + ret = linkat(dirfd, "cap_linkat_src", dfd_cap, "cap_linkat_dst", 0); + CHECK_RESULT(linkat, CAP_LINKAT | CAP_LOOKUP, ret == 0); + CHECK(unlinkat(dirfd, "cap_linkat_src", 0) == 0); + CHECK(ret == -1 || unlinkat(dirfd, "cap_linkat_dst", 0) == 0); + + ret = mkdirat(dfd_cap, "cap_mkdirat", 0700); + CHECK_RESULT(mkdirat, CAP_MKDIRAT | CAP_LOOKUP, ret == 0); + CHECK(ret == -1 || unlinkat(dirfd, "cap_mkdirat", AT_REMOVEDIR) == 0); + + ret = mkfifoat(dfd_cap, "cap_mkfifoat", 0600); + CHECK_RESULT(mkfifoat, CAP_MKFIFOAT | CAP_LOOKUP, ret == 0); + CHECK(ret == -1 || unlinkat(dirfd, "cap_mkfifoat", 0) == 0); + + ret = mknodat(dfd_cap, "cap_mknodat", S_IFCHR | 0600, 0); + CHECK_RESULT(mknodat, CAP_MKNODAT | CAP_LOOKUP, ret == 0); + CHECK(ret == -1 || unlinkat(dirfd, "cap_mknodat", 0) == 0); + + /* TODO: renameat(2) */ + + ret = symlinkat("test", dfd_cap, "cap_symlinkat"); + CHECK_RESULT(symlinkat, CAP_SYMLINKAT | CAP_LOOKUP, ret == 0); + CHECK(ret == -1 || unlinkat(dirfd, "cap_symlinkat", 0) == 0); + + ret = openat(dirfd, "cap_unlinkat", O_CREAT, 0600); + CHECK(ret >= 0); + CHECK(close(ret) == 0); + ret = unlinkat(dfd_cap, "cap_unlinkat", 0); + CHECK_RESULT(unlinkat, CAP_UNLINKAT | CAP_LOOKUP, ret == 0); + CHECK(ret == 0 || unlinkat(dirfd, "cap_unlinkat", 0) == 0); + ret = mkdirat(dirfd, "cap_unlinkat", 0700); + CHECK(ret == 0); + ret = unlinkat(dfd_cap, "cap_unlinkat", AT_REMOVEDIR); + CHECK_RESULT(unlinkat, CAP_UNLINKAT | CAP_LOOKUP, ret == 0); + CHECK(ret == 0 || unlinkat(dirfd, "cap_unlinkat", AT_REMOVEDIR) == 0); + + pollfd.fd = fd_cap; + pollfd.events = POLLIN | POLLERR | POLLHUP; + pollfd.revents = 0; + ret = poll(&pollfd, 1, 0); if (rights & CAP_POLL_EVENT) CHECK((pollfd.revents & POLLNVAL) == 0); @@ -205,79 +372,159 @@ try_file_ops(int fd, cap_rights_t rights) /* XXX: select, kqueue */ - close (fd_cap); + close(fd_cap); + close(fd_capcap); + + if (success == -1) { + fprintf(stderr, "No tests for rights 0x%jx.\n", + (uintmax_t)rights); + success = FAILED; + } return (success); } -#define TRY(fd, rights) \ +#define TRY(rights) \ do { \ if (success == PASSED) \ - success = try_file_ops(fd, rights); \ + success = try_file_ops(filefd, dirfd, (rights)); \ else \ /* We've already failed, but try the test anyway. */ \ - try_file_ops(fd, rights); \ + try_file_ops(filefd, dirfd, (rights)); \ } while (0) +#define KEEP_ERRNO(...) do { \ + int _saved_errno = errno; \ + __VA_ARGS__; \ + errno = _saved_errno; \ +} while (0); + int test_capabilities(void) { - int fd; + int filefd, dirfd, tmpfd; int success = PASSED; - - fd = open("/tmp/cap_test_capabilities", O_RDWR | O_CREAT, 0644); - if (fd < 0) + char file[] = "/tmp/cap_test.XXXXXXXXXX"; + char dir[] = "/tmp/cap_test.XXXXXXXXXX"; + + filefd = mkstemp(file); + if (filefd < 0) + err(-1, "mkstemp"); + if (mkdtemp(dir) == NULL) { + KEEP_ERRNO(unlink(file)); + err(-1, "mkdtemp"); + } + dirfd = open(dir, O_RDONLY | O_DIRECTORY); + if (dirfd == -1) { + KEEP_ERRNO(unlink(file)); + KEEP_ERRNO(rmdir(dir)); err(-1, "open"); + } + tmpfd = open("/tmp", O_RDONLY | O_DIRECTORY); + if (tmpfd == -1) { + KEEP_ERRNO(unlink(file)); + KEEP_ERRNO(rmdir(dir)); + err(-1, "open"); + } - if (cap_enter() < 0) + if (cap_enter() == -1) { + KEEP_ERRNO(unlink(file)); + KEEP_ERRNO(rmdir(dir)); err(-1, "cap_enter"); - - /* XXX: Really want to try all combinations. */ - TRY(fd, CAP_READ); - TRY(fd, CAP_READ | CAP_SEEK); - TRY(fd, CAP_WRITE); - TRY(fd, CAP_WRITE | CAP_SEEK); - TRY(fd, CAP_READ | CAP_WRITE); - TRY(fd, CAP_READ | CAP_WRITE | CAP_SEEK); - TRY(fd, CAP_SEEK); - TRY(fd, CAP_FCHFLAGS); - TRY(fd, CAP_IOCTL); - TRY(fd, CAP_FSTAT); - TRY(fd, CAP_MMAP); - TRY(fd, CAP_MMAP | CAP_READ); - TRY(fd, CAP_MMAP | CAP_WRITE); - TRY(fd, CAP_MMAP | CAP_MAPEXEC); - TRY(fd, CAP_MMAP | CAP_READ | CAP_WRITE); - TRY(fd, CAP_MMAP | CAP_READ | CAP_MAPEXEC); - TRY(fd, CAP_MMAP | CAP_MAPEXEC | CAP_WRITE); - TRY(fd, CAP_MMAP | CAP_READ | CAP_WRITE | CAP_MAPEXEC); - TRY(fd, CAP_FCNTL); - TRY(fd, CAP_POST_EVENT); - TRY(fd, CAP_POLL_EVENT); - TRY(fd, CAP_FSYNC); - TRY(fd, CAP_FCHOWN); - TRY(fd, CAP_FCHMOD); - TRY(fd, CAP_FTRUNCATE); - TRY(fd, CAP_FLOCK); - TRY(fd, CAP_FSTATFS); - TRY(fd, CAP_FPATHCONF); - TRY(fd, CAP_FUTIMES); - TRY(fd, CAP_ACL_GET); - TRY(fd, CAP_ACL_SET); - TRY(fd, CAP_ACL_DELETE); - TRY(fd, CAP_ACL_CHECK); - TRY(fd, CAP_EXTATTR_GET); - TRY(fd, CAP_EXTATTR_SET); - TRY(fd, CAP_EXTATTR_DELETE); - TRY(fd, CAP_EXTATTR_LIST); - TRY(fd, CAP_MAC_GET); - TRY(fd, CAP_MAC_SET); - - /* - * Socket-specific. - */ - TRY(fd, CAP_GETPEERNAME); - TRY(fd, CAP_GETSOCKNAME); - TRY(fd, CAP_ACCEPT); + } + + TRY(CAP_READ); + TRY(CAP_WRITE); + TRY(CAP_SEEK); + TRY(CAP_PREAD); + TRY(CAP_PWRITE); + TRY(CAP_READ | CAP_WRITE); + TRY(CAP_PREAD | CAP_PWRITE); + TRY(CAP_MMAP); + TRY(CAP_MMAP_R); + TRY(CAP_MMAP_W); + TRY(CAP_MMAP_X); + TRY(CAP_MMAP_RW); + TRY(CAP_MMAP_RX); + TRY(CAP_MMAP_WX); + TRY(CAP_MMAP_RWX); + TRY(CAP_CREATE | CAP_READ | CAP_LOOKUP); + TRY(CAP_CREATE | CAP_WRITE | CAP_LOOKUP); + TRY(CAP_CREATE | CAP_READ | CAP_WRITE | CAP_LOOKUP); +#ifdef TODO + TRY(CAP_FEXECVE); +#endif + TRY(CAP_FSYNC); + TRY(CAP_FSYNC | CAP_READ | CAP_LOOKUP); + TRY(CAP_FSYNC | CAP_WRITE | CAP_LOOKUP); + TRY(CAP_FSYNC | CAP_READ | CAP_WRITE | CAP_LOOKUP); + TRY(CAP_FTRUNCATE); + TRY(CAP_FTRUNCATE | CAP_READ | CAP_LOOKUP); + TRY(CAP_FTRUNCATE | CAP_WRITE | CAP_LOOKUP); + TRY(CAP_FTRUNCATE | CAP_READ | CAP_WRITE | CAP_LOOKUP); +#ifdef TODO + TRY(CAP_FCHDIR); +#endif + TRY(CAP_FCHFLAGS); + TRY(CAP_FCHOWN); + TRY(CAP_FCHOWN | CAP_LOOKUP); + TRY(CAP_FCHMOD | CAP_LOOKUP); + TRY(CAP_FCNTL); +#ifdef TODO + TRY(CAP_FLOCK); +#endif + TRY(CAP_FPATHCONF); +#ifdef TODO + TRY(CAP_FSCK); +#endif + TRY(CAP_FSTAT | CAP_LOOKUP); + TRY(CAP_FSTATFS); + TRY(CAP_FUTIMES | CAP_LOOKUP); + TRY(CAP_LINKAT | CAP_LOOKUP); + TRY(CAP_MKDIRAT | CAP_LOOKUP); + TRY(CAP_MKFIFOAT | CAP_LOOKUP); + TRY(CAP_MKNODAT | CAP_LOOKUP); + TRY(CAP_SYMLINKAT | CAP_LOOKUP); + TRY(CAP_UNLINKAT | CAP_LOOKUP); + /* Rename needs CAP_RENAMEAT on source directory and CAP_LINKAT on destination directory. */ + TRY(CAP_RENAMEAT | CAP_UNLINKAT | CAP_LOOKUP); +#ifdef TODO + TRY(CAP_LOOKUP); + TRY(CAP_EXTATTR_DELETE); + TRY(CAP_EXTATTR_GET); + TRY(CAP_EXTATTR_LIST); + TRY(CAP_EXTATTR_SET); + TRY(CAP_ACL_CHECK); + TRY(CAP_ACL_DELETE); + TRY(CAP_ACL_GET); + TRY(CAP_ACL_SET); + TRY(CAP_ACCEPT); + TRY(CAP_BIND); + TRY(CAP_CONNECT); + TRY(CAP_GETPEERNAME); + TRY(CAP_GETSOCKNAME); + TRY(CAP_GETSOCKOPT); + TRY(CAP_LISTEN); + TRY(CAP_PEELOFF); + TRY(CAP_RECV); + TRY(CAP_SEND); + TRY(CAP_SETSOCKOPT); + TRY(CAP_SHUTDOWN); + TRY(CAP_MAC_GET); + TRY(CAP_MAC_SET); + TRY(CAP_SEM_GETVALUE); + TRY(CAP_SEM_POST); + TRY(CAP_SEM_WAIT); + TRY(CAP_POST_EVENT); + TRY(CAP_POLL_EVENT); + TRY(CAP_IOCTL); + TRY(CAP_TTYHOOK); + TRY(CAP_PDGETPID); + TRY(CAP_PDWAIT); + TRY(CAP_PDKILL); +#endif + + (void)unlinkat(tmpfd, file + strlen("/tmp/"), 0); + (void)unlinkat(tmpfd, dir + strlen("/tmp/"), AT_REMOVEDIR); return (success); } diff --git a/tools/regression/security/cap_test/cap_test_relative.c b/tools/regression/security/cap_test/cap_test_relative.c index 1f62e84..27410eb 100644 --- a/tools/regression/security/cap_test/cap_test_relative.c +++ b/tools/regression/security/cap_test/cap_test_relative.c @@ -61,7 +61,8 @@ test_relative(void) cap_rights_t rights; REQUIRE(etc = open("/etc/", O_RDONLY)); - CHECK_SYSCALL_FAILS(EINVAL, cap_getrights, etc, &rights); + CHECK_SYSCALL_SUCCEEDS(cap_getrights, etc, &rights); + CHECK_RIGHTS(rights, CAP_ALL); MAKE_CAPABILITY(etc_cap, etc, CAP_READ); MAKE_CAPABILITY(etc_cap_ro, etc, CAP_READ | CAP_LOOKUP); -- cgit v1.1 From 746bd17f58d6db84d2ba3c0853c58ccee8dfe68c Mon Sep 17 00:00:00 2001 From: pjd Date: Sat, 2 Mar 2013 01:00:26 +0000 Subject: Add regression tests for the new Capsicum system calls. Sponsored by: The FreeBSD Foundation --- tools/regression/capsicum/syscalls/Makefile | 28 ++ .../capsicum/syscalls/cap_fcntls_limit.c | 540 +++++++++++++++++++++ tools/regression/capsicum/syscalls/cap_getmode.c | 167 +++++++ .../capsicum/syscalls/cap_ioctls_limit.c | 462 ++++++++++++++++++ tools/regression/capsicum/syscalls/misc.c | 128 +++++ tools/regression/capsicum/syscalls/misc.h | 62 +++ 6 files changed, 1387 insertions(+) create mode 100644 tools/regression/capsicum/syscalls/Makefile create mode 100644 tools/regression/capsicum/syscalls/cap_fcntls_limit.c create mode 100644 tools/regression/capsicum/syscalls/cap_getmode.c create mode 100644 tools/regression/capsicum/syscalls/cap_ioctls_limit.c create mode 100644 tools/regression/capsicum/syscalls/misc.c create mode 100644 tools/regression/capsicum/syscalls/misc.h diff --git a/tools/regression/capsicum/syscalls/Makefile b/tools/regression/capsicum/syscalls/Makefile new file mode 100644 index 0000000..5d34226 --- /dev/null +++ b/tools/regression/capsicum/syscalls/Makefile @@ -0,0 +1,28 @@ +# $FreeBSD$ + +SYSCALLS= cap_fcntls_limit cap_getmode cap_ioctls_limit + +CFLAGS= -O2 -pipe -std=gnu99 -fstack-protector +CFLAGS+= -Wsystem-headers -Werror -Wall -Wno-format-y2k -W -Wno-unused-parameter +CFLAGS+= -Wstrict-prototypes -Wmissing-prototypes -Wpointer-arith -Wreturn-type +CFLAGS+= -Wcast-qual -Wwrite-strings -Wswitch -Wshadow -Wunused-parameter +CFLAGS+= -Wcast-align -Wchar-subscripts -Winline -Wnested-externs -Wredundant-decls +CFLAGS+= -Wold-style-definition -Wno-pointer-sign + +all: ${SYSCALLS} ${SYSCALLS:=.t} + +.for SYSCALL in ${SYSCALLS} + +${SYSCALL}: ${SYSCALL}.c misc.c + ${CC} ${CFLAGS} ${@}.c misc.c -o $@ + +${SYSCALL}.t: ${SYSCALL} + @printf "#!/bin/sh\n\n%s/%s\n" ${.CURDIR} ${@:.t=} > $@ + +.endfor + +test: all + @prove -r ${.CURDIR} + +clean: + rm -f ${SYSCALLS} ${SYSCALLS:=.t} diff --git a/tools/regression/capsicum/syscalls/cap_fcntls_limit.c b/tools/regression/capsicum/syscalls/cap_fcntls_limit.c new file mode 100644 index 0000000..c97203d --- /dev/null +++ b/tools/regression/capsicum/syscalls/cap_fcntls_limit.c @@ -0,0 +1,540 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "misc.h" + +static void +fcntl_tests_0(int fd) +{ + uint32_t fcntlrights; + + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == CAP_FCNTL_ALL); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == 0); + CHECK(fcntl(fd, F_GETFL) == (O_RDWR | O_NONBLOCK)); + CHECK(fcntl(fd, F_SETFL, 0) == 0); + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + + errno = 0; + CHECK(cap_fcntls_limit(fd, ~CAP_FCNTL_ALL) == -1); + CHECK(errno == EINVAL); + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == 0); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == (CAP_FCNTL_GETFL | CAP_FCNTL_SETFL)); + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == 0); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == (CAP_FCNTL_GETFL | CAP_FCNTL_SETFL)); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == 0); + CHECK(fcntl(fd, F_GETFL) == (O_RDWR | O_NONBLOCK)); + CHECK(fcntl(fd, F_SETFL, 0) == 0); + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL) == 0); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == CAP_FCNTL_GETFL); + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == CAP_FCNTL_GETFL); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + errno = 0; + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + errno = 0; + CHECK(fcntl(fd, F_SETFL, 0) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + + CHECK(cap_fcntls_limit(fd, 0) == 0); + fcntlrights = CAP_FCNTL_ALL; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = CAP_FCNTL_ALL; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = CAP_FCNTL_ALL; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + errno = 0; + CHECK(fcntl(fd, F_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_SETFL, 0) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); +} + +static void +fcntl_tests_1(int fd) +{ + uint32_t fcntlrights; + + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL) == 0); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == CAP_FCNTL_GETFL); + + CHECK(cap_rights_limit(fd, CAP_ALL & ~CAP_FCNTL) == 0); + + fcntlrights = CAP_FCNTL_ALL; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = CAP_FCNTL_ALL; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = CAP_FCNTL_ALL; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + errno = 0; + CHECK(fcntl(fd, F_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_SETFL, 0) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); +} + +static void +fcntl_tests_2(int fd) +{ + uint32_t fcntlrights; + + CHECK(cap_rights_limit(fd, CAP_ALL & ~CAP_FCNTL) == 0); + + fcntlrights = CAP_FCNTL_ALL; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = CAP_FCNTL_ALL; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = CAP_FCNTL_ALL; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + errno = 0; + CHECK(fcntl(fd, F_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_SETFL, 0) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); +} + +static void +fcntl_tests_send_0(int sock) +{ + int fd; + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK(descriptor_send(sock, fd) == 0); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == 0); + CHECK(descriptor_send(sock, fd) == 0); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL) == 0); + CHECK(descriptor_send(sock, fd) == 0); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK(cap_fcntls_limit(fd, 0) == 0); + CHECK(descriptor_send(sock, fd) == 0); + CHECK(close(fd) == 0); +} + +static void +fcntl_tests_recv_0(int sock) +{ + uint32_t fcntlrights; + int fd; + + CHECK(descriptor_recv(sock, &fd) == 0); + + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == CAP_FCNTL_ALL); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == 0); + CHECK(fcntl(fd, F_GETFL) == (O_RDWR | O_NONBLOCK)); + CHECK(fcntl(fd, F_SETFL, 0) == 0); + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + + CHECK(close(fd) == 0); + + CHECK(descriptor_recv(sock, &fd) == 0); + + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == (CAP_FCNTL_GETFL | CAP_FCNTL_SETFL)); + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == 0); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == (CAP_FCNTL_GETFL | CAP_FCNTL_SETFL)); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == 0); + CHECK(fcntl(fd, F_GETFL) == (O_RDWR | O_NONBLOCK)); + CHECK(fcntl(fd, F_SETFL, 0) == 0); + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + + CHECK(close(fd) == 0); + + CHECK(descriptor_recv(sock, &fd) == 0); + + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == CAP_FCNTL_GETFL); + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == CAP_FCNTL_GETFL); + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL) == 0); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == CAP_FCNTL_GETFL); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + errno = 0; + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + errno = 0; + CHECK(fcntl(fd, F_SETFL, 0) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFL) == O_RDWR); + + CHECK(close(fd) == 0); + + CHECK(descriptor_recv(sock, &fd) == 0); + + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + errno = 0; + CHECK(cap_fcntls_limit(fd, CAP_FCNTL_SETFL) == -1); + CHECK(errno == ENOTCAPABLE); + fcntlrights = 0; + CHECK(cap_fcntls_get(fd, &fcntlrights) == 0); + CHECK(fcntlrights == 0); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + errno = 0; + CHECK(fcntl(fd, F_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_SETFL, O_NONBLOCK) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_SETFL, 0) == -1); + CHECK(errno == ENOTCAPABLE); + errno = 0; + CHECK(fcntl(fd, F_GETFL) == -1); + CHECK(errno == ENOTCAPABLE); + + CHECK(close(fd) == 0); +} + +int +main(void) +{ + int fd, pfd, sp[2]; + pid_t pid; + + printf("1..870\n"); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + fcntl_tests_0(fd); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + fcntl_tests_1(fd); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + fcntl_tests_2(fd); + CHECK(close(fd) == 0); + + /* Child inherits descriptor and operates on it first. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK((pid = fork()) >= 0); + if (pid == 0) { + fcntl_tests_0(fd); + CHECK(close(fd) == 0); + exit(0); + } else { + CHECK(waitpid(pid, NULL, 0) == pid); + fcntl_tests_0(fd); + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor, but operates on it after parent. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK((pid = fork()) >= 0); + if (pid == 0) { + sleep(1); + fcntl_tests_0(fd); + CHECK(close(fd) == 0); + exit(0); + } else { + fcntl_tests_0(fd); + CHECK(waitpid(pid, NULL, 0) == pid); + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor and operates on it first. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK((pid = pdfork(&pfd, 0)) >= 0); + if (pid == 0) { + fcntl_tests_1(fd); + exit(0); + } else { + CHECK(pdwait(pfd) == 0); +/* + It fails with EBADF, which I believe is a bug. + CHECK(close(pfd) == 0); +*/ + fcntl_tests_1(fd); + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor, but operates on it after parent. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK((pid = pdfork(&pfd, 0)) >= 0); + if (pid == 0) { + sleep(1); + fcntl_tests_1(fd); + exit(0); + } else { + fcntl_tests_1(fd); + CHECK(pdwait(pfd) == 0); +/* + It fails with EBADF, which I believe is a bug. + CHECK(close(pfd) == 0); +*/ + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor and operates on it first. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK((pid = fork()) >= 0); + if (pid == 0) { + fcntl_tests_2(fd); + exit(0); + } else { + CHECK(waitpid(pid, NULL, 0) == pid); + fcntl_tests_2(fd); + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor, but operates on it after parent. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK((pid = fork()) >= 0); + if (pid == 0) { + sleep(1); + fcntl_tests_2(fd); + exit(0); + } else { + fcntl_tests_2(fd); + CHECK(waitpid(pid, NULL, 0) == pid); + } + CHECK(close(fd) == 0); + + /* Send descriptors from parent to child. */ + CHECK(socketpair(AF_UNIX, SOCK_STREAM, 0, sp) == 0); + CHECK((pid = fork()) >= 0); + if (pid == 0) { + CHECK(close(sp[0]) == 0); + fcntl_tests_recv_0(sp[1]); + CHECK(close(sp[1]) == 0); + exit(0); + } else { + CHECK(close(sp[1]) == 0); + fcntl_tests_send_0(sp[0]); + CHECK(waitpid(pid, NULL, 0) == pid); + CHECK(close(sp[0]) == 0); + } + + /* Send descriptors from child to parent. */ + CHECK(socketpair(AF_UNIX, SOCK_STREAM, 0, sp) == 0); + CHECK((pid = fork()) >= 0); + if (pid == 0) { + CHECK(close(sp[0]) == 0); + fcntl_tests_send_0(sp[1]); + CHECK(close(sp[1]) == 0); + exit(0); + } else { + CHECK(close(sp[1]) == 0); + fcntl_tests_recv_0(sp[0]); + CHECK(waitpid(pid, NULL, 0) == pid); + CHECK(close(sp[0]) == 0); + } + + exit(0); +} diff --git a/tools/regression/capsicum/syscalls/cap_getmode.c b/tools/regression/capsicum/syscalls/cap_getmode.c new file mode 100644 index 0000000..1a38202 --- /dev/null +++ b/tools/regression/capsicum/syscalls/cap_getmode.c @@ -0,0 +1,167 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "misc.h" + +int +main(void) +{ + unsigned int mode; + pid_t pid; + int pfd; + + printf("1..27\n"); + + mode = 666; + CHECK(cap_getmode(&mode) == 0); + /* If cap_getmode() succeeded mode should be modified. */ + CHECK(mode != 666); + /* We are not in capability mode. */ + CHECK(mode == 0); + + /* Expect EFAULT. */ + errno = 0; + CHECK(cap_getmode(NULL) == -1); + CHECK(errno == EFAULT); + errno = 0; + CHECK(cap_getmode((void *)(uintptr_t)0xdeadc0de) == -1); + CHECK(errno == EFAULT); + + /* If parent is not in capability mode, child after fork() also won't be. */ + pid = fork(); + switch (pid) { + case -1: + err(1, "fork() failed"); + case 0: + mode = 666; + CHECK(cap_getmode(&mode) == 0); + /* If cap_getmode() succeeded mode should be modified. */ + CHECK(mode != 666); + /* We are not in capability mode. */ + CHECK(mode == 0); + exit(0); + default: + if (waitpid(pid, NULL, 0) == -1) + err(1, "waitpid() failed"); + } + + /* If parent is not in capability mode, child after pdfork() also won't be. */ + pid = pdfork(&pfd, 0); + switch (pid) { + case -1: + err(1, "pdfork() failed"); + case 0: + mode = 666; + CHECK(cap_getmode(&mode) == 0); + /* If cap_getmode() succeeded mode should be modified. */ + CHECK(mode != 666); + /* We are not in capability mode. */ + CHECK(mode == 0); + exit(0); + default: + if (pdwait(pfd) == -1) + err(1, "pdwait() failed"); + close(pfd); + } + + /* In capability mode... */ + + CHECK(cap_enter() == 0); + + mode = 666; + CHECK(cap_getmode(&mode) == 0); + /* If cap_getmode() succeeded mode should be modified. */ + CHECK(mode != 666); + /* We are in capability mode. */ + CHECK(mode == 1); + + /* Expect EFAULT. */ + errno = 0; + CHECK(cap_getmode(NULL) == -1); + CHECK(errno == EFAULT); + errno = 0; + CHECK(cap_getmode((void *)(uintptr_t)0xdeadc0de) == -1); + CHECK(errno == EFAULT); + + /* If parent is in capability mode, child after fork() also will be. */ + pid = fork(); + switch (pid) { + case -1: + err(1, "fork() failed"); + case 0: + mode = 666; + CHECK(cap_getmode(&mode) == 0); + /* If cap_getmode() succeeded mode should be modified. */ + CHECK(mode != 666); + /* We are in capability mode. */ + CHECK(mode == 1); + exit(0); + default: + /* + * wait(2) and friends are not permitted in the capability mode, + * so we can only just wait for a while. + */ + sleep(1); + } + + /* If parent is in capability mode, child after pdfork() also will be. */ + pid = pdfork(&pfd, 0); + switch (pid) { + case -1: + err(1, "pdfork() failed"); + case 0: + mode = 666; + CHECK(cap_getmode(&mode) == 0); + /* If cap_getmode() succeeded mode should be modified. */ + CHECK(mode != 666); + /* We are in capability mode. */ + CHECK(mode == 1); + exit(0); + default: + if (pdwait(pfd) == -1) + err(1, "pdwait() failed"); + close(pfd); + } + + exit(0); +} diff --git a/tools/regression/capsicum/syscalls/cap_ioctls_limit.c b/tools/regression/capsicum/syscalls/cap_ioctls_limit.c new file mode 100644 index 0000000..3a08fac --- /dev/null +++ b/tools/regression/capsicum/syscalls/cap_ioctls_limit.c @@ -0,0 +1,462 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "misc.h" + +static void +ioctl_tests_0(int fd) +{ + unsigned long cmds[2]; + + CHECK(cap_ioctls_get(fd, NULL, 0) == INT_MAX); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(ioctl(fd, FIOCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(ioctl(fd, FIONCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + cmds[0] = FIOCLEX; + cmds[1] = FIONCLEX; + CHECK(cap_ioctls_limit(fd, cmds, nitems(cmds)) == 0); + cmds[0] = cmds[1] = 0; + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == nitems(cmds)); + CHECK((cmds[0] == FIOCLEX && cmds[1] == FIONCLEX) || + (cmds[0] == FIONCLEX && cmds[1] == FIOCLEX)); + cmds[0] = FIOCLEX; + cmds[1] = FIONCLEX; + CHECK(cap_ioctls_limit(fd, cmds, nitems(cmds)) == 0); + cmds[0] = cmds[1] = 0; + CHECK(cap_ioctls_get(fd, cmds, 1) == nitems(cmds)); + CHECK(cmds[0] == FIOCLEX || cmds[0] == FIONCLEX); + CHECK(cmds[1] == 0); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(ioctl(fd, FIOCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(ioctl(fd, FIONCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + cmds[0] = FIOCLEX; + CHECK(cap_ioctls_limit(fd, cmds, 1) == 0); + cmds[0] = cmds[1] = 0; + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 1); + CHECK(cmds[0] == FIOCLEX); + cmds[0] = FIOCLEX; + cmds[1] = FIONCLEX; + errno = 0; + CHECK(cap_ioctls_limit(fd, cmds, nitems(cmds)) == -1); + CHECK(errno == ENOTCAPABLE); + cmds[0] = cmds[1] = 0; + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 1); + CHECK(cmds[0] == FIOCLEX); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(ioctl(fd, FIOCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + errno = 0; + CHECK(ioctl(fd, FIONCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(cap_ioctls_limit(fd, NULL, 0) == 0); + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 0); + cmds[0] = FIOCLEX; + errno = 0; + CHECK(cap_ioctls_limit(fd, cmds, 1) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 0); + + CHECK(fcntl(fd, F_GETFD) == 0); + errno = 0; + CHECK(ioctl(fd, FIOCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + errno = 0; + CHECK(ioctl(fd, FIONCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); +} + +static void +ioctl_tests_1(int fd) +{ + unsigned long cmds[2]; + + cmds[0] = FIOCLEX; + CHECK(cap_ioctls_limit(fd, cmds, 1) == 0); + cmds[0] = cmds[1] = 0; + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 1); + CHECK(cmds[0] == FIOCLEX); + CHECK(cmds[1] == 0); + + CHECK(cap_rights_limit(fd, CAP_ALL & ~CAP_IOCTL) == 0); + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 0); + + cmds[0] = FIOCLEX; + cmds[1] = FIONCLEX; + errno = 0; + CHECK(cap_ioctls_limit(fd, cmds, nitems(cmds)) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 0); + cmds[0] = FIOCLEX; + errno = 0; + CHECK(cap_ioctls_limit(fd, cmds, 1) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 0); + + CHECK(fcntl(fd, F_GETFD) == 0); + errno = 0; + CHECK(ioctl(fd, FIOCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + errno = 0; + CHECK(ioctl(fd, FIONCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); +} + +static void +ioctl_tests_2(int fd) +{ + unsigned long cmds[2]; + + CHECK(cap_rights_limit(fd, CAP_ALL & ~CAP_IOCTL) == 0); + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 0); + + cmds[0] = FIOCLEX; + cmds[1] = FIONCLEX; + errno = 0; + CHECK(cap_ioctls_limit(fd, cmds, nitems(cmds)) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 0); + cmds[0] = FIOCLEX; + errno = 0; + CHECK(cap_ioctls_limit(fd, cmds, 1) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 0); + + CHECK(fcntl(fd, F_GETFD) == 0); + errno = 0; + CHECK(ioctl(fd, FIOCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + errno = 0; + CHECK(ioctl(fd, FIONCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); +} + +static void +ioctl_tests_send_0(int sock) +{ + unsigned long cmds[2]; + int fd; + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK(descriptor_send(sock, fd) == 0); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + cmds[0] = FIOCLEX; + cmds[1] = FIONCLEX; + CHECK(cap_ioctls_limit(fd, cmds, nitems(cmds)) == 0); + CHECK(descriptor_send(sock, fd) == 0); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + cmds[0] = FIOCLEX; + CHECK(cap_ioctls_limit(fd, cmds, 1) == 0); + CHECK(descriptor_send(sock, fd) == 0); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + CHECK(cap_ioctls_limit(fd, NULL, 0) == 0); + CHECK(descriptor_send(sock, fd) == 0); + CHECK(close(fd) == 0); +} + +static void +ioctl_tests_recv_0(int sock) +{ + unsigned long cmds[2]; + int fd; + + CHECK(descriptor_recv(sock, &fd) == 0); + + CHECK(cap_ioctls_get(fd, NULL, 0) == INT_MAX); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(ioctl(fd, FIOCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(ioctl(fd, FIONCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(close(fd) == 0); + + CHECK(descriptor_recv(sock, &fd) == 0); + + cmds[0] = cmds[1] = 0; + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == nitems(cmds)); + CHECK((cmds[0] == FIOCLEX && cmds[1] == FIONCLEX) || + (cmds[0] == FIONCLEX && cmds[1] == FIOCLEX)); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(ioctl(fd, FIOCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(ioctl(fd, FIONCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(close(fd) == 0); + + CHECK(descriptor_recv(sock, &fd) == 0); + + cmds[0] = cmds[1] = 0; + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 1); + CHECK(cmds[0] == FIOCLEX); + + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(ioctl(fd, FIOCLEX) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + errno = 0; + CHECK(ioctl(fd, FIONCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(close(fd) == 0); + + CHECK(descriptor_recv(sock, &fd) == 0); + + CHECK(cap_ioctls_get(fd, cmds, nitems(cmds)) == 0); + + CHECK(fcntl(fd, F_GETFD) == 0); + errno = 0; + CHECK(ioctl(fd, FIOCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == 0); + CHECK(fcntl(fd, F_SETFD, FD_CLOEXEC) == 0); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + errno = 0; + CHECK(ioctl(fd, FIONCLEX) == -1); + CHECK(errno == ENOTCAPABLE); + CHECK(fcntl(fd, F_GETFD) == FD_CLOEXEC); + CHECK(fcntl(fd, F_SETFD, 0) == 0); + CHECK(fcntl(fd, F_GETFD) == 0); + + CHECK(close(fd) == 0); +} + +int +main(void) +{ + int fd, pfd, sp[2]; + pid_t pid; + + printf("1..607\n"); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + ioctl_tests_0(fd); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + ioctl_tests_1(fd); + CHECK(close(fd) == 0); + + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + ioctl_tests_2(fd); + CHECK(close(fd) == 0); + + /* Child inherits descriptor and operates on it first. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + pid = fork(); + switch (pid) { + case -1: + err(1, "fork() failed"); + case 0: + ioctl_tests_0(fd); + CHECK(close(fd) == 0); + exit(0); + default: + if (waitpid(pid, NULL, 0) == -1) + err(1, "waitpid() failed"); + ioctl_tests_0(fd); + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor, but operates on it after parent. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + pid = fork(); + switch (pid) { + case -1: + err(1, "fork() failed"); + case 0: + sleep(1); + ioctl_tests_0(fd); + CHECK(close(fd) == 0); + exit(0); + default: + ioctl_tests_0(fd); + if (waitpid(pid, NULL, 0) == -1) + err(1, "waitpid() failed"); + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor and operates on it first. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + pid = pdfork(&pfd, 0); + switch (pid) { + case -1: + err(1, "pdfork() failed"); + case 0: + ioctl_tests_1(fd); + exit(0); + default: + if (pdwait(pfd) == -1) + err(1, "pdwait() failed"); + close(pfd); + ioctl_tests_1(fd); + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor, but operates on it after parent. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + pid = pdfork(&pfd, 0); + switch (pid) { + case -1: + err(1, "pdfork() failed"); + case 0: + sleep(1); + ioctl_tests_1(fd); + exit(0); + default: + ioctl_tests_1(fd); + if (pdwait(pfd) == -1) + err(1, "pdwait() failed"); + close(pfd); + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor and operates on it first. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + pid = fork(); + switch (pid) { + case -1: + err(1, "fork() failed"); + case 0: + ioctl_tests_2(fd); + exit(0); + default: + if (waitpid(pid, NULL, 0) == -1) + err(1, "waitpid() failed"); + ioctl_tests_2(fd); + } + CHECK(close(fd) == 0); + + /* Child inherits descriptor, but operates on it after parent. */ + CHECK((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0); + pid = fork(); + switch (pid) { + case -1: + err(1, "fork() failed"); + case 0: + sleep(1); + ioctl_tests_2(fd); + exit(0); + default: + ioctl_tests_2(fd); + if (waitpid(pid, NULL, 0) == -1) + err(1, "waitpid() failed"); + } + CHECK(close(fd) == 0); + + /* Send descriptors from parent to child. */ + CHECK(socketpair(AF_UNIX, SOCK_STREAM, 0, sp) == 0); + CHECK((pid = fork()) >= 0); + if (pid == 0) { + CHECK(close(sp[0]) == 0); + ioctl_tests_recv_0(sp[1]); + CHECK(close(sp[1]) == 0); + exit(0); + } else { + CHECK(close(sp[1]) == 0); + ioctl_tests_send_0(sp[0]); + CHECK(waitpid(pid, NULL, 0) == pid); + CHECK(close(sp[0]) == 0); + } + + /* Send descriptors from child to parent. */ + CHECK(socketpair(AF_UNIX, SOCK_STREAM, 0, sp) == 0); + CHECK((pid = fork()) >= 0); + if (pid == 0) { + CHECK(close(sp[0]) == 0); + ioctl_tests_send_0(sp[1]); + CHECK(close(sp[1]) == 0); + exit(0); + } else { + CHECK(close(sp[1]) == 0); + ioctl_tests_recv_0(sp[0]); + CHECK(waitpid(pid, NULL, 0) == pid); + CHECK(close(sp[0]) == 0); + } + + exit(0); +} diff --git a/tools/regression/capsicum/syscalls/misc.c b/tools/regression/capsicum/syscalls/misc.c new file mode 100644 index 0000000..303a911 --- /dev/null +++ b/tools/regression/capsicum/syscalls/misc.c @@ -0,0 +1,128 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include +#include +#include +#include + +#include "misc.h" + +int +pdwait(int pfd) +{ + fd_set fdset; + + FD_ZERO(&fdset); + FD_SET(pfd, &fdset); + + return (select(pfd + 1, NULL, &fdset, NULL, NULL) == -1 ? -1 : 0); +} + +int +descriptor_send(int sock, int fd) +{ + unsigned char ctrl[CMSG_SPACE(sizeof(fd))]; + struct msghdr msg; + struct cmsghdr *cmsg; + + assert(sock >= 0); + assert(fd >= 0); + + bzero(&msg, sizeof(msg)); + bzero(&ctrl, sizeof(ctrl)); + + msg.msg_iov = NULL; + msg.msg_iovlen = 0; + msg.msg_control = ctrl; + msg.msg_controllen = sizeof(ctrl); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); + bcopy(&fd, CMSG_DATA(cmsg), sizeof(fd)); + + if (sendmsg(sock, &msg, 0) == -1) + return (errno); + + return (0); +} + +int +descriptor_recv(int sock, int *fdp) +{ + unsigned char ctrl[CMSG_SPACE(sizeof(*fdp))]; + struct msghdr msg; + struct cmsghdr *cmsg; + struct iovec iov; + int val; + + assert(sock >= 0); + assert(fdp != NULL); + + bzero(&msg, sizeof(msg)); + bzero(&ctrl, sizeof(ctrl)); + +#if 1 + /* + * This doesn't really make sense, as we don't plan to receive any + * data, but if no buffer is provided and recv(2) returns 0 without + * control message. Must be kernel bug. + */ + iov.iov_base = &val; + iov.iov_len = sizeof(val); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; +#else + msg.msg_iov = NULL; + msg.msg_iovlen = 0; +#endif + msg.msg_control = ctrl; + msg.msg_controllen = sizeof(ctrl); + + if (recvmsg(sock, &msg, 0) == -1) + return (errno); + + cmsg = CMSG_FIRSTHDR(&msg); + if (cmsg == NULL || cmsg->cmsg_level != SOL_SOCKET || + cmsg->cmsg_type != SCM_RIGHTS) { + return (EINVAL); + } + bcopy(CMSG_DATA(cmsg), fdp, sizeof(*fdp)); + + return (0); +} diff --git a/tools/regression/capsicum/syscalls/misc.h b/tools/regression/capsicum/syscalls/misc.h new file mode 100644 index 0000000..4968c3e --- /dev/null +++ b/tools/regression/capsicum/syscalls/misc.h @@ -0,0 +1,62 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MISC_H_ +#define _MISC_H_ + +#define OK() do { \ + int _serrno = errno; \ + printf("ok # line %u\n", __LINE__); \ + fflush(stdout); \ + errno = _serrno; \ +} while (0) +#define NOK() do { \ + int _serrno = errno; \ + printf("not ok # line %u\n", __LINE__); \ + fflush(stdout); \ + errno = _serrno; \ +} while (0) +#define CHECK(cond) do { \ + if ((cond)) \ + OK(); \ + else \ + NOK(); \ +} while (0) + +/* + * This can be removed once pdwait4(2) is implemented. + */ +int pdwait(int pfd); + +int descriptor_send(int sock, int fd); +int descriptor_recv(int sock, int *fdp); + +#endif /* !_MISC_H_ */ -- cgit v1.1 From 6bc019724d33ad4fee02e8b19c6e12745460e4ec Mon Sep 17 00:00:00 2001 From: andrew Date: Sat, 2 Mar 2013 02:19:04 +0000 Subject: Ensure the stack is correctly aligned before calling the first C function. --- sys/arm/arm/locore.S | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/arm/arm/locore.S b/sys/arm/arm/locore.S index 00a61e7..37e88fe 100644 --- a/sys/arm/arm/locore.S +++ b/sys/arm/arm/locore.S @@ -204,6 +204,7 @@ mmu_done: virt_done: mov r1, #20 /* loader info size is 20 bytes also second arg */ subs sp, sp, r1 /* allocate arm_boot_params struct on stack */ + bic sp, sp, #7 /* align stack to 8 bytes */ mov r0, sp /* loader info pointer is first arg */ str r1, [r0] /* Store length of loader info */ str r9, [r0, #4] /* Store r0 from boot loader */ -- cgit v1.1 From 6a195ffa935cec2598eac6237701f35b64fdeb32 Mon Sep 17 00:00:00 2001 From: andrew Date: Sat, 2 Mar 2013 03:23:14 +0000 Subject: Build the Raspberry Pi dtb file when building the kernel so we can copy it to the boot partition for U-Boot. --- sys/arm/conf/RPI-B | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/arm/conf/RPI-B b/sys/arm/conf/RPI-B index d9e8d38..4d840cb 100644 --- a/sys/arm/conf/RPI-B +++ b/sys/arm/conf/RPI-B @@ -117,4 +117,4 @@ options FDT # Note: DTB is normally loaded and modified by RPi boot loader, then # handed to kernel via U-Boot and ubldr. #options FDT_DTB_STATIC -#makeoptions FDT_DTS_FILE=bcm2835-rpi-b.dts +makeoptions FDT_DTS_FILE=bcm2835-rpi-b.dts -- cgit v1.1 From a94abfa9f501e5affb2adf41751ae19fe18a1b36 Mon Sep 17 00:00:00 2001 From: andrew Date: Sat, 2 Mar 2013 05:02:29 +0000 Subject: Move some virtual memory constants to the top of the file where they are on other architectures [1]. While here: - Remove an unused and commented out include. - Add a comment describing the file that other copies have. - Fix the style of the defines and add a comment on what each one is. Suggested by: [1] alc --- sys/arm/include/vmparam.h | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/sys/arm/include/vmparam.h b/sys/arm/include/vmparam.h index aec94f8..9765b34 100644 --- a/sys/arm/include/vmparam.h +++ b/sys/arm/include/vmparam.h @@ -34,9 +34,32 @@ #ifndef _MACHINE_VMPARAM_H_ #define _MACHINE_VMPARAM_H_ +/* + * Machine dependent constants for ARM. + */ + +/* + * Virtual memory related constants, all in bytes + */ +#ifndef MAXTSIZ +#define MAXTSIZ (64UL*1024*1024) /* max text size */ +#endif +#ifndef DFLDSIZ +#define DFLDSIZ (128UL*1024*1024) /* initial data size limit */ +#endif +#ifndef MAXDSIZ +#define MAXDSIZ (512UL*1024*1024) /* max data size */ +#endif +#ifndef DFLSSIZ +#define DFLSSIZ (2UL*1024*1024) /* initial stack size limit */ +#endif +#ifndef MAXSSIZ +#define MAXSSIZ (8UL*1024*1024) /* max stack size */ +#endif +#ifndef SGROWSIZ +#define SGROWSIZ (128UL*1024) /* amount to grow stack */ +#endif -/*#include -*/ /* * Address space constants */ @@ -153,25 +176,6 @@ VM_MIN_KERNEL_ADDRESS + 1) * 2 / 5) #endif -#ifndef MAXTSIZ -#define MAXTSIZ (64*1024*1024) -#endif -#ifndef DFLDSIZ -#define DFLDSIZ (128*1024*1024) -#endif -#ifndef MAXDSIZ -#define MAXDSIZ (512*1024*1024) -#endif -#ifndef DFLSSIZ -#define DFLSSIZ (2*1024*1024) -#endif -#ifndef MAXSSIZ -#define MAXSSIZ (8*1024*1024) -#endif -#ifndef SGROWSIZ -#define SGROWSIZ (128*1024) -#endif - #ifdef ARM_USE_SMALL_ALLOC #define UMA_MD_SMALL_ALLOC #endif /* ARM_USE_SMALL_ALLOC */ -- cgit v1.1 From 92a73b39ae592cec0fde94aed1f3755d1e1cd9b8 Mon Sep 17 00:00:00 2001 From: marcel Date: Sat, 2 Mar 2013 05:03:36 +0000 Subject: Fix nandfs support by providing the same crc32 function as is used in newfs_nandfs. In libstand we get crc32 from libz. The polynomial is not the same as used for nandfs, which is the crc32 used in the kernel. --- lib/libstand/nandfs.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/lib/libstand/nandfs.c b/lib/libstand/nandfs.c index d5fcb9d..2ddf8c3 100644 --- a/lib/libstand/nandfs.c +++ b/lib/libstand/nandfs.c @@ -125,6 +125,27 @@ struct fs_ops nandfs_fsops = { #define NINDIR(fs) ((fs)->nf_blocksize / sizeof(nandfs_daddr_t)) +/* from NetBSD's src/sys/net/if_ethersubr.c */ +static uint32_t +nandfs_crc32(uint32_t crc, const uint8_t *buf, size_t len) +{ + static const uint32_t crctab[] = { + 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac, + 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, + 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, + 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c + }; + size_t i; + + crc = crc ^ ~0U; + for (i = 0; i < len; i++) { + crc ^= buf[i]; + crc = (crc >> 4) ^ crctab[crc & 0xf]; + crc = (crc >> 4) ^ crctab[crc & 0xf]; + } + return (crc ^ ~0U); +} + static int nandfs_check_fsdata_crc(struct nandfs_fsdata *fsdata) { @@ -138,7 +159,7 @@ nandfs_check_fsdata_crc(struct nandfs_fsdata *fsdata) /* Calculate */ fsdata->f_sum = (0); - comp_crc = crc32(0, (uint8_t *)fsdata, fsdata->f_bytes); + comp_crc = nandfs_crc32(0, (uint8_t *)fsdata, fsdata->f_bytes); /* Restore */ fsdata->f_sum = fsdata_crc; @@ -162,7 +183,7 @@ nandfs_check_superblock_crc(struct nandfs_fsdata *fsdata, /* Calculate */ super->s_sum = (0); - comp_crc = crc32(0, (uint8_t *)super, fsdata->f_sbbytes); + comp_crc = nandfs_crc32(0, (uint8_t *)super, fsdata->f_sbbytes); /* Restore */ super->s_sum = super_crc; -- cgit v1.1 From f6b8c4d98fdaa0c3e24a77d89260634284d158a4 Mon Sep 17 00:00:00 2001 From: marcel Date: Sat, 2 Mar 2013 05:07:51 +0000 Subject: Fix warnings (control reaches end of non-void function). --- lib/libstand/nandfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/libstand/nandfs.c b/lib/libstand/nandfs.c index 2ddf8c3..63d92d2 100644 --- a/lib/libstand/nandfs.c +++ b/lib/libstand/nandfs.c @@ -418,7 +418,7 @@ nandfs_open(const char *path, struct open_file *f) return (0); } -static int +static void nandfs_free_node(struct nandfs_node *node) { struct bmap_buf *bmap, *tmp; @@ -445,6 +445,7 @@ nandfs_close(struct open_file *f) nandfs_free_node(fs->nf_opened_node); free(fs->nf_sb); free(fs); + return (0); } static int -- cgit v1.1 From 4c98e6251b85ea65db8044a138e577427504ff41 Mon Sep 17 00:00:00 2001 From: marcel Date: Sat, 2 Mar 2013 05:28:55 +0000 Subject: Make this WARNS=9 clean on i386 w/ clang. --- lib/libstand/nandfs.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/lib/libstand/nandfs.c b/lib/libstand/nandfs.c index 63d92d2..713dc12 100644 --- a/lib/libstand/nandfs.c +++ b/lib/libstand/nandfs.c @@ -95,8 +95,7 @@ static off_t nandfs_seek(struct open_file *, off_t, int); static int nandfs_stat(struct open_file *, struct stat *); static int nandfs_readdir(struct open_file *, struct dirent *); -static int nandfs_buf_read(struct nandfs *, char **, size_t *); -static struct nandfs_node *nandfs_lookup_inode(struct nandfs *, nandfs_daddr_t); +static int nandfs_buf_read(struct nandfs *, void **, size_t *); static struct nandfs_node *nandfs_lookup_path(struct nandfs *, const char *); static int nandfs_read_inode(struct nandfs *, struct nandfs_node *, nandfs_lbn_t, u_int, void *, int); @@ -453,7 +452,7 @@ nandfs_read(struct open_file *f, void *addr, size_t size, size_t *resid) { struct nandfs *fs = (struct nandfs *)f->f_fsdata; size_t csize, buf_size; - uint8_t *buf; + void *buf; int error = 0; NANDFS_DEBUG("nandfs_read(file=%p, addr=%p, size=%d)\n", f, addr, size); @@ -462,7 +461,7 @@ nandfs_read(struct open_file *f, void *addr, size_t size, size_t *resid) if (fs->nf_offset >= fs->nf_opened_node->inode->i_size) break; - error = nandfs_buf_read(fs, (void *)&buf, &buf_size); + error = nandfs_buf_read(fs, &buf, &buf_size); if (error) break; @@ -539,7 +538,7 @@ nandfs_readdir(struct open_file *f, struct dirent *d) { struct nandfs *fs = f->f_fsdata; struct nandfs_dir_entry *dirent; - uint8_t *buf; + void *buf; size_t buf_size; NANDFS_DEBUG("nandfs_readdir(file=%p, dirent=%p)\n", f, d); @@ -550,7 +549,7 @@ nandfs_readdir(struct open_file *f, struct dirent *d) return (ENOENT); } - if (nandfs_buf_read(fs, (void *)&buf, &buf_size)) { + if (nandfs_buf_read(fs, &buf, &buf_size)) { NANDFS_DEBUG("nandfs_readdir(file=%p, dirent=%p)" "buf_read failed\n", f, d); return (EIO); @@ -568,7 +567,7 @@ nandfs_readdir(struct open_file *f, struct dirent *d) } static int -nandfs_buf_read(struct nandfs *fs, char **buf_p, size_t *size_p) +nandfs_buf_read(struct nandfs *fs, void **buf_p, size_t *size_p) { nandfs_daddr_t blknr, blkoff; @@ -634,8 +633,8 @@ nandfs_lookup_path(struct nandfs *fs, const char *path) struct nandfs_node *node; struct nandfs_dir_entry *dirent; char *namebuf; - uint64_t i, j, done, counter, pinode, inode; - int nlinks = 0, len, link_len, nameidx; + uint64_t i, done, pinode, inode; + int nlinks = 0, counter, len, link_len, nameidx; uint8_t *buffer, *orig; char *strp, *lpath; @@ -672,7 +671,8 @@ nandfs_lookup_path(struct nandfs *fs, const char *path) buffer = orig; done = counter = 0; while (1) { - dirent = (struct nandfs_dir_entry *)buffer; + dirent = + (struct nandfs_dir_entry *)(void *)buffer; NANDFS_DEBUG("%s: dirent.name = %s\n", __func__, dirent->name); NANDFS_DEBUG("%s: dirent.rec_len = %d\n", @@ -768,9 +768,9 @@ static int nandfs_read_inode(struct nandfs *fs, struct nandfs_node *node, nandfs_daddr_t blknr, u_int nblks, void *buf, int raw) { - int i; uint64_t *pblks; uint64_t *vblks; + u_int i; int error; pblks = malloc(nblks * sizeof(uint64_t)); @@ -799,7 +799,7 @@ nandfs_read_inode(struct nandfs *fs, struct nandfs_node *node, return (EIO); } - buf += fs->nf_blocksize; + buf = (void *)((uintptr_t)buf + fs->nf_blocksize); } free(pblks); @@ -881,8 +881,7 @@ nandfs_bmap_lookup(struct nandfs *fs, struct nandfs_node *node, { struct nandfs_inode *ino; nandfs_daddr_t ind_block_num; - uint64_t *map, *indir; - uint64_t idx0, idx1, vblk, tmp; + uint64_t *map; int idx; int level; @@ -1028,7 +1027,7 @@ ioread(struct open_file *f, off_t pos, void *buf, u_int length) err = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, pos, nsec * bsize, buffer, NULL); - memcpy(buf, buffer + off, length); + memcpy(buf, (void *)((uintptr_t)buffer + off), length); free(buffer); return (err); -- cgit v1.1 From 4c5c303d09338e8dee2f5b87db2a9c5e75f9a337 Mon Sep 17 00:00:00 2001 From: joel Date: Sat, 2 Mar 2013 06:55:55 +0000 Subject: mdoc: remove superfluous paragraph macro. --- lib/libc/sys/cap_fcntls_limit.2 | 1 - lib/libc/sys/cap_ioctls_limit.2 | 1 - lib/libc/sys/cap_rights_limit.2 | 1 - 3 files changed, 3 deletions(-) diff --git a/lib/libc/sys/cap_fcntls_limit.2 b/lib/libc/sys/cap_fcntls_limit.2 index 8fa7463..b1fca2c 100644 --- a/lib/libc/sys/cap_fcntls_limit.2 +++ b/lib/libc/sys/cap_fcntls_limit.2 @@ -120,7 +120,6 @@ argument points at an invalid address. Support for capabilities and capabilities mode was developed as part of the .Tn TrustedBSD Project. -.Pp .Sh AUTHORS This function was created by .An Pawel Jakub Dawidek Aq pawel@dawidek.net diff --git a/lib/libc/sys/cap_ioctls_limit.2 b/lib/libc/sys/cap_ioctls_limit.2 index 5eca18c..2c21211 100644 --- a/lib/libc/sys/cap_ioctls_limit.2 +++ b/lib/libc/sys/cap_ioctls_limit.2 @@ -151,7 +151,6 @@ argument points at invalid address. Support for capabilities and capabilities mode was developed as part of the .Tn TrustedBSD Project. -.Pp .Sh AUTHORS This function was created by .An Pawel Jakub Dawidek Aq pawel@dawidek.net diff --git a/lib/libc/sys/cap_rights_limit.2 b/lib/libc/sys/cap_rights_limit.2 index d8d8777..0c527aa 100644 --- a/lib/libc/sys/cap_rights_limit.2 +++ b/lib/libc/sys/cap_rights_limit.2 @@ -590,7 +590,6 @@ argument points at an invalid address. Support for capabilities and capabilities mode was developed as part of the .Tn TrustedBSD Project. -.Pp .Sh AUTHORS This function was created by .An Pawel Jakub Dawidek Aq pawel@dawidek.net -- cgit v1.1 From f77596bc521b671974f181500d0c32e3836d24ab Mon Sep 17 00:00:00 2001 From: adrian Date: Sat, 2 Mar 2013 08:12:41 +0000 Subject: Disable the ctl driver in GENERIC. It unfortunately steals a fair chunk of RAM at startup even if it's not actively used, which prevents FreeBSD VMs of 128MB from successfully booting and running. --- UPDATING | 7 +++++++ sys/amd64/conf/GENERIC | 2 +- sys/i386/conf/GENERIC | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/UPDATING b/UPDATING index 74da04d..50e6452 100644 --- a/UPDATING +++ b/UPDATING @@ -26,6 +26,13 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 10.x IS SLOW: disable the most expensive debugging functionality run "ln -s 'abort:false,junk:false' /etc/malloc.conf".) +20130301: + The ctl device has been disabled in GENERIC for i386 and amd64. + This was done due to the extra memory being allocated at system + initialisation time by the ctl driver which was only used if + a CAM target device was created. This makes a FreeBSD system + unusable on 128MB or less of RAM. + 20130208: A new compression method (lz4) has been merged to -HEAD. Please refer to zpool-features(7) for more information. diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index 5819a0d..3988195 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -138,7 +138,7 @@ device sa # Sequential Access (tape etc) device cd # CD device pass # Passthrough device (direct ATA/SCSI access) device ses # Enclosure Services (SES and SAF-TE) -device ctl # CAM Target Layer +#device ctl # CAM Target Layer # RAID controllers interfaced to the SCSI subsystem device amr # AMI MegaRAID diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC index 47af43b..3a98ded 100644 --- a/sys/i386/conf/GENERIC +++ b/sys/i386/conf/GENERIC @@ -146,7 +146,7 @@ device sa # Sequential Access (tape etc) device cd # CD device pass # Passthrough device (direct ATA/SCSI access) device ses # Enclosure Services (SES and SAF-TE) -device ctl # CAM Target Layer +#device ctl # CAM Target Layer # RAID controllers interfaced to the SCSI subsystem device amr # AMI MegaRAID -- cgit v1.1 From 73650b4f3af1f7c8b2448569209fcdff427c08db Mon Sep 17 00:00:00 2001 From: pjd Date: Sat, 2 Mar 2013 09:58:47 +0000 Subject: If the target file already exists, check for the CAP_UNLINKAT capabiity right on the target directory descriptor, but only if this is renameat(2) and real target directory descriptor is given (not AT_FDCWD). Without this fix regular rename(2) fails if the target file already exists. Reported by: Michael Butler Reported by: Larry Rosenman Sponsored by: The FreeBSD Foundation --- sys/kern/vfs_syscalls.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 787399a..4c1d97c 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -3556,13 +3556,16 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, goto out; } #ifdef CAPABILITIES - /* - * If the target already exists we require CAP_UNLINKAT - * from 'newfd'. - */ - error = cap_check(tond.ni_filecaps.fc_rights, CAP_UNLINKAT); - if (error != 0) - goto out; + if (newfd != AT_FDCWD) { + /* + * If the target already exists we require CAP_UNLINKAT + * from 'newfd'. + */ + error = cap_check(tond.ni_filecaps.fc_rights, + CAP_UNLINKAT); + if (error != 0) + goto out; + } #endif } if (fvp == tdvp) { -- cgit v1.1 From 869c43b8d97f429d8c3f5ca0accb16b2c17d9416 Mon Sep 17 00:00:00 2001 From: jilles Date: Sat, 2 Mar 2013 12:42:23 +0000 Subject: nullfs: Improve f_flags in statfs(). Include some flags of the nullfs mount itself: MNT_RDONLY, MNT_NOEXEC, MNT_NOSUID, MNT_UNION, MNT_NOSYMFOLLOW. This allows userland code calling statfs() or fstatfs() to see these flags. In particular, this allows opendir() to detect that a -t nullfs -o union mount needs deduplication (otherwise at least . and .. are returned twice) and allows rtld to detect a -t nullfs -o noexec mount as noexec. Turn off the MNT_ROOTFS flag from the underlying filesystem because the nullfs mount is definitely not the root filesystem. Reviewed by: kib MFC after: 1 week --- sys/fs/nullfs/null_vfsops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c index 3724e0a..02932bd 100644 --- a/sys/fs/nullfs/null_vfsops.c +++ b/sys/fs/nullfs/null_vfsops.c @@ -313,7 +313,8 @@ nullfs_statfs(mp, sbp) /* now copy across the "interesting" information and fake the rest */ sbp->f_type = mstat.f_type; - sbp->f_flags = mstat.f_flags; + sbp->f_flags = (sbp->f_flags & (MNT_RDONLY | MNT_NOEXEC | MNT_NOSUID | + MNT_UNION | MNT_NOSYMFOLLOW)) | (mstat.f_flags & ~MNT_ROOTFS); sbp->f_bsize = mstat.f_bsize; sbp->f_iosize = mstat.f_iosize; sbp->f_blocks = mstat.f_blocks; -- cgit v1.1 From b1d7b9754bbc645b03e7920d1a2b39ff3848afdc Mon Sep 17 00:00:00 2001 From: marius Date: Sat, 2 Mar 2013 13:04:58 +0000 Subject: Revert the part of r247600 which turned the overtemperature and power fail interrupt shutdown handlers into filters. Shutdown_nice(9) acquires a sleep lock, which filters shouldn't do. It also seems that kern_reboot(9) still may require Giant to be hold. Submitted by: bde --- sys/sparc64/pci/psycho.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/sys/sparc64/pci/psycho.c b/sys/sparc64/pci/psycho.c index 8f4f23e..a6e1469 100644 --- a/sys/sparc64/pci/psycho.c +++ b/sys/sparc64/pci/psycho.c @@ -81,7 +81,7 @@ static const struct psycho_desc *psycho_find_desc(const struct psycho_desc *, const char *); static const struct psycho_desc *psycho_get_desc(device_t); static void psycho_set_intr(struct psycho_softc *, u_int, bus_addr_t, - driver_filter_t); + driver_filter_t, driver_intr_t); static int psycho_find_intrmap(struct psycho_softc *, u_int, bus_addr_t *, bus_addr_t *, u_long *); static void sabre_dmamap_sync(bus_dma_tag_t dt, bus_dmamap_t map, @@ -96,8 +96,8 @@ static driver_filter_t psycho_ue; static driver_filter_t psycho_ce; static driver_filter_t psycho_pci_bus; static driver_filter_t psycho_powerdebug; -static driver_filter_t psycho_powerdown; -static driver_filter_t psycho_overtemp; +static driver_intr_t psycho_powerdown; +static driver_intr_t psycho_overtemp; #ifdef PSYCHO_MAP_WAKEUP static driver_filter_t psycho_wakeup; #endif @@ -619,17 +619,17 @@ psycho_attach(device_t dev) * XXX Not all controllers have these, but installing them * is better than trying to sort through this mess. */ - psycho_set_intr(sc, 1, PSR_UE_INT_MAP, psycho_ue); - psycho_set_intr(sc, 2, PSR_CE_INT_MAP, psycho_ce); + psycho_set_intr(sc, 1, PSR_UE_INT_MAP, psycho_ue, NULL); + psycho_set_intr(sc, 2, PSR_CE_INT_MAP, psycho_ce, NULL); switch (psycho_powerfail) { case 0: break; case 2: psycho_set_intr(sc, 3, PSR_POWER_INT_MAP, - psycho_powerdebug); + psycho_powerdebug, NULL); break; default: - psycho_set_intr(sc, 3, PSR_POWER_INT_MAP, + psycho_set_intr(sc, 3, PSR_POWER_INT_MAP, NULL, psycho_powerdown); break; } @@ -643,7 +643,7 @@ psycho_attach(device_t dev) * The spare hardware interrupt is used for the * over-temperature interrupt. */ - psycho_set_intr(sc, 4, PSR_SPARE_INT_MAP, + psycho_set_intr(sc, 4, PSR_SPARE_INT_MAP, NULL, psycho_overtemp); #ifdef PSYCHO_MAP_WAKEUP /* @@ -651,7 +651,7 @@ psycho_attach(device_t dev) * now. */ psycho_set_intr(sc, 5, PSR_PWRMGT_INT_MAP, - psycho_wakeup); + psycho_wakeup, NULL); #endif /* PSYCHO_MAP_WAKEUP */ } } @@ -661,7 +661,7 @@ psycho_attach(device_t dev) * interrupt but they are also only used for PCI bus A. */ psycho_set_intr(sc, 0, sc->sc_half == 0 ? PSR_PCIAERR_INT_MAP : - PSR_PCIBERR_INT_MAP, psycho_pci_bus); + PSR_PCIBERR_INT_MAP, psycho_pci_bus, NULL); /* * Set the latency timer register as this isn't always done by the @@ -701,7 +701,7 @@ psycho_attach(device_t dev) static void psycho_set_intr(struct psycho_softc *sc, u_int index, bus_addr_t intrmap, - driver_filter_t handler) + driver_filter_t filt, driver_intr_t intr) { u_long vec; int rid; @@ -722,7 +722,7 @@ psycho_set_intr(struct psycho_softc *sc, u_int index, bus_addr_t intrmap, INTVEC(PSYCHO_READ8(sc, intrmap)) != vec || intr_vectors[vec].iv_ic != &psycho_ic || bus_setup_intr(sc->sc_dev, sc->sc_irq_res[index], - INTR_TYPE_MISC | INTR_BRIDGE, handler, NULL, sc, + INTR_TYPE_MISC | INTR_BRIDGE, filt, intr, sc, &sc->sc_ihand[index]) != 0) panic("%s: failed to set up interrupt %d", __func__, index); } @@ -858,32 +858,30 @@ psycho_powerdebug(void *arg __unused) return (FILTER_HANDLED); } -static int +static void psycho_powerdown(void *arg __unused) { static int shutdown; /* As the interrupt is cleared we may be called multiple times. */ if (shutdown != 0) - return (FILTER_HANDLED); + return; shutdown++; printf("Power Failure Detected: Shutting down NOW.\n"); shutdown_nice(RB_POWEROFF); - return (FILTER_HANDLED); } -static int +static void psycho_overtemp(void *arg __unused) { static int shutdown; /* As the interrupt is cleared we may be called multiple times. */ if (shutdown != 0) - return (FILTER_HANDLED); + return; shutdown++; printf("DANGER: OVER TEMPERATURE detected.\nShutting down NOW.\n"); shutdown_nice(RB_POWEROFF); - return (FILTER_HANDLED); } #ifdef PSYCHO_MAP_WAKEUP -- cgit v1.1 From ee248b021f7bf872e6ec7ad05e281873bfe22f7d Mon Sep 17 00:00:00 2001 From: marius Date: Sat, 2 Mar 2013 13:08:13 +0000 Subject: - Revert the part of r247601 which turned the overtemperature and power fail interrupt shutdown handlers into filters. Shutdown_nice(9) acquires a sleep lock, which filters shouldn't do. It also seems that kern_reboot(9) still may require Giant to be hold. - Correct an incorrect argument to shutdown_nice(9). Submitted by: bde --- sys/sparc64/sbus/sbus.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/sys/sparc64/sbus/sbus.c b/sys/sparc64/sbus/sbus.c index 6c69e2d..580624f 100644 --- a/sys/sparc64/sbus/sbus.c +++ b/sys/sparc64/sbus/sbus.c @@ -152,8 +152,8 @@ static void sbus_intr_assign(void *); static void sbus_intr_clear(void *); static int sbus_find_intrmap(struct sbus_softc *, u_int, bus_addr_t *, bus_addr_t *); -static driver_filter_t sbus_overtemp; -static driver_filter_t sbus_pwrfail; +static driver_intr_t sbus_overtemp; +static driver_intr_t sbus_pwrfail; static int sbus_print_res(struct sbus_devinfo *); static device_method_t sbus_methods[] = { @@ -410,7 +410,7 @@ sbus_attach(device_t dev) INTVEC(SYSIO_READ8(sc, SBR_THERM_INT_MAP)) != vec || intr_vectors[vec].iv_ic != &sbus_ic || bus_setup_intr(dev, sc->sc_ot_ires, INTR_TYPE_MISC | INTR_BRIDGE, - sbus_overtemp, NULL, sc, &sc->sc_ot_ihand) != 0) + NULL, sbus_overtemp, sc, &sc->sc_ot_ihand) != 0) panic("%s: failed to set up temperature interrupt", __func__); i = 3; sc->sc_pf_ires = bus_alloc_resource_any(dev, SYS_RES_IRQ, &i, @@ -420,7 +420,7 @@ sbus_attach(device_t dev) INTVEC(SYSIO_READ8(sc, SBR_POWER_INT_MAP)) != vec || intr_vectors[vec].iv_ic != &sbus_ic || bus_setup_intr(dev, sc->sc_pf_ires, INTR_TYPE_MISC | INTR_BRIDGE, - sbus_pwrfail, NULL, sc, &sc->sc_pf_ihand) != 0) + NULL, sbus_pwrfail, sc, &sc->sc_pf_ihand) != 0) panic("%s: failed to set up power fail interrupt", __func__); /* Initialize the counter-timer. */ @@ -897,33 +897,31 @@ sbus_get_devinfo(device_t bus, device_t child) * This handles the interrupt and powers off the machine. * The same needs to be done to PCI controller drivers. */ -static int +static void sbus_overtemp(void *arg __unused) { static int shutdown; /* As the interrupt is cleared we may be called multiple times. */ if (shutdown != 0) - return (FILTER_HANDLED); + return; shutdown++; printf("DANGER: OVER TEMPERATURE detected\nShutting down NOW.\n"); shutdown_nice(RB_POWEROFF); - return (FILTER_HANDLED); } /* Try to shut down in time in case of power failure. */ -static int +static void sbus_pwrfail(void *arg __unused) { static int shutdown; /* As the interrupt is cleared we may be called multiple times. */ if (shutdown != 0) - return (FILTER_HANDLED); + return; shutdown++; printf("Power failure detected\nShutting down NOW.\n"); - shutdown_nice(FILTER_HANDLED); - return (FILTER_HANDLED); + shutdown_nice(RB_POWEROFF); } static int -- cgit v1.1 From 5d33901b24a0ca4496b12ca2299df4b62dfdfd35 Mon Sep 17 00:00:00 2001 From: attilio Date: Sat, 2 Mar 2013 14:19:08 +0000 Subject: Merge from vmc-playground branch: Rename the pv_entry_t iterator from pv_list to pv_next. Besides being more correct technically (as the name seems to suggest this is a list while it is an iterator), it will also be needed by vm_radix work to avoid a nameclash on macro expansions. Sponsored by: EMC / Isilon storage division Reviewed by: alc, jeff Tested by: flo, pho, jhb, davide --- sys/amd64/amd64/pmap.c | 54 +++++++++++++++++++++++----------------------- sys/amd64/include/pmap.h | 2 +- sys/i386/i386/pmap.c | 56 ++++++++++++++++++++++++------------------------ sys/i386/include/pmap.h | 2 +- 4 files changed, 57 insertions(+), 57 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index f2d8967..532bd36 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -2222,7 +2222,7 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) if ((tpte & PG_A) != 0) vm_page_aflag_set(m, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); @@ -2506,9 +2506,9 @@ pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) pv_entry_t pv; rw_assert(&pvh_global_lock, RA_LOCKED); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { - TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); break; } } @@ -2547,7 +2547,7 @@ pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); m = PHYS_TO_VM_PAGE(pa); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); /* Instantiate the remaining NPTEPG - 1 pv entries. */ PV_STAT(atomic_add_long(&pv_entry_allocs, NPTEPG - 1)); va_last = va + NBPDR - PAGE_SIZE; @@ -2565,7 +2565,7 @@ pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, m++; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_pv_demote_pde: page %p is not managed", m)); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); if (va == va_last) goto out; } @@ -2613,7 +2613,7 @@ pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found")); pvh = pa_to_pvh(pa); - TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); /* Free the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { @@ -2654,7 +2654,7 @@ pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, if ((pv = get_pv_entry(pmap, NULL)) != NULL) { pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); return (TRUE); } else return (FALSE); @@ -2678,7 +2678,7 @@ pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); pvh = pa_to_pvh(pa); - TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); return (TRUE); } else return (FALSE); @@ -3157,7 +3157,7 @@ small_mappings: vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, *pde, &free); pmap_invalidate_page(pmap, pv->pv_va); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } @@ -3602,7 +3602,7 @@ retry: pv = get_pv_entry(pmap, &lock); pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); if ((newpte & PG_RW) != 0) vm_page_aflag_set(m, PGA_WRITEABLE); } @@ -4295,7 +4295,7 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; @@ -4306,7 +4306,7 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) } if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; @@ -4358,7 +4358,7 @@ pmap_pvh_wired_mappings(struct md_page *pvh, int count) pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte(pmap, pv->pv_va); @@ -4489,7 +4489,7 @@ pmap_remove_pages(pmap_t pmap) if ((tpte & PG_PS) != 0) { pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE); pvh = pa_to_pvh(tpte & PG_PS_FRAME); - TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) if ((mt->aflags & PGA_WRITEABLE) != 0 && @@ -4508,7 +4508,7 @@ pmap_remove_pages(pmap_t pmap) } } else { pmap_resident_count_dec(pmap, 1); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if ((m->aflags & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { @@ -4583,7 +4583,7 @@ pmap_is_modified_pvh(struct md_page *pvh) rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte(pmap, pv->pv_va); @@ -4654,7 +4654,7 @@ pmap_is_referenced_pvh(struct md_page *pvh) rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte(pmap, pv->pv_va); @@ -4695,7 +4695,7 @@ pmap_remove_write(vm_page_t m) if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; @@ -4705,7 +4705,7 @@ pmap_remove_write(vm_page_t m) PMAP_UNLOCK(pmap); } small_mappings: - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4758,7 +4758,7 @@ pmap_ts_referenced(vm_page_t m) if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, pvn) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; @@ -4792,9 +4792,9 @@ small_mappings: if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pvf = pv; do { - pvn = TAILQ_NEXT(pv, pv_list); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + pvn = TAILQ_NEXT(pv, pv_next); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4846,7 +4846,7 @@ pmap_clear_modify(vm_page_t m) if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; @@ -4878,7 +4878,7 @@ pmap_clear_modify(vm_page_t m) PMAP_UNLOCK(pmap); } small_mappings: - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4915,7 +4915,7 @@ pmap_clear_reference(vm_page_t m) if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; @@ -4938,7 +4938,7 @@ pmap_clear_reference(vm_page_t m) PMAP_UNLOCK(pmap); } small_mappings: - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index 334d6c6..0fc8867 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -277,7 +277,7 @@ extern struct pmap kernel_pmap_store; */ typedef struct pv_entry { vm_offset_t pv_va; /* virtual address for mapping */ - TAILQ_ENTRY(pv_entry) pv_list; + TAILQ_ENTRY(pv_entry) pv_next; } *pv_entry_t; /* diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 3c47a45..9aaf1eb 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -2286,7 +2286,7 @@ pmap_pv_reclaim(pmap_t locked_pmap) vm_page_dirty(m); if ((tpte & PG_A) != 0) vm_page_aflag_set(m, PGA_REFERENCED); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); @@ -2491,9 +2491,9 @@ pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { - TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); break; } } @@ -2521,7 +2521,7 @@ pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); m = PHYS_TO_VM_PAGE(pa); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); /* Instantiate the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { @@ -2557,7 +2557,7 @@ pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found")); pvh = pa_to_pvh(pa); - TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); /* Free the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { @@ -2604,7 +2604,7 @@ pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) PMAP_LOCK_ASSERT(pmap, MA_OWNED); pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); } /* @@ -2620,7 +2620,7 @@ pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); return (TRUE); } else return (FALSE); @@ -2640,7 +2640,7 @@ pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; pvh = pa_to_pvh(pa); - TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); return (TRUE); } else return (FALSE); @@ -3095,7 +3095,7 @@ small_mappings: vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, &free); pmap_invalidate_page(pmap, pv->pv_va); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } @@ -3550,7 +3550,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, if (pv == NULL) pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); pa |= PG_MANAGED; } else if (pv != NULL) free_pv_entry(pmap, pv); @@ -4258,7 +4258,7 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; rw_wlock(&pvh_global_lock); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; @@ -4269,7 +4269,7 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) } if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; @@ -4321,7 +4321,7 @@ pmap_pvh_wired_mappings(struct md_page *pvh, int count) rw_assert(&pvh_global_lock, RA_WLOCKED); sched_pin(); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); @@ -4448,7 +4448,7 @@ pmap_remove_pages(pmap_t pmap) if ((tpte & PG_PS) != 0) { pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; pvh = pa_to_pvh(tpte & PG_PS_FRAME); - TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) if (TAILQ_EMPTY(&mt->md.pv_list)) @@ -4466,7 +4466,7 @@ pmap_remove_pages(pmap_t pmap) } } else { pmap->pm_stats.resident_count--; - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); @@ -4536,7 +4536,7 @@ pmap_is_modified_pvh(struct md_page *pvh) rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; sched_pin(); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); @@ -4609,7 +4609,7 @@ pmap_is_referenced_pvh(struct md_page *pvh) rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; sched_pin(); - TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); @@ -4652,7 +4652,7 @@ pmap_remove_write(vm_page_t m) if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); @@ -4662,7 +4662,7 @@ pmap_remove_write(vm_page_t m) PMAP_UNLOCK(pmap); } small_mappings: - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4721,7 +4721,7 @@ pmap_ts_referenced(vm_page_t m) sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, pvn) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); @@ -4755,9 +4755,9 @@ small_mappings: if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pvf = pv; do { - pvn = TAILQ_NEXT(pv, pv_list); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + pvn = TAILQ_NEXT(pv, pv_next); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4811,7 +4811,7 @@ pmap_clear_modify(vm_page_t m) if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); @@ -4848,7 +4848,7 @@ pmap_clear_modify(vm_page_t m) PMAP_UNLOCK(pmap); } small_mappings: - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -4892,7 +4892,7 @@ pmap_clear_reference(vm_page_t m) if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); @@ -4915,7 +4915,7 @@ pmap_clear_reference(vm_page_t m) PMAP_UNLOCK(pmap); } small_mappings: - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); @@ -5426,7 +5426,7 @@ pmap_pvdump(vm_paddr_t pa) printf("pa %x", pa); m = PHYS_TO_VM_PAGE(pa); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va); pads(pmap); diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h index 7a827f8..1de47f2 100644 --- a/sys/i386/include/pmap.h +++ b/sys/i386/include/pmap.h @@ -468,7 +468,7 @@ extern struct pmap kernel_pmap_store; */ typedef struct pv_entry { vm_offset_t pv_va; /* virtual address for mapping */ - TAILQ_ENTRY(pv_entry) pv_list; + TAILQ_ENTRY(pv_entry) pv_next; } *pv_entry_t; /* -- cgit v1.1