diff options
71 files changed, 599 insertions, 994 deletions
diff --git a/arch/mips/kernel/irixsig.c b/arch/mips/kernel/irixsig.c index 08273a2..8150f07 100644 --- a/arch/mips/kernel/irixsig.c +++ b/arch/mips/kernel/irixsig.c @@ -603,7 +603,7 @@ repeat: /* move to end of parent's list to avoid starvation */ write_lock_irq(&tasklist_lock); remove_parent(p); - add_parent(p, p->parent); + add_parent(p); write_unlock_irq(&tasklist_lock); retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; if (retval) @@ -643,7 +643,7 @@ repeat: write_lock_irq(&tasklist_lock); remove_parent(p); p->parent = p->real_parent; - add_parent(p, p->parent); + add_parent(p); do_notify_parent(p, SIGCHLD); write_unlock_irq(&tasklist_lock); } else diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c index c8d8d0a..511116a 100644 --- a/arch/um/kernel/smp.c +++ b/arch/um/kernel/smp.c @@ -143,7 +143,6 @@ void smp_prepare_cpus(unsigned int maxcpus) idle = idle_thread(cpu); init_idle(idle, cpu); - unhash_process(idle); waittime = 200000000; while (waittime-- && !cpu_isset(cpu, cpu_callin_map)) diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 811dadb..0bfd1b6 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -1094,8 +1094,8 @@ static void do_tty_hangup(void *data) p->signal->tty = NULL; if (!p->signal->leader) continue; - send_group_sig_info(SIGHUP, SEND_SIG_PRIV, p); - send_group_sig_info(SIGCONT, SEND_SIG_PRIV, p); + group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p); + group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p); if (tty->pgrp > 0) p->signal->tty_old_pgrp = tty->pgrp; } while_each_task_pid(tty->session, PIDTYPE_SID, p); @@ -2672,7 +2672,7 @@ static void __do_SAK(void *arg) tty_hangup(tty); #else struct tty_struct *tty = arg; - struct task_struct *p; + struct task_struct *g, *p; int session; int i; struct file *filp; @@ -2693,8 +2693,18 @@ static void __do_SAK(void *arg) tty->driver->flush_buffer(tty); read_lock(&tasklist_lock); + /* Kill the entire session */ do_each_task_pid(session, PIDTYPE_SID, p) { - if (p->signal->tty == tty || session > 0) { + printk(KERN_NOTICE "SAK: killed process %d" + " (%s): p->signal->session==tty->session\n", + p->pid, p->comm); + send_sig(SIGKILL, p, 1); + } while_each_task_pid(session, PIDTYPE_SID, p); + /* Now kill any processes that happen to have the + * tty open. + */ + do_each_thread(g, p) { + if (p->signal->tty == tty) { printk(KERN_NOTICE "SAK: killed process %d" " (%s): p->signal->session==tty->session\n", p->pid, p->comm); @@ -2721,7 +2731,7 @@ static void __do_SAK(void *arg) rcu_read_unlock(); } task_unlock(p); - } while_each_task_pid(session, PIDTYPE_SID, p); + } while_each_thread(g, p); read_unlock(&tasklist_lock); #endif } diff --git a/drivers/ieee1394/dv1394.c b/drivers/ieee1394/dv1394.c index efeaa94..85c2d4c 100644 --- a/drivers/ieee1394/dv1394.c +++ b/drivers/ieee1394/dv1394.c @@ -73,7 +73,7 @@ - fix all XXX showstoppers - disable IR/IT DMA interrupts on shutdown - flush pci writes to the card by issuing a read - - devfs and character device dispatching (* needs testing with Linux 2.2.x) + - character device dispatching - switch over to the new kernel DMA API (pci_map_*()) (* needs testing on platforms with IOMMU!) - keep all video_cards in a list (for open() via chardev), set file->private_data = video - dv1394_poll should indicate POLLIN when receiving buffers are available @@ -1096,7 +1096,6 @@ static int do_dv1394_init_default(struct video_card *video) init.api_version = DV1394_API_VERSION; init.n_frames = DV1394_MAX_FRAMES / 4; - /* the following are now set via devfs */ init.channel = video->channel; init.format = video->pal_or_ntsc; init.cip_n = video->cip_n; @@ -1791,8 +1790,6 @@ static int dv1394_open(struct inode *inode, struct file *file) { struct video_card *video = NULL; - /* if the device was opened through devfs, then file->private_data - has already been set to video by devfs */ if (file->private_data) { video = (struct video_card*) file->private_data; @@ -2211,7 +2208,7 @@ static int dv1394_init(struct ti_ohci *ohci, enum pal_or_ntsc format, enum modes video = kzalloc(sizeof(*video), GFP_KERNEL); if (!video) { printk(KERN_ERR "dv1394: cannot allocate video_card\n"); - goto err; + return -1; } video->ohci = ohci; @@ -2266,37 +2263,14 @@ static int dv1394_init(struct ti_ohci *ohci, enum pal_or_ntsc format, enum modes list_add_tail(&video->list, &dv1394_cards); spin_unlock_irqrestore(&dv1394_cards_lock, flags); - if (devfs_mk_cdev(MKDEV(IEEE1394_MAJOR, - IEEE1394_MINOR_BLOCK_DV1394*16 + video->id), - S_IFCHR|S_IRUGO|S_IWUGO, - "ieee1394/dv/host%d/%s/%s", - (video->id>>2), - (video->pal_or_ntsc == DV1394_NTSC ? "NTSC" : "PAL"), - (video->mode == MODE_RECEIVE ? "in" : "out")) < 0) - goto err_free; - debug_printk("dv1394: dv1394_init() OK on ID %d\n", video->id); - return 0; - - err_free: - kfree(video); - err: - return -1; } static void dv1394_un_init(struct video_card *video) { - char buf[32]; - /* obviously nobody has the driver open at this point */ do_dv1394_shutdown(video, 1); - snprintf(buf, sizeof(buf), "dv/host%d/%s/%s", (video->id >> 2), - (video->pal_or_ntsc == DV1394_NTSC ? "NTSC" : "PAL"), - (video->mode == MODE_RECEIVE ? "in" : "out") - ); - - devfs_remove("ieee1394/%s", buf); kfree(video); } @@ -2333,9 +2307,6 @@ static void dv1394_remove_host (struct hpsb_host *host) class_device_destroy(hpsb_protocol_class, MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_DV1394 * 16 + (id<<2))); - devfs_remove("ieee1394/dv/host%d/NTSC", id); - devfs_remove("ieee1394/dv/host%d/PAL", id); - devfs_remove("ieee1394/dv/host%d", id); } static void dv1394_add_host (struct hpsb_host *host) @@ -2352,9 +2323,6 @@ static void dv1394_add_host (struct hpsb_host *host) class_device_create(hpsb_protocol_class, NULL, MKDEV( IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_DV1394 * 16 + (id<<2)), NULL, "dv1394-%d", id); - devfs_mk_dir("ieee1394/dv/host%d", id); - devfs_mk_dir("ieee1394/dv/host%d/NTSC", id); - devfs_mk_dir("ieee1394/dv/host%d/PAL", id); dv1394_init(ohci, DV1394_NTSC, MODE_RECEIVE); dv1394_init(ohci, DV1394_NTSC, MODE_TRANSMIT); @@ -2611,10 +2579,8 @@ MODULE_LICENSE("GPL"); static void __exit dv1394_exit_module(void) { hpsb_unregister_protocol(&dv1394_driver); - hpsb_unregister_highlevel(&dv1394_highlevel); cdev_del(&dv1394_cdev); - devfs_remove("ieee1394/dv"); } static int __init dv1394_init_module(void) @@ -2630,15 +2596,12 @@ static int __init dv1394_init_module(void) return ret; } - devfs_mk_dir("ieee1394/dv"); - hpsb_register_highlevel(&dv1394_highlevel); ret = hpsb_register_protocol(&dv1394_driver); if (ret) { printk(KERN_ERR "dv1394: failed to register protocol\n"); hpsb_unregister_highlevel(&dv1394_highlevel); - devfs_remove("ieee1394/dv"); cdev_del(&dv1394_cdev); return ret; } diff --git a/drivers/ieee1394/ieee1394_core.c b/drivers/ieee1394/ieee1394_core.c index 25ef5a8..be6854e 100644 --- a/drivers/ieee1394/ieee1394_core.c +++ b/drivers/ieee1394/ieee1394_core.c @@ -58,7 +58,7 @@ MODULE_PARM_DESC(disable_nodemgr, "Disable nodemgr functionality."); /* Disable Isochronous Resource Manager functionality */ int hpsb_disable_irm = 0; -module_param_named(disable_irm, hpsb_disable_irm, bool, 0); +module_param_named(disable_irm, hpsb_disable_irm, bool, 0444); MODULE_PARM_DESC(disable_irm, "Disable Isochronous Resource Manager functionality."); @@ -1078,17 +1078,10 @@ static int __init ieee1394_init(void) goto exit_release_kernel_thread; } - /* actually this is a non-fatal error */ - ret = devfs_mk_dir("ieee1394"); - if (ret < 0) { - HPSB_ERR("unable to make devfs dir for device major %d!\n", IEEE1394_MAJOR); - goto release_chrdev; - } - ret = bus_register(&ieee1394_bus_type); if (ret < 0) { HPSB_INFO("bus register failed"); - goto release_devfs; + goto release_chrdev; } for (i = 0; fw_bus_attrs[i]; i++) { @@ -1099,7 +1092,7 @@ static int __init ieee1394_init(void) fw_bus_attrs[i--]); } bus_unregister(&ieee1394_bus_type); - goto release_devfs; + goto release_chrdev; } } @@ -1152,8 +1145,6 @@ release_all_bus: for (i = 0; fw_bus_attrs[i]; i++) bus_remove_file(&ieee1394_bus_type, fw_bus_attrs[i]); bus_unregister(&ieee1394_bus_type); -release_devfs: - devfs_remove("ieee1394"); release_chrdev: unregister_chrdev_region(IEEE1394_CORE_DEV, 256); exit_release_kernel_thread: @@ -1191,7 +1182,6 @@ static void __exit ieee1394_cleanup(void) hpsb_cleanup_config_roms(); unregister_chrdev_region(IEEE1394_CORE_DEV, 256); - devfs_remove("ieee1394"); } module_init(ieee1394_init); diff --git a/drivers/ieee1394/ieee1394_core.h b/drivers/ieee1394/ieee1394_core.h index b354660..e7b55e8 100644 --- a/drivers/ieee1394/ieee1394_core.h +++ b/drivers/ieee1394/ieee1394_core.h @@ -3,7 +3,6 @@ #define _IEEE1394_CORE_H #include <linux/slab.h> -#include <linux/devfs_fs_kernel.h> #include <asm/atomic.h> #include <asm/semaphore.h> #include "hosts.h" @@ -202,14 +201,12 @@ void hpsb_packet_received(struct hpsb_host *host, quadlet_t *data, size_t size, #define IEEE1394_MINOR_BLOCK_RAW1394 0 #define IEEE1394_MINOR_BLOCK_VIDEO1394 1 #define IEEE1394_MINOR_BLOCK_DV1394 2 -#define IEEE1394_MINOR_BLOCK_AMDTP 3 #define IEEE1394_MINOR_BLOCK_EXPERIMENTAL 15 #define IEEE1394_CORE_DEV MKDEV(IEEE1394_MAJOR, 0) #define IEEE1394_RAW1394_DEV MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_RAW1394 * 16) #define IEEE1394_VIDEO1394_DEV MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_VIDEO1394 * 16) #define IEEE1394_DV1394_DEV MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_DV1394 * 16) -#define IEEE1394_AMDTP_DEV MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_AMDTP * 16) #define IEEE1394_EXPERIMENTAL_DEV MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_EXPERIMENTAL * 16) /* return the index (within a minor number block) of a file */ diff --git a/drivers/ieee1394/ohci1394.c b/drivers/ieee1394/ohci1394.c index 314f355..1922287 100644 --- a/drivers/ieee1394/ohci1394.c +++ b/drivers/ieee1394/ohci1394.c @@ -544,12 +544,19 @@ static void ohci_initialize(struct ti_ohci *ohci) /* Initialize IR Legacy DMA channel mask */ ohci->ir_legacy_channels = 0; - /* - * Accept AT requests from all nodes. This probably - * will have to be controlled from the subsystem - * on a per node basis. - */ - reg_write(ohci,OHCI1394_AsReqFilterHiSet, 0x80000000); + /* Accept AR requests from all nodes */ + reg_write(ohci, OHCI1394_AsReqFilterHiSet, 0x80000000); + + /* Set the address range of the physical response unit. + * Most controllers do not implement it as a writable register though. + * They will keep a hardwired offset of 0x00010000 and show 0x0 as + * register content. + * To actually enable physical responses is the job of our interrupt + * handler which programs the physical request filter. */ + reg_write(ohci, OHCI1394_PhyUpperBound, 0xffff0000); + + DBGMSG("physUpperBoundOffset=%08x", + reg_read(ohci, OHCI1394_PhyUpperBound)); /* Specify AT retries */ reg_write(ohci, OHCI1394_ATRetries, @@ -572,6 +579,7 @@ static void ohci_initialize(struct ti_ohci *ohci) OHCI1394_reqTxComplete | OHCI1394_isochRx | OHCI1394_isochTx | + OHCI1394_postedWriteErr | OHCI1394_cycleInconsistent); /* Enable link */ @@ -2374,7 +2382,10 @@ static irqreturn_t ohci_irq_handler(int irq, void *dev_id, event &= ~OHCI1394_unrecoverableError; } - + if (event & OHCI1394_postedWriteErr) { + PRINT(KERN_ERR, "physical posted write error"); + /* no recovery strategy yet, had to involve protocol drivers */ + } if (event & OHCI1394_cycleInconsistent) { /* We subscribe to the cycleInconsistent event only to * clear the corresponding event bit... otherwise, @@ -2382,7 +2393,6 @@ static irqreturn_t ohci_irq_handler(int irq, void *dev_id, DBGMSG("OHCI1394_cycleInconsistent"); event &= ~OHCI1394_cycleInconsistent; } - if (event & OHCI1394_busReset) { /* The busReset event bit can't be cleared during the * selfID phase, so we disable busReset interrupts, to @@ -2426,7 +2436,6 @@ static irqreturn_t ohci_irq_handler(int irq, void *dev_id, } event &= ~OHCI1394_busReset; } - if (event & OHCI1394_reqTxComplete) { struct dma_trm_ctx *d = &ohci->at_req_context; DBGMSG("Got reqTxComplete interrupt " @@ -2514,26 +2523,20 @@ static irqreturn_t ohci_irq_handler(int irq, void *dev_id, reg_write(ohci, OHCI1394_IntMaskSet, OHCI1394_busReset); spin_unlock_irqrestore(&ohci->event_lock, flags); - /* Accept Physical requests from all nodes. */ - reg_write(ohci,OHCI1394_AsReqFilterHiSet, 0xffffffff); - reg_write(ohci,OHCI1394_AsReqFilterLoSet, 0xffffffff); - /* Turn on phys dma reception. * * TODO: Enable some sort of filtering management. */ if (phys_dma) { - reg_write(ohci,OHCI1394_PhyReqFilterHiSet, 0xffffffff); - reg_write(ohci,OHCI1394_PhyReqFilterLoSet, 0xffffffff); - reg_write(ohci,OHCI1394_PhyUpperBound, 0xffff0000); - } else { - reg_write(ohci,OHCI1394_PhyReqFilterHiSet, 0x00000000); - reg_write(ohci,OHCI1394_PhyReqFilterLoSet, 0x00000000); + reg_write(ohci, OHCI1394_PhyReqFilterHiSet, + 0xffffffff); + reg_write(ohci, OHCI1394_PhyReqFilterLoSet, + 0xffffffff); } DBGMSG("PhyReqFilter=%08x%08x", - reg_read(ohci,OHCI1394_PhyReqFilterHiSet), - reg_read(ohci,OHCI1394_PhyReqFilterLoSet)); + reg_read(ohci, OHCI1394_PhyReqFilterHiSet), + reg_read(ohci, OHCI1394_PhyReqFilterLoSet)); hpsb_selfid_complete(host, phyid, isroot); } else @@ -3259,8 +3262,8 @@ static int __devinit ohci1394_pci_probe(struct pci_dev *dev, * fail to report the right length. Anyway, the ohci spec * clearly says it's 2kb, so this shouldn't be a problem. */ ohci_base = pci_resource_start(dev, 0); - if (pci_resource_len(dev, 0) != OHCI1394_REGISTER_SIZE) - PRINT(KERN_WARNING, "Unexpected PCI resource length of %lx!", + if (pci_resource_len(dev, 0) < OHCI1394_REGISTER_SIZE) + PRINT(KERN_WARNING, "PCI resource length of %lx too small!", pci_resource_len(dev, 0)); /* Seems PCMCIA handles this internally. Not sure why. Seems diff --git a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c index 19f26c5..f7de546 100644 --- a/drivers/ieee1394/raw1394.c +++ b/drivers/ieee1394/raw1394.c @@ -41,7 +41,6 @@ #include <linux/cdev.h> #include <asm/uaccess.h> #include <asm/atomic.h> -#include <linux/devfs_fs_kernel.h> #include <linux/compat.h> #include "csr1212.h" @@ -2999,9 +2998,6 @@ static int __init init_raw1394(void) goto out_unreg; } - devfs_mk_cdev(MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_RAW1394 * 16), - S_IFCHR | S_IRUSR | S_IWUSR, RAW1394_DEVICE_NAME); - cdev_init(&raw1394_cdev, &raw1394_fops); raw1394_cdev.owner = THIS_MODULE; kobject_set_name(&raw1394_cdev.kobj, RAW1394_DEVICE_NAME); @@ -3023,7 +3019,6 @@ static int __init init_raw1394(void) goto out; out_dev: - devfs_remove(RAW1394_DEVICE_NAME); class_device_destroy(hpsb_protocol_class, MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_RAW1394 * 16)); @@ -3039,7 +3034,6 @@ static void __exit cleanup_raw1394(void) MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_RAW1394 * 16)); cdev_del(&raw1394_cdev); - devfs_remove(RAW1394_DEVICE_NAME); hpsb_unregister_highlevel(&raw1394_highlevel); hpsb_unregister_protocol(&raw1394_driver); } diff --git a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c index eca92eb..2c765ca 100644 --- a/drivers/ieee1394/sbp2.c +++ b/drivers/ieee1394/sbp2.c @@ -214,6 +214,7 @@ static u32 global_outstanding_dmas = 0; #endif #define SBP2_ERR(fmt, args...) HPSB_ERR("sbp2: "fmt, ## args) +#define SBP2_DEBUG_ENTER() SBP2_DEBUG("%s", __FUNCTION__) /* * Globals @@ -535,7 +536,7 @@ static struct sbp2_command_info *sbp2util_allocate_command_orb( command->Current_SCpnt = Current_SCpnt; list_add_tail(&command->list, &scsi_id->sbp2_command_orb_inuse); } else { - SBP2_ERR("sbp2util_allocate_command_orb - No orbs available!"); + SBP2_ERR("%s: no orbs available", __FUNCTION__); } spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags); return command; @@ -549,7 +550,7 @@ static void sbp2util_free_command_dma(struct sbp2_command_info *command) struct hpsb_host *host; if (!scsi_id) { - printk(KERN_ERR "%s: scsi_id == NULL\n", __FUNCTION__); + SBP2_ERR("%s: scsi_id == NULL", __FUNCTION__); return; } @@ -610,7 +611,7 @@ static int sbp2_probe(struct device *dev) struct unit_directory *ud; struct scsi_id_instance_data *scsi_id; - SBP2_DEBUG("sbp2_probe"); + SBP2_DEBUG_ENTER(); ud = container_of(dev, struct unit_directory, device); @@ -635,7 +636,7 @@ static int sbp2_remove(struct device *dev) struct scsi_id_instance_data *scsi_id; struct scsi_device *sdev; - SBP2_DEBUG("sbp2_remove"); + SBP2_DEBUG_ENTER(); ud = container_of(dev, struct unit_directory, device); scsi_id = ud->device.driver_data; @@ -667,7 +668,7 @@ static int sbp2_update(struct unit_directory *ud) { struct scsi_id_instance_data *scsi_id = ud->device.driver_data; - SBP2_DEBUG("sbp2_update"); + SBP2_DEBUG_ENTER(); if (sbp2_reconnect_device(scsi_id)) { @@ -715,7 +716,7 @@ static struct scsi_id_instance_data *sbp2_alloc_device(struct unit_directory *ud struct Scsi_Host *scsi_host = NULL; struct scsi_id_instance_data *scsi_id = NULL; - SBP2_DEBUG("sbp2_alloc_device"); + SBP2_DEBUG_ENTER(); scsi_id = kzalloc(sizeof(*scsi_id), GFP_KERNEL); if (!scsi_id) { @@ -749,12 +750,22 @@ static struct scsi_id_instance_data *sbp2_alloc_device(struct unit_directory *ud #ifdef CONFIG_IEEE1394_SBP2_PHYS_DMA /* Handle data movement if physical dma is not - * enabled/supportedon host controller */ - hpsb_register_addrspace(&sbp2_highlevel, ud->ne->host, &sbp2_physdma_ops, - 0x0ULL, 0xfffffffcULL); + * enabled or not supported on host controller */ + if (!hpsb_register_addrspace(&sbp2_highlevel, ud->ne->host, + &sbp2_physdma_ops, + 0x0ULL, 0xfffffffcULL)) { + SBP2_ERR("failed to register lower 4GB address range"); + goto failed_alloc; + } #endif } + /* Prevent unloading of the 1394 host */ + if (!try_module_get(hi->host->driver->owner)) { + SBP2_ERR("failed to get a reference on 1394 host driver"); + goto failed_alloc; + } + scsi_id->hi = hi; list_add_tail(&scsi_id->scsi_list, &hi->scsi_ids); @@ -816,7 +827,7 @@ static int sbp2_start_device(struct scsi_id_instance_data *scsi_id) struct sbp2scsi_host_info *hi = scsi_id->hi; int error; - SBP2_DEBUG("sbp2_start_device"); + SBP2_DEBUG_ENTER(); /* Login FIFO DMA */ scsi_id->login_response = @@ -891,7 +902,6 @@ static int sbp2_start_device(struct scsi_id_instance_data *scsi_id) * allows someone else to login instead. One second makes sense. */ msleep_interruptible(1000); if (signal_pending(current)) { - SBP2_WARN("aborting sbp2_start_device due to event"); sbp2_remove_device(scsi_id); return -EINTR; } @@ -944,7 +954,7 @@ static void sbp2_remove_device(struct scsi_id_instance_data *scsi_id) { struct sbp2scsi_host_info *hi; - SBP2_DEBUG("sbp2_remove_device"); + SBP2_DEBUG_ENTER(); if (!scsi_id) return; @@ -1015,6 +1025,9 @@ static void sbp2_remove_device(struct scsi_id_instance_data *scsi_id) scsi_id->ud->device.driver_data = NULL; + if (hi) + module_put(hi->host->driver->owner); + SBP2_DEBUG("SBP-2 device removed, SCSI ID = %d", scsi_id->ud->id); kfree(scsi_id); @@ -1073,23 +1086,20 @@ static int sbp2_query_logins(struct scsi_id_instance_data *scsi_id) int max_logins; int active_logins; - SBP2_DEBUG("sbp2_query_logins"); + SBP2_DEBUG_ENTER(); scsi_id->query_logins_orb->reserved1 = 0x0; scsi_id->query_logins_orb->reserved2 = 0x0; scsi_id->query_logins_orb->query_response_lo = scsi_id->query_logins_response_dma; scsi_id->query_logins_orb->query_response_hi = ORB_SET_NODE_ID(hi->host->node_id); - SBP2_DEBUG("sbp2_query_logins: query_response_hi/lo initialized"); scsi_id->query_logins_orb->lun_misc = ORB_SET_FUNCTION(SBP2_QUERY_LOGINS_REQUEST); scsi_id->query_logins_orb->lun_misc |= ORB_SET_NOTIFY(1); scsi_id->query_logins_orb->lun_misc |= ORB_SET_LUN(scsi_id->sbp2_lun); - SBP2_DEBUG("sbp2_query_logins: lun_misc initialized"); scsi_id->query_logins_orb->reserved_resp_length = ORB_SET_QUERY_LOGINS_RESP_LENGTH(sizeof(struct sbp2_query_logins_response)); - SBP2_DEBUG("sbp2_query_logins: reserved_resp_length initialized"); scsi_id->query_logins_orb->status_fifo_hi = ORB_SET_STATUS_FIFO_HI(scsi_id->status_fifo_addr, hi->host->node_id); @@ -1098,25 +1108,19 @@ static int sbp2_query_logins(struct scsi_id_instance_data *scsi_id) sbp2util_cpu_to_be32_buffer(scsi_id->query_logins_orb, sizeof(struct sbp2_query_logins_orb)); - SBP2_DEBUG("sbp2_query_logins: orb byte-swapped"); - sbp2util_packet_dump(scsi_id->query_logins_orb, sizeof(struct sbp2_query_logins_orb), "sbp2 query logins orb", scsi_id->query_logins_orb_dma); memset(scsi_id->query_logins_response, 0, sizeof(struct sbp2_query_logins_response)); memset(&scsi_id->status_block, 0, sizeof(struct sbp2_status_block)); - SBP2_DEBUG("sbp2_query_logins: query_logins_response/status FIFO memset"); - data[0] = ORB_SET_NODE_ID(hi->host->node_id); data[1] = scsi_id->query_logins_orb_dma; sbp2util_cpu_to_be32_buffer(data, 8); atomic_set(&scsi_id->sbp2_login_complete, 0); - SBP2_DEBUG("sbp2_query_logins: prepared to write"); hpsb_node_write(scsi_id->ne, scsi_id->sbp2_management_agent_addr, data, 8); - SBP2_DEBUG("sbp2_query_logins: written"); if (sbp2util_down_timeout(&scsi_id->sbp2_login_complete, 2*HZ)) { SBP2_INFO("Error querying logins to SBP-2 device - timed out"); @@ -1165,10 +1169,10 @@ static int sbp2_login_device(struct scsi_id_instance_data *scsi_id) struct sbp2scsi_host_info *hi = scsi_id->hi; quadlet_t data[2]; - SBP2_DEBUG("sbp2_login_device"); + SBP2_DEBUG_ENTER(); if (!scsi_id->login_orb) { - SBP2_DEBUG("sbp2_login_device: login_orb not alloc'd!"); + SBP2_DEBUG("%s: login_orb not alloc'd!", __FUNCTION__); return -EIO; } @@ -1182,59 +1186,39 @@ static int sbp2_login_device(struct scsi_id_instance_data *scsi_id) /* Set-up login ORB, assume no password */ scsi_id->login_orb->password_hi = 0; scsi_id->login_orb->password_lo = 0; - SBP2_DEBUG("sbp2_login_device: password_hi/lo initialized"); scsi_id->login_orb->login_response_lo = scsi_id->login_response_dma; scsi_id->login_orb->login_response_hi = ORB_SET_NODE_ID(hi->host->node_id); - SBP2_DEBUG("sbp2_login_device: login_response_hi/lo initialized"); scsi_id->login_orb->lun_misc = ORB_SET_FUNCTION(SBP2_LOGIN_REQUEST); scsi_id->login_orb->lun_misc |= ORB_SET_RECONNECT(0); /* One second reconnect time */ scsi_id->login_orb->lun_misc |= ORB_SET_EXCLUSIVE(exclusive_login); /* Exclusive access to device */ scsi_id->login_orb->lun_misc |= ORB_SET_NOTIFY(1); /* Notify us of login complete */ scsi_id->login_orb->lun_misc |= ORB_SET_LUN(scsi_id->sbp2_lun); - SBP2_DEBUG("sbp2_login_device: lun_misc initialized"); scsi_id->login_orb->passwd_resp_lengths = ORB_SET_LOGIN_RESP_LENGTH(sizeof(struct sbp2_login_response)); - SBP2_DEBUG("sbp2_login_device: passwd_resp_lengths initialized"); scsi_id->login_orb->status_fifo_hi = ORB_SET_STATUS_FIFO_HI(scsi_id->status_fifo_addr, hi->host->node_id); scsi_id->login_orb->status_fifo_lo = ORB_SET_STATUS_FIFO_LO(scsi_id->status_fifo_addr); - /* - * Byte swap ORB if necessary - */ sbp2util_cpu_to_be32_buffer(scsi_id->login_orb, sizeof(struct sbp2_login_orb)); - SBP2_DEBUG("sbp2_login_device: orb byte-swapped"); - sbp2util_packet_dump(scsi_id->login_orb, sizeof(struct sbp2_login_orb), "sbp2 login orb", scsi_id->login_orb_dma); - /* - * Initialize login response and status fifo - */ memset(scsi_id->login_response, 0, sizeof(struct sbp2_login_response)); memset(&scsi_id->status_block, 0, sizeof(struct sbp2_status_block)); - SBP2_DEBUG("sbp2_login_device: login_response/status FIFO memset"); - - /* - * Ok, let's write to the target's management agent register - */ data[0] = ORB_SET_NODE_ID(hi->host->node_id); data[1] = scsi_id->login_orb_dma; sbp2util_cpu_to_be32_buffer(data, 8); atomic_set(&scsi_id->sbp2_login_complete, 0); - SBP2_DEBUG("sbp2_login_device: prepared to write to %08x", - (unsigned int)scsi_id->sbp2_management_agent_addr); hpsb_node_write(scsi_id->ne, scsi_id->sbp2_management_agent_addr, data, 8); - SBP2_DEBUG("sbp2_login_device: written"); /* * Wait for login status (up to 20 seconds)... @@ -1298,7 +1282,7 @@ static int sbp2_logout_device(struct scsi_id_instance_data *scsi_id) quadlet_t data[2]; int error; - SBP2_DEBUG("sbp2_logout_device"); + SBP2_DEBUG_ENTER(); /* * Set-up logout ORB @@ -1362,7 +1346,7 @@ static int sbp2_reconnect_device(struct scsi_id_instance_data *scsi_id) quadlet_t data[2]; int error; - SBP2_DEBUG("sbp2_reconnect_device"); + SBP2_DEBUG_ENTER(); /* * Set-up reconnect ORB @@ -1453,17 +1437,11 @@ static int sbp2_set_busy_timeout(struct scsi_id_instance_data *scsi_id) { quadlet_t data; - SBP2_DEBUG("sbp2_set_busy_timeout"); + SBP2_DEBUG_ENTER(); - /* - * Ok, let's write to the target's busy timeout register - */ data = cpu_to_be32(SBP2_BUSY_TIMEOUT_VALUE); - - if (hpsb_node_write(scsi_id->ne, SBP2_BUSY_TIMEOUT_ADDRESS, &data, 4)) { - SBP2_ERR("sbp2_set_busy_timeout error"); - } - + if (hpsb_node_write(scsi_id->ne, SBP2_BUSY_TIMEOUT_ADDRESS, &data, 4)) + SBP2_ERR("%s error", __FUNCTION__); return 0; } @@ -1482,7 +1460,7 @@ static void sbp2_parse_unit_directory(struct scsi_id_instance_data *scsi_id, firmware_revision, workarounds; int i; - SBP2_DEBUG("sbp2_parse_unit_directory"); + SBP2_DEBUG_ENTER(); management_agent_addr = 0x0; command_set_spec_id = 0x0; @@ -1615,7 +1593,7 @@ static int sbp2_max_speed_and_size(struct scsi_id_instance_data *scsi_id) { struct sbp2scsi_host_info *hi = scsi_id->hi; - SBP2_DEBUG("sbp2_max_speed_and_size"); + SBP2_DEBUG_ENTER(); /* Initial setting comes from the hosts speed map */ scsi_id->speed_code = @@ -1652,11 +1630,8 @@ static int sbp2_agent_reset(struct scsi_id_instance_data *scsi_id, int wait) u64 addr; int retval; - SBP2_DEBUG("sbp2_agent_reset"); + SBP2_DEBUG_ENTER(); - /* - * Ok, let's write to the target's management agent register - */ data = ntohl(SBP2_AGENT_RESET_DATA); addr = scsi_id->sbp2_command_block_agent_addr + SBP2_AGENT_RESET_OFFSET; @@ -2004,11 +1979,7 @@ static int sbp2_send_command(struct scsi_id_instance_data *scsi_id, unsigned int request_bufflen = SCpnt->request_bufflen; struct sbp2_command_info *command; - SBP2_DEBUG("sbp2_send_command"); -#if (CONFIG_IEEE1394_SBP2_DEBUG >= 2) || defined(CONFIG_IEEE1394_SBP2_PACKET_DUMP) - printk("[scsi command]\n "); - scsi_print_command(SCpnt); -#endif + SBP2_DEBUG_ENTER(); SBP2_DEBUG("SCSI transfer size = %x", request_bufflen); SBP2_DEBUG("SCSI s/g elements = %x", (unsigned int)SCpnt->use_sg); @@ -2048,7 +2019,7 @@ static int sbp2_send_command(struct scsi_id_instance_data *scsi_id, */ static unsigned int sbp2_status_to_sense_data(unchar *sbp2_status, unchar *sense_data) { - SBP2_DEBUG("sbp2_status_to_sense_data"); + SBP2_DEBUG_ENTER(); /* * Ok, it's pretty ugly... ;-) @@ -2082,7 +2053,7 @@ static void sbp2_check_sbp2_response(struct scsi_id_instance_data *scsi_id, { u8 *scsi_buf = SCpnt->request_buffer; - SBP2_DEBUG("sbp2_check_sbp2_response"); + SBP2_DEBUG_ENTER(); if (SCpnt->cmnd[0] == INQUIRY && (SCpnt->cmnd[1] & 3) == 0) { /* @@ -2113,7 +2084,7 @@ static int sbp2_handle_status_write(struct hpsb_host *host, int nodeid, int dest struct sbp2_command_info *command; unsigned long flags; - SBP2_DEBUG("sbp2_handle_status_write"); + SBP2_DEBUG_ENTER(); sbp2util_packet_dump(data, length, "sbp2 status write by device", (u32)addr); @@ -2260,7 +2231,10 @@ static int sbp2scsi_queuecommand(struct scsi_cmnd *SCpnt, struct sbp2scsi_host_info *hi; int result = DID_NO_CONNECT << 16; - SBP2_DEBUG("sbp2scsi_queuecommand"); + SBP2_DEBUG_ENTER(); +#if (CONFIG_IEEE1394_SBP2_DEBUG >= 2) || defined(CONFIG_IEEE1394_SBP2_PACKET_DUMP) + scsi_print_command(SCpnt); +#endif if (!sbp2util_node_is_available(scsi_id)) goto done; @@ -2338,7 +2312,7 @@ static void sbp2scsi_complete_all_commands(struct scsi_id_instance_data *scsi_id struct sbp2_command_info *command; unsigned long flags; - SBP2_DEBUG("sbp2scsi_complete_all_commands"); + SBP2_DEBUG_ENTER(); spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags); while (!list_empty(&scsi_id->sbp2_command_orb_inuse)) { @@ -2371,7 +2345,7 @@ static void sbp2scsi_complete_command(struct scsi_id_instance_data *scsi_id, u32 scsi_status, struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *)) { - SBP2_DEBUG("sbp2scsi_complete_command"); + SBP2_DEBUG_ENTER(); /* * Sanity @@ -2397,7 +2371,7 @@ static void sbp2scsi_complete_command(struct scsi_id_instance_data *scsi_id, */ switch (scsi_status) { case SBP2_SCSI_STATUS_GOOD: - SCpnt->result = DID_OK; + SCpnt->result = DID_OK << 16; break; case SBP2_SCSI_STATUS_BUSY: @@ -2407,16 +2381,11 @@ static void sbp2scsi_complete_command(struct scsi_id_instance_data *scsi_id, case SBP2_SCSI_STATUS_CHECK_CONDITION: SBP2_DEBUG("SBP2_SCSI_STATUS_CHECK_CONDITION"); - SCpnt->result = CHECK_CONDITION << 1; - - /* - * Debug stuff - */ + SCpnt->result = CHECK_CONDITION << 1 | DID_OK << 16; #if CONFIG_IEEE1394_SBP2_DEBUG >= 1 scsi_print_command(SCpnt); - scsi_print_sense("bh", SCpnt); + scsi_print_sense(SBP2_DEVICE_NAME, SCpnt); #endif - break; case SBP2_SCSI_STATUS_SELECTION_TIMEOUT: @@ -2441,7 +2410,7 @@ static void sbp2scsi_complete_command(struct scsi_id_instance_data *scsi_id, /* * Take care of any sbp2 response data mucking here (RBC stuff, etc.) */ - if (SCpnt->result == DID_OK) { + if (SCpnt->result == DID_OK << 16) { sbp2_check_sbp2_response(scsi_id, SCpnt); } @@ -2459,6 +2428,8 @@ static void sbp2scsi_complete_command(struct scsi_id_instance_data *scsi_id, * If a unit attention occurs, return busy status so it gets * retried... it could have happened because of a 1394 bus reset * or hot-plug... + * XXX DID_BUS_BUSY is actually a bad idea because it will defy + * the scsi layer's retry logic. */ #if 0 if ((scsi_status == SBP2_SCSI_STATUS_CHECK_CONDITION) && @@ -2624,7 +2595,7 @@ static int sbp2_module_init(void) { int ret; - SBP2_DEBUG("sbp2_module_init"); + SBP2_DEBUG_ENTER(); /* Module load debug option to force one command at a time (serializing I/O) */ if (serialize_io) { @@ -2652,7 +2623,7 @@ static int sbp2_module_init(void) static void __exit sbp2_module_exit(void) { - SBP2_DEBUG("sbp2_module_exit"); + SBP2_DEBUG_ENTER(); hpsb_unregister_protocol(&sbp2_driver); diff --git a/drivers/ieee1394/video1394.c b/drivers/ieee1394/video1394.c index 216dbbf..4e3bd62 100644 --- a/drivers/ieee1394/video1394.c +++ b/drivers/ieee1394/video1394.c @@ -42,7 +42,6 @@ #include <linux/poll.h> #include <linux/smp_lock.h> #include <linux/delay.h> -#include <linux/devfs_fs_kernel.h> #include <linux/bitops.h> #include <linux/types.h> #include <linux/vmalloc.h> @@ -1322,9 +1321,6 @@ static void video1394_add_host (struct hpsb_host *host) class_device_create(hpsb_protocol_class, NULL, MKDEV( IEEE1394_MAJOR, minor), NULL, "%s-%d", VIDEO1394_DRIVER_NAME, ohci->host->id); - devfs_mk_cdev(MKDEV(IEEE1394_MAJOR, minor), - S_IFCHR | S_IRUSR | S_IWUSR, - "%s/%d", VIDEO1394_DRIVER_NAME, ohci->host->id); } @@ -1332,12 +1328,9 @@ static void video1394_remove_host (struct hpsb_host *host) { struct ti_ohci *ohci = hpsb_get_hostinfo(&video1394_highlevel, host); - if (ohci) { + if (ohci) class_device_destroy(hpsb_protocol_class, MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_VIDEO1394 * 16 + ohci->host->id)); - devfs_remove("%s/%d", VIDEO1394_DRIVER_NAME, ohci->host->id); - } - return; } @@ -1478,12 +1471,8 @@ static long video1394_compat_ioctl(struct file *f, unsigned cmd, unsigned long a static void __exit video1394_exit_module (void) { hpsb_unregister_protocol(&video1394_driver); - hpsb_unregister_highlevel(&video1394_highlevel); - - devfs_remove(VIDEO1394_DRIVER_NAME); cdev_del(&video1394_cdev); - PRINT_G(KERN_INFO, "Removed " VIDEO1394_DRIVER_NAME " module"); } @@ -1500,15 +1489,12 @@ static int __init video1394_init_module (void) return ret; } - devfs_mk_dir(VIDEO1394_DRIVER_NAME); - hpsb_register_highlevel(&video1394_highlevel); ret = hpsb_register_protocol(&video1394_driver); if (ret) { PRINT_G(KERN_ERR, "video1394: failed to register protocol"); hpsb_unregister_highlevel(&video1394_highlevel); - devfs_remove(VIDEO1394_DRIVER_NAME); cdev_del(&video1394_cdev); return ret; } diff --git a/fs/direct-io.c b/fs/direct-io.c index 9d1d2aa..910a8ed 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -524,8 +524,6 @@ static int get_more_blocks(struct dio *dio) */ ret = dio->page_errors; if (ret == 0) { - map_bh->b_state = 0; - map_bh->b_size = 0; BUG_ON(dio->block_in_file >= dio->final_block_in_request); fs_startblk = dio->block_in_file >> dio->blkfactor; dio_count = dio->final_block_in_request - dio->block_in_file; @@ -534,6 +532,9 @@ static int get_more_blocks(struct dio *dio) if (dio_count & blkmask) fs_count++; + map_bh->b_state = 0; + map_bh->b_size = fs_count << dio->inode->i_blkbits; + create = dio->rw == WRITE; if (dio->lock_type == DIO_LOCKING) { if (dio->block_in_file < (i_size_read(dio->inode) >> @@ -542,13 +543,13 @@ static int get_more_blocks(struct dio *dio) } else if (dio->lock_type == DIO_NO_LOCKING) { create = 0; } + /* * For writes inside i_size we forbid block creations: only * overwrites are permitted. We fall back to buffered writes * at a higher level for inside-i_size block-instantiating * writes. */ - map_bh->b_size = fs_count << dio->blkbits; ret = (*dio->get_block)(dio->inode, fs_startblk, map_bh, create); } @@ -616,6 +616,15 @@ static int de_thread(struct task_struct *tsk) kmem_cache_free(sighand_cachep, newsighand); return -EAGAIN; } + + /* + * child_reaper ignores SIGKILL, change it now. + * Reparenting needs write_lock on tasklist_lock, + * so it is safe to do it under read_lock. + */ + if (unlikely(current->group_leader == child_reaper)) + child_reaper = current; + zap_other_threads(current); read_unlock(&tasklist_lock); @@ -699,22 +708,30 @@ static int de_thread(struct task_struct *tsk) remove_parent(current); remove_parent(leader); - switch_exec_pids(leader, current); + + /* Become a process group leader with the old leader's pid. + * Note: The old leader also uses thispid until release_task + * is called. Odd but simple and correct. + */ + detach_pid(current, PIDTYPE_PID); + current->pid = leader->pid; + attach_pid(current, PIDTYPE_PID, current->pid); + attach_pid(current, PIDTYPE_PGID, current->signal->pgrp); + attach_pid(current, PIDTYPE_SID, current->signal->session); + list_add_tail(¤t->tasks, &init_task.tasks); current->parent = current->real_parent = leader->real_parent; leader->parent = leader->real_parent = child_reaper; current->group_leader = current; leader->group_leader = leader; - add_parent(current, current->parent); - add_parent(leader, leader->parent); + add_parent(current); + add_parent(leader); if (ptrace) { current->ptrace = ptrace; __ptrace_link(current, parent); } - list_del(¤t->tasks); - list_add_tail(¤t->tasks, &init_task.tasks); current->exit_signal = SIGCHLD; BUG_ON(leader->exit_state != EXIT_ZOMBIE); @@ -751,7 +768,6 @@ no_thread_group: /* * Move our state over to newsighand and switch it in. */ - spin_lock_init(&newsighand->siglock); atomic_set(&newsighand->count, 1); memcpy(newsighand->action, oldsighand->action, sizeof(newsighand->action)); @@ -768,7 +784,7 @@ no_thread_group: write_unlock_irq(&tasklist_lock); if (atomic_dec_and_test(&oldsighand->count)) - sighand_free(oldsighand); + kmem_cache_free(sighand_cachep, oldsighand); } BUG_ON(!thread_group_leader(current)); diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h index 16b44c3..1b262b7 100644 --- a/fs/xfs/linux-2.6/mrlock.h +++ b/fs/xfs/linux-2.6/mrlock.h @@ -79,7 +79,7 @@ static inline void mrdemote(mrlock_t *mrp) * Debug-only routine, without some platform-specific asm code, we can * now only answer requests regarding whether we hold the lock for write * (reader state is outside our visibility, we only track writer state). - * Note: means !ismrlocked would give false positivies, so don't do that. + * Note: means !ismrlocked would give false positives, so don't do that. */ static inline int ismrlocked(mrlock_t *mrp, int type) { diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index c02f7c5..6cbbd16 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -372,7 +372,7 @@ static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) * assumes that all buffers on the page are started at the same time. * * The fix is two passes across the ioend list - one to start writeback on the - * bufferheads, and then the second one submit them for I/O. + * buffer_heads, and then submit them for I/O on the second pass. */ STATIC void xfs_submit_ioend( @@ -699,7 +699,7 @@ xfs_convert_page( /* * page_dirty is initially a count of buffers on the page before - * EOF and is decrememted as we move each into a cleanable state. + * EOF and is decremented as we move each into a cleanable state. * * Derivation: * @@ -842,7 +842,7 @@ xfs_cluster_write( * page if possible. * The bh->b_state's cannot know if any of the blocks or which block for * that matter are dirty due to mmap writes, and therefore bh uptodate is - * only vaild if the page itself isn't completely uptodate. Some layers + * only valid if the page itself isn't completely uptodate. Some layers * may clear the page dirty flag prior to calling write page, under the * assumption the entire page will be written out; by not writing out the * whole page the page can be reused before all valid dirty data is @@ -892,7 +892,7 @@ xfs_page_state_convert( /* * page_dirty is initially a count of buffers on the page before - * EOF and is decrememted as we move each into a cleanable state. + * EOF and is decremented as we move each into a cleanable state. * * Derivation: * @@ -1223,10 +1223,9 @@ free_buffers: } STATIC int -__xfs_get_block( +__xfs_get_blocks( struct inode *inode, sector_t iblock, - unsigned long blocks, struct buffer_head *bh_result, int create, int direct, @@ -1236,22 +1235,17 @@ __xfs_get_block( xfs_iomap_t iomap; xfs_off_t offset; ssize_t size; - int retpbbm = 1; + int niomap = 1; int error; offset = (xfs_off_t)iblock << inode->i_blkbits; - if (blocks) - size = (ssize_t) min_t(xfs_off_t, LONG_MAX, - (xfs_off_t)blocks << inode->i_blkbits); - else - size = 1 << inode->i_blkbits; - + ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); + size = bh_result->b_size; VOP_BMAP(vp, offset, size, - create ? flags : BMAPI_READ, &iomap, &retpbbm, error); + create ? flags : BMAPI_READ, &iomap, &niomap, error); if (error) return -error; - - if (retpbbm == 0) + if (niomap == 0) return 0; if (iomap.iomap_bn != IOMAP_DADDR_NULL) { @@ -1271,12 +1265,16 @@ __xfs_get_block( } } - /* If this is a realtime file, data might be on a new device */ + /* + * If this is a realtime file, data may be on a different device. + * to that pointed to from the buffer_head b_bdev currently. + */ bh_result->b_bdev = iomap.iomap_target->bt_bdev; - /* If we previously allocated a block out beyond eof and - * we are now coming back to use it then we will need to - * flag it as new even if it has a disk address. + /* + * If we previously allocated a block out beyond eof and we are + * now coming back to use it then we will need to flag it as new + * even if it has a disk address. */ if (create && ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || @@ -1292,26 +1290,24 @@ __xfs_get_block( } } - if (blocks) { + if (direct || size > (1 << inode->i_blkbits)) { ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0); offset = min_t(xfs_off_t, - iomap.iomap_bsize - iomap.iomap_delta, - (xfs_off_t)blocks << inode->i_blkbits); - bh_result->b_size = (u32) min_t(xfs_off_t, UINT_MAX, offset); + iomap.iomap_bsize - iomap.iomap_delta, size); + bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset); } return 0; } int -xfs_get_block( +xfs_get_blocks( struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { - return __xfs_get_block(inode, iblock, - bh_result->b_size >> inode->i_blkbits, + return __xfs_get_blocks(inode, iblock, bh_result, create, 0, BMAPI_WRITE); } @@ -1322,8 +1318,7 @@ xfs_get_blocks_direct( struct buffer_head *bh_result, int create) { - return __xfs_get_block(inode, iblock, - bh_result->b_size >> inode->i_blkbits, + return __xfs_get_blocks(inode, iblock, bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT); } @@ -1339,9 +1334,9 @@ xfs_end_io_direct( /* * Non-NULL private data means we need to issue a transaction to * convert a range from unwritten to written extents. This needs - * to happen from process contect but aio+dio I/O completion + * to happen from process context but aio+dio I/O completion * happens from irq context so we need to defer it to a workqueue. - * This is not nessecary for synchronous direct I/O, but we do + * This is not necessary for synchronous direct I/O, but we do * it anyway to keep the code uniform and simpler. * * The core direct I/O code might be changed to always call the @@ -1358,7 +1353,7 @@ xfs_end_io_direct( } /* - * blockdev_direct_IO can return an error even afer the I/O + * blockdev_direct_IO can return an error even after the I/O * completion handler was called. Thus we need to protect * against double-freeing. */ @@ -1405,7 +1400,7 @@ xfs_vm_prepare_write( unsigned int from, unsigned int to) { - return block_prepare_write(page, from, to, xfs_get_block); + return block_prepare_write(page, from, to, xfs_get_blocks); } STATIC sector_t @@ -1422,7 +1417,7 @@ xfs_vm_bmap( VOP_RWLOCK(vp, VRWLOCK_READ); VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error); VOP_RWUNLOCK(vp, VRWLOCK_READ); - return generic_block_bmap(mapping, block, xfs_get_block); + return generic_block_bmap(mapping, block, xfs_get_blocks); } STATIC int @@ -1430,7 +1425,7 @@ xfs_vm_readpage( struct file *unused, struct page *page) { - return mpage_readpage(page, xfs_get_block); + return mpage_readpage(page, xfs_get_blocks); } STATIC int @@ -1440,7 +1435,7 @@ xfs_vm_readpages( struct list_head *pages, unsigned nr_pages) { - return mpage_readpages(mapping, pages, nr_pages, xfs_get_block); + return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); } STATIC void diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index 795699f..6071654 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -41,6 +41,6 @@ typedef struct xfs_ioend { } xfs_ioend_t; extern struct address_space_operations xfs_address_space_operations; -extern int xfs_get_block(struct inode *, sector_t, struct buffer_head *, int); +extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); #endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h index e5b0559..e794ca4 100644 --- a/fs/xfs/linux-2.6/xfs_export.h +++ b/fs/xfs/linux-2.6/xfs_export.h @@ -54,7 +54,7 @@ * Note, the NFS filehandle also includes an fsid portion which * may have an inode number in it. That number is hardcoded to * 32bits and there is no way for XFS to intercept it. In - * practice this means when exporting an XFS filesytem with 64bit + * practice this means when exporting an XFS filesystem with 64bit * inodes you should either export the mountpoint (rather than * a subdirectory) or use the "fsid" export option. */ diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index b6321ab..251bfe4 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -72,7 +72,7 @@ xfs_ioctl32_flock( copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) || copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32))) return -EFAULT; - + return (unsigned long)p; } @@ -107,11 +107,15 @@ xfs_ioctl32_bulkstat( #endif STATIC long -xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg) +xfs_compat_ioctl( + int mode, + struct file *file, + unsigned cmd, + unsigned long arg) { + struct inode *inode = file->f_dentry->d_inode; + vnode_t *vp = vn_from_inode(inode); int error; - struct inode *inode = f->f_dentry->d_inode; - vnode_t *vp = vn_to_inode(inode); switch (cmd) { case XFS_IOC_DIOINFO: @@ -189,7 +193,7 @@ xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg) return -ENOIOCTLCMD; } - VOP_IOCTL(vp, inode, f, mode, cmd, (void __user *)arg, error); + VOP_IOCTL(vp, inode, file, mode, cmd, (void __user *)arg, error); VMODIFY(vp); return error; @@ -197,18 +201,18 @@ xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg) long xfs_file_compat_ioctl( - struct file *f, + struct file *file, unsigned cmd, unsigned long arg) { - return xfs_compat_ioctl(0, f, cmd, arg); + return xfs_compat_ioctl(0, file, cmd, arg); } long xfs_file_compat_invis_ioctl( - struct file *f, + struct file *file, unsigned cmd, unsigned long arg) { - return xfs_compat_ioctl(IO_INVIS, f, cmd, arg); + return xfs_compat_ioctl(IO_INVIS, file, cmd, arg); } diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index af48743..1492373 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -708,7 +708,7 @@ STATIC void xfs_vn_truncate( struct inode *inode) { - block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_block); + block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_blocks); } STATIC int diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 0169360..84ddf18 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -681,7 +681,7 @@ start: eventsent = 1; /* - * The iolock was dropped and reaquired in XFS_SEND_DATA + * The iolock was dropped and reacquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h index 8fed356..841200c 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ b/fs/xfs/linux-2.6/xfs_vfs.h @@ -92,7 +92,7 @@ typedef enum { #define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */ #define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ #define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ -#define SYNC_QUIESCE 0x0100 /* quiesce fileystem for a snapshot */ +#define SYNC_QUIESCE 0x0100 /* quiesce filesystem for a snapshot */ typedef int (*vfs_mount_t)(bhv_desc_t *, struct xfs_mount_args *, struct cred *); diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index e4e5f05..546f48a 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -221,7 +221,7 @@ xfs_qm_dqunpin_wait( * as possible. * * We must not be holding the AIL_LOCK at this point. Calling incore() to - * search the buffercache can be a time consuming thing, and AIL_LOCK is a + * search the buffer cache can be a time consuming thing, and AIL_LOCK is a * spinlock. */ STATIC void diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 1fb757e..73c1e5e8 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -289,7 +289,7 @@ xfs_qm_rele_quotafs_ref( /* * This is called at mount time from xfs_mountfs to initialize the quotainfo - * structure and start the global quotamanager (xfs_Gqm) if it hasn't done + * structure and start the global quota manager (xfs_Gqm) if it hasn't done * so already. Note that the superblock has not been read in yet. */ void @@ -807,7 +807,7 @@ xfs_qm_dqattach_one( * Given a udquot and gdquot, attach a ptr to the group dquot in the * udquot as a hint for future lookups. The idea sounds simple, but the * execution isn't, because the udquot might have a group dquot attached - * already and getting rid of that gets us into lock ordering contraints. + * already and getting rid of that gets us into lock ordering constraints. * The process is complicated more by the fact that the dquots may or may not * be locked on entry. */ @@ -1094,10 +1094,10 @@ xfs_qm_sync( } /* * If we can't grab the flush lock then if the caller - * really wanted us to give this our best shot, + * really wanted us to give this our best shot, so * see if we can give a push to the buffer before we wait * on the flush lock. At this point, we know that - * eventhough the dquot is being flushed, + * even though the dquot is being flushed, * it has (new) dirty data. */ xfs_qm_dqflock_pushbuf_wait(dqp); @@ -1491,7 +1491,7 @@ xfs_qm_reset_dqcounts( /* * Do a sanity check, and if needed, repair the dqblk. Don't * output any warnings because it's perfectly possible to - * find unitialized dquot blks. See comment in xfs_qm_dqcheck. + * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. */ (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR, "xfs_quotacheck"); @@ -1580,7 +1580,7 @@ xfs_qm_dqiterate( error = 0; /* - * This looks racey, but we can't keep an inode lock across a + * This looks racy, but we can't keep an inode lock across a * trans_reserve. But, this gets called during quotacheck, and that * happens only at mount time which is single threaded. */ @@ -1824,7 +1824,7 @@ xfs_qm_dqusage_adjust( * we have to start from the beginning anyway. * Once we're done, we'll log all the dquot bufs. * - * The *QUOTA_ON checks below may look pretty racey, but quotachecks + * The *QUOTA_ON checks below may look pretty racy, but quotachecks * and quotaoffs don't race. (Quotachecks happen at mount time only). */ if (XFS_IS_UQUOTA_ON(mp)) { diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 6768843..c55db46 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -912,7 +912,7 @@ xfs_qm_export_dquot( /* * Internally, we don't reset all the timers when quota enforcement - * gets turned off. No need to confuse the userlevel code, + * gets turned off. No need to confuse the user level code, * so return zeroes in that case. */ if (! XFS_IS_QUOTA_ENFORCED(mp)) { diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index 3290975..d8e131e 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c @@ -804,7 +804,7 @@ xfs_trans_reserve_quota_bydquots( } /* - * Didnt change anything critical, so, no need to log + * Didn't change anything critical, so, no need to log */ return (0); } diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 4ff0f4e..2539af3 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -395,7 +395,7 @@ xfs_acl_allow_set( * The access control process to determine the access permission: * if uid == file owner id, use the file owner bits. * if gid == file owner group id, use the file group bits. - * scan ACL for a maching user or group, and use matched entry + * scan ACL for a matching user or group, and use matched entry * permission. Use total permissions of all matching group entries, * until all acl entries are exhausted. The final permission produced * by matching acl entry or entries needs to be & with group permission. diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index a96e2ff..dc2361d 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -179,7 +179,7 @@ typedef struct xfs_perag { char pagf_init; /* this agf's entry is initialized */ char pagi_init; /* this agi's entry is initialized */ - char pagf_metadata; /* the agf is prefered to be metadata */ + char pagf_metadata; /* the agf is preferred to be metadata */ char pagi_inodeok; /* The agi is ok for inodes */ __uint8_t pagf_levels[XFS_BTNUM_AGF]; /* # of levels in bno & cnt btree */ diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index f4328e1..64ee07d 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -511,7 +511,7 @@ STATIC void xfs_alloc_trace_busy( char *name, /* function tag string */ char *str, /* additional string */ - xfs_mount_t *mp, /* file system mount poing */ + xfs_mount_t *mp, /* file system mount point */ xfs_agnumber_t agno, /* allocation group number */ xfs_agblock_t agbno, /* a.g. relative block number */ xfs_extlen_t len, /* length of extent */ @@ -1843,7 +1843,7 @@ xfs_alloc_fix_freelist( } else agbp = NULL; - /* If this is a metadata prefered pag and we are user data + /* If this is a metadata preferred pag and we are user data * then try somewhere else if we are not being asked to * try harder at this point */ @@ -2458,7 +2458,7 @@ error0: /* * AG Busy list management * The busy list contains block ranges that have been freed but whose - * transacations have not yet hit disk. If any block listed in a busy + * transactions have not yet hit disk. If any block listed in a busy * list is reused, the transaction that freed it must be forced to disk * before continuing to use the block. * diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 3546dea..2d1f892 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h @@ -68,7 +68,7 @@ typedef struct xfs_alloc_arg { xfs_alloctype_t otype; /* original allocation type */ char wasdel; /* set if allocation was prev delayed */ char wasfromfl; /* set if allocation is from freelist */ - char isfl; /* set if is freelist blocks - !actg */ + char isfl; /* set if is freelist blocks - !acctg */ char userdata; /* set if this is user data */ } xfs_alloc_arg_t; diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 093fac4..b6e1e02 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -294,7 +294,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen, xfs_trans_ihold(args.trans, dp); /* - * If the attribute list is non-existant or a shortform list, + * If the attribute list is non-existent or a shortform list, * upgrade it to a single-leaf-block attribute list. */ if ((dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) || @@ -1584,7 +1584,7 @@ out: * Fill in the disk block numbers in the state structure for the buffers * that are attached to the state structure. * This is done so that we can quickly reattach ourselves to those buffers - * after some set of transaction commit's has released these buffers. + * after some set of transaction commits have released these buffers. */ STATIC int xfs_attr_fillstate(xfs_da_state_t *state) @@ -1631,7 +1631,7 @@ xfs_attr_fillstate(xfs_da_state_t *state) /* * Reattach the buffers to the state structure based on the disk block * numbers stored in the state structure. - * This is done after some set of transaction commit's has released those + * This is done after some set of transaction commits have released those * buffers from our grip. */ STATIC int diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 7176827..9462be8 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -524,7 +524,7 @@ xfs_attr_shortform_compare(const void *a, const void *b) /* * Copy out entries of shortform attribute lists for attr_list(). - * Shortform atrtribute lists are not stored in hashval sorted order. + * Shortform attribute lists are not stored in hashval sorted order. * If the output buffer is not large enough to hold them all, then we * we have to calculate each entries' hashvalue and sort them before * we can begin returning them to the user. @@ -1541,7 +1541,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) /* * Check for the degenerate case of the block being empty. * If the block is empty, we'll simply delete it, no need to - * coalesce it with a sibling block. We choose (aribtrarily) + * coalesce it with a sibling block. We choose (arbitrarily) * to merge with the forward block unless it is NULL. */ if (count == 0) { diff --git a/fs/xfs/xfs_behavior.c b/fs/xfs/xfs_behavior.c index 9880ada..f4fe371 100644 --- a/fs/xfs/xfs_behavior.c +++ b/fs/xfs/xfs_behavior.c @@ -31,7 +31,7 @@ * The behavior chain is ordered based on the 'position' number which * lives in the first field of the ops vector (higher numbers first). * - * Attemps to insert duplicate ops result in an EINVAL return code. + * Attempts to insert duplicate ops result in an EINVAL return code. * Otherwise, return 0 to indicate success. */ int @@ -84,7 +84,7 @@ bhv_insert(bhv_head_t *bhp, bhv_desc_t *bdp) /* * Remove a behavior descriptor from a position in a behavior chain; - * the postition is guaranteed not to be the first position. + * the position is guaranteed not to be the first position. * Should only be called by the bhv_remove() macro. */ void diff --git a/fs/xfs/xfs_behavior.h b/fs/xfs/xfs_behavior.h index 2cd89bb..1d8ff10 100644 --- a/fs/xfs/xfs_behavior.h +++ b/fs/xfs/xfs_behavior.h @@ -39,7 +39,7 @@ * behaviors is synchronized with operations-in-progress (oip's) so that * the oip's always see a consistent view of the chain. * - * The term "interpostion" is used to refer to the act of inserting + * The term "interposition" is used to refer to the act of inserting * a behavior such that it interposes on (i.e., is inserted in front * of) a particular other behavior. A key example of this is when a * system implementing distributed single system image wishes to @@ -51,7 +51,7 @@ * * Behavior synchronization is logic which is necessary under certain * circumstances that there is no conflict between ongoing operations - * traversing the behavior chain and those dunamically modifying the + * traversing the behavior chain and those dynamically modifying the * behavior chain. Because behavior synchronization adds extra overhead * to virtual operation invocation, we want to restrict, as much as * we can, the requirement for this extra code, to those situations diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 2d702e4..d384e48 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -3467,113 +3467,6 @@ done: return error; } -xfs_bmbt_rec_t * /* pointer to found extent entry */ -xfs_bmap_do_search_extents( - xfs_bmbt_rec_t *base, /* base of extent list */ - xfs_extnum_t lastx, /* last extent index used */ - xfs_extnum_t nextents, /* number of file extents */ - xfs_fileoff_t bno, /* block number searched for */ - int *eofp, /* out: end of file found */ - xfs_extnum_t *lastxp, /* out: last extent index */ - xfs_bmbt_irec_t *gotp, /* out: extent entry found */ - xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ -{ - xfs_bmbt_rec_t *ep; /* extent list entry pointer */ - xfs_bmbt_irec_t got; /* extent list entry, decoded */ - int high; /* high index of binary search */ - int low; /* low index of binary search */ - - /* - * Initialize the extent entry structure to catch access to - * uninitialized br_startblock field. - */ - got.br_startoff = 0xffa5a5a5a5a5a5a5LL; - got.br_blockcount = 0xa55a5a5a5a5a5a5aLL; - got.br_state = XFS_EXT_INVALID; - -#if XFS_BIG_BLKNOS - got.br_startblock = 0xffffa5a5a5a5a5a5LL; -#else - got.br_startblock = 0xffffa5a5; -#endif - - if (lastx != NULLEXTNUM && lastx < nextents) - ep = base + lastx; - else - ep = NULL; - prevp->br_startoff = NULLFILEOFF; - if (ep && bno >= (got.br_startoff = xfs_bmbt_get_startoff(ep)) && - bno < got.br_startoff + - (got.br_blockcount = xfs_bmbt_get_blockcount(ep))) - *eofp = 0; - else if (ep && lastx < nextents - 1 && - bno >= (got.br_startoff = xfs_bmbt_get_startoff(ep + 1)) && - bno < got.br_startoff + - (got.br_blockcount = xfs_bmbt_get_blockcount(ep + 1))) { - lastx++; - ep++; - *eofp = 0; - } else if (nextents == 0) - *eofp = 1; - else if (bno == 0 && - (got.br_startoff = xfs_bmbt_get_startoff(base)) == 0) { - ep = base; - lastx = 0; - got.br_blockcount = xfs_bmbt_get_blockcount(ep); - *eofp = 0; - } else { - low = 0; - high = nextents - 1; - /* binary search the extents array */ - while (low <= high) { - XFS_STATS_INC(xs_cmp_exlist); - lastx = (low + high) >> 1; - ep = base + lastx; - got.br_startoff = xfs_bmbt_get_startoff(ep); - got.br_blockcount = xfs_bmbt_get_blockcount(ep); - if (bno < got.br_startoff) - high = lastx - 1; - else if (bno >= got.br_startoff + got.br_blockcount) - low = lastx + 1; - else { - got.br_startblock = xfs_bmbt_get_startblock(ep); - got.br_state = xfs_bmbt_get_state(ep); - *eofp = 0; - *lastxp = lastx; - *gotp = got; - return ep; - } - } - if (bno >= got.br_startoff + got.br_blockcount) { - lastx++; - if (lastx == nextents) { - *eofp = 1; - got.br_startblock = xfs_bmbt_get_startblock(ep); - got.br_state = xfs_bmbt_get_state(ep); - *prevp = got; - ep = NULL; - } else { - *eofp = 0; - xfs_bmbt_get_all(ep, prevp); - ep++; - got.br_startoff = xfs_bmbt_get_startoff(ep); - got.br_blockcount = xfs_bmbt_get_blockcount(ep); - } - } else { - *eofp = 0; - if (ep > base) - xfs_bmbt_get_all(ep - 1, prevp); - } - } - if (ep) { - got.br_startblock = xfs_bmbt_get_startblock(ep); - got.br_state = xfs_bmbt_get_state(ep); - } - *lastxp = lastx; - *gotp = got; - return ep; -} - /* * Search the extent records for the entry containing block bno. * If bno lies in a hole, point to the next entry. If bno lies diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 011ccaa..f83399c 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -362,14 +362,6 @@ xfs_bmbt_rec_t * xfs_bmap_search_multi_extents(struct xfs_ifork *, xfs_fileoff_t, int *, xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *); -/* - * Search an extent list for the extent which includes block - * bno. - */ -xfs_bmbt_rec_t *xfs_bmap_do_search_extents(xfs_bmbt_rec_t *, - xfs_extnum_t, xfs_extnum_t, xfs_fileoff_t, int *, - xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *); - #endif /* __KERNEL__ */ #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 07e2324..5fed156 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -98,12 +98,12 @@ xfs_buf_item_flush_log_debug( } /* - * This function is called to verify that our caller's have logged + * This function is called to verify that our callers have logged * all the bytes that they changed. * * It does this by comparing the original copy of the buffer stored in * the buf log item's bli_orig array to the current copy of the buffer - * and ensuring that all bytes which miscompare are set in the bli_logged + * and ensuring that all bytes which mismatch are set in the bli_logged * array of the buf log item. */ STATIC void diff --git a/fs/xfs/xfs_cap.h b/fs/xfs/xfs_cap.h index 433ec53..d0035c6e 100644 --- a/fs/xfs/xfs_cap.h +++ b/fs/xfs/xfs_cap.h @@ -38,7 +38,7 @@ typedef struct xfs_cap_set { /* * For Linux, we take the bitfields directly from capability.h * and no longer attempt to keep this attribute ondisk compatible - * with IRIX. Since this attribute is only set on exectuables, + * with IRIX. Since this attribute is only set on executables, * it just doesn't make much sense to try. We do use a different * named attribute though, to avoid confusion. */ diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 4bae3a7..8988b90 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -840,7 +840,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action) /* * Check for the degenerate case of the block being empty. * If the block is empty, we'll simply delete it, no need to - * coalesce it with a sibling block. We choose (aribtrarily) + * coalesce it with a sibling block. We choose (arbitrarily) * to merge with the forward block unless it is NULL. */ if (count == 0) { diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index bd5cee6..972ded5 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -533,7 +533,7 @@ xfs_dir2_block_getdents( /* * Reached the end of the block. - * Set the offset to a nonexistent block 1 and return. + * Set the offset to a non-existent block 1 and return. */ *eofp = 1; diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 08648b18..0f5e2f2 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -515,7 +515,7 @@ xfs_dir2_leaf_addname( ASSERT(be32_to_cpu(leaf->ents[highstale].address) == XFS_DIR2_NULL_DATAPTR); /* - * Copy entries down to copver the stale entry + * Copy entries down to cover the stale entry * and make room for the new entry. */ if (highstale - index > 0) diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index af556f1..ac511ab 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -830,7 +830,7 @@ xfs_dir2_leafn_rebalance( state->inleaf = 1; blk2->index = 0; cmn_err(CE_ALERT, - "xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting orignal leaf: " + "xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting original leaf: " "blk1->index %d\n", blk1->index); } diff --git a/fs/xfs/xfs_dir_leaf.c b/fs/xfs/xfs_dir_leaf.c index ee88751..6d71186 100644 --- a/fs/xfs/xfs_dir_leaf.c +++ b/fs/xfs/xfs_dir_leaf.c @@ -1341,7 +1341,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action) /* * Check for the degenerate case of the block being empty. * If the block is empty, we'll simply delete it, no need to - * coalesce it with a sibling block. We choose (aribtrarily) + * coalesce it with a sibling block. We choose (arbitrarily) * to merge with the forward block unless it is NULL. */ if (count == 0) { diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 56caa88..dfa3527 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -477,7 +477,7 @@ xfs_fs_counts( * * xfs_reserve_blocks is called to set m_resblks * in the in-core mount table. The number of unused reserved blocks - * is kept in m_resbls_avail. + * is kept in m_resblks_avail. * * Reserve the requested number of blocks if available. Otherwise return * as many as possible to satisfy the request. The actual number diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 0024892..4eeb856 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -136,7 +136,7 @@ xfs_ialloc_ag_alloc( int ninodes; /* num inodes per buf */ xfs_agino_t thisino; /* current inode number, for loop */ int version; /* inode version number to use */ - int isaligned; /* inode allocation at stripe unit */ + int isaligned = 0; /* inode allocation at stripe unit */ /* boundary */ args.tp = tp; @@ -152,47 +152,75 @@ xfs_ialloc_ag_alloc( return XFS_ERROR(ENOSPC); args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp); /* - * Set the alignment for the allocation. - * If stripe alignment is turned on then align at stripe unit - * boundary. - * If the cluster size is smaller than a filesystem block - * then we're doing I/O for inodes in filesystem block size pieces, - * so don't need alignment anyway. - */ - isaligned = 0; - if (args.mp->m_sinoalign) { - ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); - args.alignment = args.mp->m_dalign; - isaligned = 1; - } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) && - args.mp->m_sb.sb_inoalignmt >= - XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp))) - args.alignment = args.mp->m_sb.sb_inoalignmt; - else - args.alignment = 1; + * First try to allocate inodes contiguous with the last-allocated + * chunk of inodes. If the filesystem is striped, this will fill + * an entire stripe unit with inodes. + */ agi = XFS_BUF_TO_AGI(agbp); - /* - * Need to figure out where to allocate the inode blocks. - * Ideally they should be spaced out through the a.g. - * For now, just allocate blocks up front. - */ - args.agbno = be32_to_cpu(agi->agi_root); - args.fsbno = XFS_AGB_TO_FSB(args.mp, be32_to_cpu(agi->agi_seqno), - args.agbno); - /* - * Allocate a fixed-size extent of inodes. - */ - args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.mod = args.total = args.wasdel = args.isfl = args.userdata = - args.minalignslop = 0; - args.prod = 1; - /* - * Allow space for the inode btree to split. - */ - args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; - if ((error = xfs_alloc_vextent(&args))) - return error; + newino = be32_to_cpu(agi->agi_newino); + if(likely(newino != NULLAGINO)) { + args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + + XFS_IALLOC_BLOCKS(args.mp); + args.fsbno = XFS_AGB_TO_FSB(args.mp, + be32_to_cpu(agi->agi_seqno), args.agbno); + args.type = XFS_ALLOCTYPE_THIS_BNO; + args.mod = args.total = args.wasdel = args.isfl = + args.userdata = args.minalignslop = 0; + args.prod = 1; + args.alignment = 1; + /* + * Allow space for the inode btree to split. + */ + args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; + if ((error = xfs_alloc_vextent(&args))) + return error; + } else + args.fsbno = NULLFSBLOCK; + if (unlikely(args.fsbno == NULLFSBLOCK)) { + /* + * Set the alignment for the allocation. + * If stripe alignment is turned on then align at stripe unit + * boundary. + * If the cluster size is smaller than a filesystem block + * then we're doing I/O for inodes in filesystem block size + * pieces, so don't need alignment anyway. + */ + isaligned = 0; + if (args.mp->m_sinoalign) { + ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); + args.alignment = args.mp->m_dalign; + isaligned = 1; + } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) && + args.mp->m_sb.sb_inoalignmt >= + XFS_B_TO_FSBT(args.mp, + XFS_INODE_CLUSTER_SIZE(args.mp))) + args.alignment = args.mp->m_sb.sb_inoalignmt; + else + args.alignment = 1; + /* + * Need to figure out where to allocate the inode blocks. + * Ideally they should be spaced out through the a.g. + * For now, just allocate blocks up front. + */ + args.agbno = be32_to_cpu(agi->agi_root); + args.fsbno = XFS_AGB_TO_FSB(args.mp, + be32_to_cpu(agi->agi_seqno), args.agbno); + /* + * Allocate a fixed-size extent of inodes. + */ + args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.mod = args.total = args.wasdel = args.isfl = + args.userdata = args.minalignslop = 0; + args.prod = 1; + /* + * Allow space for the inode btree to split. + */ + args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; + if ((error = xfs_alloc_vextent(&args))) + return error; + } + /* * If stripe alignment is turned on, then try again with cluster * alignment. @@ -1023,7 +1051,7 @@ xfs_difree( rec.ir_freecount++; /* - * When an inode cluster is free, it becomes elgible for removal + * When an inode cluster is free, it becomes eligible for removal */ if ((mp->m_flags & XFS_MOUNT_IDELETE) && (rec.ir_freecount == XFS_IALLOC_INODES(mp))) { diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 3ce35a6..bb33113 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -509,7 +509,7 @@ retry: } else { /* * If the inode is not fully constructed due to - * filehandle mistmatches wait for the inode to go + * filehandle mismatches wait for the inode to go * away and try again. * * iget_locked will call __wait_on_freeing_inode diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 88a517f..48146bd 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -160,7 +160,7 @@ xfs_inotobp( xfs_dinode_t *dip; /* - * Call the space managment code to find the location of the + * Call the space management code to find the location of the * inode on disk. */ imap.im_blkno = 0; @@ -837,7 +837,7 @@ xfs_dic2xflags( /* * Given a mount structure and an inode number, return a pointer - * to a newly allocated in-core inode coresponding to the given + * to a newly allocated in-core inode corresponding to the given * inode number. * * Initialize the inode's attributes and extent pointers if it @@ -2723,7 +2723,7 @@ xfs_ipin( /* * Decrement the pin count of the given inode, and wake up * anyone in xfs_iwait_unpin() if the count goes to 0. The - * inode must have been previoulsy pinned with a call to xfs_ipin(). + * inode must have been previously pinned with a call to xfs_ipin(). */ void xfs_iunpin( @@ -3690,7 +3690,7 @@ void xfs_iext_add( xfs_ifork_t *ifp, /* inode fork pointer */ xfs_extnum_t idx, /* index to begin adding exts */ - int ext_diff) /* nubmer of extents to add */ + int ext_diff) /* number of extents to add */ { int byte_diff; /* new bytes being added */ int new_size; /* size of extents after adding */ @@ -4038,7 +4038,7 @@ xfs_iext_remove_indirect( xfs_extnum_t ext_diff; /* extents to remove in current list */ xfs_extnum_t nex1; /* number of extents before idx */ xfs_extnum_t nex2; /* extents after idx + count */ - int nlists; /* entries in indirecton array */ + int nlists; /* entries in indirection array */ int page_idx = idx; /* index in target extent list */ ASSERT(ifp->if_flags & XFS_IFEXTIREC); @@ -4291,9 +4291,9 @@ xfs_iext_bno_to_ext( xfs_filblks_t blockcount = 0; /* number of blocks in extent */ xfs_bmbt_rec_t *ep = NULL; /* pointer to target extent */ xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ - int high; /* upper boundry in search */ + int high; /* upper boundary in search */ xfs_extnum_t idx = 0; /* index of target extent */ - int low; /* lower boundry in search */ + int low; /* lower boundary in search */ xfs_extnum_t nextents; /* number of file extents */ xfs_fileoff_t startoff = 0; /* start offset of extent */ diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 36aa1fc..7497a48 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -580,7 +580,7 @@ xfs_inode_item_unpin_remove( * been or is in the process of being flushed, then (ideally) we'd like to * see if the inode's buffer is still incore, and if so give it a nudge. * We delay doing so until the pushbuf routine, though, to avoid holding - * the AIL lock across a call to the blackhole which is the buffercache. + * the AIL lock across a call to the blackhole which is the buffer cache. * Also we don't want to sleep in any device strategy routines, which can happen * if we do the subsequent bawrite in here. */ diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 32247b6..94068d0 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -272,7 +272,7 @@ xfs_bulkstat( size_t statstruct_size, /* sizeof struct filling */ char __user *ubuffer, /* buffer with inode stats */ int flags, /* defined in xfs_itable.h */ - int *done) /* 1 if there're more stats to get */ + int *done) /* 1 if there are more stats to get */ { xfs_agblock_t agbno=0;/* allocation group block number */ xfs_buf_t *agbp; /* agi header buffer */ @@ -676,7 +676,7 @@ xfs_bulkstat_single( xfs_mount_t *mp, /* mount point for filesystem */ xfs_ino_t *lastinop, /* inode to return */ char __user *buffer, /* buffer with inode stats */ - int *done) /* 1 if there're more stats to get */ + int *done) /* 1 if there are more stats to get */ { int count; /* count value for bulkstat call */ int error; /* return value */ diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index 047d834..11eb4e1 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -60,7 +60,7 @@ xfs_bulkstat( size_t statstruct_size,/* sizeof struct that we're filling */ char __user *ubuffer,/* buffer with inode stats */ int flags, /* flag to control access method */ - int *done); /* 1 if there're more stats to get */ + int *done); /* 1 if there are more stats to get */ int xfs_bulkstat_single( diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 9176995..32e841d 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -59,7 +59,7 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, int num_bblks); STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); -STATIC void xlog_unalloc_log(xlog_t *log); +STATIC void xlog_dealloc_log(xlog_t *log); STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[], int nentries, xfs_log_ticket_t tic, xfs_lsn_t *start_lsn, @@ -304,7 +304,7 @@ xfs_log_done(xfs_mount_t *mp, if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 || (flags & XFS_LOG_REL_PERM_RESERV)) { /* - * Release ticket if not permanent reservation or a specifc + * Release ticket if not permanent reservation or a specific * request has been made to release a permanent reservation. */ xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); @@ -511,7 +511,7 @@ xfs_log_mount(xfs_mount_t *mp, vfsp->vfs_flag |= VFS_RDONLY; if (error) { cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error); - xlog_unalloc_log(mp->m_log); + xlog_dealloc_log(mp->m_log); return error; } } @@ -667,7 +667,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) * * Go through the motions of sync'ing and releasing * the iclog, even though no I/O will actually happen, - * we need to wait for other log I/O's that may already + * we need to wait for other log I/Os that may already * be in progress. Do this as a separate section of * code so we'll know if we ever get stuck here that * we're in this odd situation of trying to unmount @@ -704,7 +704,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) void xfs_log_unmount_dealloc(xfs_mount_t *mp) { - xlog_unalloc_log(mp->m_log); + xlog_dealloc_log(mp->m_log); } /* @@ -1492,7 +1492,7 @@ xlog_sync(xlog_t *log, ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); - /* account for internal log which does't start at block #0 */ + /* account for internal log which doesn't start at block #0 */ XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); XFS_BUF_WRITE(bp); if ((error = XFS_bwrite(bp))) { @@ -1506,10 +1506,10 @@ xlog_sync(xlog_t *log, /* - * Unallocate a log structure + * Deallocate a log structure */ void -xlog_unalloc_log(xlog_t *log) +xlog_dealloc_log(xlog_t *log) { xlog_in_core_t *iclog, *next_iclog; xlog_ticket_t *tic, *next_tic; @@ -1539,7 +1539,7 @@ xlog_unalloc_log(xlog_t *log) if ((log->l_ticket_cnt != log->l_ticket_tcnt) && !XLOG_FORCED_SHUTDOWN(log)) { xfs_fs_cmn_err(CE_WARN, log->l_mp, - "xlog_unalloc_log: (cnt: %d, total: %d)", + "xlog_dealloc_log: (cnt: %d, total: %d)", log->l_ticket_cnt, log->l_ticket_tcnt); /* ASSERT(log->l_ticket_cnt == log->l_ticket_tcnt); */ @@ -1562,7 +1562,7 @@ xlog_unalloc_log(xlog_t *log) #endif log->l_mp->m_log = NULL; kmem_free(log, sizeof(xlog_t)); -} /* xlog_unalloc_log */ +} /* xlog_dealloc_log */ /* * Update counters atomically now that memcpy is done. @@ -2829,7 +2829,7 @@ xlog_state_release_iclog(xlog_t *log, /* * We let the log lock go, so it's possible that we hit a log I/O - * error or someother SHUTDOWN condition that marks the iclog + * error or some other SHUTDOWN condition that marks the iclog * as XLOG_STATE_IOERROR before the bwrite. However, we know that * this iclog has consistent data, so we ignore IOERROR * flags after this point. diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 4b2ac88..eacb3d4 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -27,7 +27,7 @@ #ifdef __KERNEL__ /* - * By comparing each compnent, we don't have to worry about extra + * By comparing each component, we don't have to worry about extra * endian issues in treating two 32 bit numbers as one 64 bit number */ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index add13f5..1f0016b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -583,7 +583,7 @@ xlog_find_head( * x | x ... | x - 1 | x * Another case that fits this picture would be * x | x + 1 | x ... | x - * In this case the head really is somwhere at the end of the + * In this case the head really is somewhere at the end of the * log, as one of the latest writes at the beginning was * incomplete. * One more case is @@ -2799,7 +2799,7 @@ xlog_recover_do_trans( * we don't need to worry about the block number being * truncated in > 1 TB buffers because in user-land, * we're now n32 or 64-bit so xfs_daddr_t is 64-bits so - * the blkno's will get through the user-mode buffer + * the blknos will get through the user-mode buffer * cache properly. The only bad case is o32 kernels * where xfs_daddr_t is 32-bits but mount will warn us * off a > 1 TB filesystem before we get here. diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 20e8abc..72e7e78 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -393,7 +393,7 @@ xfs_initialize_perag( break; } - /* This ag is prefered for inodes */ + /* This ag is preferred for inodes */ pag = &mp->m_perag[index]; pag->pagi_inodeok = 1; if (index < max_metadata) @@ -1728,7 +1728,7 @@ xfs_mount_log_sbunit( * We cannot use the hotcpu_register() function because it does * not allow notifier instances. We need a notifier per filesystem * as we need to be able to identify the filesystem to balance - * the counters out. This is acheived by having a notifier block + * the counters out. This is achieved by having a notifier block * embedded in the xfs_mount_t and doing pointer magic to get the * mount pointer from the notifier block address. */ diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index ebd7396..66cbee7 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -379,7 +379,7 @@ typedef struct xfs_mount { #endif int m_dalign; /* stripe unit */ int m_swidth; /* stripe width */ - int m_sinoalign; /* stripe unit inode alignmnt */ + int m_sinoalign; /* stripe unit inode alignment */ int m_attr_magicpct;/* 37% of the blocksize */ int m_dir_magicpct; /* 37% of the dir blocksize */ __uint8_t m_mk_sharedro; /* mark shared ro on unmount */ diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 82a08ba..4f6a034 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -31,7 +31,7 @@ typedef __uint32_t xfs_dqid_t; /* - * Eventhough users may not have quota limits occupying all 64-bits, + * Even though users may not have quota limits occupying all 64-bits, * they may need 64-bit accounting. Hence, 64-bit quota-counters, * and quota-limits. This is a waste in the common case, but hey ... */ @@ -246,7 +246,7 @@ typedef struct xfs_qoff_logformat { #ifdef __KERNEL__ /* * This check is done typically without holding the inode lock; - * that may seem racey, but it is harmless in the context that it is used. + * that may seem racy, but it is harmless in the context that it is used. * The inode cannot go inactive as long a reference is kept, and * therefore if dquot(s) were attached, they'll stay consistent. * If, for example, the ownership of the inode changes while diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 2918956..8d056ce 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -490,7 +490,7 @@ xfs_trans_mod_sb( case XFS_TRANS_SB_RES_FREXTENTS: /* * The allocation has already been applied to the - * in-core superblocks's counter. This should only + * in-core superblock's counter. This should only * be applied to the on-disk superblock. */ ASSERT(delta < 0); @@ -611,7 +611,7 @@ xfs_trans_apply_sb_deltas( if (whole) /* - * Log the whole thing, the fields are discontiguous. + * Log the whole thing, the fields are noncontiguous. */ xfs_trans_log_buf(tp, bp, 0, sizeof(xfs_sb_t) - 1); else @@ -669,7 +669,7 @@ xfs_trans_unreserve_and_mod_sb( /* * Apply any superblock modifications to the in-core version. * The t_res_fdblocks_delta and t_res_frextents_delta fields are - * explicity NOT applied to the in-core superblock. + * explicitly NOT applied to the in-core superblock. * The idea is that that has already been done. */ if (tp->t_flags & XFS_TRANS_SB_DIRTY) { diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index e48befa4..100d9a4 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -354,7 +354,7 @@ typedef struct xfs_trans { xfs_lsn_t t_commit_lsn; /* log seq num of end of * transaction. */ struct xfs_mount *t_mountp; /* ptr to fs mount struct */ - struct xfs_dquot_acct *t_dqinfo; /* accting info for dquots */ + struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */ xfs_trans_callback_t t_callback; /* transaction callback */ void *t_callarg; /* callback arg */ unsigned int t_flags; /* misc flags */ diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index e341409..7c5894d 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -272,7 +272,7 @@ xfs_trans_log_inode( * This is to coordinate with the xfs_iflush() and xfs_iflush_done() * routines in the eventual clearing of the ilf_fields bits. * See the big comment in xfs_iflush() for an explanation of - * this coorination mechanism. + * this coordination mechanism. */ flags |= ip->i_itemp->ili_last_fields; ip->i_itemp->ili_format.ilf_fields |= flags; diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index d4ec4df..504d2a8 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -880,10 +880,10 @@ xfs_statvfs( * determine if they should be flushed sync, async, or * delwri. * SYNC_CLOSE - This flag is passed when the system is being - * unmounted. We should sync and invalidate everthing. + * unmounted. We should sync and invalidate everything. * SYNC_FSDATA - This indicates that the caller would like to make * sure the superblock is safe on disk. We can ensure - * this by simply makeing sure the log gets flushed + * this by simply making sure the log gets flushed * if SYNC_BDFLUSH is set, and by actually writing it * out otherwise. * @@ -908,7 +908,7 @@ xfs_sync( * * This routine supports all of the flags defined for the generic VFS_SYNC * interface as explained above under xfs_sync. In the interests of not - * changing interfaces within the 6.5 family, additional internallly- + * changing interfaces within the 6.5 family, additional internally- * required functions are specified within a separate xflags parameter, * only available by calling this routine. * @@ -1090,7 +1090,7 @@ xfs_sync_inodes( * If this is just vfs_sync() or pflushd() calling * then we can skip inodes for which it looks like * there is nothing to do. Since we don't have the - * inode locked this is racey, but these are periodic + * inode locked this is racy, but these are periodic * calls so it doesn't matter. For the others we want * to know for sure, so we at least try to lock them. */ @@ -1429,7 +1429,7 @@ xfs_sync_inodes( * * This routine supports all of the flags defined for the generic VFS_SYNC * interface as explained above under xfs_sync. In the interests of not - * changing interfaces within the 6.5 family, additional internallly- + * changing interfaces within the 6.5 family, additional internally- * required functions are specified within a separate xflags parameter, * only available by calling this routine. * diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 0f0a64e..de49601 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -848,7 +848,7 @@ xfs_setattr( * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. * This is slightly sub-optimal in that truncates require - * two sync transactions instead of one for wsync filesytems. + * two sync transactions instead of one for wsync filesystems. * One for the truncate and one for the timestamps since we * don't want to change the timestamps unless we're sure the * truncate worked. Truncates are less than 1% of the laddis @@ -1170,7 +1170,7 @@ xfs_fsync( /* * If this inode is on the RT dev we need to flush that - * cache aswell. + * cache as well. */ if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); @@ -1380,7 +1380,7 @@ xfs_inactive_symlink_rmt( */ ntp = xfs_trans_dup(tp); /* - * Commit the transaction containing extent freeing and EFD's. + * Commit the transaction containing extent freeing and EFDs. * If we get an error on the commit here or on the reserve below, * we need to unlock the inode since the new transaction doesn't * have the inode attached. @@ -2023,7 +2023,7 @@ xfs_create( XFS_QM_DQRELE(mp, gdqp); /* - * Propogate the fact that the vnode changed after the + * Propagate the fact that the vnode changed after the * xfs_inode locks have been released. */ VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_TRUNCATED, 3); @@ -2370,7 +2370,7 @@ xfs_remove( * for a log reservation. Since we'll have to wait for the * inactive code to complete before returning from xfs_iget, * we need to make sure that we don't have log space reserved - * when we call xfs_iget. Instead we get an unlocked referece + * when we call xfs_iget. Instead we get an unlocked reference * to the inode before getting our log reservation. */ error = xfs_get_dir_entry(dentry, &ip); @@ -3020,7 +3020,7 @@ xfs_rmdir( * for a log reservation. Since we'll have to wait for the * inactive code to complete before returning from xfs_iget, * we need to make sure that we don't have log space reserved - * when we call xfs_iget. Instead we get an unlocked referece + * when we call xfs_iget. Instead we get an unlocked reference * to the inode before getting our log reservation. */ error = xfs_get_dir_entry(dentry, &cdp); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 92146f3..41ecbb8 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -62,6 +62,8 @@ .posix_timers = LIST_HEAD_INIT(sig.posix_timers), \ .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \ .rlim = INIT_RLIMITS, \ + .pgrp = 1, \ + .session = 1, \ } #define INIT_SIGHAND(sighand) { \ diff --git a/include/linux/pid.h b/include/linux/pid.h index 5b2fcb1..5b9082c 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -4,7 +4,6 @@ enum pid_type { PIDTYPE_PID, - PIDTYPE_TGID, PIDTYPE_PGID, PIDTYPE_SID, PIDTYPE_MAX @@ -38,7 +37,6 @@ extern struct pid *FASTCALL(find_pid(enum pid_type, int)); extern int alloc_pidmap(void); extern void FASTCALL(free_pidmap(int)); -extern void switch_exec_pids(struct task_struct *leader, struct task_struct *thread); #define do_each_task_pid(who, type, task) \ if ((task = find_task_by_pid_type(type, who))) { \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 20b4f03..d04186d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -355,16 +355,8 @@ struct sighand_struct { atomic_t count; struct k_sigaction action[_NSIG]; spinlock_t siglock; - struct rcu_head rcu; }; -extern void sighand_free_cb(struct rcu_head *rhp); - -static inline void sighand_free(struct sighand_struct *sp) -{ - call_rcu(&sp->rcu, sighand_free_cb); -} - /* * NOTE! "signal_struct" does not have it's own * locking, because a shared signal_struct always @@ -760,6 +752,7 @@ struct task_struct { /* PID/PID hash table linkage. */ struct pid pids[PIDTYPE_MAX]; + struct list_head thread_group; struct completion *vfork_done; /* for vfork() */ int __user *set_child_tid; /* CLONE_CHILD_SETTID */ @@ -1101,7 +1094,6 @@ extern void force_sig_specific(int, struct task_struct *); extern int send_sig(int, struct task_struct *, int); extern void zap_other_threads(struct task_struct *p); extern int kill_pg(pid_t, int, int); -extern int kill_sl(pid_t, int, int); extern int kill_proc(pid_t, int, int); extern struct sigqueue *sigqueue_alloc(void); extern void sigqueue_free(struct sigqueue *); @@ -1158,10 +1150,8 @@ extern void flush_thread(void); extern void exit_thread(void); extern void exit_files(struct task_struct *); -extern void exit_signal(struct task_struct *); -extern void __exit_signal(struct task_struct *); -extern void exit_sighand(struct task_struct *); -extern void __exit_sighand(struct task_struct *); +extern void __cleanup_signal(struct signal_struct *); +extern void __cleanup_sighand(struct sighand_struct *); extern void exit_itimers(struct signal_struct *); extern NORET_TYPE void do_group_exit(int); @@ -1185,19 +1175,7 @@ extern void wait_task_inactive(task_t * p); #endif #define remove_parent(p) list_del_init(&(p)->sibling) -#define add_parent(p, parent) list_add_tail(&(p)->sibling,&(parent)->children) - -#define REMOVE_LINKS(p) do { \ - if (thread_group_leader(p)) \ - list_del_init(&(p)->tasks); \ - remove_parent(p); \ - } while (0) - -#define SET_LINKS(p) do { \ - if (thread_group_leader(p)) \ - list_add_tail(&(p)->tasks,&init_task.tasks); \ - add_parent(p, (p)->parent); \ - } while (0) +#define add_parent(p) list_add_tail(&(p)->sibling,&(p)->parent->children) #define next_task(p) list_entry((p)->tasks.next, struct task_struct, tasks) #define prev_task(p) list_entry((p)->tasks.prev, struct task_struct, tasks) @@ -1215,20 +1193,22 @@ extern void wait_task_inactive(task_t * p); #define while_each_thread(g, t) \ while ((t = next_thread(t)) != g) -extern task_t * FASTCALL(next_thread(const task_t *p)); - #define thread_group_leader(p) (p->pid == p->tgid) +static inline task_t *next_thread(task_t *p) +{ + return list_entry(rcu_dereference(p->thread_group.next), + task_t, thread_group); +} + static inline int thread_group_empty(task_t *p) { - return list_empty(&p->pids[PIDTYPE_TGID].pid_list); + return list_empty(&p->thread_group); } #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) -extern void unhash_process(struct task_struct *p); - /* * Protects ->fs, ->files, ->mm, ->ptrace, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also @@ -1248,6 +1228,15 @@ static inline void task_unlock(struct task_struct *p) spin_unlock(&p->alloc_lock); } +extern struct sighand_struct *lock_task_sighand(struct task_struct *tsk, + unsigned long *flags); + +static inline void unlock_task_sighand(struct task_struct *tsk, + unsigned long *flags) +{ + spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); +} + #ifndef __HAVE_THREAD_FUNCTIONS #define task_thread_info(task) (task)->thread_info diff --git a/include/linux/signal.h b/include/linux/signal.h index b7d0935..162a8fd 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -249,6 +249,8 @@ static inline void init_sigpending(struct sigpending *sig) INIT_LIST_HEAD(&sig->list); } +extern void flush_sigqueue(struct sigpending *queue); + /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ static inline int valid_signal(unsigned long sig) { diff --git a/include/linux/slab.h b/include/linux/slab.h index 15e1d97..3af03b1 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -210,7 +210,6 @@ extern kmem_cache_t *names_cachep; extern kmem_cache_t *files_cachep; extern kmem_cache_t *filp_cachep; extern kmem_cache_t *fs_cachep; -extern kmem_cache_t *signal_cachep; extern kmem_cache_t *sighand_cachep; extern kmem_cache_t *bio_cachep; diff --git a/kernel/exit.c b/kernel/exit.c index a8c7efc..bc0ec67 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -29,6 +29,7 @@ #include <linux/cpuset.h> #include <linux/syscalls.h> #include <linux/signal.h> +#include <linux/posix-timers.h> #include <linux/cn_proc.h> #include <linux/mutex.h> #include <linux/futex.h> @@ -50,15 +51,80 @@ static void __unhash_process(struct task_struct *p) { nr_threads--; detach_pid(p, PIDTYPE_PID); - detach_pid(p, PIDTYPE_TGID); if (thread_group_leader(p)) { detach_pid(p, PIDTYPE_PGID); detach_pid(p, PIDTYPE_SID); - if (p->pid) - __get_cpu_var(process_counts)--; + + list_del_init(&p->tasks); + __get_cpu_var(process_counts)--; + } + list_del_rcu(&p->thread_group); + remove_parent(p); +} + +/* + * This function expects the tasklist_lock write-locked. + */ +static void __exit_signal(struct task_struct *tsk) +{ + struct signal_struct *sig = tsk->signal; + struct sighand_struct *sighand; + + BUG_ON(!sig); + BUG_ON(!atomic_read(&sig->count)); + + rcu_read_lock(); + sighand = rcu_dereference(tsk->sighand); + spin_lock(&sighand->siglock); + + posix_cpu_timers_exit(tsk); + if (atomic_dec_and_test(&sig->count)) + posix_cpu_timers_exit_group(tsk); + else { + /* + * If there is any task waiting for the group exit + * then notify it: + */ + if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) { + wake_up_process(sig->group_exit_task); + sig->group_exit_task = NULL; + } + if (tsk == sig->curr_target) + sig->curr_target = next_thread(tsk); + /* + * Accumulate here the counters for all threads but the + * group leader as they die, so they can be added into + * the process-wide totals when those are taken. + * The group leader stays around as a zombie as long + * as there are other threads. When it gets reaped, + * the exit.c code will add its counts into these totals. + * We won't ever get here for the group leader, since it + * will have been the last reference on the signal_struct. + */ + sig->utime = cputime_add(sig->utime, tsk->utime); + sig->stime = cputime_add(sig->stime, tsk->stime); + sig->min_flt += tsk->min_flt; + sig->maj_flt += tsk->maj_flt; + sig->nvcsw += tsk->nvcsw; + sig->nivcsw += tsk->nivcsw; + sig->sched_time += tsk->sched_time; + sig = NULL; /* Marker for below. */ } - REMOVE_LINKS(p); + __unhash_process(tsk); + + tsk->signal = NULL; + tsk->sighand = NULL; + spin_unlock(&sighand->siglock); + rcu_read_unlock(); + + __cleanup_sighand(sighand); + clear_tsk_thread_flag(tsk,TIF_SIGPENDING); + flush_sigqueue(&tsk->pending); + if (sig) { + flush_sigqueue(&sig->shared_pending); + __cleanup_signal(sig); + } } void release_task(struct task_struct * p) @@ -67,21 +133,14 @@ void release_task(struct task_struct * p) task_t *leader; struct dentry *proc_dentry; -repeat: +repeat: atomic_dec(&p->user->processes); spin_lock(&p->proc_lock); proc_dentry = proc_pid_unhash(p); write_lock_irq(&tasklist_lock); - if (unlikely(p->ptrace)) - __ptrace_unlink(p); + ptrace_unlink(p); BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); __exit_signal(p); - /* - * Note that the fastpath in sys_times depends on __exit_signal having - * updated the counters before a task is removed from the tasklist of - * the process by __unhash_process. - */ - __unhash_process(p); /* * If we are the last non-leader member of the thread @@ -116,21 +175,6 @@ repeat: goto repeat; } -/* we are using it only for SMP init */ - -void unhash_process(struct task_struct *p) -{ - struct dentry *proc_dentry; - - spin_lock(&p->proc_lock); - proc_dentry = proc_pid_unhash(p); - write_lock_irq(&tasklist_lock); - __unhash_process(p); - write_unlock_irq(&tasklist_lock); - spin_unlock(&p->proc_lock); - proc_pid_flush(proc_dentry); -} - /* * This checks not only the pgrp, but falls back on the pid if no * satisfactory pgrp is found. I dunno - gdb doesn't work correctly @@ -238,10 +282,10 @@ static void reparent_to_init(void) ptrace_unlink(current); /* Reparent to init */ - REMOVE_LINKS(current); + remove_parent(current); current->parent = child_reaper; current->real_parent = child_reaper; - SET_LINKS(current); + add_parent(current); /* Set the exit signal to SIGCHLD so we signal init on exit */ current->exit_signal = SIGCHLD; @@ -538,13 +582,13 @@ static void exit_mm(struct task_struct * tsk) mmput(mm); } -static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper) +static inline void choose_new_parent(task_t *p, task_t *reaper) { /* * Make sure we're not reparenting to ourselves and that * the parent is not a zombie. */ - BUG_ON(p == reaper || reaper->exit_state >= EXIT_ZOMBIE); + BUG_ON(p == reaper || reaper->exit_state); p->real_parent = reaper; } @@ -569,9 +613,9 @@ static void reparent_thread(task_t *p, task_t *father, int traced) * anyway, so let go of it. */ p->ptrace = 0; - list_del_init(&p->sibling); + remove_parent(p); p->parent = p->real_parent; - list_add_tail(&p->sibling, &p->parent->children); + add_parent(p); /* If we'd notified the old parent about this child's death, * also notify the new parent. @@ -645,7 +689,7 @@ static void forget_original_parent(struct task_struct * father, if (father == p->real_parent) { /* reparent with a reaper, real father it's us */ - choose_new_parent(p, reaper, child_reaper); + choose_new_parent(p, reaper); reparent_thread(p, father, 0); } else { /* reparent ptraced task to its real parent */ @@ -666,7 +710,7 @@ static void forget_original_parent(struct task_struct * father, } list_for_each_safe(_p, _n, &father->ptrace_children) { p = list_entry(_p,struct task_struct,ptrace_list); - choose_new_parent(p, reaper, child_reaper); + choose_new_parent(p, reaper); reparent_thread(p, father, 1); } } @@ -807,7 +851,7 @@ fastcall NORET_TYPE void do_exit(long code) panic("Aiee, killing interrupt handler!"); if (unlikely(!tsk->pid)) panic("Attempted to kill the idle task!"); - if (unlikely(tsk->pid == 1)) + if (unlikely(tsk == child_reaper)) panic("Attempted to kill init!"); if (unlikely(current->ptrace & PT_TRACE_EXIT)) { @@ -920,13 +964,6 @@ asmlinkage long sys_exit(int error_code) do_exit((error_code&0xff)<<8); } -task_t fastcall *next_thread(const task_t *p) -{ - return pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID); -} - -EXPORT_SYMBOL(next_thread); - /* * Take down every thread in the group. This is called by fatal signals * as well as by sys_exit_group (below). @@ -941,7 +978,6 @@ do_group_exit(int exit_code) else if (!thread_group_empty(current)) { struct signal_struct *const sig = current->signal; struct sighand_struct *const sighand = current->sighand; - read_lock(&tasklist_lock); spin_lock_irq(&sighand->siglock); if (sig->flags & SIGNAL_GROUP_EXIT) /* Another thread got here before we took the lock. */ @@ -951,7 +987,6 @@ do_group_exit(int exit_code) zap_other_threads(current); } spin_unlock_irq(&sighand->siglock); - read_unlock(&tasklist_lock); } do_exit(exit_code); @@ -1281,7 +1316,7 @@ bail_ref: /* move to end of parent's list to avoid starvation */ remove_parent(p); - add_parent(p, p->parent); + add_parent(p); write_unlock_irq(&tasklist_lock); diff --git a/kernel/fork.c b/kernel/fork.c index c49bd19..b3f7a1b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -84,7 +84,7 @@ static kmem_cache_t *task_struct_cachep; #endif /* SLAB cache for signal_struct structures (tsk->signal) */ -kmem_cache_t *signal_cachep; +static kmem_cache_t *signal_cachep; /* SLAB cache for sighand_struct structures (tsk->sighand) */ kmem_cache_t *sighand_cachep; @@ -786,14 +786,6 @@ int unshare_files(void) EXPORT_SYMBOL(unshare_files); -void sighand_free_cb(struct rcu_head *rhp) -{ - struct sighand_struct *sp; - - sp = container_of(rhp, struct sighand_struct, rcu); - kmem_cache_free(sighand_cachep, sp); -} - static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk) { struct sighand_struct *sig; @@ -806,12 +798,17 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t rcu_assign_pointer(tsk->sighand, sig); if (!sig) return -ENOMEM; - spin_lock_init(&sig->siglock); atomic_set(&sig->count, 1); memcpy(sig->action, current->sighand->action, sizeof(sig->action)); return 0; } +void __cleanup_sighand(struct sighand_struct *sighand) +{ + if (atomic_dec_and_test(&sighand->count)) + kmem_cache_free(sighand_cachep, sighand); +} + static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk) { struct signal_struct *sig; @@ -881,6 +878,22 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts return 0; } +void __cleanup_signal(struct signal_struct *sig) +{ + exit_thread_group_keys(sig); + kmem_cache_free(signal_cachep, sig); +} + +static inline void cleanup_signal(struct task_struct *tsk) +{ + struct signal_struct *sig = tsk->signal; + + atomic_dec(&sig->live); + + if (atomic_dec_and_test(&sig->count)) + __cleanup_signal(sig); +} + static inline void copy_flags(unsigned long clone_flags, struct task_struct *p) { unsigned long new_flags = p->flags; @@ -1095,6 +1108,7 @@ static task_t *copy_process(unsigned long clone_flags, * We dont wake it up yet. */ p->group_leader = p; + INIT_LIST_HEAD(&p->thread_group); INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); @@ -1118,16 +1132,6 @@ static task_t *copy_process(unsigned long clone_flags, !cpu_online(task_cpu(p)))) set_task_cpu(p, smp_processor_id()); - /* - * Check for pending SIGKILL! The new thread should not be allowed - * to slip out of an OOM kill. (or normal SIGKILL.) - */ - if (sigismember(¤t->pending.signal, SIGKILL)) { - write_unlock_irq(&tasklist_lock); - retval = -EINTR; - goto bad_fork_cleanup_namespace; - } - /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) p->real_parent = current->real_parent; @@ -1136,6 +1140,23 @@ static task_t *copy_process(unsigned long clone_flags, p->parent = p->real_parent; spin_lock(¤t->sighand->siglock); + + /* + * Process group and session signals need to be delivered to just the + * parent before the fork or both the parent and the child after the + * fork. Restart if a signal comes in before we add the new process to + * it's process group. + * A fatal signal pending means that current will exit, so the new + * thread can't slip out of an OOM kill (or normal SIGKILL). + */ + recalc_sigpending(); + if (signal_pending(current)) { + spin_unlock(¤t->sighand->siglock); + write_unlock_irq(&tasklist_lock); + retval = -ERESTARTNOINTR; + goto bad_fork_cleanup_namespace; + } + if (clone_flags & CLONE_THREAD) { /* * Important: if an exit-all has been started then @@ -1148,17 +1169,9 @@ static task_t *copy_process(unsigned long clone_flags, retval = -EAGAIN; goto bad_fork_cleanup_namespace; } - p->group_leader = current->group_leader; - if (current->signal->group_stop_count > 0) { - /* - * There is an all-stop in progress for the group. - * We ourselves will stop as soon as we check signals. - * Make the new thread part of that group stop too. - */ - current->signal->group_stop_count++; - set_tsk_thread_flag(p, TIF_SIGPENDING); - } + p->group_leader = current->group_leader; + list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); if (!cputime_eq(current->signal->it_virt_expires, cputime_zero) || @@ -1181,23 +1194,25 @@ static task_t *copy_process(unsigned long clone_flags, */ p->ioprio = current->ioprio; - SET_LINKS(p); - if (unlikely(p->ptrace & PT_PTRACED)) - __ptrace_link(p, current->parent); - - if (thread_group_leader(p)) { - p->signal->tty = current->signal->tty; - p->signal->pgrp = process_group(current); - p->signal->session = current->signal->session; - attach_pid(p, PIDTYPE_PGID, process_group(p)); - attach_pid(p, PIDTYPE_SID, p->signal->session); - if (p->pid) + if (likely(p->pid)) { + add_parent(p); + if (unlikely(p->ptrace & PT_PTRACED)) + __ptrace_link(p, current->parent); + + if (thread_group_leader(p)) { + p->signal->tty = current->signal->tty; + p->signal->pgrp = process_group(current); + p->signal->session = current->signal->session; + attach_pid(p, PIDTYPE_PGID, process_group(p)); + attach_pid(p, PIDTYPE_SID, p->signal->session); + + list_add_tail(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; + } + attach_pid(p, PIDTYPE_PID, p->pid); + nr_threads++; } - attach_pid(p, PIDTYPE_TGID, p->tgid); - attach_pid(p, PIDTYPE_PID, p->pid); - nr_threads++; total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); @@ -1212,9 +1227,9 @@ bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: - exit_signal(p); + cleanup_signal(p); bad_fork_cleanup_sighand: - exit_sighand(p); + __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: @@ -1261,7 +1276,7 @@ task_t * __devinit fork_idle(int cpu) if (!task) return ERR_PTR(-ENOMEM); init_idle(task, cpu); - unhash_process(task); + return task; } @@ -1353,11 +1368,21 @@ long do_fork(unsigned long clone_flags, #define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif +static void sighand_ctor(void *data, kmem_cache_t *cachep, unsigned long flags) +{ + struct sighand_struct *sighand = data; + + if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) + spin_lock_init(&sighand->siglock); +} + void __init proc_caches_init(void) { sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU, + sighand_ctor, NULL); signal_cachep = kmem_cache_create("signal_cache", sizeof(struct signal_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); diff --git a/kernel/kmod.c b/kernel/kmod.c index 51a8920..20a997c 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -170,7 +170,7 @@ static int wait_for_helper(void *data) sa.sa.sa_handler = SIG_IGN; sa.sa.sa_flags = 0; siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD)); - do_sigaction(SIGCHLD, &sa, (struct k_sigaction *)0); + do_sigaction(SIGCHLD, &sa, NULL); allow_signal(SIGCHLD); pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD); diff --git a/kernel/pid.c b/kernel/pid.c index 1acc072..a9f2dfd 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -218,36 +218,6 @@ task_t *find_task_by_pid_type(int type, int nr) EXPORT_SYMBOL(find_task_by_pid_type); /* - * This function switches the PIDs if a non-leader thread calls - * sys_execve() - this must be done without releasing the PID. - * (which a detach_pid() would eventually do.) - */ -void switch_exec_pids(task_t *leader, task_t *thread) -{ - __detach_pid(leader, PIDTYPE_PID); - __detach_pid(leader, PIDTYPE_TGID); - __detach_pid(leader, PIDTYPE_PGID); - __detach_pid(leader, PIDTYPE_SID); - - __detach_pid(thread, PIDTYPE_PID); - __detach_pid(thread, PIDTYPE_TGID); - - leader->pid = leader->tgid = thread->pid; - thread->pid = thread->tgid; - - attach_pid(thread, PIDTYPE_PID, thread->pid); - attach_pid(thread, PIDTYPE_TGID, thread->tgid); - attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp); - attach_pid(thread, PIDTYPE_SID, thread->signal->session); - list_add_tail(&thread->tasks, &init_task.tasks); - - attach_pid(leader, PIDTYPE_PID, leader->pid); - attach_pid(leader, PIDTYPE_TGID, leader->tgid); - attach_pid(leader, PIDTYPE_PGID, leader->signal->pgrp); - attach_pid(leader, PIDTYPE_SID, leader->signal->session); -} - -/* * The pid hash table is scaled according to the amount of memory in the * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or * more. @@ -277,16 +247,8 @@ void __init pidhash_init(void) void __init pidmap_init(void) { - int i; - pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL); + /* Reserve PID 0. We never call free_pidmap(0) */ set_bit(0, pidmap_array->page); atomic_dec(&pidmap_array->nr_free); - - /* - * Allocate PID 0, and hash it via all PID types: - */ - - for (i = 0; i < PIDTYPE_MAX; i++) - attach_pid(current, i, 0); } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index d95a72c..86a7f6c 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -35,9 +35,9 @@ void __ptrace_link(task_t *child, task_t *new_parent) if (child->parent == new_parent) return; list_add(&child->ptrace_list, &child->parent->ptrace_children); - REMOVE_LINKS(child); + remove_parent(child); child->parent = new_parent; - SET_LINKS(child); + add_parent(child); } /* @@ -77,9 +77,9 @@ void __ptrace_unlink(task_t *child) child->ptrace = 0; if (!list_empty(&child->ptrace_list)) { list_del_init(&child->ptrace_list); - REMOVE_LINKS(child); + remove_parent(child); child->parent = child->real_parent; - SET_LINKS(child); + add_parent(child); } ptrace_untrace(child); diff --git a/kernel/signal.c b/kernel/signal.c index 75f7341..4922928 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -22,7 +22,6 @@ #include <linux/security.h> #include <linux/syscalls.h> #include <linux/ptrace.h> -#include <linux/posix-timers.h> #include <linux/signal.h> #include <linux/audit.h> #include <linux/capability.h> @@ -147,6 +146,8 @@ static kmem_cache_t *sigqueue_cachep; #define sig_kernel_stop(sig) \ (((sig) < SIGRTMIN) && T(sig, SIG_KERNEL_STOP_MASK)) +#define sig_needs_tasklist(sig) ((sig) == SIGCONT) + #define sig_user_defined(t, signr) \ (((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_DFL) && \ ((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_IGN)) @@ -292,7 +293,7 @@ static void __sigqueue_free(struct sigqueue *q) kmem_cache_free(sigqueue_cachep, q); } -static void flush_sigqueue(struct sigpending *queue) +void flush_sigqueue(struct sigpending *queue) { struct sigqueue *q; @@ -307,9 +308,7 @@ static void flush_sigqueue(struct sigpending *queue) /* * Flush all pending signals for a task. */ - -void -flush_signals(struct task_struct *t) +void flush_signals(struct task_struct *t) { unsigned long flags; @@ -321,109 +320,6 @@ flush_signals(struct task_struct *t) } /* - * This function expects the tasklist_lock write-locked. - */ -void __exit_sighand(struct task_struct *tsk) -{ - struct sighand_struct * sighand = tsk->sighand; - - /* Ok, we're done with the signal handlers */ - tsk->sighand = NULL; - if (atomic_dec_and_test(&sighand->count)) - sighand_free(sighand); -} - -void exit_sighand(struct task_struct *tsk) -{ - write_lock_irq(&tasklist_lock); - rcu_read_lock(); - if (tsk->sighand != NULL) { - struct sighand_struct *sighand = rcu_dereference(tsk->sighand); - spin_lock(&sighand->siglock); - __exit_sighand(tsk); - spin_unlock(&sighand->siglock); - } - rcu_read_unlock(); - write_unlock_irq(&tasklist_lock); -} - -/* - * This function expects the tasklist_lock write-locked. - */ -void __exit_signal(struct task_struct *tsk) -{ - struct signal_struct * sig = tsk->signal; - struct sighand_struct * sighand; - - if (!sig) - BUG(); - if (!atomic_read(&sig->count)) - BUG(); - rcu_read_lock(); - sighand = rcu_dereference(tsk->sighand); - spin_lock(&sighand->siglock); - posix_cpu_timers_exit(tsk); - if (atomic_dec_and_test(&sig->count)) { - posix_cpu_timers_exit_group(tsk); - tsk->signal = NULL; - __exit_sighand(tsk); - spin_unlock(&sighand->siglock); - flush_sigqueue(&sig->shared_pending); - } else { - /* - * If there is any task waiting for the group exit - * then notify it: - */ - if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) { - wake_up_process(sig->group_exit_task); - sig->group_exit_task = NULL; - } - if (tsk == sig->curr_target) - sig->curr_target = next_thread(tsk); - tsk->signal = NULL; - /* - * Accumulate here the counters for all threads but the - * group leader as they die, so they can be added into - * the process-wide totals when those are taken. - * The group leader stays around as a zombie as long - * as there are other threads. When it gets reaped, - * the exit.c code will add its counts into these totals. - * We won't ever get here for the group leader, since it - * will have been the last reference on the signal_struct. - */ - sig->utime = cputime_add(sig->utime, tsk->utime); - sig->stime = cputime_add(sig->stime, tsk->stime); - sig->min_flt += tsk->min_flt; - sig->maj_flt += tsk->maj_flt; - sig->nvcsw += tsk->nvcsw; - sig->nivcsw += tsk->nivcsw; - sig->sched_time += tsk->sched_time; - __exit_sighand(tsk); - spin_unlock(&sighand->siglock); - sig = NULL; /* Marker for below. */ - } - rcu_read_unlock(); - clear_tsk_thread_flag(tsk,TIF_SIGPENDING); - flush_sigqueue(&tsk->pending); - if (sig) { - /* - * We are cleaning up the signal_struct here. - */ - exit_thread_group_keys(sig); - kmem_cache_free(signal_cachep, sig); - } -} - -void exit_signal(struct task_struct *tsk) -{ - atomic_dec(&tsk->signal->live); - - write_lock_irq(&tasklist_lock); - __exit_signal(tsk); - write_unlock_irq(&tasklist_lock); -} - -/* * Flush all handlers for a task. */ @@ -695,9 +591,7 @@ static int check_kill_permission(int sig, struct siginfo *info, } /* forward decl */ -static void do_notify_parent_cldstop(struct task_struct *tsk, - int to_self, - int why); +static void do_notify_parent_cldstop(struct task_struct *tsk, int why); /* * Handle magic process-wide effects of stop/continue signals. @@ -747,7 +641,7 @@ static void handle_stop_signal(int sig, struct task_struct *p) p->signal->group_stop_count = 0; p->signal->flags = SIGNAL_STOP_CONTINUED; spin_unlock(&p->sighand->siglock); - do_notify_parent_cldstop(p, (p->ptrace & PT_PTRACED), CLD_STOPPED); + do_notify_parent_cldstop(p, CLD_STOPPED); spin_lock(&p->sighand->siglock); } rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending); @@ -788,7 +682,7 @@ static void handle_stop_signal(int sig, struct task_struct *p) p->signal->flags = SIGNAL_STOP_CONTINUED; p->signal->group_exit_code = 0; spin_unlock(&p->sighand->siglock); - do_notify_parent_cldstop(p, (p->ptrace & PT_PTRACED), CLD_CONTINUED); + do_notify_parent_cldstop(p, CLD_CONTINUED); spin_lock(&p->sighand->siglock); } else { /* @@ -1120,27 +1014,37 @@ void zap_other_threads(struct task_struct *p) /* * Must be called under rcu_read_lock() or with tasklist_lock read-held. */ +struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) +{ + struct sighand_struct *sighand; + + for (;;) { + sighand = rcu_dereference(tsk->sighand); + if (unlikely(sighand == NULL)) + break; + + spin_lock_irqsave(&sighand->siglock, *flags); + if (likely(sighand == tsk->sighand)) + break; + spin_unlock_irqrestore(&sighand->siglock, *flags); + } + + return sighand; +} + int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) { unsigned long flags; - struct sighand_struct *sp; int ret; -retry: ret = check_kill_permission(sig, info, p); - if (!ret && sig && (sp = rcu_dereference(p->sighand))) { - spin_lock_irqsave(&sp->siglock, flags); - if (p->sighand != sp) { - spin_unlock_irqrestore(&sp->siglock, flags); - goto retry; - } - if ((atomic_read(&sp->count) == 0) || - (atomic_read(&p->usage) == 0)) { - spin_unlock_irqrestore(&sp->siglock, flags); - return -ESRCH; + + if (!ret && sig) { + ret = -ESRCH; + if (lock_task_sighand(p, &flags)) { + ret = __group_send_sig_info(sig, info, p); + unlock_task_sighand(p, &flags); } - ret = __group_send_sig_info(sig, info, p); - spin_unlock_irqrestore(&sp->siglock, flags); } return ret; @@ -1189,7 +1093,7 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid) struct task_struct *p; rcu_read_lock(); - if (unlikely(sig_kernel_stop(sig) || sig == SIGCONT)) { + if (unlikely(sig_needs_tasklist(sig))) { read_lock(&tasklist_lock); acquired_tasklist_lock = 1; } @@ -1405,12 +1309,10 @@ void sigqueue_free(struct sigqueue *q) __sigqueue_free(q); } -int -send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) +int send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) { unsigned long flags; int ret = 0; - struct sighand_struct *sh; BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); @@ -1424,48 +1326,17 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) */ rcu_read_lock(); - if (unlikely(p->flags & PF_EXITING)) { + if (!likely(lock_task_sighand(p, &flags))) { ret = -1; goto out_err; } -retry: - sh = rcu_dereference(p->sighand); - - spin_lock_irqsave(&sh->siglock, flags); - if (p->sighand != sh) { - /* We raced with exec() in a multithreaded process... */ - spin_unlock_irqrestore(&sh->siglock, flags); - goto retry; - } - - /* - * We do the check here again to handle the following scenario: - * - * CPU 0 CPU 1 - * send_sigqueue - * check PF_EXITING - * interrupt exit code running - * __exit_signal - * lock sighand->siglock - * unlock sighand->siglock - * lock sh->siglock - * add(tsk->pending) flush_sigqueue(tsk->pending) - * - */ - - if (unlikely(p->flags & PF_EXITING)) { - ret = -1; - goto out; - } - if (unlikely(!list_empty(&q->list))) { /* * If an SI_TIMER entry is already queue just increment * the overrun count. */ - if (q->info.si_code != SI_TIMER) - BUG(); + BUG_ON(q->info.si_code != SI_TIMER); q->info.si_overrun++; goto out; } @@ -1481,7 +1352,7 @@ retry: signal_wake_up(p, sig == SIGKILL); out: - spin_unlock_irqrestore(&sh->siglock, flags); + unlock_task_sighand(p, &flags); out_err: rcu_read_unlock(); @@ -1613,14 +1484,14 @@ void do_notify_parent(struct task_struct *tsk, int sig) spin_unlock_irqrestore(&psig->siglock, flags); } -static void do_notify_parent_cldstop(struct task_struct *tsk, int to_self, int why) +static void do_notify_parent_cldstop(struct task_struct *tsk, int why) { struct siginfo info; unsigned long flags; struct task_struct *parent; struct sighand_struct *sighand; - if (to_self) + if (tsk->ptrace & PT_PTRACED) parent = tsk->parent; else { tsk = tsk->group_leader; @@ -1695,7 +1566,7 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info) !(current->ptrace & PT_ATTACHED)) && (likely(current->parent->signal != current->signal) || !unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) { - do_notify_parent_cldstop(current, 1, CLD_TRAPPED); + do_notify_parent_cldstop(current, CLD_TRAPPED); read_unlock(&tasklist_lock); schedule(); } else { @@ -1744,25 +1615,17 @@ void ptrace_notify(int exit_code) static void finish_stop(int stop_count) { - int to_self; - /* * If there are no other threads in the group, or if there is * a group stop in progress and we are the last to stop, * report to the parent. When ptraced, every thread reports itself. */ - if (stop_count < 0 || (current->ptrace & PT_PTRACED)) - to_self = 1; - else if (stop_count == 0) - to_self = 0; - else - goto out; - - read_lock(&tasklist_lock); - do_notify_parent_cldstop(current, to_self, CLD_STOPPED); - read_unlock(&tasklist_lock); + if (stop_count == 0 || (current->ptrace & PT_PTRACED)) { + read_lock(&tasklist_lock); + do_notify_parent_cldstop(current, CLD_STOPPED); + read_unlock(&tasklist_lock); + } -out: schedule(); /* * Now we don't run again until continued. @@ -1776,12 +1639,10 @@ out: * Returns nonzero if we've actually stopped and released the siglock. * Returns zero if we didn't stop and still hold the siglock. */ -static int -do_signal_stop(int signr) +static int do_signal_stop(int signr) { struct signal_struct *sig = current->signal; - struct sighand_struct *sighand = current->sighand; - int stop_count = -1; + int stop_count; if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED)) return 0; @@ -1791,86 +1652,37 @@ do_signal_stop(int signr) * There is a group stop in progress. We don't need to * start another one. */ - signr = sig->group_exit_code; stop_count = --sig->group_stop_count; - current->exit_code = signr; - set_current_state(TASK_STOPPED); - if (stop_count == 0) - sig->flags = SIGNAL_STOP_STOPPED; - spin_unlock_irq(&sighand->siglock); - } - else if (thread_group_empty(current)) { - /* - * Lock must be held through transition to stopped state. - */ - current->exit_code = current->signal->group_exit_code = signr; - set_current_state(TASK_STOPPED); - sig->flags = SIGNAL_STOP_STOPPED; - spin_unlock_irq(&sighand->siglock); - } - else { + } else { /* * There is no group stop already in progress. - * We must initiate one now, but that requires - * dropping siglock to get both the tasklist lock - * and siglock again in the proper order. Note that - * this allows an intervening SIGCONT to be posted. - * We need to check for that and bail out if necessary. + * We must initiate one now. */ struct task_struct *t; - spin_unlock_irq(&sighand->siglock); - - /* signals can be posted during this window */ + sig->group_exit_code = signr; - read_lock(&tasklist_lock); - spin_lock_irq(&sighand->siglock); - - if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED)) { + stop_count = 0; + for (t = next_thread(current); t != current; t = next_thread(t)) /* - * Another stop or continue happened while we - * didn't have the lock. We can just swallow this - * signal now. If we raced with a SIGCONT, that - * should have just cleared it now. If we raced - * with another processor delivering a stop signal, - * then the SIGCONT that wakes us up should clear it. + * Setting state to TASK_STOPPED for a group + * stop is always done with the siglock held, + * so this check has no races. */ - read_unlock(&tasklist_lock); - return 0; - } - - if (sig->group_stop_count == 0) { - sig->group_exit_code = signr; - stop_count = 0; - for (t = next_thread(current); t != current; - t = next_thread(t)) - /* - * Setting state to TASK_STOPPED for a group - * stop is always done with the siglock held, - * so this check has no races. - */ - if (!t->exit_state && - !(t->state & (TASK_STOPPED|TASK_TRACED))) { - stop_count++; - signal_wake_up(t, 0); - } - sig->group_stop_count = stop_count; - } - else { - /* A race with another thread while unlocked. */ - signr = sig->group_exit_code; - stop_count = --sig->group_stop_count; - } - - current->exit_code = signr; - set_current_state(TASK_STOPPED); - if (stop_count == 0) - sig->flags = SIGNAL_STOP_STOPPED; - - spin_unlock_irq(&sighand->siglock); - read_unlock(&tasklist_lock); + if (!t->exit_state && + !(t->state & (TASK_STOPPED|TASK_TRACED))) { + stop_count++; + signal_wake_up(t, 0); + } + sig->group_stop_count = stop_count; } + if (stop_count == 0) + sig->flags = SIGNAL_STOP_STOPPED; + current->exit_code = sig->group_exit_code; + __set_current_state(TASK_STOPPED); + + spin_unlock_irq(¤t->sighand->siglock); finish_stop(stop_count); return 1; } @@ -1990,7 +1802,7 @@ relock: continue; /* Init gets no signals it doesn't want. */ - if (current->pid == 1) + if (current == child_reaper) continue; if (sig_kernel_stop(signr)) { @@ -2430,8 +2242,7 @@ sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo) return kill_proc_info(sig, &info, pid); } -int -do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) +int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) { struct k_sigaction *k; sigset_t mask; @@ -2457,6 +2268,7 @@ do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) if (act) { sigdelsetmask(&act->sa.sa_mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); + *k = *act; /* * POSIX 3.3.1.3: * "Setting a signal action to SIG_IGN for a signal that is @@ -2469,19 +2281,8 @@ do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) * be discarded, whether or not it is blocked" */ if (act->sa.sa_handler == SIG_IGN || - (act->sa.sa_handler == SIG_DFL && - sig_kernel_ignore(sig))) { - /* - * This is a fairly rare case, so we only take the - * tasklist_lock once we're sure we'll need it. - * Now we must do this little unlock and relock - * dance to maintain the lock hierarchy. - */ + (act->sa.sa_handler == SIG_DFL && sig_kernel_ignore(sig))) { struct task_struct *t = current; - spin_unlock_irq(&t->sighand->siglock); - read_lock(&tasklist_lock); - spin_lock_irq(&t->sighand->siglock); - *k = *act; sigemptyset(&mask); sigaddset(&mask, sig); rm_from_queue_full(&mask, &t->signal->shared_pending); @@ -2490,12 +2291,7 @@ do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) recalc_sigpending_tsk(t); t = next_thread(t); } while (t != current); - spin_unlock_irq(¤t->sighand->siglock); - read_unlock(&tasklist_lock); - return 0; } - - *k = *act; } spin_unlock_irq(¤t->sighand->siglock); diff --git a/kernel/sys.c b/kernel/sys.c index c93d37f..7ef7f60 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1202,69 +1202,24 @@ asmlinkage long sys_times(struct tms __user * tbuf) */ if (tbuf) { struct tms tmp; + struct task_struct *tsk = current; + struct task_struct *t; cputime_t utime, stime, cutime, cstime; -#ifdef CONFIG_SMP - if (thread_group_empty(current)) { - /* - * Single thread case without the use of any locks. - * - * We may race with release_task if two threads are - * executing. However, release task first adds up the - * counters (__exit_signal) before removing the task - * from the process tasklist (__unhash_process). - * __exit_signal also acquires and releases the - * siglock which results in the proper memory ordering - * so that the list modifications are always visible - * after the counters have been updated. - * - * If the counters have been updated by the second thread - * but the thread has not yet been removed from the list - * then the other branch will be executing which will - * block on tasklist_lock until the exit handling of the - * other task is finished. - * - * This also implies that the sighand->siglock cannot - * be held by another processor. So we can also - * skip acquiring that lock. - */ - utime = cputime_add(current->signal->utime, current->utime); - stime = cputime_add(current->signal->utime, current->stime); - cutime = current->signal->cutime; - cstime = current->signal->cstime; - } else -#endif - { + spin_lock_irq(&tsk->sighand->siglock); + utime = tsk->signal->utime; + stime = tsk->signal->stime; + t = tsk; + do { + utime = cputime_add(utime, t->utime); + stime = cputime_add(stime, t->stime); + t = next_thread(t); + } while (t != tsk); - /* Process with multiple threads */ - struct task_struct *tsk = current; - struct task_struct *t; + cutime = tsk->signal->cutime; + cstime = tsk->signal->cstime; + spin_unlock_irq(&tsk->sighand->siglock); - read_lock(&tasklist_lock); - utime = tsk->signal->utime; - stime = tsk->signal->stime; - t = tsk; - do { - utime = cputime_add(utime, t->utime); - stime = cputime_add(stime, t->stime); - t = next_thread(t); - } while (t != tsk); - - /* - * While we have tasklist_lock read-locked, no dying thread - * can be updating current->signal->[us]time. Instead, - * we got their counts included in the live thread loop. - * However, another thread can come in right now and - * do a wait call that updates current->signal->c[us]time. - * To make sure we always see that pair updated atomically, - * we take the siglock around fetching them. - */ - spin_lock_irq(&tsk->sighand->siglock); - cutime = tsk->signal->cutime; - cstime = tsk->signal->cstime; - spin_unlock_irq(&tsk->sighand->siglock); - read_unlock(&tasklist_lock); - } tmp.tms_utime = cputime_to_clock_t(utime); tmp.tms_stime = cputime_to_clock_t(stime); tmp.tms_cutime = cputime_to_clock_t(cutime); |