diff options
55 files changed, 1659 insertions, 577 deletions
diff --git a/Documentation/ABI/obsolete/devfs b/Documentation/ABI/removed/devfs index b8b8739..8195c4e 100644 --- a/Documentation/ABI/obsolete/devfs +++ b/Documentation/ABI/removed/devfs @@ -1,13 +1,12 @@ What: devfs -Date: July 2005 +Date: July 2005 (scheduled), finally removed in kernel v2.6.18 Contact: Greg Kroah-Hartman <gregkh@suse.de> Description: devfs has been unmaintained for a number of years, has unfixable races, contains a naming policy within the kernel that is against the LSB, and can be replaced by using udev. - The files fs/devfs/*, include/linux/devfs_fs*.h will be removed, + The files fs/devfs/*, include/linux/devfs_fs*.h were removed, along with the the assorted devfs function calls throughout the kernel tree. Users: - diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power new file mode 100644 index 0000000..d882f80 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-power @@ -0,0 +1,88 @@ +What: /sys/power/ +Date: August 2006 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/power directory will contain files that will + provide a unified interface to the power management + subsystem. + +What: /sys/power/state +Date: August 2006 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/power/state file controls the system power state. + Reading from this file returns what states are supported, + which is hard-coded to 'standby' (Power-On Suspend), 'mem' + (Suspend-to-RAM), and 'disk' (Suspend-to-Disk). + + Writing to this file one of these strings causes the system to + transition into that state. Please see the file + Documentation/power/states.txt for a description of each of + these states. + +What: /sys/power/disk +Date: August 2006 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/power/disk file controls the operating mode of the + suspend-to-disk mechanism. Reading from this file returns + the name of the method by which the system will be put to + sleep on the next suspend. There are four methods supported: + 'firmware' - means that the memory image will be saved to disk + by some firmware, in which case we also assume that the + firmware will handle the system suspend. + 'platform' - the memory image will be saved by the kernel and + the system will be put to sleep by the platform driver (e.g. + ACPI or other PM registers). + 'shutdown' - the memory image will be saved by the kernel and + the system will be powered off. + 'reboot' - the memory image will be saved by the kernel and + the system will be rebooted. + + The suspend-to-disk method may be chosen by writing to this + file one of the accepted strings: + + 'firmware' + 'platform' + 'shutdown' + 'reboot' + + It will only change to 'firmware' or 'platform' if the system + supports that. + +What: /sys/power/image_size +Date: August 2006 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/power/image_size file controls the size of the image + created by the suspend-to-disk mechanism. It can be written a + string representing a non-negative integer that will be used + as an upper limit of the image size, in bytes. The kernel's + suspend-to-disk code will do its best to ensure the image size + will not exceed this number. However, if it turns out to be + impossible, the kernel will try to suspend anyway using the + smallest image possible. In particular, if "0" is written to + this file, the suspend image will be as small as possible. + + Reading from this file will display the current image size + limit, which is set to 500 MB by default. + +What: /sys/power/pm_trace +Date: August 2006 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/power/pm_trace file controls the code which saves the + last PM event point in the RTC across reboots, so that you can + debug a machine that just hangs during suspend (or more + commonly, during resume). Namely, the RTC is only used to save + the last PM event point if this file contains '1'. Initially + it contains '0' which may be changed to '1' by writing a + string representing a nonzero integer into it. + + To use this debugging feature you should attempt to suspend + the machine, then reboot it and run + + dmesg -s 1000000 | grep 'hash matches' + + CAUTION: Using it will cause your machine's real-time (CMOS) + clock to be set to a random invalid time after a resume. diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 552507f..611acc3 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -6,6 +6,21 @@ be removed from this file. --------------------------- +What: /sys/devices/.../power/state + dev->power.power_state + dpm_runtime_{suspend,resume)() +When: July 2007 +Why: Broken design for runtime control over driver power states, confusing + driver-internal runtime power management with: mechanisms to support + system-wide sleep state transitions; event codes that distinguish + different phases of swsusp "sleep" transitions; and userspace policy + inputs. This framework was never widely used, and most attempts to + use it were broken. Drivers should instead be exposing domain-specific + interfaces either to kernel or to userspace. +Who: Pavel Machek <pavel@suse.cz> + +--------------------------- + What: RAW driver (CONFIG_RAW_DRIVER) When: December 2005 Why: declared obsolete since kernel 2.6.3 @@ -294,3 +309,15 @@ Why: The frame diverter is included in most distribution kernels, but is It is not clear if anyone is still using it. Who: Stephen Hemminger <shemminger@osdl.org> +--------------------------- + + +What: PHYSDEVPATH, PHYSDEVBUS, PHYSDEVDRIVER in the uevent environment +When: Oktober 2008 +Why: The stacking of class devices makes these values misleading and + inconsistent. + Class devices should not carry any of these properties, and bus + devices have SUBSYTEM and DRIVER as a replacement. +Who: Kay Sievers <kay.sievers@suse.de> + +--------------------------- diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index fba1e05..d0e79d5 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt @@ -1,208 +1,553 @@ +Most of the code in Linux is device drivers, so most of the Linux power +management code is also driver-specific. Most drivers will do very little; +others, especially for platforms with small batteries (like cell phones), +will do a lot. + +This writeup gives an overview of how drivers interact with system-wide +power management goals, emphasizing the models and interfaces that are +shared by everything that hooks up to the driver model core. Read it as +background for the domain-specific work you'd do with any specific driver. + + +Two Models for Device Power Management +====================================== +Drivers will use one or both of these models to put devices into low-power +states: + + System Sleep model: + Drivers can enter low power states as part of entering system-wide + low-power states like "suspend-to-ram", or (mostly for systems with + disks) "hibernate" (suspend-to-disk). + + This is something that device, bus, and class drivers collaborate on + by implementing various role-specific suspend and resume methods to + cleanly power down hardware and software subsystems, then reactivate + them without loss of data. + + Some drivers can manage hardware wakeup events, which make the system + leave that low-power state. This feature may be disabled using the + relevant /sys/devices/.../power/wakeup file; enabling it may cost some + power usage, but let the whole system enter low power states more often. + + Runtime Power Management model: + Drivers may also enter low power states while the system is running, + independently of other power management activity. Upstream drivers + will normally not know (or care) if the device is in some low power + state when issuing requests; the driver will auto-resume anything + that's needed when it gets a request. + + This doesn't have, or need much infrastructure; it's just something you + should do when writing your drivers. For example, clk_disable() unused + clocks as part of minimizing power drain for currently-unused hardware. + Of course, sometimes clusters of drivers will collaborate with each + other, which could involve task-specific power management. + +There's not a lot to be said about those low power states except that they +are very system-specific, and often device-specific. Also, that if enough +drivers put themselves into low power states (at "runtime"), the effect may be +the same as entering some system-wide low-power state (system sleep) ... and +that synergies exist, so that several drivers using runtime pm might put the +system into a state where even deeper power saving options are available. + +Most suspended devices will have quiesced all I/O: no more DMA or irqs, no +more data read or written, and requests from upstream drivers are no longer +accepted. A given bus or platform may have different requirements though. + +Examples of hardware wakeup events include an alarm from a real time clock, +network wake-on-LAN packets, keyboard or mouse activity, and media insertion +or removal (for PCMCIA, MMC/SD, USB, and so on). + + +Interfaces for Entering System Sleep States +=========================================== +Most of the programming interfaces a device driver needs to know about +relate to that first model: entering a system-wide low power state, +rather than just minimizing power consumption by one device. + + +Bus Driver Methods +------------------ +The core methods to suspend and resume devices reside in struct bus_type. +These are mostly of interest to people writing infrastructure for busses +like PCI or USB, or because they define the primitives that device drivers +may need to apply in domain-specific ways to their devices: -Device Power Management +struct bus_type { + ... + int (*suspend)(struct device *dev, pm_message_t state); + int (*suspend_late)(struct device *dev, pm_message_t state); + int (*resume_early)(struct device *dev); + int (*resume)(struct device *dev); +}; -Device power management encompasses two areas - the ability to save -state and transition a device to a low-power state when the system is -entering a low-power state; and the ability to transition a device to -a low-power state while the system is running (and independently of -any other power management activity). +Bus drivers implement those methods as appropriate for the hardware and +the drivers using it; PCI works differently from USB, and so on. Not many +people write bus drivers; most driver code is a "device driver" that +builds on top of bus-specific framework code. + +For more information on these driver calls, see the description later; +they are called in phases for every device, respecting the parent-child +sequencing in the driver model tree. Note that as this is being written, +only the suspend() and resume() are widely available; not many bus drivers +leverage all of those phases, or pass them down to lower driver levels. + + +/sys/devices/.../power/wakeup files +----------------------------------- +All devices in the driver model have two flags to control handling of +wakeup events, which are hardware signals that can force the device and/or +system out of a low power state. These are initialized by bus or device +driver code using device_init_wakeup(dev,can_wakeup). + +The "can_wakeup" flag just records whether the device (and its driver) can +physically support wakeup events. When that flag is clear, the sysfs +"wakeup" file is empty, and device_may_wakeup() returns false. + +For devices that can issue wakeup events, a separate flag controls whether +that device should try to use its wakeup mechanism. The initial value of +device_may_wakeup() will be true, so that the device's "wakeup" file holds +the value "enabled". Userspace can change that to "disabled" so that +device_may_wakeup() returns false; or change it back to "enabled" (so that +it returns true again). + + +EXAMPLE: PCI Device Driver Methods +----------------------------------- +PCI framework software calls these methods when the PCI device driver bound +to a device device has provided them: + +struct pci_driver { + ... + int (*suspend)(struct pci_device *pdev, pm_message_t state); + int (*suspend_late)(struct pci_device *pdev, pm_message_t state); + + int (*resume_early)(struct pci_device *pdev); + int (*resume)(struct pci_device *pdev); +}; +Drivers will implement those methods, and call PCI-specific procedures +like pci_set_power_state(), pci_enable_wake(), pci_save_state(), and +pci_restore_state() to manage PCI-specific mechanisms. (PCI config space +could be saved during driver probe, if it weren't for the fact that some +systems rely on userspace tweaking using setpci.) Devices are suspended +before their bridges enter low power states, and likewise bridges resume +before their devices. + + +Upper Layers of Driver Stacks +----------------------------- +Device drivers generally have at least two interfaces, and the methods +sketched above are the ones which apply to the lower level (nearer PCI, USB, +or other bus hardware). The network and block layers are examples of upper +level interfaces, as is a character device talking to userspace. + +Power management requests normally need to flow through those upper levels, +which often use domain-oriented requests like "blank that screen". In +some cases those upper levels will have power management intelligence that +relates to end-user activity, or other devices that work in cooperation. + +When those interfaces are structured using class interfaces, there is a +standard way to have the upper layer stop issuing requests to a given +class device (and restart later): + +struct class { + ... + int (*suspend)(struct device *dev, pm_message_t state); + int (*resume)(struct device *dev); +}; -Methods +Those calls are issued in specific phases of the process by which the +system enters a low power "suspend" state, or resumes from it. + + +Calling Drivers to Enter System Sleep States +============================================ +When the system enters a low power state, each device's driver is asked +to suspend the device by putting it into state compatible with the target +system state. That's usually some version of "off", but the details are +system-specific. Also, wakeup-enabled devices will usually stay partly +functional in order to wake the system. + +When the system leaves that low power state, the device's driver is asked +to resume it. The suspend and resume operations always go together, and +both are multi-phase operations. + +For simple drivers, suspend might quiesce the device using the class code +and then turn its hardware as "off" as possible with late_suspend. The +matching resume calls would then completely reinitialize the hardware +before reactivating its class I/O queues. + +More power-aware drivers drivers will use more than one device low power +state, either at runtime or during system sleep states, and might trigger +system wakeup events. + + +Call Sequence Guarantees +------------------------ +To ensure that bridges and similar links needed to talk to a device are +available when the device is suspended or resumed, the device tree is +walked in a bottom-up order to suspend devices. A top-down order is +used to resume those devices. + +The ordering of the device tree is defined by the order in which devices +get registered: a child can never be registered, probed or resumed before +its parent; and can't be removed or suspended after that parent. + +The policy is that the device tree should match hardware bus topology. +(Or at least the control bus, for devices which use multiple busses.) + + +Suspending Devices +------------------ +Suspending a given device is done in several phases. Suspending the +system always includes every phase, executing calls for every device +before the next phase begins. Not all busses or classes support all +these callbacks; and not all drivers use all the callbacks. + +The phases are seen by driver notifications issued in this order: + + 1 class.suspend(dev, message) is called after tasks are frozen, for + devices associated with a class that has such a method. This + method may sleep. + + Since I/O activity usually comes from such higher layers, this is + a good place to quiesce all drivers of a given type (and keep such + code out of those drivers). + + 2 bus.suspend(dev, message) is called next. This method may sleep, + and is often morphed into a device driver call with bus-specific + parameters and/or rules. + + This call should handle parts of device suspend logic that require + sleeping. It probably does work to quiesce the device which hasn't + been abstracted into class.suspend() or bus.suspend_late(). + + 3 bus.suspend_late(dev, message) is called with IRQs disabled, and + with only one CPU active. Until the bus.resume_early() phase + completes (see later), IRQs are not enabled again. This method + won't be exposed by all busses; for message based busses like USB, + I2C, or SPI, device interactions normally require IRQs. This bus + call may be morphed into a driver call with bus-specific parameters. + + This call might save low level hardware state that might otherwise + be lost in the upcoming low power state, and actually put the + device into a low power state ... so that in some cases the device + may stay partly usable until this late. This "late" call may also + help when coping with hardware that behaves badly. + +The pm_message_t parameter is currently used to refine those semantics +(described later). + +At the end of those phases, drivers should normally have stopped all I/O +transactions (DMA, IRQs), saved enough state that they can re-initialize +or restore previous state (as needed by the hardware), and placed the +device into a low-power state. On many platforms they will also use +clk_disable() to gate off one or more clock sources; sometimes they will +also switch off power supplies, or reduce voltages. Drivers which have +runtime PM support may already have performed some or all of the steps +needed to prepare for the upcoming system sleep state. + +When any driver sees that its device_can_wakeup(dev), it should make sure +to use the relevant hardware signals to trigger a system wakeup event. +For example, enable_irq_wake() might identify GPIO signals hooked up to +a switch or other external hardware, and pci_enable_wake() does something +similar for PCI's PME# signal. + +If a driver (or bus, or class) fails it suspend method, the system won't +enter the desired low power state; it will resume all the devices it's +suspended so far. + +Note that drivers may need to perform different actions based on the target +system lowpower/sleep state. At this writing, there are only platform +specific APIs through which drivers could determine those target states. + + +Device Low Power (suspend) States +--------------------------------- +Device low-power states aren't very standard. One device might only handle +"on" and "off, while another might support a dozen different versions of +"on" (how many engines are active?), plus a state that gets back to "on" +faster than from a full "off". + +Some busses define rules about what different suspend states mean. PCI +gives one example: after the suspend sequence completes, a non-legacy +PCI device may not perform DMA or issue IRQs, and any wakeup events it +issues would be issued through the PME# bus signal. Plus, there are +several PCI-standard device states, some of which are optional. + +In contrast, integrated system-on-chip processors often use irqs as the +wakeup event sources (so drivers would call enable_irq_wake) and might +be able to treat DMA completion as a wakeup event (sometimes DMA can stay +active too, it'd only be the CPU and some peripherals that sleep). + +Some details here may be platform-specific. Systems may have devices that +can be fully active in certain sleep states, such as an LCD display that's +refreshed using DMA while most of the system is sleeping lightly ... and +its frame buffer might even be updated by a DSP or other non-Linux CPU while +the Linux control processor stays idle. + +Moreover, the specific actions taken may depend on the target system state. +One target system state might allow a given device to be very operational; +another might require a hard shut down with re-initialization on resume. +And two different target systems might use the same device in different +ways; the aforementioned LCD might be active in one product's "standby", +but a different product using the same SOC might work differently. + + +Meaning of pm_message_t.event +----------------------------- +Parameters to suspend calls include the device affected and a message of +type pm_message_t, which has one field: the event. If driver does not +recognize the event code, suspend calls may abort the request and return +a negative errno. However, most drivers will be fine if they implement +PM_EVENT_SUSPEND semantics for all messages. + +The event codes are used to refine the goal of suspending the device, and +mostly matter when creating or resuming system memory image snapshots, as +used with suspend-to-disk: + + PM_EVENT_SUSPEND -- quiesce the driver and put hardware into a low-power + state. When used with system sleep states like "suspend-to-RAM" or + "standby", the upcoming resume() call will often be able to rely on + state kept in hardware, or issue system wakeup events. When used + instead with suspend-to-disk, few devices support this capability; + most are completely powered off. + + PM_EVENT_FREEZE -- quiesce the driver, but don't necessarily change into + any low power mode. A system snapshot is about to be taken, often + followed by a call to the driver's resume() method. Neither wakeup + events nor DMA are allowed. + + PM_EVENT_PRETHAW -- quiesce the driver, knowing that the upcoming resume() + will restore a suspend-to-disk snapshot from a different kernel image. + Drivers that are smart enough to look at their hardware state during + resume() processing need that state to be correct ... a PRETHAW could + be used to invalidate that state (by resetting the device), like a + shutdown() invocation would before a kexec() or system halt. Other + drivers might handle this the same way as PM_EVENT_FREEZE. Neither + wakeup events nor DMA are allowed. + +To enter "standby" (ACPI S1) or "Suspend to RAM" (STR, ACPI S3) states, or +the similarly named APM states, only PM_EVENT_SUSPEND is used; for "Suspend +to Disk" (STD, hibernate, ACPI S4), all of those event codes are used. + +There's also PM_EVENT_ON, a value which never appears as a suspend event +but is sometimes used to record the "not suspended" device state. + + +Resuming Devices +---------------- +Resuming is done in multiple phases, much like suspending, with all +devices processing each phase's calls before the next phase begins. + +The phases are seen by driver notifications issued in this order: + + 1 bus.resume_early(dev) is called with IRQs disabled, and with + only one CPU active. As with bus.suspend_late(), this method + won't be supported on busses that require IRQs in order to + interact with devices. + + This reverses the effects of bus.suspend_late(). + + 2 bus.resume(dev) is called next. This may be morphed into a device + driver call with bus-specific parameters; implementations may sleep. + + This reverses the effects of bus.suspend(). + + 3 class.resume(dev) is called for devices associated with a class + that has such a method. Implementations may sleep. + + This reverses the effects of class.suspend(), and would usually + reactivate the device's I/O queue. + +At the end of those phases, drivers should normally be as functional as +they were before suspending: I/O can be performed using DMA and IRQs, and +the relevant clocks are gated on. The device need not be "fully on"; it +might be in a runtime lowpower/suspend state that acts as if it were. + +However, the details here may again be platform-specific. For example, +some systems support multiple "run" states, and the mode in effect at +the end of resume() might not be the one which preceded suspension. +That means availability of certain clocks or power supplies changed, +which could easily affect how a driver works. + + +Drivers need to be able to handle hardware which has been reset since the +suspend methods were called, for example by complete reinitialization. +This may be the hardest part, and the one most protected by NDA'd documents +and chip errata. It's simplest if the hardware state hasn't changed since +the suspend() was called, but that can't always be guaranteed. + +Drivers must also be prepared to notice that the device has been removed +while the system was powered off, whenever that's physically possible. +PCMCIA, MMC, USB, Firewire, SCSI, and even IDE are common examples of busses +where common Linux platforms will see such removal. Details of how drivers +will notice and handle such removals are currently bus-specific, and often +involve a separate thread. -The methods to suspend and resume devices reside in struct bus_type: -struct bus_type { - ... - int (*suspend)(struct device * dev, pm_message_t state); - int (*resume)(struct device * dev); -}; +Note that the bus-specific runtime PM wakeup mechanism can exist, and might +be defined to share some of the same driver code as for system wakeup. For +example, a bus-specific device driver's resume() method might be used there, +so it wouldn't only be called from bus.resume() during system-wide wakeup. +See bus-specific information about how runtime wakeup events are handled. -Each bus driver is responsible implementing these methods, translating -the call into a bus-specific request and forwarding the call to the -bus-specific drivers. For example, PCI drivers implement suspend() and -resume() methods in struct pci_driver. The PCI core is simply -responsible for translating the pointers to PCI-specific ones and -calling the low-level driver. - -This is done to a) ease transition to the new power management methods -and leverage the existing PM code in various bus drivers; b) allow -buses to implement generic and default PM routines for devices, and c) -make the flow of execution obvious to the reader. - - -System Power Management - -When the system enters a low-power state, the device tree is walked in -a depth-first fashion to transition each device into a low-power -state. The ordering of the device tree is guaranteed by the order in -which devices get registered - children are never registered before -their ancestors, and devices are placed at the back of the list when -registered. By walking the list in reverse order, we are guaranteed to -suspend devices in the proper order. - -Devices are suspended once with interrupts enabled. Drivers are -expected to stop I/O transactions, save device state, and place the -device into a low-power state. Drivers may sleep, allocate memory, -etc. at will. - -Some devices are broken and will inevitably have problems powering -down or disabling themselves with interrupts enabled. For these -special cases, they may return -EAGAIN. This will put the device on a -list to be taken care of later. When interrupts are disabled, before -we enter the low-power state, their drivers are called again to put -their device to sleep. - -On resume, the devices that returned -EAGAIN will be called to power -themselves back on with interrupts disabled. Once interrupts have been -re-enabled, the rest of the drivers will be called to resume their -devices. On resume, a driver is responsible for powering back on each -device, restoring state, and re-enabling I/O transactions for that -device. +System Devices +-------------- System devices follow a slightly different API, which can be found in include/linux/sysdev.h drivers/base/sys.c -System devices will only be suspended with interrupts disabled, and -after all other devices have been suspended. On resume, they will be -resumed before any other devices, and also with interrupts disabled. +System devices will only be suspended with interrupts disabled, and after +all other devices have been suspended. On resume, they will be resumed +before any other devices, and also with interrupts disabled. +That is, IRQs are disabled, the suspend_late() phase begins, then the +sysdev_driver.suspend() phase, and the system enters a sleep state. Then +the sysdev_driver.resume() phase begins, followed by the resume_early() +phase, after which IRQs are enabled. -Runtime Power Management - -Many devices are able to dynamically power down while the system is -still running. This feature is useful for devices that are not being -used, and can offer significant power savings on a running system. - -In each device's directory, there is a 'power' directory, which -contains at least a 'state' file. Reading from this file displays what -power state the device is currently in. Writing to this file initiates -a transition to the specified power state, which must be a decimal in -the range 1-3, inclusive; or 0 for 'On'. +Code to actually enter and exit the system-wide low power state sometimes +involves hardware details that are only known to the boot firmware, and +may leave a CPU running software (from SRAM or flash memory) that monitors +the system and manages its wakeup sequence. -The PM core will call the ->suspend() method in the bus_type object -that the device belongs to if the specified state is not 0, or -->resume() if it is. -Nothing will happen if the specified state is the same state the -device is currently in. - -If the device is already in a low-power state, and the specified state -is another, but different, low-power state, the ->resume() method will -first be called to power the device back on, then ->suspend() will be -called again with the new state. - -The driver is responsible for saving the working state of the device -and putting it into the low-power state specified. If this was -successful, it returns 0, and the device's power_state field is -updated. - -The driver must take care to know whether or not it is able to -properly resume the device, including all step of reinitialization -necessary. (This is the hardest part, and the one most protected by -NDA'd documents). - -The driver must also take care not to suspend a device that is -currently in use. It is their responsibility to provide their own -exclusion mechanisms. - -The runtime power transition happens with interrupts enabled. If a -device cannot support being powered down with interrupts, it may -return -EAGAIN (as it would during a system power management -transition), but it will _not_ be called again, and the transaction -will fail. - -There is currently no way to know what states a device or driver -supports a priori. This will change in the future. - -pm_message_t meaning - -pm_message_t has two fields. event ("major"), and flags. If driver -does not know event code, it aborts the request, returning error. Some -drivers may need to deal with special cases based on the actual type -of suspend operation being done at the system level. This is why -there are flags. - -Event codes are: - -ON -- no need to do anything except special cases like broken -HW. - -# NOTIFICATION -- pretty much same as ON? - -FREEZE -- stop DMA and interrupts, and be prepared to reinit HW from -scratch. That probably means stop accepting upstream requests, the -actual policy of what to do with them being specific to a given -driver. It's acceptable for a network driver to just drop packets -while a block driver is expected to block the queue so no request is -lost. (Use IDE as an example on how to do that). FREEZE requires no -power state change, and it's expected for drivers to be able to -quickly transition back to operating state. - -SUSPEND -- like FREEZE, but also put hardware into low-power state. If -there's need to distinguish several levels of sleep, additional flag -is probably best way to do that. - -Transitions are only from a resumed state to a suspended state, never -between 2 suspended states. (ON -> FREEZE or ON -> SUSPEND can happen, -FREEZE -> SUSPEND or SUSPEND -> FREEZE can not). - -All events are: - -[NOTE NOTE NOTE: If you are driver author, you should not care; you -should only look at event, and ignore flags.] - -#Prepare for suspend -- userland is still running but we are going to -#enter suspend state. This gives drivers chance to load firmware from -#disk and store it in memory, or do other activities taht require -#operating userland, ability to kmalloc GFP_KERNEL, etc... All of these -#are forbiden once the suspend dance is started.. event = ON, flags = -#PREPARE_TO_SUSPEND - -Apm standby -- prepare for APM event. Quiesce devices to make life -easier for APM BIOS. event = FREEZE, flags = APM_STANDBY - -Apm suspend -- same as APM_STANDBY, but it we should probably avoid -spinning down disks. event = FREEZE, flags = APM_SUSPEND - -System halt, reboot -- quiesce devices to make life easier for BIOS. event -= FREEZE, flags = SYSTEM_HALT or SYSTEM_REBOOT - -System shutdown -- at least disks need to be spun down, or data may be -lost. Quiesce devices, just to make life easier for BIOS. event = -FREEZE, flags = SYSTEM_SHUTDOWN - -Kexec -- turn off DMAs and put hardware into some state where new -kernel can take over. event = FREEZE, flags = KEXEC - -Powerdown at end of swsusp -- very similar to SYSTEM_SHUTDOWN, except wake -may need to be enabled on some devices. This actually has at least 3 -subtypes, system can reboot, enter S4 and enter S5 at the end of -swsusp. event = FREEZE, flags = SWSUSP and one of SYSTEM_REBOOT, -SYSTEM_SHUTDOWN, SYSTEM_S4 - -Suspend to ram -- put devices into low power state. event = SUSPEND, -flags = SUSPEND_TO_RAM - -Freeze for swsusp snapshot -- stop DMA and interrupts. No need to put -devices into low power mode, but you must be able to reinitialize -device from scratch in resume method. This has two flavors, its done -once on suspending kernel, once on resuming kernel. event = FREEZE, -flags = DURING_SUSPEND or DURING_RESUME - -Device detach requested from /sys -- deinitialize device; proably same as -SYSTEM_SHUTDOWN, I do not understand this one too much. probably event -= FREEZE, flags = DEV_DETACH. - -#These are not really events sent: -# -#System fully on -- device is working normally; this is probably never -#passed to suspend() method... event = ON, flags = 0 -# -#Ready after resume -- userland is now running, again. Time to free any -#memory you ate during prepare to suspend... event = ON, flags = -#READY_AFTER_RESUME -# +Runtime Power Management +======================== +Many devices are able to dynamically power down while the system is still +running. This feature is useful for devices that are not being used, and +can offer significant power savings on a running system. These devices +often support a range of runtime power states, which might use names such +as "off", "sleep", "idle", "active", and so on. Those states will in some +cases (like PCI) be partially constrained by a bus the device uses, and will +usually include hardware states that are also used in system sleep states. + +However, note that if a driver puts a device into a runtime low power state +and the system then goes into a system-wide sleep state, it normally ought +to resume into that runtime low power state rather than "full on". Such +distinctions would be part of the driver-internal state machine for that +hardware; the whole point of runtime power management is to be sure that +drivers are decoupled in that way from the state machine governing phases +of the system-wide power/sleep state transitions. + + +Power Saving Techniques +----------------------- +Normally runtime power management is handled by the drivers without specific +userspace or kernel intervention, by device-aware use of techniques like: + + Using information provided by other system layers + - stay deeply "off" except between open() and close() + - if transceiver/PHY indicates "nobody connected", stay "off" + - application protocols may include power commands or hints + + Using fewer CPU cycles + - using DMA instead of PIO + - removing timers, or making them lower frequency + - shortening "hot" code paths + - eliminating cache misses + - (sometimes) offloading work to device firmware + + Reducing other resource costs + - gating off unused clocks in software (or hardware) + - switching off unused power supplies + - eliminating (or delaying/merging) IRQs + - tuning DMA to use word and/or burst modes + + Using device-specific low power states + - using lower voltages + - avoiding needless DMA transfers + +Read your hardware documentation carefully to see the opportunities that +may be available. If you can, measure the actual power usage and check +it against the budget established for your project. + + +Examples: USB hosts, system timer, system CPU +---------------------------------------------- +USB host controllers make interesting, if complex, examples. In many cases +these have no work to do: no USB devices are connected, or all of them are +in the USB "suspend" state. Linux host controller drivers can then disable +periodic DMA transfers that would otherwise be a constant power drain on the +memory subsystem, and enter a suspend state. In power-aware controllers, +entering that suspend state may disable the clock used with USB signaling, +saving a certain amount of power. + +The controller will be woken from that state (with an IRQ) by changes to the +signal state on the data lines of a given port, for example by an existing +peripheral requesting "remote wakeup" or by plugging a new peripheral. The +same wakeup mechanism usually works from "standby" sleep states, and on some +systems also from "suspend to RAM" (or even "suspend to disk") states. +(Except that ACPI may be involved instead of normal IRQs, on some hardware.) + +System devices like timers and CPUs may have special roles in the platform +power management scheme. For example, system timers using a "dynamic tick" +approach don't just save CPU cycles (by eliminating needless timer IRQs), +but they may also open the door to using lower power CPU "idle" states that +cost more than a jiffie to enter and exit. On x86 systems these are states +like "C3"; note that periodic DMA transfers from a USB host controller will +also prevent entry to a C3 state, much like a periodic timer IRQ. + +That kind of runtime mechanism interaction is common. "System On Chip" (SOC) +processors often have low power idle modes that can't be entered unless +certain medium-speed clocks (often 12 or 48 MHz) are gated off. When the +drivers gate those clocks effectively, then the system idle task may be able +to use the lower power idle modes and thereby increase battery life. + +If the CPU can have a "cpufreq" driver, there also may be opportunities +to shift to lower voltage settings and reduce the power cost of executing +a given number of instructions. (Without voltage adjustment, it's rare +for cpufreq to save much power; the cost-per-instruction must go down.) + + +/sys/devices/.../power/state files +================================== +For now you can also test some of this functionality using sysfs. + + DEPRECATED: USE "power/state" ONLY FOR DRIVER TESTING, AND + AVOID USING dev->power.power_state IN DRIVERS. + + THESE WILL BE REMOVED. IF THE "power/state" FILE GETS REPLACED, + IT WILL BECOME SOMETHING COUPLED TO THE BUS OR DRIVER. + +In each device's directory, there is a 'power' directory, which contains +at least a 'state' file. The value of this field is effectively boolean, +PM_EVENT_ON or PM_EVENT_SUSPEND. + + * Reading from this file displays a value corresponding to + the power.power_state.event field. All nonzero values are + displayed as "2", corresponding to a low power state; zero + is displayed as "0", corresponding to normal operation. + + * Writing to this file initiates a transition using the + specified event code number; only '0', '2', and '3' are + accepted (without a newline); '2' and '3' are both + mapped to PM_EVENT_SUSPEND. + +On writes, the PM core relies on that recorded event code and the device/bus +capabilities to determine whether it uses a partial suspend() or resume() +sequence to change things so that the recorded event corresponds to the +numeric parameter. + + - If the bus requires the irqs-disabled suspend_late()/resume_early() + phases, writes fail because those operations are not supported here. + + - If the recorded value is the expected value, nothing is done. + + - If the recorded value is nonzero, the device is partially resumed, + using the bus.resume() and/or class.resume() methods. + + - If the target value is nonzero, the device is partially suspended, + using the class.suspend() and/or bus.suspend() methods and the + PM_EVENT_SUSPEND message. + +Drivers have no way to tell whether their suspend() and resume() calls +have come through the sysfs power/state file or as part of entering a +system sleep state, except that when accessed through sysfs the normal +parent/child sequencing rules are ignored. Drivers (such as bus, bridge, +or hub drivers) which expose child devices may need to enforce those rules +on their own. diff --git a/drivers/base/base.h b/drivers/base/base.h index c3b8dc9..d26644a 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -16,7 +16,7 @@ extern int cpu_dev_init(void); extern int attribute_container_init(void); extern int bus_add_device(struct device * dev); -extern void bus_attach_device(struct device * dev); +extern int bus_attach_device(struct device * dev); extern void bus_remove_device(struct device * dev); extern struct bus_type *get_bus(struct bus_type * bus); extern void put_bus(struct bus_type * bus); diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 2e954d0..12173d1 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -371,12 +371,20 @@ int bus_add_device(struct device * dev) if (bus) { pr_debug("bus %s: add device %s\n", bus->name, dev->bus_id); error = device_add_attrs(bus, dev); - if (!error) { - sysfs_create_link(&bus->devices.kobj, &dev->kobj, dev->bus_id); - sysfs_create_link(&dev->kobj, &dev->bus->subsys.kset.kobj, "subsystem"); - sysfs_create_link(&dev->kobj, &dev->bus->subsys.kset.kobj, "bus"); - } + if (error) + goto out; + error = sysfs_create_link(&bus->devices.kobj, + &dev->kobj, dev->bus_id); + if (error) + goto out; + error = sysfs_create_link(&dev->kobj, + &dev->bus->subsys.kset.kobj, "subsystem"); + if (error) + goto out; + error = sysfs_create_link(&dev->kobj, + &dev->bus->subsys.kset.kobj, "bus"); } +out: return error; } @@ -384,16 +392,24 @@ int bus_add_device(struct device * dev) * bus_attach_device - add device to bus * @dev: device tried to attach to a driver * + * - Add device to bus's list of devices. * - Try to attach to driver. */ -void bus_attach_device(struct device * dev) +int bus_attach_device(struct device * dev) { - struct bus_type * bus = dev->bus; + struct bus_type *bus = dev->bus; + int ret = 0; if (bus) { - device_attach(dev); - klist_add_tail(&dev->knode_bus, &bus->klist_devices); + dev->is_registered = 1; + ret = device_attach(dev); + if (ret >= 0) { + klist_add_tail(&dev->knode_bus, &bus->klist_devices); + ret = 0; + } else + dev->is_registered = 0; } + return ret; } /** @@ -412,7 +428,8 @@ void bus_remove_device(struct device * dev) sysfs_remove_link(&dev->kobj, "bus"); sysfs_remove_link(&dev->bus->devices.kobj, dev->bus_id); device_remove_attrs(dev->bus, dev); - klist_remove(&dev->knode_bus); + dev->is_registered = 0; + klist_del(&dev->knode_bus); pr_debug("bus %s: remove device %s\n", dev->bus->name, dev->bus_id); device_release_driver(dev); put_bus(dev->bus); @@ -455,10 +472,17 @@ static void driver_remove_attrs(struct bus_type * bus, struct device_driver * dr * Thanks to drivers making their tables __devinit, we can't allow manual * bind and unbind from userspace unless CONFIG_HOTPLUG is enabled. */ -static void add_bind_files(struct device_driver *drv) +static int __must_check add_bind_files(struct device_driver *drv) { - driver_create_file(drv, &driver_attr_unbind); - driver_create_file(drv, &driver_attr_bind); + int ret; + + ret = driver_create_file(drv, &driver_attr_unbind); + if (ret == 0) { + ret = driver_create_file(drv, &driver_attr_bind); + if (ret) + driver_remove_file(drv, &driver_attr_unbind); + } + return ret; } static void remove_bind_files(struct device_driver *drv) @@ -467,7 +491,7 @@ static void remove_bind_files(struct device_driver *drv) driver_remove_file(drv, &driver_attr_unbind); } #else -static inline void add_bind_files(struct device_driver *drv) {} +static inline int add_bind_files(struct device_driver *drv) { return 0; } static inline void remove_bind_files(struct device_driver *drv) {} #endif @@ -476,7 +500,7 @@ static inline void remove_bind_files(struct device_driver *drv) {} * @drv: driver. * */ -int bus_add_driver(struct device_driver * drv) +int bus_add_driver(struct device_driver *drv) { struct bus_type * bus = get_bus(drv->bus); int error = 0; @@ -484,27 +508,39 @@ int bus_add_driver(struct device_driver * drv) if (bus) { pr_debug("bus %s: add driver %s\n", bus->name, drv->name); error = kobject_set_name(&drv->kobj, "%s", drv->name); - if (error) { - put_bus(bus); - return error; - } + if (error) + goto out_put_bus; drv->kobj.kset = &bus->drivers; - if ((error = kobject_register(&drv->kobj))) { - put_bus(bus); - return error; - } + if ((error = kobject_register(&drv->kobj))) + goto out_put_bus; - driver_attach(drv); + error = driver_attach(drv); + if (error) + goto out_unregister; klist_add_tail(&drv->knode_bus, &bus->klist_drivers); module_add_driver(drv->owner, drv); - driver_add_attrs(bus, drv); - add_bind_files(drv); + error = driver_add_attrs(bus, drv); + if (error) { + /* How the hell do we get out of this pickle? Give up */ + printk(KERN_ERR "%s: driver_add_attrs(%s) failed\n", + __FUNCTION__, drv->name); + } + error = add_bind_files(drv); + if (error) { + /* Ditto */ + printk(KERN_ERR "%s: add_bind_files(%s) failed\n", + __FUNCTION__, drv->name); + } } return error; +out_unregister: + kobject_unregister(&drv->kobj); +out_put_bus: + put_bus(bus); + return error; } - /** * bus_remove_driver - delete driver from bus's knowledge. * @drv: driver. @@ -530,16 +566,21 @@ void bus_remove_driver(struct device_driver * drv) /* Helper for bus_rescan_devices's iter */ -static int bus_rescan_devices_helper(struct device *dev, void *data) +static int __must_check bus_rescan_devices_helper(struct device *dev, + void *data) { + int ret = 0; + if (!dev->driver) { if (dev->parent) /* Needed for USB */ down(&dev->parent->sem); - device_attach(dev); + ret = device_attach(dev); if (dev->parent) up(&dev->parent->sem); + if (ret > 0) + ret = 0; } - return 0; + return ret < 0 ? ret : 0; } /** @@ -550,9 +591,9 @@ static int bus_rescan_devices_helper(struct device *dev, void *data) * attached and rescan it against existing drivers to see if it matches * any by calling device_attach() for the unbound devices. */ -void bus_rescan_devices(struct bus_type * bus) +int bus_rescan_devices(struct bus_type * bus) { - bus_for_each_dev(bus, NULL, NULL, bus_rescan_devices_helper); + return bus_for_each_dev(bus, NULL, NULL, bus_rescan_devices_helper); } /** @@ -564,7 +605,7 @@ void bus_rescan_devices(struct bus_type * bus) * to use if probing criteria changed during a devices lifetime and * driver attachment should change accordingly. */ -void device_reprobe(struct device *dev) +int device_reprobe(struct device *dev) { if (dev->driver) { if (dev->parent) /* Needed for USB */ @@ -573,14 +614,14 @@ void device_reprobe(struct device *dev) if (dev->parent) up(&dev->parent->sem); } - - bus_rescan_devices_helper(dev, NULL); + return bus_rescan_devices_helper(dev, NULL); } EXPORT_SYMBOL_GPL(device_reprobe); -struct bus_type * get_bus(struct bus_type * bus) +struct bus_type *get_bus(struct bus_type *bus) { - return bus ? container_of(subsys_get(&bus->subsys), struct bus_type, subsys) : NULL; + return bus ? container_of(subsys_get(&bus->subsys), + struct bus_type, subsys) : NULL; } void put_bus(struct bus_type * bus) @@ -655,22 +696,6 @@ static void klist_devices_put(struct klist_node *n) put_device(dev); } -static void klist_drivers_get(struct klist_node *n) -{ - struct device_driver *drv = container_of(n, struct device_driver, - knode_bus); - - get_driver(drv); -} - -static void klist_drivers_put(struct klist_node *n) -{ - struct device_driver *drv = container_of(n, struct device_driver, - knode_bus); - - put_driver(drv); -} - /** * bus_register - register a bus with the system. * @bus: bus. @@ -706,7 +731,7 @@ int bus_register(struct bus_type * bus) goto bus_drivers_fail; klist_init(&bus->klist_devices, klist_devices_get, klist_devices_put); - klist_init(&bus->klist_drivers, klist_drivers_get, klist_drivers_put); + klist_init(&bus->klist_drivers, NULL, NULL); bus_add_attrs(bus); pr_debug("bus type '%s' registered\n", bus->name); diff --git a/drivers/base/class.c b/drivers/base/class.c index de89083..b06b0e2 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -19,6 +19,8 @@ #include <linux/slab.h> #include "base.h" +extern struct subsystem devices_subsys; + #define to_class_attr(_attr) container_of(_attr, struct class_attribute, attr) #define to_class(obj) container_of(obj, struct class, subsys.kset.kobj) @@ -197,7 +199,7 @@ static int class_device_create_uevent(struct class_device *class_dev, * Note, the pointer created here is to be destroyed when finished by * making a call to class_destroy(). */ -struct class *class_create(struct module *owner, char *name) +struct class *class_create(struct module *owner, const char *name) { struct class *cls; int retval; @@ -361,7 +363,7 @@ static int class_uevent(struct kset *kset, struct kobject *kobj, char **envp, pr_debug("%s - name = %s\n", __FUNCTION__, class_dev->class_id); if (class_dev->dev) { - /* add physical device, backing this device */ + /* add device, backing this class device (deprecated) */ struct device *dev = class_dev->dev; char *path = kobject_get_path(&dev->kobj, GFP_KERNEL); @@ -679,7 +681,8 @@ int class_device_register(struct class_device *class_dev) struct class_device *class_device_create(struct class *cls, struct class_device *parent, dev_t devt, - struct device *device, char *fmt, ...) + struct device *device, + const char *fmt, ...) { va_list args; struct class_device *class_dev = NULL; @@ -839,6 +842,7 @@ int class_interface_register(struct class_interface *class_intf) { struct class *parent; struct class_device *class_dev; + struct device *dev; if (!class_intf || !class_intf->class) return -ENODEV; @@ -853,6 +857,10 @@ int class_interface_register(struct class_interface *class_intf) list_for_each_entry(class_dev, &parent->children, node) class_intf->add(class_dev, class_intf); } + if (class_intf->add_dev) { + list_for_each_entry(dev, &parent->devices, node) + class_intf->add_dev(dev, class_intf); + } up(&parent->sem); return 0; @@ -862,6 +870,7 @@ void class_interface_unregister(struct class_interface *class_intf) { struct class * parent = class_intf->class; struct class_device *class_dev; + struct device *dev; if (!parent) return; @@ -872,12 +881,31 @@ void class_interface_unregister(struct class_interface *class_intf) list_for_each_entry(class_dev, &parent->children, node) class_intf->remove(class_dev, class_intf); } + if (class_intf->remove_dev) { + list_for_each_entry(dev, &parent->devices, node) + class_intf->remove_dev(dev, class_intf); + } up(&parent->sem); class_put(parent); } +int virtual_device_parent(struct device *dev) +{ + if (!dev->class) + return -ENODEV; + + if (!dev->class->virtual_dir) { + static struct kobject *virtual_dir = NULL; + + if (!virtual_dir) + virtual_dir = kobject_add_dir(&devices_subsys.kset.kobj, "virtual"); + dev->class->virtual_dir = kobject_add_dir(virtual_dir, dev->class->name); + } + dev->kobj.parent = dev->class->virtual_dir; + return 0; +} int __init classes_init(void) { diff --git a/drivers/base/core.c b/drivers/base/core.c index be6b5bc..b224bb4 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3,6 +3,8 @@ * * Copyright (c) 2002-3 Patrick Mochel * Copyright (c) 2002-3 Open Source Development Labs + * Copyright (c) 2006 Greg Kroah-Hartman <gregkh@suse.de> + * Copyright (c) 2006 Novell, Inc. * * This file is released under the GPLv2 * @@ -92,6 +94,8 @@ static void device_release(struct kobject * kobj) if (dev->release) dev->release(dev); + else if (dev->class && dev->class->dev_release) + dev->class->dev_release(dev); else { printk(KERN_ERR "Device '%s' does not have a release() function, " "it is broken and must be fixed.\n", @@ -149,17 +153,21 @@ static int dev_uevent(struct kset *kset, struct kobject *kobj, char **envp, "MINOR=%u", MINOR(dev->devt)); } - /* add bus name of physical device */ + /* add bus name (same as SUBSYSTEM, deprecated) */ if (dev->bus) add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, "PHYSDEVBUS=%s", dev->bus->name); - /* add driver name of physical device */ - if (dev->driver) + /* add driver name (PHYSDEV* values are deprecated)*/ + if (dev->driver) { + add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "DRIVER=%s", dev->driver->name); add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, "PHYSDEVDRIVER=%s", dev->driver->name); + } /* terminate, set to next free slot, shrink available space */ envp[i] = NULL; @@ -177,6 +185,15 @@ static int dev_uevent(struct kset *kset, struct kobject *kobj, char **envp, } } + if (dev->class && dev->class->dev_uevent) { + /* have the class specific function add its stuff */ + retval = dev->class->dev_uevent(dev, envp, num_envp, buffer, buffer_size); + if (retval) { + pr_debug("%s - dev_uevent() returned %d\n", + __FUNCTION__, retval); + } + } + return retval; } @@ -193,6 +210,72 @@ static ssize_t store_uevent(struct device *dev, struct device_attribute *attr, return count; } +static int device_add_groups(struct device *dev) +{ + int i; + int error = 0; + + if (dev->groups) { + for (i = 0; dev->groups[i]; i++) { + error = sysfs_create_group(&dev->kobj, dev->groups[i]); + if (error) { + while (--i >= 0) + sysfs_remove_group(&dev->kobj, dev->groups[i]); + goto out; + } + } + } +out: + return error; +} + +static void device_remove_groups(struct device *dev) +{ + int i; + if (dev->groups) { + for (i = 0; dev->groups[i]; i++) { + sysfs_remove_group(&dev->kobj, dev->groups[i]); + } + } +} + +static int device_add_attrs(struct device *dev) +{ + struct class *class = dev->class; + int error = 0; + int i; + + if (!class) + return 0; + + if (class->dev_attrs) { + for (i = 0; attr_name(class->dev_attrs[i]); i++) { + error = device_create_file(dev, &class->dev_attrs[i]); + if (error) + break; + } + } + if (error) + while (--i >= 0) + device_remove_file(dev, &class->dev_attrs[i]); + return error; +} + +static void device_remove_attrs(struct device *dev) +{ + struct class *class = dev->class; + int i; + + if (!class) + return; + + if (class->dev_attrs) { + for (i = 0; attr_name(class->dev_attrs[i]); i++) + device_remove_file(dev, &class->dev_attrs[i]); + } +} + + static ssize_t show_dev(struct device *dev, struct device_attribute *attr, char *buf) { @@ -236,6 +319,32 @@ void device_remove_file(struct device * dev, struct device_attribute * attr) } } +/** + * device_create_bin_file - create sysfs binary attribute file for device. + * @dev: device. + * @attr: device binary attribute descriptor. + */ +int device_create_bin_file(struct device *dev, struct bin_attribute *attr) +{ + int error = -EINVAL; + if (dev) + error = sysfs_create_bin_file(&dev->kobj, attr); + return error; +} +EXPORT_SYMBOL_GPL(device_create_bin_file); + +/** + * device_remove_bin_file - remove sysfs binary attribute file + * @dev: device. + * @attr: device binary attribute descriptor. + */ +void device_remove_bin_file(struct device *dev, struct bin_attribute *attr) +{ + if (dev) + sysfs_remove_bin_file(&dev->kobj, attr); +} +EXPORT_SYMBOL_GPL(device_remove_bin_file); + static void klist_children_get(struct klist_node *n) { struct device *dev = container_of(n, struct device, knode_parent); @@ -289,12 +398,20 @@ int device_add(struct device *dev) { struct device *parent = NULL; char *class_name = NULL; + struct class_interface *class_intf; int error = -EINVAL; dev = get_device(dev); if (!dev || !strlen(dev->bus_id)) goto Error; + /* if this is a class device, and has no parent, create one */ + if ((dev->class) && (dev->parent == NULL)) { + error = virtual_device_parent(dev); + if (error) + goto Error; + } + parent = get_device(dev->parent); pr_debug("DEV: registering device: ID = '%s'\n", dev->bus_id); @@ -307,6 +424,10 @@ int device_add(struct device *dev) if ((error = kobject_add(&dev->kobj))) goto Error; + /* notify platform of device entry */ + if (platform_notify) + platform_notify(dev); + dev->uevent_attr.attr.name = "uevent"; dev->uevent_attr.attr.mode = S_IWUSR; if (dev->driver) @@ -340,12 +461,17 @@ int device_add(struct device *dev) "subsystem"); sysfs_create_link(&dev->class->subsys.kset.kobj, &dev->kobj, dev->bus_id); - - sysfs_create_link(&dev->kobj, &dev->parent->kobj, "device"); - class_name = make_class_name(dev->class->name, &dev->kobj); - sysfs_create_link(&dev->parent->kobj, &dev->kobj, class_name); + if (parent) { + sysfs_create_link(&dev->kobj, &dev->parent->kobj, "device"); + class_name = make_class_name(dev->class->name, &dev->kobj); + sysfs_create_link(&dev->parent->kobj, &dev->kobj, class_name); + } } + if ((error = device_add_attrs(dev))) + goto AttrsError; + if ((error = device_add_groups(dev))) + goto GroupError; if ((error = device_pm_add(dev))) goto PMError; if ((error = bus_add_device(dev))) @@ -356,15 +482,16 @@ int device_add(struct device *dev) klist_add_tail(&dev->knode_parent, &parent->klist_children); if (dev->class) { - /* tie the class to the device */ down(&dev->class->sem); + /* tie the class to the device */ list_add_tail(&dev->node, &dev->class->devices); + + /* notify any interfaces that the device is here */ + list_for_each_entry(class_intf, &dev->class->interfaces, node) + if (class_intf->add_dev) + class_intf->add_dev(dev, class_intf); up(&dev->class->sem); } - - /* notify platform of device entry */ - if (platform_notify) - platform_notify(dev); Done: kfree(class_name); put_device(dev); @@ -372,6 +499,10 @@ int device_add(struct device *dev) BusError: device_pm_remove(dev); PMError: + device_remove_groups(dev); + GroupError: + device_remove_attrs(dev); + AttrsError: if (dev->devt_attr) { device_remove_file(dev, dev->devt_attr); kfree(dev->devt_attr); @@ -449,6 +580,7 @@ void device_del(struct device * dev) { struct device * parent = dev->parent; char *class_name = NULL; + struct class_interface *class_intf; if (parent) klist_del(&dev->knode_parent); @@ -458,14 +590,23 @@ void device_del(struct device * dev) sysfs_remove_link(&dev->kobj, "subsystem"); sysfs_remove_link(&dev->class->subsys.kset.kobj, dev->bus_id); class_name = make_class_name(dev->class->name, &dev->kobj); - sysfs_remove_link(&dev->kobj, "device"); - sysfs_remove_link(&dev->parent->kobj, class_name); + if (parent) { + sysfs_remove_link(&dev->kobj, "device"); + sysfs_remove_link(&dev->parent->kobj, class_name); + } kfree(class_name); down(&dev->class->sem); + /* notify any interfaces that the device is now gone */ + list_for_each_entry(class_intf, &dev->class->interfaces, node) + if (class_intf->remove_dev) + class_intf->remove_dev(dev, class_intf); + /* remove the device from the class list */ list_del_init(&dev->node); up(&dev->class->sem); } device_remove_file(dev, &dev->uevent_attr); + device_remove_groups(dev); + device_remove_attrs(dev); /* Notify the platform of the removal, in case they * need to do anything... @@ -579,7 +720,7 @@ static void device_create_release(struct device *dev) * been created with a call to class_create(). */ struct device *device_create(struct class *class, struct device *parent, - dev_t devt, char *fmt, ...) + dev_t devt, const char *fmt, ...) { va_list args; struct device *dev = NULL; @@ -587,10 +728,6 @@ struct device *device_create(struct class *class, struct device *parent, if (class == NULL || IS_ERR(class)) goto error; - if (parent == NULL) { - printk(KERN_WARNING "%s does not work yet for NULL parents\n", __FUNCTION__); - goto error; - } dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) { @@ -644,3 +781,58 @@ void device_destroy(struct class *class, dev_t devt) device_unregister(dev); } EXPORT_SYMBOL_GPL(device_destroy); + +/** + * device_rename - renames a device + * @dev: the pointer to the struct device to be renamed + * @new_name: the new name of the device + */ +int device_rename(struct device *dev, char *new_name) +{ + char *old_class_name = NULL; + char *new_class_name = NULL; + char *old_symlink_name = NULL; + int error; + + dev = get_device(dev); + if (!dev) + return -EINVAL; + + pr_debug("DEVICE: renaming '%s' to '%s'\n", dev->bus_id, new_name); + + if ((dev->class) && (dev->parent)) + old_class_name = make_class_name(dev->class->name, &dev->kobj); + + if (dev->class) { + old_symlink_name = kmalloc(BUS_ID_SIZE, GFP_KERNEL); + if (!old_symlink_name) + return -ENOMEM; + strlcpy(old_symlink_name, dev->bus_id, BUS_ID_SIZE); + } + + strlcpy(dev->bus_id, new_name, BUS_ID_SIZE); + + error = kobject_rename(&dev->kobj, new_name); + + if (old_class_name) { + new_class_name = make_class_name(dev->class->name, &dev->kobj); + if (new_class_name) { + sysfs_create_link(&dev->parent->kobj, &dev->kobj, + new_class_name); + sysfs_remove_link(&dev->parent->kobj, old_class_name); + } + } + if (dev->class) { + sysfs_remove_link(&dev->class->subsys.kset.kobj, + old_symlink_name); + sysfs_create_link(&dev->class->subsys.kset.kobj, &dev->kobj, + dev->bus_id); + } + put_device(dev); + + kfree(old_class_name); + kfree(new_class_name); + kfree(old_symlink_name); + + return error; +} diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 889c711..b5f43c3 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -17,6 +17,7 @@ #include <linux/device.h> #include <linux/module.h> +#include <linux/kthread.h> #include "base.h" #include "power/power.h" @@ -38,66 +39,73 @@ * * This function must be called with @dev->sem held. */ -void device_bind_driver(struct device * dev) +int device_bind_driver(struct device *dev) { - if (klist_node_attached(&dev->knode_driver)) - return; + int ret; + + if (klist_node_attached(&dev->knode_driver)) { + printk(KERN_WARNING "%s: device %s already bound\n", + __FUNCTION__, kobject_name(&dev->kobj)); + return 0; + } pr_debug("bound device '%s' to driver '%s'\n", dev->bus_id, dev->driver->name); klist_add_tail(&dev->knode_driver, &dev->driver->klist_devices); - sysfs_create_link(&dev->driver->kobj, &dev->kobj, + ret = sysfs_create_link(&dev->driver->kobj, &dev->kobj, kobject_name(&dev->kobj)); - sysfs_create_link(&dev->kobj, &dev->driver->kobj, "driver"); + if (ret == 0) { + ret = sysfs_create_link(&dev->kobj, &dev->driver->kobj, + "driver"); + if (ret) + sysfs_remove_link(&dev->driver->kobj, + kobject_name(&dev->kobj)); + } + return ret; } -/** - * driver_probe_device - attempt to bind device & driver. - * @drv: driver. - * @dev: device. - * - * First, we call the bus's match function, if one present, which - * should compare the device IDs the driver supports with the - * device IDs of the device. Note we don't do this ourselves - * because we don't know the format of the ID structures, nor what - * is to be considered a match and what is not. - * - * This function returns 1 if a match is found, an error if one - * occurs (that is not -ENODEV or -ENXIO), and 0 otherwise. - * - * This function must be called with @dev->sem held. When called - * for a USB interface, @dev->parent->sem must be held as well. - */ -int driver_probe_device(struct device_driver * drv, struct device * dev) +struct stupid_thread_structure { + struct device_driver *drv; + struct device *dev; +}; + +static atomic_t probe_count = ATOMIC_INIT(0); +static int really_probe(void *void_data) { + struct stupid_thread_structure *data = void_data; + struct device_driver *drv = data->drv; + struct device *dev = data->dev; int ret = 0; - if (drv->bus->match && !drv->bus->match(dev, drv)) - goto Done; + atomic_inc(&probe_count); + pr_debug("%s: Probing driver %s with device %s\n", + drv->bus->name, drv->name, dev->bus_id); - pr_debug("%s: Matched Device %s with Driver %s\n", - drv->bus->name, dev->bus_id, drv->name); dev->driver = drv; if (dev->bus->probe) { ret = dev->bus->probe(dev); if (ret) { dev->driver = NULL; - goto ProbeFailed; + goto probe_failed; } } else if (drv->probe) { ret = drv->probe(dev); if (ret) { dev->driver = NULL; - goto ProbeFailed; + goto probe_failed; } } - device_bind_driver(dev); + if (device_bind_driver(dev)) { + printk(KERN_ERR "%s: device_bind_driver(%s) failed\n", + __FUNCTION__, dev->bus_id); + /* How does undo a ->probe? We're screwed. */ + } ret = 1; pr_debug("%s: Bound Device %s to Driver %s\n", drv->bus->name, dev->bus_id, drv->name); - goto Done; + goto done; - ProbeFailed: +probe_failed: if (ret == -ENODEV || ret == -ENXIO) { /* Driver matched, but didn't support device * or device not found. @@ -110,7 +118,71 @@ int driver_probe_device(struct device_driver * drv, struct device * dev) "%s: probe of %s failed with error %d\n", drv->name, dev->bus_id, ret); } - Done: +done: + kfree(data); + atomic_dec(&probe_count); + return ret; +} + +/** + * driver_probe_done + * Determine if the probe sequence is finished or not. + * + * Should somehow figure out how to use a semaphore, not an atomic variable... + */ +int driver_probe_done(void) +{ + pr_debug("%s: probe_count = %d\n", __FUNCTION__, + atomic_read(&probe_count)); + if (atomic_read(&probe_count)) + return -EBUSY; + return 0; +} + +/** + * driver_probe_device - attempt to bind device & driver together + * @drv: driver to bind a device to + * @dev: device to try to bind to the driver + * + * First, we call the bus's match function, if one present, which should + * compare the device IDs the driver supports with the device IDs of the + * device. Note we don't do this ourselves because we don't know the + * format of the ID structures, nor what is to be considered a match and + * what is not. + * + * This function returns 1 if a match is found, an error if one occurs + * (that is not -ENODEV or -ENXIO), and 0 otherwise. + * + * This function must be called with @dev->sem held. When called for a + * USB interface, @dev->parent->sem must be held as well. + */ +int driver_probe_device(struct device_driver * drv, struct device * dev) +{ + struct stupid_thread_structure *data; + struct task_struct *probe_task; + int ret = 0; + + if (!device_is_registered(dev)) + return -ENODEV; + if (drv->bus->match && !drv->bus->match(dev, drv)) + goto done; + + pr_debug("%s: Matched Device %s with Driver %s\n", + drv->bus->name, dev->bus_id, drv->name); + + data = kmalloc(sizeof(*data), GFP_KERNEL); + data->drv = drv; + data->dev = dev; + + if (drv->multithread_probe) { + probe_task = kthread_run(really_probe, data, + "probe-%s", dev->bus_id); + if (IS_ERR(probe_task)) + ret = PTR_ERR(probe_task); + } else + ret = really_probe(data); + +done: return ret; } @@ -139,8 +211,9 @@ int device_attach(struct device * dev) down(&dev->sem); if (dev->driver) { - device_bind_driver(dev); - ret = 1; + ret = device_bind_driver(dev); + if (ret == 0) + ret = 1; } else ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach); up(&dev->sem); @@ -182,9 +255,9 @@ static int __driver_attach(struct device * dev, void * data) * returns 0 and the @dev->driver is set, we've found a * compatible pair. */ -void driver_attach(struct device_driver * drv) +int driver_attach(struct device_driver * drv) { - bus_for_each_dev(drv->bus, NULL, drv, __driver_attach); + return bus_for_each_dev(drv->bus, NULL, drv, __driver_attach); } /** diff --git a/drivers/base/driver.c b/drivers/base/driver.c index 562600d..1214cbd 100644 --- a/drivers/base/driver.c +++ b/drivers/base/driver.c @@ -142,20 +142,6 @@ void put_driver(struct device_driver * drv) kobject_put(&drv->kobj); } -static void klist_devices_get(struct klist_node *n) -{ - struct device *dev = container_of(n, struct device, knode_driver); - - get_device(dev); -} - -static void klist_devices_put(struct klist_node *n) -{ - struct device *dev = container_of(n, struct device, knode_driver); - - put_device(dev); -} - /** * driver_register - register driver with bus * @drv: driver to register @@ -175,7 +161,7 @@ int driver_register(struct device_driver * drv) (drv->bus->shutdown && drv->shutdown)) { printk(KERN_WARNING "Driver '%s' needs updating - please use bus_type methods\n", drv->name); } - klist_init(&drv->klist_devices, klist_devices_get, klist_devices_put); + klist_init(&drv->klist_devices, NULL, NULL); init_completion(&drv->unloaded); return bus_add_driver(drv); } diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 2b8755d..940ce41 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -505,12 +505,36 @@ static int platform_match(struct device * dev, struct device_driver * drv) return (strncmp(pdev->name, drv->name, BUS_ID_SIZE) == 0); } -static int platform_suspend(struct device * dev, pm_message_t state) +static int platform_suspend(struct device *dev, pm_message_t mesg) { int ret = 0; if (dev->driver && dev->driver->suspend) - ret = dev->driver->suspend(dev, state); + ret = dev->driver->suspend(dev, mesg); + + return ret; +} + +static int platform_suspend_late(struct device *dev, pm_message_t mesg) +{ + struct platform_driver *drv = to_platform_driver(dev->driver); + struct platform_device *pdev = container_of(dev, struct platform_device, dev); + int ret = 0; + + if (dev->driver && drv->suspend_late) + ret = drv->suspend_late(pdev, mesg); + + return ret; +} + +static int platform_resume_early(struct device *dev) +{ + struct platform_driver *drv = to_platform_driver(dev->driver); + struct platform_device *pdev = container_of(dev, struct platform_device, dev); + int ret = 0; + + if (dev->driver && drv->resume_early) + ret = drv->resume_early(pdev); return ret; } @@ -531,6 +555,8 @@ struct bus_type platform_bus_type = { .match = platform_match, .uevent = platform_uevent, .suspend = platform_suspend, + .suspend_late = platform_suspend_late, + .resume_early = platform_resume_early, .resume = platform_resume, }; EXPORT_SYMBOL_GPL(platform_bus_type); diff --git a/drivers/base/power/resume.c b/drivers/base/power/resume.c index 826093e..020be36 100644 --- a/drivers/base/power/resume.c +++ b/drivers/base/power/resume.c @@ -38,13 +38,35 @@ int resume_device(struct device * dev) dev_dbg(dev,"resuming\n"); error = dev->bus->resume(dev); } + if (dev->class && dev->class->resume) { + dev_dbg(dev,"class resume\n"); + error = dev->class->resume(dev); + } up(&dev->sem); TRACE_RESUME(error); return error; } +static int resume_device_early(struct device * dev) +{ + int error = 0; + TRACE_DEVICE(dev); + TRACE_RESUME(0); + if (dev->bus && dev->bus->resume_early) { + dev_dbg(dev,"EARLY resume\n"); + error = dev->bus->resume_early(dev); + } + TRACE_RESUME(error); + return error; +} + +/* + * Resume the devices that have either not gone through + * the late suspend, or that did go through it but also + * went through the early resume + */ void dpm_resume(void) { down(&dpm_list_sem); @@ -74,6 +96,7 @@ void dpm_resume(void) void device_resume(void) { + might_sleep(); down(&dpm_sem); dpm_resume(); up(&dpm_sem); @@ -83,12 +106,12 @@ EXPORT_SYMBOL_GPL(device_resume); /** - * device_power_up_irq - Power on some devices. + * dpm_power_up - Power on some devices. * * Walk the dpm_off_irq list and power each device up. This * is used for devices that required they be powered down with - * interrupts disabled. As devices are powered on, they are moved to - * the dpm_suspended list. + * interrupts disabled. As devices are powered on, they are moved + * to the dpm_active list. * * Interrupts must be disabled when calling this. */ @@ -99,16 +122,14 @@ void dpm_power_up(void) struct list_head * entry = dpm_off_irq.next; struct device * dev = to_device(entry); - get_device(dev); - list_move_tail(entry, &dpm_active); - resume_device(dev); - put_device(dev); + list_move_tail(entry, &dpm_off); + resume_device_early(dev); } } /** - * device_pm_power_up - Turn on all devices that need special attention. + * device_power_up - Turn on all devices that need special attention. * * Power on system devices then devices that required we shut them down * with interrupts disabled. diff --git a/drivers/base/power/suspend.c b/drivers/base/power/suspend.c index 69509e0..ece136b 100644 --- a/drivers/base/power/suspend.c +++ b/drivers/base/power/suspend.c @@ -34,6 +34,7 @@ static inline char *suspend_verb(u32 event) switch (event) { case PM_EVENT_SUSPEND: return "suspend"; case PM_EVENT_FREEZE: return "freeze"; + case PM_EVENT_PRETHAW: return "prethaw"; default: return "(unknown suspend event)"; } } @@ -65,7 +66,19 @@ int suspend_device(struct device * dev, pm_message_t state) dev->power.prev_state = dev->power.power_state; - if (dev->bus && dev->bus->suspend && !dev->power.power_state.event) { + if (dev->class && dev->class->suspend && !dev->power.power_state.event) { + dev_dbg(dev, "class %s%s\n", + suspend_verb(state.event), + ((state.event == PM_EVENT_SUSPEND) + && device_may_wakeup(dev)) + ? ", may wakeup" + : "" + ); + error = dev->class->suspend(dev, state); + suspend_report_result(dev->class->suspend, error); + } + + if (!error && dev->bus && dev->bus->suspend && !dev->power.power_state.event) { dev_dbg(dev, "%s%s\n", suspend_verb(state.event), ((state.event == PM_EVENT_SUSPEND) @@ -81,15 +94,42 @@ int suspend_device(struct device * dev, pm_message_t state) } +/* + * This is called with interrupts off, only a single CPU + * running. We can't do down() on a semaphore (and we don't + * need the protection) + */ +static int suspend_device_late(struct device *dev, pm_message_t state) +{ + int error = 0; + + if (dev->bus && dev->bus->suspend_late && !dev->power.power_state.event) { + dev_dbg(dev, "LATE %s%s\n", + suspend_verb(state.event), + ((state.event == PM_EVENT_SUSPEND) + && device_may_wakeup(dev)) + ? ", may wakeup" + : "" + ); + error = dev->bus->suspend_late(dev, state); + suspend_report_result(dev->bus->suspend_late, error); + } + return error; +} + /** * device_suspend - Save state and stop all devices in system. * @state: Power state to put each device in. * * Walk the dpm_active list, call ->suspend() for each device, and move - * it to dpm_off. - * Check the return value for each. If it returns 0, then we move the - * the device to the dpm_off list. If it returns -EAGAIN, we move it to - * the dpm_off_irq list. If we get a different error, try and back out. + * it to the dpm_off list. + * + * (For historical reasons, if it returns -EAGAIN, that used to mean + * that the device would be called again with interrupts disabled. + * These days, we use the "suspend_late()" callback for that, so we + * print a warning and consider it an error). + * + * If we get a different error, try and back out. * * If we hit a failure with any of the devices, call device_resume() * above to bring the suspended devices back to life. @@ -100,6 +140,7 @@ int device_suspend(pm_message_t state) { int error = 0; + might_sleep(); down(&dpm_sem); down(&dpm_list_sem); while (!list_empty(&dpm_active) && error == 0) { @@ -115,39 +156,27 @@ int device_suspend(pm_message_t state) /* Check if the device got removed */ if (!list_empty(&dev->power.entry)) { - /* Move it to the dpm_off or dpm_off_irq list */ + /* Move it to the dpm_off list */ if (!error) list_move(&dev->power.entry, &dpm_off); - else if (error == -EAGAIN) { - list_move(&dev->power.entry, &dpm_off_irq); - error = 0; - } } if (error) printk(KERN_ERR "Could not suspend device %s: " - "error %d\n", kobject_name(&dev->kobj), error); + "error %d%s\n", + kobject_name(&dev->kobj), error, + error == -EAGAIN ? " (please convert to suspend_late)" : ""); put_device(dev); } up(&dpm_list_sem); - if (error) { - /* we failed... before resuming, bring back devices from - * dpm_off_irq list back to main dpm_off list, we do want - * to call resume() on them, in case they partially suspended - * despite returning -EAGAIN - */ - while (!list_empty(&dpm_off_irq)) { - struct list_head * entry = dpm_off_irq.next; - list_move(entry, &dpm_off); - } + if (error) dpm_resume(); - } + up(&dpm_sem); return error; } EXPORT_SYMBOL_GPL(device_suspend); - /** * device_power_down - Shut down special devices. * @state: Power state to enter. @@ -162,14 +191,17 @@ int device_power_down(pm_message_t state) int error = 0; struct device * dev; - list_for_each_entry_reverse(dev, &dpm_off_irq, power.entry) { - if ((error = suspend_device(dev, state))) - break; + while (!list_empty(&dpm_off)) { + struct list_head * entry = dpm_off.prev; + + dev = to_device(entry); + error = suspend_device_late(dev, state); + if (error) + goto Error; + list_move(&dev->power.entry, &dpm_off_irq); } - if (error) - goto Error; - if ((error = sysdev_suspend(state))) - goto Error; + + error = sysdev_suspend(state); Done: return error; Error: diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 40d7242..2d47517 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -7,22 +7,29 @@ #include "power.h" +#ifdef CONFIG_PM_SYSFS_DEPRECATED + /** * state - Control current power state of device * * show() returns the current power state of the device. '0' indicates - * the device is on. Other values (1-3) indicate the device is in a low + * the device is on. Other values (2) indicate the device is in some low * power state. * - * store() sets the current power state, which is an integer value - * between 0-3. If the device is on ('0'), and the value written is - * greater than 0, then the device is placed directly into the low-power - * state (via its driver's ->suspend() method). - * If the device is currently in a low-power state, and the value is 0, - * the device is powered back on (via the ->resume() method). - * If the device is in a low-power state, and a different low-power state - * is requested, the device is first resumed, then suspended into the new - * low-power state. + * store() sets the current power state, which is an integer valued + * 0, 2, or 3. Devices with bus.suspend_late(), or bus.resume_early() + * methods fail this operation; those methods couldn't be called. + * Otherwise, + * + * - If the recorded dev->power.power_state.event matches the + * target value, nothing is done. + * - If the recorded event code is nonzero, the device is reactivated + * by calling bus.resume() and/or class.resume(). + * - If the target value is nonzero, the device is suspended by + * calling class.suspend() and/or bus.suspend() with event code + * PM_EVENT_SUSPEND. + * + * This mechanism is DEPRECATED and should only be used for testing. */ static ssize_t state_show(struct device * dev, struct device_attribute *attr, char * buf) @@ -38,6 +45,10 @@ static ssize_t state_store(struct device * dev, struct device_attribute *attr, c pm_message_t state; int error = -EINVAL; + /* disallow incomplete suspend sequences */ + if (dev->bus && (dev->bus->suspend_late || dev->bus->resume_early)) + return error; + state.event = PM_EVENT_SUSPEND; /* Older apps expected to write "3" here - confused with PCI D3 */ if ((n == 1) && !strcmp(buf, "3")) @@ -57,6 +68,8 @@ static ssize_t state_store(struct device * dev, struct device_attribute *attr, c static DEVICE_ATTR(state, 0644, state_show, state_store); +#endif /* CONFIG_PM_SYSFS_DEPRECATED */ + /* * wakeup - Report/change current wakeup option for device * @@ -130,7 +143,9 @@ static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store); static struct attribute * power_attrs[] = { +#ifdef CONFIG_PM_SYSFS_DEPRECATED &dev_attr_state.attr, +#endif &dev_attr_wakeup.attr, NULL, }; diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index defd4b4..9c8468d 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -1207,7 +1207,7 @@ int system_bus_clock (void) EXPORT_SYMBOL(system_bus_clock); -static int generic_ide_suspend(struct device *dev, pm_message_t state) +static int generic_ide_suspend(struct device *dev, pm_message_t mesg) { ide_drive_t *drive = dev->driver_data; struct request rq; @@ -1221,7 +1221,9 @@ static int generic_ide_suspend(struct device *dev, pm_message_t state) rq.special = &args; rq.end_io_data = &rqpm; rqpm.pm_step = ide_pm_state_start_suspend; - rqpm.pm_state = state.event; + if (mesg.event == PM_EVENT_PRETHAW) + mesg.event = PM_EVENT_FREEZE; + rqpm.pm_state = mesg.event; return ide_do_drive_cmd(drive, &rq, ide_wait); } diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index 996c694..31ad79f 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -1369,15 +1369,16 @@ pmac_ide_macio_attach(struct macio_dev *mdev, const struct of_device_id *match) } static int -pmac_ide_macio_suspend(struct macio_dev *mdev, pm_message_t state) +pmac_ide_macio_suspend(struct macio_dev *mdev, pm_message_t mesg) { ide_hwif_t *hwif = (ide_hwif_t *)dev_get_drvdata(&mdev->ofdev.dev); int rc = 0; - if (state.event != mdev->ofdev.dev.power.power_state.event && state.event >= PM_EVENT_SUSPEND) { + if (mesg.event != mdev->ofdev.dev.power.power_state.event + && mesg.event == PM_EVENT_SUSPEND) { rc = pmac_ide_do_suspend(hwif); if (rc == 0) - mdev->ofdev.dev.power.power_state = state; + mdev->ofdev.dev.power.power_state = mesg; } return rc; @@ -1473,15 +1474,16 @@ pmac_ide_pci_attach(struct pci_dev *pdev, const struct pci_device_id *id) } static int -pmac_ide_pci_suspend(struct pci_dev *pdev, pm_message_t state) +pmac_ide_pci_suspend(struct pci_dev *pdev, pm_message_t mesg) { ide_hwif_t *hwif = (ide_hwif_t *)pci_get_drvdata(pdev); int rc = 0; - if (state.event != pdev->dev.power.power_state.event && state.event >= 2) { + if (mesg.event != pdev->dev.power.power_state.event + && mesg.event == PM_EVENT_SUSPEND) { rc = pmac_ide_do_suspend(hwif); if (rc == 0) - pdev->dev.power.power_state = state; + pdev->dev.power.power_state = mesg; } return rc; diff --git a/drivers/media/dvb/cinergyT2/cinergyT2.c b/drivers/media/dvb/cinergyT2/cinergyT2.c index 001c71b..410fa6d 100644 --- a/drivers/media/dvb/cinergyT2/cinergyT2.c +++ b/drivers/media/dvb/cinergyT2/cinergyT2.c @@ -981,7 +981,7 @@ static int cinergyt2_suspend (struct usb_interface *intf, pm_message_t state) if (cinergyt2->disconnect_pending || mutex_lock_interruptible(&cinergyt2->sem)) return -ERESTARTSYS; - if (state.event > PM_EVENT_ON) { + if (1) { struct cinergyt2 *cinergyt2 = usb_get_intfdata (intf); cinergyt2_suspend_rc(cinergyt2); diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 4d762fc..c27e782 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -17,6 +17,31 @@ config PCI_MSI If you don't know what to do here, say N. +config PCI_MULTITHREAD_PROBE + bool "PCI Multi-threaded probe (EXPERIMENTAL)" + depends on PCI && EXPERIMENTAL + help + Say Y here if you want the PCI core to spawn a new thread for + every PCI device that is probed. This can cause a huge + speedup in boot times on multiprocessor machines, and even a + smaller speedup on single processor machines. + + But it can also cause lots of bad things to happen. A number + of PCI drivers can not properly handle running in this way, + some will just not work properly at all, while others might + decide to blow up power supplies with a huge load all at once, + so use this option at your own risk. + + It is very unwise to use this option if you are not using a + boot process that can handle devices being created in any + order. A program that can create persistant block and network + device names (like udev) is a good idea if you wish to use + this option. + + Again, use this option at your own risk, you have been warned! + + When in doubt, say N. + config PCI_DEBUG bool "PCI Debugging" depends on PCI && DEBUG_KERNEL diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c index 317457d..d0a07d9 100644 --- a/drivers/pci/hotplug/acpiphp_ibm.c +++ b/drivers/pci/hotplug/acpiphp_ibm.c @@ -487,9 +487,7 @@ static void __exit ibm_acpiphp_exit(void) if (ACPI_FAILURE(status)) err("%s: Notification handler removal failed\n", __FUNCTION__); /* remove the /sys entries */ - if (sysfs_remove_bin_file(sysdir, &ibm_apci_table_attr)) - err("%s: removal of sysfs file apci_table failed\n", - __FUNCTION__); + sysfs_remove_bin_file(sysdir, &ibm_apci_table_attr); } module_init(ibm_acpiphp_init); diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 474e9cd..d8ace1f 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -17,6 +17,16 @@ * Registration of PCI drivers and handling of hot-pluggable devices. */ +/* multithreaded probe logic */ +static int pci_multithread_probe = +#ifdef CONFIG_PCI_MULTITHREAD_PROBE + 1; +#else + 0; +#endif +__module_param_call("", pci_multithread_probe, param_set_bool, param_get_bool, &pci_multithread_probe, 0644); + + /* * Dynamic device IDs are disabled for !CONFIG_HOTPLUG */ @@ -279,6 +289,18 @@ static int pci_device_suspend(struct device * dev, pm_message_t state) return i; } +static int pci_device_suspend_late(struct device * dev, pm_message_t state) +{ + struct pci_dev * pci_dev = to_pci_dev(dev); + struct pci_driver * drv = pci_dev->driver; + int i = 0; + + if (drv && drv->suspend_late) { + i = drv->suspend_late(pci_dev, state); + suspend_report_result(drv->suspend_late, i); + } + return i; +} /* * Default resume method for devices that have no driver provided resume, @@ -313,6 +335,17 @@ static int pci_device_resume(struct device * dev) return error; } +static int pci_device_resume_early(struct device * dev) +{ + int error = 0; + struct pci_dev * pci_dev = to_pci_dev(dev); + struct pci_driver * drv = pci_dev->driver; + + if (drv && drv->resume_early) + error = drv->resume_early(pci_dev); + return error; +} + static void pci_device_shutdown(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); @@ -385,6 +418,7 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner) drv->driver.bus = &pci_bus_type; drv->driver.owner = owner; drv->driver.kobj.ktype = &pci_driver_kobj_type; + drv->driver.multithread_probe = pci_multithread_probe; spin_lock_init(&drv->dynids.lock); INIT_LIST_HEAD(&drv->dynids.list); @@ -509,8 +543,10 @@ struct bus_type pci_bus_type = { .probe = pci_device_probe, .remove = pci_device_remove, .suspend = pci_device_suspend, - .shutdown = pci_device_shutdown, + .suspend_late = pci_device_suspend_late, + .resume_early = pci_device_resume_early, .resume = pci_device_resume, + .shutdown = pci_device_shutdown, .dev_attrs = pci_dev_attrs, }; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 9f79dd6..8ab0278 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -432,10 +432,12 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state) case PM_EVENT_ON: return PCI_D0; case PM_EVENT_FREEZE: + case PM_EVENT_PRETHAW: + /* REVISIT both freeze and pre-thaw "should" use D0 */ case PM_EVENT_SUSPEND: return PCI_D3hot; default: - printk("They asked me for state %d\n", state.event); + printk("Unrecognized suspend event %d\n", state.event); BUG(); } return PCI_D0; diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c index 592b52a..683fc7a 100644 --- a/drivers/scsi/mesh.c +++ b/drivers/scsi/mesh.c @@ -1756,16 +1756,23 @@ static void set_mesh_power(struct mesh_state *ms, int state) pmac_call_feature(PMAC_FTR_MESH_ENABLE, macio_get_of_node(ms->mdev), 0, 0); msleep(10); } -} +} #ifdef CONFIG_PM -static int mesh_suspend(struct macio_dev *mdev, pm_message_t state) +static int mesh_suspend(struct macio_dev *mdev, pm_message_t mesg) { struct mesh_state *ms = (struct mesh_state *)macio_get_drvdata(mdev); unsigned long flags; - if (state.event == mdev->ofdev.dev.power.power_state.event || state.event < 2) + switch (mesg.event) { + case PM_EVENT_SUSPEND: + case PM_EVENT_FREEZE: + break; + default: + return 0; + } + if (mesg.event == mdev->ofdev.dev.power.power_state.event) return 0; scsi_block_requests(ms->host); @@ -1780,7 +1787,7 @@ static int mesh_suspend(struct macio_dev *mdev, pm_message_t state) disable_irq(ms->meshintr); set_mesh_power(ms, 0); - mdev->ofdev.dev.power.power_state = state; + mdev->ofdev.dev.power.power_state = mesg; return 0; } diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index 5078fb3..fa36391 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -281,7 +281,7 @@ int usb_hcd_pci_suspend (struct pci_dev *dev, pm_message_t message) (void) usb_hcd_pci_resume (dev); } - } else { + } else if (hcd->state != HC_STATE_HALT) { dev_dbg (hcd->self.controller, "hcd state %d; not suspended\n", hcd->state); WARN_ON(1); diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index cadffac..6967ab7 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -238,6 +238,12 @@ static int ehci_pci_suspend(struct usb_hcd *hcd, pm_message_t message) writel (0, &ehci->regs->intr_enable); (void)readl(&ehci->regs->intr_enable); + /* make sure snapshot being resumed re-enumerates everything */ + if (message.event == PM_EVENT_PRETHAW) { + ehci_halt(ehci); + ehci_reset(ehci); + } + clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags); bail: spin_unlock_irqrestore (&ehci->lock, flags); diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c index b268537..37e1228 100644 --- a/drivers/usb/host/ohci-pci.c +++ b/drivers/usb/host/ohci-pci.c @@ -135,6 +135,11 @@ static int ohci_pci_suspend (struct usb_hcd *hcd, pm_message_t message) } ohci_writel(ohci, OHCI_INTR_MIE, &ohci->regs->intrdisable); (void)ohci_readl(ohci, &ohci->regs->intrdisable); + + /* make sure snapshot being resumed re-enumerates everything */ + if (message.event == PM_EVENT_PRETHAW) + ohci_usb_reset(ohci); + clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags); bail: spin_unlock_irqrestore (&ohci->lock, flags); diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c index fa34092..9de115d9 100644 --- a/drivers/usb/host/sl811-hcd.c +++ b/drivers/usb/host/sl811-hcd.c @@ -1783,10 +1783,15 @@ sl811h_suspend(struct platform_device *dev, pm_message_t state) struct sl811 *sl811 = hcd_to_sl811(hcd); int retval = 0; - if (state.event == PM_EVENT_FREEZE) + switch (state.event) { + case PM_EVENT_FREEZE: retval = sl811h_bus_suspend(hcd); - else if (state.event == PM_EVENT_SUSPEND) + break; + case PM_EVENT_SUSPEND: + case PM_EVENT_PRETHAW: /* explicitly discard hw state */ port_power(sl811, 0); + break; + } if (retval == 0) dev->dev.power.power_state = state; return retval; diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c index 4151f61..b7402ce 100644 --- a/drivers/usb/host/uhci-hcd.c +++ b/drivers/usb/host/uhci-hcd.c @@ -734,6 +734,10 @@ static int uhci_suspend(struct usb_hcd *hcd, pm_message_t message) /* FIXME: Enable non-PME# remote wakeup? */ + /* make sure snapshot being resumed re-enumerates everything */ + if (message.event == PM_EVENT_PRETHAW) + uhci_hc_died(uhci); + done_okay: clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags); done: diff --git a/drivers/video/aty/radeon_pm.c b/drivers/video/aty/radeon_pm.c index e308ed2..365de5d 100644 --- a/drivers/video/aty/radeon_pm.c +++ b/drivers/video/aty/radeon_pm.c @@ -2621,25 +2621,28 @@ static int radeon_restore_pci_cfg(struct radeonfb_info *rinfo) } -int radeonfb_pci_suspend(struct pci_dev *pdev, pm_message_t state) +int radeonfb_pci_suspend(struct pci_dev *pdev, pm_message_t mesg) { struct fb_info *info = pci_get_drvdata(pdev); struct radeonfb_info *rinfo = info->par; int i; - if (state.event == pdev->dev.power.power_state.event) + if (mesg.event == pdev->dev.power.power_state.event) return 0; - printk(KERN_DEBUG "radeonfb (%s): suspending to state: %d...\n", - pci_name(pdev), state.event); + printk(KERN_DEBUG "radeonfb (%s): suspending for event: %d...\n", + pci_name(pdev), mesg.event); /* For suspend-to-disk, we cheat here. We don't suspend anything and * let fbcon continue drawing until we are all set. That shouldn't * really cause any problem at this point, provided that the wakeup * code knows that any state in memory may not match the HW */ - if (state.event == PM_EVENT_FREEZE) + switch (mesg.event) { + case PM_EVENT_FREEZE: /* about to take snapshot */ + case PM_EVENT_PRETHAW: /* before restoring snapshot */ goto done; + } acquire_console_sem(); @@ -2706,7 +2709,7 @@ int radeonfb_pci_suspend(struct pci_dev *pdev, pm_message_t state) release_console_sem(); done: - pdev->dev.power.power_state = state; + pdev->dev.power.power_state = mesg; return 0; } diff --git a/drivers/video/i810/i810_main.c b/drivers/video/i810/i810_main.c index a6ca02f..d42edac 100644 --- a/drivers/video/i810/i810_main.c +++ b/drivers/video/i810/i810_main.c @@ -1554,15 +1554,17 @@ static struct fb_ops i810fb_ops __devinitdata = { /*********************************************************************** * Power Management * ***********************************************************************/ -static int i810fb_suspend(struct pci_dev *dev, pm_message_t state) +static int i810fb_suspend(struct pci_dev *dev, pm_message_t mesg) { struct fb_info *info = pci_get_drvdata(dev); struct i810fb_par *par = info->par; - par->cur_state = state.event; + par->cur_state = mesg.event; - if (state.event == PM_EVENT_FREEZE) { - dev->dev.power.power_state = state; + switch (mesg.event) { + case PM_EVENT_FREEZE: + case PM_EVENT_PRETHAW: + dev->dev.power.power_state = mesg; return 0; } @@ -1578,7 +1580,7 @@ static int i810fb_suspend(struct pci_dev *dev, pm_message_t state) pci_save_state(dev); pci_disable_device(dev); - pci_set_power_state(dev, pci_choose_state(dev, state)); + pci_set_power_state(dev, pci_choose_state(dev, mesg)); release_console_sem(); return 0; diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c index d4f8501..f8cd4c5 100644 --- a/drivers/video/nvidia/nvidia.c +++ b/drivers/video/nvidia/nvidia.c @@ -950,24 +950,25 @@ static struct fb_ops nvidia_fb_ops = { }; #ifdef CONFIG_PM -static int nvidiafb_suspend(struct pci_dev *dev, pm_message_t state) +static int nvidiafb_suspend(struct pci_dev *dev, pm_message_t mesg) { struct fb_info *info = pci_get_drvdata(dev); struct nvidia_par *par = info->par; + if (mesg.event == PM_EVENT_PRETHAW) + mesg.event = PM_EVENT_FREEZE; acquire_console_sem(); - par->pm_state = state.event; + par->pm_state = mesg.event; - if (state.event == PM_EVENT_FREEZE) { - dev->dev.power.power_state = state; - } else { + if (mesg.event == PM_EVENT_SUSPEND) { fb_set_suspend(info, 1); nvidiafb_blank(FB_BLANK_POWERDOWN, info); nvidia_write_regs(par, &par->SavedReg); pci_save_state(dev); pci_disable_device(dev); - pci_set_power_state(dev, pci_choose_state(dev, state)); + pci_set_power_state(dev, pci_choose_state(dev, mesg)); } + dev->dev.power.power_state = mesg; release_console_sem(); return 0; diff --git a/drivers/video/savage/savagefb_driver.c b/drivers/video/savage/savagefb_driver.c index 461e094..82b3dea 100644 --- a/drivers/video/savage/savagefb_driver.c +++ b/drivers/video/savage/savagefb_driver.c @@ -2323,24 +2323,24 @@ static void __devexit savagefb_remove(struct pci_dev *dev) } } -static int savagefb_suspend(struct pci_dev* dev, pm_message_t state) +static int savagefb_suspend(struct pci_dev *dev, pm_message_t mesg) { struct fb_info *info = pci_get_drvdata(dev); struct savagefb_par *par = info->par; DBG("savagefb_suspend"); - - par->pm_state = state.event; + if (mesg.event == PM_EVENT_PRETHAW) + mesg.event = PM_EVENT_FREEZE; + par->pm_state = mesg.event; + dev->dev.power.power_state = mesg; /* * For PM_EVENT_FREEZE, do not power down so the console * can remain active. */ - if (state.event == PM_EVENT_FREEZE) { - dev->dev.power.power_state = state; + if (mesg.event == PM_EVENT_FREEZE) return 0; - } acquire_console_sem(); fb_set_suspend(info, 1); @@ -2353,7 +2353,7 @@ static int savagefb_suspend(struct pci_dev* dev, pm_message_t state) savage_disable_mmio(par); pci_save_state(dev); pci_disable_device(dev); - pci_set_power_state(dev, pci_choose_state(dev, state)); + pci_set_power_state(dev, pci_choose_state(dev, mesg)); release_console_sem(); return 0; diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 39640fd..e4b4305 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -55,12 +55,11 @@ static u64 debugfs_u8_get(void *data) DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n"); /** - * debugfs_create_u8 - create a file in the debugfs filesystem that is used to read and write an unsigned 8 bit value. - * + * debugfs_create_u8 - create a debugfs file that is used to read and write an unsigned 8-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a - * directory dentry if set. If this paramater is NULL, then the + * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. @@ -72,11 +71,11 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n"); * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, NULL will be returned. + * you are responsible here.) If an error occurs, %NULL will be returned. * - * If debugfs is not enabled in the kernel, the value -ENODEV will be + * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. It is not wise to check for this value, but rather, check for - * NULL or !NULL instead as to eliminate the need for #ifdef in the calling + * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling * code. */ struct dentry *debugfs_create_u8(const char *name, mode_t mode, @@ -97,12 +96,11 @@ static u64 debugfs_u16_get(void *data) DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n"); /** - * debugfs_create_u16 - create a file in the debugfs filesystem that is used to read and write an unsigned 16 bit value. - * + * debugfs_create_u16 - create a debugfs file that is used to read and write an unsigned 16-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a - * directory dentry if set. If this paramater is NULL, then the + * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. @@ -114,11 +112,11 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n"); * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, NULL will be returned. + * you are responsible here.) If an error occurs, %NULL will be returned. * - * If debugfs is not enabled in the kernel, the value -ENODEV will be + * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. It is not wise to check for this value, but rather, check for - * NULL or !NULL instead as to eliminate the need for #ifdef in the calling + * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling * code. */ struct dentry *debugfs_create_u16(const char *name, mode_t mode, @@ -139,12 +137,11 @@ static u64 debugfs_u32_get(void *data) DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n"); /** - * debugfs_create_u32 - create a file in the debugfs filesystem that is used to read and write an unsigned 32 bit value. - * + * debugfs_create_u32 - create a debugfs file that is used to read and write an unsigned 32-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a - * directory dentry if set. If this paramater is NULL, then the + * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. @@ -156,11 +153,11 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n"); * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, NULL will be returned. + * you are responsible here.) If an error occurs, %NULL will be returned. * - * If debugfs is not enabled in the kernel, the value -ENODEV will be + * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. It is not wise to check for this value, but rather, check for - * NULL or !NULL instead as to eliminate the need for #ifdef in the calling + * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling * code. */ struct dentry *debugfs_create_u32(const char *name, mode_t mode, @@ -219,12 +216,11 @@ static const struct file_operations fops_bool = { }; /** - * debugfs_create_bool - create a file in the debugfs filesystem that is used to read and write a boolean value. - * + * debugfs_create_bool - create a debugfs file that is used to read and write a boolean value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a - * directory dentry if set. If this paramater is NULL, then the + * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. @@ -236,11 +232,11 @@ static const struct file_operations fops_bool = { * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, NULL will be returned. + * you are responsible here.) If an error occurs, %NULL will be returned. * - * If debugfs is not enabled in the kernel, the value -ENODEV will be + * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. It is not wise to check for this value, but rather, check for - * NULL or !NULL instead as to eliminate the need for #ifdef in the calling + * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling * code. */ struct dentry *debugfs_create_bool(const char *name, mode_t mode, @@ -264,13 +260,11 @@ static struct file_operations fops_blob = { }; /** - * debugfs_create_blob - create a file in the debugfs filesystem that is - * used to read and write a binary blob. - * + * debugfs_create_blob - create a debugfs file that is used to read and write a binary blob * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a - * directory dentry if set. If this paramater is NULL, then the + * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @blob: a pointer to a struct debugfs_blob_wrapper which contains a pointer * to the blob data and the size of the data. @@ -282,11 +276,11 @@ static struct file_operations fops_blob = { * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, NULL will be returned. + * you are responsible here.) If an error occurs, %NULL will be returned. * - * If debugfs is not enabled in the kernel, the value -ENODEV will be + * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. It is not wise to check for this value, but rather, check for - * NULL or !NULL instead as to eliminate the need for #ifdef in the calling + * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling * code. */ struct dentry *debugfs_create_blob(const char *name, mode_t mode, diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index e8ae304..3ca268d 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -162,7 +162,6 @@ static int debugfs_create_by_name(const char *name, mode_t mode, /** * debugfs_create_file - create a file in the debugfs filesystem - * * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a @@ -182,11 +181,11 @@ static int debugfs_create_by_name(const char *name, mode_t mode, * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, NULL will be returned. + * you are responsible here.) If an error occurs, %NULL will be returned. * - * If debugfs is not enabled in the kernel, the value -ENODEV will be + * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. It is not wise to check for this value, but rather, check for - * NULL or !NULL instead as to eliminate the need for #ifdef in the calling + * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling * code. */ struct dentry *debugfs_create_file(const char *name, mode_t mode, @@ -221,7 +220,6 @@ EXPORT_SYMBOL_GPL(debugfs_create_file); /** * debugfs_create_dir - create a directory in the debugfs filesystem - * * @name: a pointer to a string containing the name of the directory to * create. * @parent: a pointer to the parent dentry for this file. This should be a @@ -233,11 +231,11 @@ EXPORT_SYMBOL_GPL(debugfs_create_file); * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, NULL will be returned. + * you are responsible here.) If an error occurs, %NULL will be returned. * - * If debugfs is not enabled in the kernel, the value -ENODEV will be + * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. It is not wise to check for this value, but rather, check for - * NULL or !NULL instead as to eliminate the need for #ifdef in the calling + * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling * code. */ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) @@ -250,7 +248,6 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir); /** * debugfs_remove - removes a file or directory from the debugfs filesystem - * * @dentry: a pointer to a the dentry of the file or directory to be * removed. * diff --git a/fs/namespace.c b/fs/namespace.c index fa7ed6a9f..36d1808 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -17,6 +17,7 @@ #include <linux/acct.h> #include <linux/capability.h> #include <linux/module.h> +#include <linux/sysfs.h> #include <linux/seq_file.h> #include <linux/namespace.h> #include <linux/namei.h> @@ -28,15 +29,6 @@ extern int __init init_rootfs(void); -#ifdef CONFIG_SYSFS -extern int __init sysfs_init(void); -#else -static inline int sysfs_init(void) -{ - return 0; -} -#endif - /* spinlock for vfsmount related operations, inplace of dcache_lock */ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index c16a93c..98022e4 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -10,6 +10,7 @@ #include <linux/errno.h> #include <linux/fs.h> +#include <linux/kernel.h> #include <linux/kobject.h> #include <linux/module.h> #include <linux/slab.h> @@ -176,7 +177,6 @@ const struct file_operations bin_fops = { * sysfs_create_bin_file - create binary file for object. * @kobj: object. * @attr: attribute descriptor. - * */ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr) @@ -191,13 +191,16 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr) * sysfs_remove_bin_file - remove binary file for object. * @kobj: object. * @attr: attribute descriptor. - * */ -int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr) +void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr) { - sysfs_hash_and_remove(kobj->dentry,attr->attr.name); - return 0; + if (sysfs_hash_and_remove(kobj->dentry, attr->attr.name) < 0) { + printk(KERN_ERR "%s: " + "bad dentry or inode or no such file: \"%s\"\n", + __FUNCTION__, attr->attr.name); + dump_stack(); + } } EXPORT_SYMBOL_GPL(sysfs_create_bin_file); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 61c4243..5f3d725 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -43,7 +43,7 @@ static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent * parent_sd, memset(sd, 0, sizeof(*sd)); atomic_set(&sd->s_count, 1); - atomic_set(&sd->s_event, 0); + atomic_set(&sd->s_event, 1); INIT_LIST_HEAD(&sd->s_children); list_add(&sd->s_sibling, &parent_sd->s_children); sd->s_element = element; diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 9889e54..fd7cd5f 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -12,6 +12,7 @@ #include <linux/namei.h> #include <linux/backing-dev.h> #include <linux/capability.h> +#include <linux/errno.h> #include "sysfs.h" extern struct super_block * sysfs_sb; @@ -234,17 +235,18 @@ void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent) } } -void sysfs_hash_and_remove(struct dentry * dir, const char * name) +int sysfs_hash_and_remove(struct dentry * dir, const char * name) { struct sysfs_dirent * sd; struct sysfs_dirent * parent_sd; + int found = 0; if (!dir) - return; + return -ENOENT; if (dir->d_inode == NULL) /* no inode means this hasn't been made visible yet */ - return; + return -ENOENT; parent_sd = dir->d_fsdata; mutex_lock(&dir->d_inode->i_mutex); @@ -255,8 +257,11 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name) list_del_init(&sd->s_sibling); sysfs_drop_dentry(sd, dir); sysfs_put(sd); + found = 1; break; } } mutex_unlock(&dir->d_inode->i_mutex); + + return found ? 0 : -ENOENT; } diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index d2eac3c..f50e3cc 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -3,6 +3,7 @@ */ #include <linux/fs.h> +#include <linux/mount.h> #include <linux/module.h> #include <linux/kobject.h> #include <linux/namei.h> @@ -82,10 +83,19 @@ exit1: */ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name) { - struct dentry * dentry = kobj->dentry; + struct dentry *dentry = NULL; int error = -EEXIST; - BUG_ON(!kobj || !kobj->dentry || !name); + BUG_ON(!name); + + if (!kobj) { + if (sysfs_mount && sysfs_mount->mnt_sb) + dentry = sysfs_mount->mnt_sb->s_root; + } else + dentry = kobj->dentry; + + if (!dentry) + return -EFAULT; mutex_lock(&dentry->d_inode->i_mutex); if (!sysfs_dirent_exist(dentry->d_fsdata, name)) diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 3651ffb..6f3d6bd 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -10,7 +10,7 @@ extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *, umode_t, int); extern int sysfs_add_file(struct dentry *, const struct attribute *, int); -extern void sysfs_hash_and_remove(struct dentry * dir, const char * name); +extern int sysfs_hash_and_remove(struct dentry * dir, const char * name); extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name); extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **); diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 9b4f110..060b961 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -99,6 +99,11 @@ extern void __chk_io_ptr(void __iomem *); #define __must_check #endif +#ifndef CONFIG_ENABLE_MUST_CHECK +#undef __must_check +#define __must_check +#endif + /* * Allow us to avoid 'defined but not used' warnings on functions and data, * as well as force them to be emitted to the assembly file. diff --git a/include/linux/device.h b/include/linux/device.h index 1e5f30d..662e6a1 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -15,6 +15,7 @@ #include <linux/kobject.h> #include <linux/klist.h> #include <linux/list.h> +#include <linux/compiler.h> #include <linux/types.h> #include <linux/module.h> #include <linux/pm.h> @@ -51,14 +52,17 @@ struct bus_type { int (*probe)(struct device * dev); int (*remove)(struct device * dev); void (*shutdown)(struct device * dev); - int (*suspend)(struct device * dev, pm_message_t state); - int (*resume)(struct device * dev); + + int (*suspend)(struct device * dev, pm_message_t state); + int (*suspend_late)(struct device * dev, pm_message_t state); + int (*resume_early)(struct device * dev); + int (*resume)(struct device * dev); }; -extern int bus_register(struct bus_type * bus); +extern int __must_check bus_register(struct bus_type * bus); extern void bus_unregister(struct bus_type * bus); -extern void bus_rescan_devices(struct bus_type * bus); +extern int __must_check bus_rescan_devices(struct bus_type * bus); /* iterator helpers for buses */ @@ -67,9 +71,9 @@ int bus_for_each_dev(struct bus_type * bus, struct device * start, void * data, struct device * bus_find_device(struct bus_type *bus, struct device *start, void *data, int (*match)(struct device *, void *)); -int bus_for_each_drv(struct bus_type * bus, struct device_driver * start, - void * data, int (*fn)(struct device_driver *, void *)); - +int __must_check bus_for_each_drv(struct bus_type *bus, + struct device_driver *start, void *data, + int (*fn)(struct device_driver *, void *)); /* driverfs interface for exporting bus attributes */ @@ -82,7 +86,8 @@ struct bus_attribute { #define BUS_ATTR(_name,_mode,_show,_store) \ struct bus_attribute bus_attr_##_name = __ATTR(_name,_mode,_show,_store) -extern int bus_create_file(struct bus_type *, struct bus_attribute *); +extern int __must_check bus_create_file(struct bus_type *, + struct bus_attribute *); extern void bus_remove_file(struct bus_type *, struct bus_attribute *); struct device_driver { @@ -101,16 +106,18 @@ struct device_driver { void (*shutdown) (struct device * dev); int (*suspend) (struct device * dev, pm_message_t state); int (*resume) (struct device * dev); + + unsigned int multithread_probe:1; }; -extern int driver_register(struct device_driver * drv); +extern int __must_check driver_register(struct device_driver * drv); extern void driver_unregister(struct device_driver * drv); extern struct device_driver * get_driver(struct device_driver * drv); extern void put_driver(struct device_driver * drv); extern struct device_driver *driver_find(const char *name, struct bus_type *bus); - +extern int driver_probe_done(void); /* driverfs interface for exporting driver attributes */ @@ -123,16 +130,17 @@ struct driver_attribute { #define DRIVER_ATTR(_name,_mode,_show,_store) \ struct driver_attribute driver_attr_##_name = __ATTR(_name,_mode,_show,_store) -extern int driver_create_file(struct device_driver *, struct driver_attribute *); +extern int __must_check driver_create_file(struct device_driver *, + struct driver_attribute *); extern void driver_remove_file(struct device_driver *, struct driver_attribute *); -extern int driver_for_each_device(struct device_driver * drv, struct device * start, - void * data, int (*fn)(struct device *, void *)); +extern int __must_check driver_for_each_device(struct device_driver * drv, + struct device *start, void *data, + int (*fn)(struct device *, void *)); struct device * driver_find_device(struct device_driver *drv, struct device *start, void *data, int (*match)(struct device *, void *)); - /* * device classes */ @@ -146,17 +154,26 @@ struct class { struct list_head interfaces; struct semaphore sem; /* locks both the children and interfaces lists */ + struct kobject *virtual_dir; + struct class_attribute * class_attrs; struct class_device_attribute * class_dev_attrs; + struct device_attribute * dev_attrs; int (*uevent)(struct class_device *dev, char **envp, int num_envp, char *buffer, int buffer_size); + int (*dev_uevent)(struct device *dev, char **envp, int num_envp, + char *buffer, int buffer_size); void (*release)(struct class_device *dev); void (*class_release)(struct class *class); + void (*dev_release)(struct device *dev); + + int (*suspend)(struct device *, pm_message_t state); + int (*resume)(struct device *); }; -extern int class_register(struct class *); +extern int __must_check class_register(struct class *); extern void class_unregister(struct class *); @@ -169,7 +186,8 @@ struct class_attribute { #define CLASS_ATTR(_name,_mode,_show,_store) \ struct class_attribute class_attr_##_name = __ATTR(_name,_mode,_show,_store) -extern int class_create_file(struct class *, const struct class_attribute *); +extern int __must_check class_create_file(struct class *, + const struct class_attribute *); extern void class_remove_file(struct class *, const struct class_attribute *); struct class_device_attribute { @@ -182,7 +200,7 @@ struct class_device_attribute { struct class_device_attribute class_device_attr_##_name = \ __ATTR(_name,_mode,_show,_store) -extern int class_device_create_file(struct class_device *, +extern int __must_check class_device_create_file(struct class_device *, const struct class_device_attribute *); /** @@ -242,10 +260,10 @@ class_set_devdata (struct class_device *dev, void *data) } -extern int class_device_register(struct class_device *); +extern int __must_check class_device_register(struct class_device *); extern void class_device_unregister(struct class_device *); extern void class_device_initialize(struct class_device *); -extern int class_device_add(struct class_device *); +extern int __must_check class_device_add(struct class_device *); extern void class_device_del(struct class_device *); extern int class_device_rename(struct class_device *, char *); @@ -255,7 +273,7 @@ extern void class_device_put(struct class_device *); extern void class_device_remove_file(struct class_device *, const struct class_device_attribute *); -extern int class_device_create_bin_file(struct class_device *, +extern int __must_check class_device_create_bin_file(struct class_device *, struct bin_attribute *); extern void class_device_remove_bin_file(struct class_device *, struct bin_attribute *); @@ -266,22 +284,23 @@ struct class_interface { int (*add) (struct class_device *, struct class_interface *); void (*remove) (struct class_device *, struct class_interface *); + int (*add_dev) (struct device *, struct class_interface *); + void (*remove_dev) (struct device *, struct class_interface *); }; -extern int class_interface_register(struct class_interface *); +extern int __must_check class_interface_register(struct class_interface *); extern void class_interface_unregister(struct class_interface *); -extern struct class *class_create(struct module *owner, char *name); +extern struct class *class_create(struct module *owner, const char *name); extern void class_destroy(struct class *cls); extern struct class_device *class_device_create(struct class *cls, struct class_device *parent, dev_t devt, struct device *device, - char *fmt, ...) + const char *fmt, ...) __attribute__((format(printf,5,6))); extern void class_device_destroy(struct class *cls, dev_t devt); - /* interface for exporting device attributes */ struct device_attribute { struct attribute attr; @@ -294,8 +313,13 @@ struct device_attribute { #define DEVICE_ATTR(_name,_mode,_show,_store) \ struct device_attribute dev_attr_##_name = __ATTR(_name,_mode,_show,_store) -extern int device_create_file(struct device *device, struct device_attribute * entry); +extern int __must_check device_create_file(struct device *device, + struct device_attribute * entry); extern void device_remove_file(struct device * dev, struct device_attribute * attr); +extern int __must_check device_create_bin_file(struct device *dev, + struct bin_attribute *attr); +extern void device_remove_bin_file(struct device *dev, + struct bin_attribute *attr); struct device { struct klist klist_children; struct klist_node knode_parent; /* node in sibling list */ @@ -305,6 +329,7 @@ struct device { struct kobject kobj; char bus_id[BUS_ID_SIZE]; /* position on parent bus */ + unsigned is_registered:1; struct device_attribute uevent_attr; struct device_attribute *devt_attr; @@ -338,6 +363,7 @@ struct device { struct list_head node; struct class *class; /* optional*/ dev_t devt; /* dev_t, creates the sysfs "dev" */ + struct attribute_group **groups; /* optional groups */ void (*release)(struct device * dev); }; @@ -356,38 +382,41 @@ dev_set_drvdata (struct device *dev, void *data) static inline int device_is_registered(struct device *dev) { - return klist_node_attached(&dev->knode_bus); + return dev->is_registered; } /* * High level routines for use by the bus drivers */ -extern int device_register(struct device * dev); +extern int __must_check device_register(struct device * dev); extern void device_unregister(struct device * dev); extern void device_initialize(struct device * dev); -extern int device_add(struct device * dev); +extern int __must_check device_add(struct device * dev); extern void device_del(struct device * dev); -extern int device_for_each_child(struct device *, void *, +extern int __must_check device_for_each_child(struct device *, void *, int (*fn)(struct device *, void *)); +extern int device_rename(struct device *dev, char *new_name); /* * Manual binding of a device to driver. See drivers/base/bus.c * for information on use. */ -extern void device_bind_driver(struct device * dev); +extern int __must_check device_bind_driver(struct device *dev); extern void device_release_driver(struct device * dev); -extern int device_attach(struct device * dev); -extern void driver_attach(struct device_driver * drv); -extern void device_reprobe(struct device *dev); +extern int __must_check device_attach(struct device * dev); +extern int __must_check driver_attach(struct device_driver *drv); +extern int __must_check device_reprobe(struct device *dev); /* * Easy functions for dynamically creating devices on the fly */ extern struct device *device_create(struct class *cls, struct device *parent, - dev_t devt, char *fmt, ...) + dev_t devt, const char *fmt, ...) __attribute__((format(printf,4,5))); extern void device_destroy(struct class *cls, dev_t devt); +extern int virtual_device_parent(struct device *dev); + /* * Platform "fixup" functions - allow the platform to have their say * about devices and actions that the general device layer doesn't @@ -412,7 +441,7 @@ extern void device_shutdown(void); /* drivers/base/firmware.c */ -extern int firmware_register(struct subsystem *); +extern int __must_check firmware_register(struct subsystem *); extern void firmware_unregister(struct subsystem *); /* debugging and troubleshooting/diagnostic helpers. */ diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 2d22932..bcd9cd1 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -20,6 +20,7 @@ #include <linux/types.h> #include <linux/list.h> #include <linux/sysfs.h> +#include <linux/compiler.h> #include <linux/spinlock.h> #include <linux/rwsem.h> #include <linux/kref.h> @@ -71,12 +72,12 @@ static inline const char * kobject_name(const struct kobject * kobj) extern void kobject_init(struct kobject *); extern void kobject_cleanup(struct kobject *); -extern int kobject_add(struct kobject *); +extern int __must_check kobject_add(struct kobject *); extern void kobject_del(struct kobject *); -extern int kobject_rename(struct kobject *, const char *new_name); +extern int __must_check kobject_rename(struct kobject *, const char *new_name); -extern int kobject_register(struct kobject *); +extern int __must_check kobject_register(struct kobject *); extern void kobject_unregister(struct kobject *); extern struct kobject * kobject_get(struct kobject *); @@ -128,8 +129,8 @@ struct kset { extern void kset_init(struct kset * k); -extern int kset_add(struct kset * k); -extern int kset_register(struct kset * k); +extern int __must_check kset_add(struct kset * k); +extern int __must_check kset_register(struct kset * k); extern void kset_unregister(struct kset * k); static inline struct kset * to_kset(struct kobject * kobj) @@ -239,7 +240,7 @@ extern struct subsystem hypervisor_subsys; (obj)->subsys.kset.kobj.kset = &(_subsys).kset extern void subsystem_init(struct subsystem *); -extern int subsystem_register(struct subsystem *); +extern int __must_check subsystem_register(struct subsystem *); extern void subsystem_unregister(struct subsystem *); static inline struct subsystem * subsys_get(struct subsystem * s) @@ -258,7 +259,8 @@ struct subsys_attribute { ssize_t (*store)(struct subsystem *, const char *, size_t); }; -extern int subsys_create_file(struct subsystem * , struct subsys_attribute *); +extern int __must_check subsys_create_file(struct subsystem * , + struct subsys_attribute *); #if defined(CONFIG_HOTPLUG) void kobject_uevent(struct kobject *kobj, enum kobject_action action); diff --git a/include/linux/pci.h b/include/linux/pci.h index 8565b81..3ec7255 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -49,6 +49,7 @@ #include <linux/types.h> #include <linux/ioport.h> #include <linux/list.h> +#include <linux/compiler.h> #include <linux/errno.h> #include <linux/device.h> @@ -346,6 +347,8 @@ struct pci_driver { int (*probe) (struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */ void (*remove) (struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */ int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */ + int (*suspend_late) (struct pci_dev *dev, pm_message_t state); + int (*resume_early) (struct pci_dev *dev); int (*resume) (struct pci_dev *dev); /* Device woken up */ int (*enable_wake) (struct pci_dev *dev, pci_power_t state, int enable); /* Enable wake event */ void (*shutdown) (struct pci_dev *dev); @@ -401,7 +404,7 @@ extern struct list_head pci_root_buses; /* list of all known PCI buses */ extern struct list_head pci_devices; /* list of all devices */ void pcibios_fixup_bus(struct pci_bus *); -int pcibios_enable_device(struct pci_dev *, int mask); +int __must_check pcibios_enable_device(struct pci_dev *, int mask); char *pcibios_setup (char *str); /* Used only when drivers/pci/setup.c is used */ @@ -488,19 +491,19 @@ static inline int pci_write_config_dword(struct pci_dev *dev, int where, u32 val return pci_bus_write_config_dword (dev->bus, dev->devfn, where, val); } -int pci_enable_device(struct pci_dev *dev); -int pci_enable_device_bars(struct pci_dev *dev, int mask); +int __must_check pci_enable_device(struct pci_dev *dev); +int __must_check pci_enable_device_bars(struct pci_dev *dev, int mask); void pci_disable_device(struct pci_dev *dev); void pci_set_master(struct pci_dev *dev); #define HAVE_PCI_SET_MWI -int pci_set_mwi(struct pci_dev *dev); +int __must_check pci_set_mwi(struct pci_dev *dev); void pci_clear_mwi(struct pci_dev *dev); void pci_intx(struct pci_dev *dev, int enable); int pci_set_dma_mask(struct pci_dev *dev, u64 mask); int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask); void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno); -int pci_assign_resource(struct pci_dev *dev, int i); -int pci_assign_resource_fixed(struct pci_dev *dev, int i); +int __must_check pci_assign_resource(struct pci_dev *dev, int i); +int __must_check pci_assign_resource_fixed(struct pci_dev *dev, int i); void pci_restore_bars(struct pci_dev *dev); /* ROM control related routines */ @@ -526,23 +529,24 @@ void pdev_sort_resources(struct pci_dev *, struct resource_list *); void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *), int (*)(struct pci_dev *, u8, u8)); #define HAVE_PCI_REQ_REGIONS 2 -int pci_request_regions(struct pci_dev *, const char *); +int __must_check pci_request_regions(struct pci_dev *, const char *); void pci_release_regions(struct pci_dev *); -int pci_request_region(struct pci_dev *, int, const char *); +int __must_check pci_request_region(struct pci_dev *, int, const char *); void pci_release_region(struct pci_dev *, int); /* drivers/pci/bus.c */ -int pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, - resource_size_t size, resource_size_t align, - resource_size_t min, unsigned int type_mask, - void (*alignf)(void *, struct resource *, - resource_size_t, resource_size_t), - void *alignf_data); +int __must_check pci_bus_alloc_resource(struct pci_bus *bus, + struct resource *res, resource_size_t size, + resource_size_t align, resource_size_t min, + unsigned int type_mask, + void (*alignf)(void *, struct resource *, + resource_size_t, resource_size_t), + void *alignf_data); void pci_enable_bridges(struct pci_bus *bus); /* Proper probing supporting hot-pluggable devices */ -int __pci_register_driver(struct pci_driver *, struct module *); -static inline int pci_register_driver(struct pci_driver *driver) +int __must_check __pci_register_driver(struct pci_driver *, struct module *); +static inline int __must_check pci_register_driver(struct pci_driver *driver) { return __pci_register_driver(driver, THIS_MODULE); } diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 782090c..29cd6de 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -49,6 +49,8 @@ struct platform_driver { int (*remove)(struct platform_device *); void (*shutdown)(struct platform_device *); int (*suspend)(struct platform_device *, pm_message_t state); + int (*suspend_late)(struct platform_device *, pm_message_t state); + int (*resume_early)(struct platform_device *); int (*resume)(struct platform_device *); struct device_driver driver; }; diff --git a/include/linux/pm.h b/include/linux/pm.h index 658c1b9..6b27e07 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -142,29 +142,61 @@ typedef struct pm_message { } pm_message_t; /* - * There are 4 important states driver can be in: - * ON -- driver is working - * FREEZE -- stop operations and apply whatever policy is applicable to a - * suspended driver of that class, freeze queues for block like IDE - * does, drop packets for ethernet, etc... stop DMA engine too etc... - * so a consistent image can be saved; but do not power any hardware - * down. - * SUSPEND - like FREEZE, but hardware is doing as much powersaving as - * possible. Roughly pci D3. + * Several driver power state transitions are externally visible, affecting + * the state of pending I/O queues and (for drivers that touch hardware) + * interrupts, wakeups, DMA, and other hardware state. There may also be + * internal transitions to various low power modes, which are transparent + * to the rest of the driver stack (such as a driver that's ON gating off + * clocks which are not in active use). * - * Unfortunately, current drivers only recognize numeric values 0 (ON) and 3 - * (SUSPEND). We'll need to fix the drivers. So yes, putting 3 to all different - * defines is intentional, and will go away as soon as drivers are fixed. Also - * note that typedef is neccessary, we'll probably want to switch to - * typedef struct pm_message_t { int event; int flags; } pm_message_t - * or something similar soon. + * One transition is triggered by resume(), after a suspend() call; the + * message is implicit: + * + * ON Driver starts working again, responding to hardware events + * and software requests. The hardware may have gone through + * a power-off reset, or it may have maintained state from the + * previous suspend() which the driver will rely on while + * resuming. On most platforms, there are no restrictions on + * availability of resources like clocks during resume(). + * + * Other transitions are triggered by messages sent using suspend(). All + * these transitions quiesce the driver, so that I/O queues are inactive. + * That commonly entails turning off IRQs and DMA; there may be rules + * about how to quiesce that are specific to the bus or the device's type. + * (For example, network drivers mark the link state.) Other details may + * differ according to the message: + * + * SUSPEND Quiesce, enter a low power device state appropriate for + * the upcoming system state (such as PCI_D3hot), and enable + * wakeup events as appropriate. + * + * FREEZE Quiesce operations so that a consistent image can be saved; + * but do NOT otherwise enter a low power device state, and do + * NOT emit system wakeup events. + * + * PRETHAW Quiesce as if for FREEZE; additionally, prepare for restoring + * the system from a snapshot taken after an earlier FREEZE. + * Some drivers will need to reset their hardware state instead + * of preserving it, to ensure that it's never mistaken for the + * state which that earlier snapshot had set up. + * + * A minimally power-aware driver treats all messages as SUSPEND, fully + * reinitializes its device during resume() -- whether or not it was reset + * during the suspend/resume cycle -- and can't issue wakeup events. + * + * More power-aware drivers may also use low power states at runtime as + * well as during system sleep states like PM_SUSPEND_STANDBY. They may + * be able to use wakeup events to exit from runtime low-power states, + * or from system low-power states such as standby or suspend-to-RAM. */ #define PM_EVENT_ON 0 #define PM_EVENT_FREEZE 1 #define PM_EVENT_SUSPEND 2 +#define PM_EVENT_PRETHAW 3 #define PMSG_FREEZE ((struct pm_message){ .event = PM_EVENT_FREEZE, }) +#define PMSG_PRETHAW ((struct pm_message){ .event = PM_EVENT_PRETHAW, }) #define PMSG_SUSPEND ((struct pm_message){ .event = PM_EVENT_SUSPEND, }) #define PMSG_ON ((struct pm_message){ .event = PM_EVENT_ON, }) @@ -190,6 +222,7 @@ extern void device_resume(void); extern suspend_disk_method_t pm_disk_mode; extern int device_suspend(pm_message_t state); +extern int device_prepare_suspend(pm_message_t state); #define device_set_wakeup_enable(dev,val) \ ((dev)->power.should_wakeup = !!(val)) diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 1ea5d3c..6d5c43d 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -10,6 +10,7 @@ #ifndef _SYSFS_H_ #define _SYSFS_H_ +#include <linux/compiler.h> #include <asm/atomic.h> struct kobject; @@ -86,40 +87,44 @@ struct sysfs_dirent { #ifdef CONFIG_SYSFS -extern int +extern int __must_check sysfs_create_dir(struct kobject *); extern void sysfs_remove_dir(struct kobject *); -extern int +extern int __must_check sysfs_rename_dir(struct kobject *, const char *new_name); -extern int +extern int __must_check sysfs_create_file(struct kobject *, const struct attribute *); -extern int +extern int __must_check sysfs_update_file(struct kobject *, const struct attribute *); -extern int +extern int __must_check sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode); extern void sysfs_remove_file(struct kobject *, const struct attribute *); -extern int +extern int __must_check sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name); extern void sysfs_remove_link(struct kobject *, const char * name); -int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr); -int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr); +int __must_check sysfs_create_bin_file(struct kobject *kobj, + struct bin_attribute *attr); +void sysfs_remove_bin_file(struct kobject *kobj, struct bin_attribute *attr); -int sysfs_create_group(struct kobject *, const struct attribute_group *); +int __must_check sysfs_create_group(struct kobject *, + const struct attribute_group *); void sysfs_remove_group(struct kobject *, const struct attribute_group *); void sysfs_notify(struct kobject * k, char *dir, char *attr); +extern int __must_check sysfs_init(void); + #else /* CONFIG_SYSFS */ static inline int sysfs_create_dir(struct kobject * k) @@ -191,6 +196,11 @@ static inline void sysfs_notify(struct kobject * k, char *dir, char *attr) { } +static inline int __must_check sysfs_init(void) +{ + return 0; +} + #endif /* CONFIG_SYSFS */ #endif /* _SYSFS_H_ */ diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h index 810462f..bb495b7 100644 --- a/include/media/v4l2-dev.h +++ b/include/media/v4l2-dev.h @@ -341,7 +341,7 @@ extern int video_usercopy(struct inode *inode, struct file *file, extern struct video_device* video_devdata(struct file*); #define to_video_device(cd) container_of(cd, struct video_device, class_dev) -static inline int +static inline int __must_check video_device_create_file(struct video_device *vfd, struct class_device_attribute *attr) { diff --git a/init/do_mounts.c b/init/do_mounts.c index 94aeec7..b290aad 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -8,6 +8,7 @@ #include <linux/security.h> #include <linux/delay.h> #include <linux/mount.h> +#include <linux/device.h> #include <linux/nfs_fs.h> #include <linux/nfs_fs_sb.h> @@ -403,6 +404,10 @@ void __init prepare_namespace(void) ssleep(root_delay); } + /* wait for the known devices to complete their probing */ + while (driver_probe_done() != 0) + msleep(100); + md_run_setup(); if (saved_root_name[0]) { diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 4b6e2f1..825068c 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -64,6 +64,17 @@ config PM_TRACE CAUTION: this option will cause your machine's real-time clock to be set to an invalid time after a resume. +config PM_SYSFS_DEPRECATED + bool "Driver model /sys/devices/.../power/state files (DEPRECATED)" + depends on PM && SYSFS + default n + help + The driver model started out with a sysfs file intended to provide + a userspace hook for device power management. This feature has never + worked very well, except for limited testing purposes, and so it will + be removed. It's not clear that a generic mechanism could really + handle the wide variability of device power states; any replacements + are likely to be bus or driver specific. config SOFTWARE_SUSPEND bool "Software Suspend" diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 7c7b9b6..d722349 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -103,7 +103,7 @@ static void unprepare_processes(void) } /** - * pm_suspend_disk - The granpappy of power management. + * pm_suspend_disk - The granpappy of hibernation power management. * * If we're going through the firmware, then get it over with quickly. * @@ -212,7 +212,7 @@ static int software_resume(void) pr_debug("PM: Preparing devices for restore.\n"); - if ((error = device_suspend(PMSG_FREEZE))) { + if ((error = device_suspend(PMSG_PRETHAW))) { printk("Some devices failed to suspend\n"); swsusp_free(); goto Thaw; diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 8ef677e..0b66659 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -247,6 +247,9 @@ int swsusp_suspend(void) restore_processor_state(); Restore_highmem: restore_highmem(); + /* NOTE: device_power_up() is just a resume() for devices + * that suspended with irqs off ... no overall powerup. + */ device_power_up(); Enable_irqs: local_irq_enable(); @@ -256,8 +259,12 @@ Enable_irqs: int swsusp_resume(void) { int error; + local_irq_disable(); - if (device_power_down(PMSG_FREEZE)) + /* NOTE: device_power_down() is just a suspend() with irqs off; + * it has no special "power things down" semantics + */ + if (device_power_down(PMSG_PRETHAW)) printk(KERN_ERR "Some devices failed to power down, very bad\n"); /* We'll ignore saved state, but this gets preempt count (etc) right */ save_processor_state(); diff --git a/kernel/power/user.c b/kernel/power/user.c index 2e4499f..72825c8 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -196,7 +196,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, snapshot_free_unused_memory(&data->handle); down(&pm_sem); pm_prepare_console(); - error = device_suspend(PMSG_FREEZE); + error = device_suspend(PMSG_PRETHAW); if (!error) { error = swsusp_resume(); device_resume(); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 3f21cc7..2869307c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -8,6 +8,13 @@ config PRINTK_TIME operations. This is useful for identifying long delays in kernel startup. +config ENABLE_MUST_CHECK + bool "Enable __must_check logic" + default y + help + Enable the __must_check logic in the kernel build. Disable this to + suppress the "warning: ignoring return value of 'foo', declared with + attribute warn_unused_result" messages. config MAGIC_SYSRQ bool "Magic SysRq key" diff --git a/lib/klist.c b/lib/klist.c index 9c94f0b..120bd17 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -123,12 +123,10 @@ EXPORT_SYMBOL_GPL(klist_add_tail); static void klist_release(struct kref * kref) { struct klist_node * n = container_of(kref, struct klist_node, n_ref); - void (*put)(struct klist_node *) = n->n_klist->put; + list_del(&n->n_node); complete(&n->n_removed); n->n_klist = NULL; - if (put) - put(n); } static int klist_dec_and_del(struct klist_node * n) @@ -145,10 +143,14 @@ static int klist_dec_and_del(struct klist_node * n) void klist_del(struct klist_node * n) { struct klist * k = n->n_klist; + void (*put)(struct klist_node *) = k->put; spin_lock(&k->k_lock); - klist_dec_and_del(n); + if (!klist_dec_and_del(n)) + put = NULL; spin_unlock(&k->k_lock); + if (put) + put(n); } EXPORT_SYMBOL_GPL(klist_del); @@ -161,10 +163,7 @@ EXPORT_SYMBOL_GPL(klist_del); void klist_remove(struct klist_node * n) { - struct klist * k = n->n_klist; - spin_lock(&k->k_lock); - klist_dec_and_del(n); - spin_unlock(&k->k_lock); + klist_del(n); wait_for_completion(&n->n_removed); } @@ -260,12 +259,15 @@ static struct klist_node * to_klist_node(struct list_head * n) struct klist_node * klist_next(struct klist_iter * i) { struct list_head * next; + struct klist_node * lnode = i->i_cur; struct klist_node * knode = NULL; + void (*put)(struct klist_node *) = i->i_klist->put; spin_lock(&i->i_klist->k_lock); - if (i->i_cur) { - next = i->i_cur->n_node.next; - klist_dec_and_del(i->i_cur); + if (lnode) { + next = lnode->n_node.next; + if (!klist_dec_and_del(lnode)) + put = NULL; } else next = i->i_head->next; @@ -275,6 +277,8 @@ struct klist_node * klist_next(struct klist_iter * i) } i->i_cur = knode; spin_unlock(&i->i_klist->k_lock); + if (put && lnode) + put(lnode); return knode; } diff --git a/lib/kobject.c b/lib/kobject.c index 8e7c719..1699eb9 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -407,6 +407,7 @@ static struct kobj_type dir_ktype = { struct kobject *kobject_add_dir(struct kobject *parent, const char *name) { struct kobject *k; + int ret; if (!parent) return NULL; @@ -418,7 +419,13 @@ struct kobject *kobject_add_dir(struct kobject *parent, const char *name) k->parent = parent; k->ktype = &dir_ktype; kobject_set_name(k, name); - kobject_register(k); + ret = kobject_register(k); + if (ret < 0) { + printk(KERN_WARNING "kobject_add_dir: " + "kobject_register error: %d\n", ret); + kobject_del(k); + return NULL; + } return k; } |