diff options
Diffstat (limited to 'Documentation')
142 files changed, 4892 insertions, 743 deletions
diff --git a/Documentation/ABI/removed/devfs b/Documentation/ABI/removed/devfs index 8195c4e..8ffd28b 100644 --- a/Documentation/ABI/removed/devfs +++ b/Documentation/ABI/removed/devfs @@ -6,7 +6,7 @@ Description: races, contains a naming policy within the kernel that is against the LSB, and can be replaced by using udev. The files fs/devfs/*, include/linux/devfs_fs*.h were removed, - along with the the assorted devfs function calls throughout the + along with the assorted devfs function calls throughout the kernel tree. Users: diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle index 9069189..afc2867 100644 --- a/Documentation/CodingStyle +++ b/Documentation/CodingStyle @@ -160,6 +160,21 @@ supply of new-lines on your screen is not a renewable resource (think 25-line terminal screens here), you have more empty lines to put comments on. +Do not unnecessarily use braces where a single statement will do. + +if (condition) + action(); + +This does not apply if one branch of a conditional statement is a single +statement. Use braces in both branches. + +if (condition) { + do_this(); + do_that(); +} else { + otherwise(); +} + 3.1: Spaces Linux kernel style for use of spaces depends (mostly) on @@ -625,7 +640,7 @@ language. There appears to be a common misperception that gcc has a magic "make me faster" speedup option called "inline". While the use of inlines can be -appropriate (for example as a means of replacing macros, see Chapter 11), it +appropriate (for example as a means of replacing macros, see Chapter 12), it very often is not. Abundant use of the inline keyword leads to a much bigger kernel, which in turn slows the system as a whole down, due to a bigger icache footprint for the CPU and simply because there is less memory diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 867608a..6fd1646 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -41,8 +41,9 @@ psdocs: $(PS) PDF := $(patsubst %.xml, %.pdf, $(BOOKS)) pdfdocs: $(PDF) -HTML := $(patsubst %.xml, %.html, $(BOOKS)) +HTML := $(sort $(patsubst %.xml, %.html, $(BOOKS))) htmldocs: $(HTML) + $(call build_main_index) MAN := $(patsubst %.xml, %.9, $(BOOKS)) mandocs: $(MAN) @@ -132,10 +133,17 @@ quiet_cmd_db2pdf = PDF $@ %.pdf : %.xml $(call cmd,db2pdf) + +main_idx = Documentation/DocBook/index.html +build_main_index = rm -rf $(main_idx) && \ + echo '<h1>Linux Kernel HTML Documentation</h1>' >> $(main_idx) && \ + echo '<h2>Kernel Version: $(KERNELVERSION)</h2>' >> $(main_idx) && \ + cat $(HTML) >> $(main_idx) + quiet_cmd_db2html = HTML $@ cmd_db2html = xmlto xhtml $(XMLTOFLAGS) -o $(patsubst %.html,%,$@) $< && \ echo '<a HREF="$(patsubst %.html,%,$(notdir $@))/index.html"> \ - Goto $(patsubst %.html,%,$(notdir $@))</a><p>' > $@ + $(patsubst %.html,%,$(notdir $@))</a><p>' > $@ %.html: %.xml @(which xmlto > /dev/null 2>&1) || \ @@ -152,6 +160,7 @@ quiet_cmd_db2man = MAN $@ @(which xmlto > /dev/null 2>&1) || \ (echo "*** You need to install xmlto ***"; \ exit 1) + $(Q)mkdir -p $(obj)/man $(call cmd,db2man) @touch $@ @@ -212,11 +221,7 @@ clean-files := $(DOCBOOKS) \ $(patsubst %.xml, %.9, $(DOCBOOKS)) \ $(C-procfs-example) -clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) - -#man put files in man subdir - traverse down -subdir- := man/ - +clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) man # Declare the contents of the .PHONY variable as phony. We keep that # information in a variable se we can use it in if_changed and friends. diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index b61dfc7..38f88b6 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -84,6 +84,10 @@ X!Iinclude/linux/kobject.h !Ekernel/rcupdate.c </sect1> + <sect1><title>Device Resource Management</title> +!Edrivers/base/devres.c + </sect1> + </chapter> <chapter id="adt"> @@ -576,4 +580,67 @@ X!Idrivers/video/console/fonts.c !Edrivers/input/ff-core.c !Edrivers/input/ff-memless.c </chapter> + + <chapter id="spi"> + <title>Serial Peripheral Interface (SPI)</title> + <para> + SPI is the "Serial Peripheral Interface", widely used with + embedded systems because it is a simple and efficient + interface: basically a multiplexed shift register. + Its three signal wires hold a clock (SCK, often in the range + of 1-20 MHz), a "Master Out, Slave In" (MOSI) data line, and + a "Master In, Slave Out" (MISO) data line. + SPI is a full duplex protocol; for each bit shifted out the + MOSI line (one per clock) another is shifted in on the MISO line. + Those bits are assembled into words of various sizes on the + way to and from system memory. + An additional chipselect line is usually active-low (nCS); + four signals are normally used for each peripheral, plus + sometimes an interrupt. + </para> + <para> + The SPI bus facilities listed here provide a generalized + interface to declare SPI busses and devices, manage them + according to the standard Linux driver model, and perform + input/output operations. + At this time, only "master" side interfaces are supported, + where Linux talks to SPI peripherals and does not implement + such a peripheral itself. + (Interfaces to support implementing SPI slaves would + necessarily look different.) + </para> + <para> + The programming interface is structured around two kinds of driver, + and two kinds of device. + A "Controller Driver" abstracts the controller hardware, which may + be as simple as a set of GPIO pins or as complex as a pair of FIFOs + connected to dual DMA engines on the other side of the SPI shift + register (maximizing throughput). Such drivers bridge between + whatever bus they sit on (often the platform bus) and SPI, and + expose the SPI side of their device as a + <structname>struct spi_master</structname>. + SPI devices are children of that master, represented as a + <structname>struct spi_device</structname> and manufactured from + <structname>struct spi_board_info</structname> descriptors which + are usually provided by board-specific initialization code. + A <structname>struct spi_driver</structname> is called a + "Protocol Driver", and is bound to a spi_device using normal + driver model calls. + </para> + <para> + The I/O model is a set of queued messages. Protocol drivers + submit one or more <structname>struct spi_message</structname> + objects, which are processed and completed asynchronously. + (There are synchronous wrappers, however.) Messages are + built from one or more <structname>struct spi_transfer</structname> + objects, each of which wraps a full duplex SPI transfer. + A variety of protocol tweaking options are needed, because + different chips adopt very different policies for how they + use the bits transferred with SPI. + </para> +!Iinclude/linux/spi/spi.h +!Fdrivers/spi/spi.c spi_register_board_info +!Edrivers/spi/spi.c + </chapter> + </book> diff --git a/Documentation/DocBook/librs.tmpl b/Documentation/DocBook/librs.tmpl index 3ff39ba..94f2136 100644 --- a/Documentation/DocBook/librs.tmpl +++ b/Documentation/DocBook/librs.tmpl @@ -79,12 +79,12 @@ <chapter id="usage"> <title>Usage</title> <para> - This chapter provides examples how to use the library. + This chapter provides examples of how to use the library. </para> <sect1> <title>Initializing</title> <para> - The init function init_rs returns a pointer to a + The init function init_rs returns a pointer to an rs decoder structure, which holds the necessary information for encoding, decoding and error correction with the given polynomial. It either uses an existing @@ -98,10 +98,10 @@ static struct rs_control *rs_decoder; /* Symbolsize is 10 (bits) - * Primitve polynomial is x^10+x^3+1 + * Primitive polynomial is x^10+x^3+1 * first consecutive root is 0 - * primitve element to generate roots = 1 - * generator polinomial degree (number of roots) = 6 + * primitive element to generate roots = 1 + * generator polynomial degree (number of roots) = 6 */ rs_decoder = init_rs (10, 0x409, 0, 1, 6); </programlisting> @@ -116,12 +116,12 @@ rs_decoder = init_rs (10, 0x409, 0, 1, 6); </para> <para> The expanded data can be inverted on the fly by - providing a non zero inversion mask. The expanded data is + providing a non-zero inversion mask. The expanded data is XOR'ed with the mask. This is used e.g. for FLASH ECC, where the all 0xFF is inverted to an all 0x00. The Reed-Solomon code for all 0x00 is all 0x00. The code is inverted before storing to FLASH so it is 0xFF - too. This prevent's that reading from an erased FLASH + too. This prevents that reading from an erased FLASH results in ECC errors. </para> <para> @@ -273,7 +273,7 @@ free_rs(rs_decoder); May be used under the terms of the GNU General Public License (GPL) </programlisting> <para> - The wrapper functions and interfaces are written by Thomas Gleixner + The wrapper functions and interfaces are written by Thomas Gleixner. </para> <para> Many users have provided bugfixes, improvements and helping hands for testing. diff --git a/Documentation/DocBook/man/Makefile b/Documentation/DocBook/man/Makefile deleted file mode 100644 index 4fb7ea0..0000000 --- a/Documentation/DocBook/man/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -# Rules are put in Documentation/DocBook - -clean-files := *.9.gz *.sgml manpage.links manpage.refs diff --git a/Documentation/MSI-HOWTO.txt b/Documentation/MSI-HOWTO.txt index d389388..0d82407 100644 --- a/Documentation/MSI-HOWTO.txt +++ b/Documentation/MSI-HOWTO.txt @@ -480,8 +480,8 @@ The PCI stack provides 3 possible levels of MSI disabling: 6.1. Disabling MSI on a single device -Under some circumstances, it might be required to disable MSI on a -single device, It may be achived by either not calling pci_enable_msi() +Under some circumstances it might be required to disable MSI on a +single device. This may be achieved by either not calling pci_enable_msi() or all, or setting the pci_dev->no_msi flag before (most of the time in a quirk). @@ -492,7 +492,7 @@ being able to route MSI between busses. In this case, MSI have to be disabled on all devices behind this bridge. It is achieves by setting the PCI_BUS_FLAGS_NO_MSI flag in the pci_bus->bus_flags of the bridge subordinate bus. There is no need to set the same flag on bridges that -are below the broken brigde. When pci_enable_msi() is called to enable +are below the broken bridge. When pci_enable_msi() is called to enable MSI on a device, pci_msi_supported() takes care of checking the NO_MSI flag in all parent busses of the device. diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist index bd23dc0..6491b2c 100644 --- a/Documentation/SubmitChecklist +++ b/Documentation/SubmitChecklist @@ -80,3 +80,7 @@ kernel patches. 23: Tested after it has been merged into the -mm patchset to make sure that it still works with all of the other queued patches and various changes in the VM, VFS, and other subsystems. + +24: Avoid whitespace damage such as indenting with spaces or whitespace + at the end of lines. You can test this by feeding the patch to + "git apply --check --whitespace=error-all" diff --git a/Documentation/SubmittingDrivers b/Documentation/SubmittingDrivers index 58bead0..d7e2642 100644 --- a/Documentation/SubmittingDrivers +++ b/Documentation/SubmittingDrivers @@ -87,6 +87,21 @@ Clarity: It helps if anyone can see how to fix the driver. It helps driver that intentionally obfuscates how the hardware works it will go in the bitbucket. +PM support: Since Linux is used on many portable and desktop systems, your + driver is likely to be used on such a system and therefore it + should support basic power management by implementing, if + necessary, the .suspend and .resume methods used during the + system-wide suspend and resume transitions. You should verify + that your driver correctly handles the suspend and resume, but + if you are unable to ensure that, please at least define the + .suspend method returning the -ENOSYS ("Function not + implemented") error. You should also try to make sure that your + driver uses as little power as possible when it's not doing + anything. For the driver testing instructions see + Documentation/power/drivers-testing.txt and for a relatively + complete overview of the power management issues related to + drivers see Documentation/power/devices.txt . + Control: In general if there is active maintainance of a driver by the author then patches will be redirected to them unless they are totally obvious and without need of checking. diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index b0d0043..a417b25 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -363,7 +363,8 @@ area or subsystem of the kernel is being patched. The "summary phrase" in the email's Subject should concisely describe the patch which that email contains. The "summary phrase" should not be a filename. Do not use the same "summary -phrase" for every patch in a whole patch series. +phrase" for every patch in a whole patch series (where a "patch +series" is an ordered sequence of multiple, related patches). Bear in mind that the "summary phrase" of your email becomes a globally-unique identifier for that patch. It propagates diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c index e9126e7..71acc28 100644 --- a/Documentation/accounting/getdelays.c +++ b/Documentation/accounting/getdelays.c @@ -61,8 +61,6 @@ __u64 stime, utime; #define MAX_MSG_SIZE 1024 /* Maximum number of cpus expected to be specified in a cpumask */ #define MAX_CPUS 32 -/* Maximum length of pathname to log file */ -#define MAX_FILENAME 256 struct msgtemplate { struct nlmsghdr n; @@ -72,6 +70,16 @@ struct msgtemplate { char cpumask[100+6*MAX_CPUS]; +static void usage(void) +{ + fprintf(stderr, "getdelays [-dilv] [-w logfile] [-r bufsize] " + "[-m cpumask] [-t tgid] [-p pid]\n"); + fprintf(stderr, " -d: print delayacct stats\n"); + fprintf(stderr, " -i: print IO accounting (works only with -p)\n"); + fprintf(stderr, " -l: listen forever\n"); + fprintf(stderr, " -v: debug on\n"); +} + /* * Create a raw netlink socket and bind */ @@ -221,13 +229,13 @@ int main(int argc, char *argv[]) int count = 0; int write_file = 0; int maskset = 0; - char logfile[128]; + char *logfile = NULL; int loop = 0; struct msgtemplate msg; while (1) { - c = getopt(argc, argv, "diw:r:m:t:p:v:l"); + c = getopt(argc, argv, "diw:r:m:t:p:vl"); if (c < 0) break; @@ -241,7 +249,7 @@ int main(int argc, char *argv[]) print_io_accounting = 1; break; case 'w': - strncpy(logfile, optarg, MAX_FILENAME); + logfile = strdup(optarg); printf("write to file %s\n", logfile); write_file = 1; break; @@ -277,7 +285,7 @@ int main(int argc, char *argv[]) loop = 1; break; default: - printf("Unknown option %d\n", c); + usage(); exit(-1); } } diff --git a/Documentation/arm/Interrupts b/Documentation/arm/Interrupts index 72c93de..0d3dbf1 100644 --- a/Documentation/arm/Interrupts +++ b/Documentation/arm/Interrupts @@ -149,7 +149,7 @@ So, what's changed? 3. set_GPIO_IRQ_edge() is obsolete, and should be replaced by set_irq_type. -4. Direct access to SA1111 INTPOL is depreciated. Use set_irq_type instead. +4. Direct access to SA1111 INTPOL is deprecated. Use set_irq_type instead. 5. A handler is expected to perform any necessary acknowledgement of the parent IRQ via the correct chip specific function. For instance, if diff --git a/Documentation/arm/Samsung-S3C24XX/H1940.txt b/Documentation/arm/Samsung-S3C24XX/H1940.txt index d6b1de9..f4a7b22 100644 --- a/Documentation/arm/Samsung-S3C24XX/H1940.txt +++ b/Documentation/arm/Samsung-S3C24XX/H1940.txt @@ -23,7 +23,7 @@ Support http://handhelds.org/moin/moin.cgi/HpIpaqH1940 - Herbert Pötzl pages: + Herbert Pötzl pages: http://vserver.13thfloor.at/H1940/ @@ -32,7 +32,7 @@ Maintainers ----------- This project is being maintained and developed by a variety - of people, including Ben Dooks, Arnaud Patard, and Herbert Pötzl. + of people, including Ben Dooks, Arnaud Patard, and Herbert Pötzl. Thanks to the many others who have also provided support. diff --git a/Documentation/auxdisplay/cfag12864b b/Documentation/auxdisplay/cfag12864b index 3572b98..b714183 100644 --- a/Documentation/auxdisplay/cfag12864b +++ b/Documentation/auxdisplay/cfag12864b @@ -78,9 +78,9 @@ Select (17)------------------------------(16) Data / Instruction Ground (18)---[GND] [+5v]---(19) LED + Ground (19)---[GND] Ground (20)---[GND] E A Values: -Ground (21)---[GND] [GND]---[P1]---(18) Vee · R = Resistor = 22 ohm -Ground (22)---[GND] | · P1 = Preset = 10 Kohm -Ground (23)---[GND] ---- S ------( 3) V0 · P2 = Preset = 1 Kohm +Ground (21)---[GND] [GND]---[P1]---(18) Vee - R = Resistor = 22 ohm +Ground (22)---[GND] | - P1 = Preset = 10 Kohm +Ground (23)---[GND] ---- S ------( 3) V0 - P2 = Preset = 1 Kohm Ground (24)---[GND] | | Ground (25)---[GND] [GND]---[P2]---[R]---(20) LED - diff --git a/Documentation/binfmt_misc.txt b/Documentation/binfmt_misc.txt index d097f09ee..f609ebf 100644 --- a/Documentation/binfmt_misc.txt +++ b/Documentation/binfmt_misc.txt @@ -113,4 +113,4 @@ cause unexpected behaviour and can be a security hazard. There is a web page about binfmt_misc at http://www.tat.physik.uni-tuebingen.de/~rguenth/linux/binfmt_misc.html -Richard Günther <rguenth@tat.physik.uni-tuebingen.de> +Richard Günther <rguenth@tat.physik.uni-tuebingen.de> diff --git a/Documentation/blackfin/00-INDEX b/Documentation/blackfin/00-INDEX new file mode 100644 index 0000000..7cb3b35 --- /dev/null +++ b/Documentation/blackfin/00-INDEX @@ -0,0 +1,11 @@ +00-INDEX + - This file + +cache-lock.txt + - HOWTO for blackfin cache locking. + +cachefeatures.txt + - Supported cache features. + +Filesystems + - Requirements for mounting the root file system. diff --git a/Documentation/blackfin/Filesystems b/Documentation/blackfin/Filesystems new file mode 100644 index 0000000..51260a1 --- /dev/null +++ b/Documentation/blackfin/Filesystems @@ -0,0 +1,169 @@ +/* + * File: Documentation/blackfin/Filesystems + * Based on: + * Author: + * + * Created: + * Description: This file contains the simple DMA Implementation for Blackfin + * + * Rev: $Id: Filesystems 2384 2006-11-01 04:12:43Z magicyang $ + * + * Modified: + * Copyright 2004-2006 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + */ + + How to mount the root file system in uClinux/Blackfin + ----------------------------------------------------- + +1 Mounting EXT3 File system. + ------------------------ + + Creating an EXT3 File system for uClinux/Blackfin: + + +Please follow the steps to form the EXT3 File system and mount the same as root +file system. + +a Make an ext3 file system as large as you want the final root file + system. + + mkfs.ext3 /dev/ram0 <your-rootfs-size-in-1k-blocks> + +b Mount this Empty file system on a free directory as: + + mount -t ext3 /dev/ram0 ./test + where ./test is the empty directory. + +c Copy your root fs directory that you have so carefully made over. + + cp -af /tmp/my_final_rootfs_files/* ./test + + (For ex: cp -af uClinux-dist/romfs/* ./test) + +d If you have done everything right till now you should be able to see + the required "root" dir's (that's etc, root, bin, lib, sbin...) + +e Now unmount the file system + + umount ./test + +f Create the root file system image. + + dd if=/dev/ram0 bs=1k count=<your-rootfs-size-in-1k-blocks> \ + > ext3fs.img + + +Now you have to tell the kernel that will be mounting this file system as +rootfs. +So do a make menuconfig under kernel and select the Ext3 journaling file system +support under File system --> submenu. + + +2. Mounting EXT2 File system. + ------------------------- + +By default the ext2 file system image will be created if you invoke make from +the top uClinux-dist directory. + + +3. Mounting CRAMFS File System + ---------------------------- + +To create a CRAMFS file system image execute the command + + mkfs.cramfs ./test cramfs.img + + where ./test is the target directory. + + +4. Mounting ROMFS File System + -------------------------- + +To create a ROMFS file system image execute the command + + genromfs -v -V "ROMdisk" -f romfs.img -d ./test + + where ./test is the target directory + + +5. Mounting the JFFS2 Filesystem + ----------------------------- + +To create a compressed JFFS filesystem (JFFS2), please execute the command + + mkfs.jffs2 -d ./test -o jffs2.img + + where ./test is the target directory. + +However, please make sure the following is in your kernel config. + +/* + * RAM/ROM/Flash chip drivers + */ +#define CONFIG_MTD_CFI 1 +#define CONFIG_MTD_ROM 1 +/* + * Mapping drivers for chip access + */ +#define CONFIG_MTD_COMPLEX_MAPPINGS 1 +#define CONFIG_MTD_BF533 1 +#undef CONFIG_MTD_UCLINUX + +Through the u-boot boot loader, use the jffs2.img in the corresponding +partition made in linux-2.6.x/drivers/mtd/maps/bf533_flash.c. + +NOTE - Currently the Flash driver is available only for EZKIT. Watch out for a + STAMP driver soon. + + +6. Mounting the NFS File system + ----------------------------- + + For mounting the NFS please do the following in the kernel config. + + In Networking Support --> Networking options --> TCP/IP networking --> + IP: kernel level autoconfiguration + + Enable BOOTP Support. + + In Kernel hacking --> Compiled-in kernel boot parameter add the following + + root=/dev/nfs rw ip=bootp + + In File system --> Network File system, Enable + + NFS file system support --> NFSv3 client support + Root File system on NFS + + in uClibc menuconfig, do the following + In Networking Support + enable Remote Procedure Call (RPC) support + Full RPC Support + + On the Host side, ensure that /etc/dhcpd.conf looks something like this + + ddns-update-style ad-hoc; + allow bootp; + subnet 10.100.4.0 netmask 255.255.255.0 { + default-lease-time 122209600; + max-lease-time 31557600; + group { + host bf533 { + hardware ethernet 00:CF:52:49:C3:01; + fixed-address 10.100.4.50; + option root-path "/home/nfsmount"; + } + } + + ensure that /etc/exports looks something like this + /home/nfsmount *(rw,no_root_squash,no_all_squash) + + run the following commands as root (may differ depending on your + distribution) : + - service nfs start + - service portmap start + - service dhcpd start + - /usr/sbin/exportfs diff --git a/Documentation/blackfin/cache-lock.txt b/Documentation/blackfin/cache-lock.txt new file mode 100644 index 0000000..88ba1e6 --- /dev/null +++ b/Documentation/blackfin/cache-lock.txt @@ -0,0 +1,48 @@ +/* + * File: Documentation/blackfin/cache-lock.txt + * Based on: + * Author: + * + * Created: + * Description: This file contains the simple DMA Implementation for Blackfin + * + * Rev: $Id: cache-lock.txt 2384 2006-11-01 04:12:43Z magicyang $ + * + * Modified: + * Copyright 2004-2006 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + */ + +How to lock your code in cache in uClinux/blackfin +-------------------------------------------------- + +There are only a few steps required to lock your code into the cache. +Currently you can lock the code by Way. + +Below are the interface provided for locking the cache. + + +1. cache_grab_lock(int Ways); + +This function grab the lock for locking your code into the cache specified +by Ways. + + +2. cache_lock(int Ways); + +This function should be called after your critical code has been executed. +Once the critical code exits, the code is now loaded into the cache. This +function locks the code into the cache. + + +So, the example sequence will be: + + cache_grab_lock(WAY0_L); /* Grab the lock */ + + critical_code(); /* Execute the code of interest */ + + cache_lock(WAY0_L); /* Lock the cache */ + +Where WAY0_L signifies WAY0 locking. diff --git a/Documentation/blackfin/cachefeatures.txt b/Documentation/blackfin/cachefeatures.txt new file mode 100644 index 0000000..0fbec23 --- /dev/null +++ b/Documentation/blackfin/cachefeatures.txt @@ -0,0 +1,65 @@ +/* + * File: Documentation/blackfin/cachefeatures.txt + * Based on: + * Author: + * + * Created: + * Description: This file contains the simple DMA Implementation for Blackfin + * + * Rev: $Id: cachefeatures.txt 2384 2006-11-01 04:12:43Z magicyang $ + * + * Modified: + * Copyright 2004-2006 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + */ + + - Instruction and Data cache initialization. + icache_init(); + dcache_init(); + + - Instruction and Data cache Invalidation Routines, when flushing the + same is not required. + _icache_invalidate(); + _dcache_invalidate(); + + Also, for invalidating the entire instruction and data cache, the below + routines are provided (another method for invalidation, refer page no 267 and 287 of + ADSP-BF533 Hardware Reference manual) + + invalidate_entire_dcache(); + invalidate_entire_icache(); + + -External Flushing of Instruction and data cache routines. + + flush_instruction_cache(); + flush_data_cache(); + + - Internal Flushing of Instruction and Data Cache. + + icplb_flush(); + dcplb_flush(); + + - Locking the cache. + + cache_grab_lock(); + cache_lock(); + + Please refer linux-2.6.x/Documentation/blackfin/cache-lock.txt for how to + lock the cache. + + Locking the cache is optional feature. + + - Miscellaneous cache functions. + + flush_cache_all(); + flush_cache_mm(); + invalidate_dcache_range(); + flush_dcache_range(); + flush_dcache_page(); + flush_cache_range(); + flush_cache_page(); + invalidate_dcache_range(); + flush_page_to_ram(); + diff --git a/Documentation/block/ioprio.txt b/Documentation/block/ioprio.txt index 96ccf68..1b930ef 100644 --- a/Documentation/block/ioprio.txt +++ b/Documentation/block/ioprio.txt @@ -6,10 +6,10 @@ Intro ----- With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io -priorities is supported for reads on files. This enables users to io nice -processes or process groups, similar to what has been possible to cpu -scheduling for ages. This document mainly details the current possibilites -with cfq, other io schedulers do not support io priorities so far. +priorities are supported for reads on files. This enables users to io nice +processes or process groups, similar to what has been possible with cpu +scheduling for ages. This document mainly details the current possibilities +with cfq; other io schedulers do not support io priorities thus far. Scheduling classes ------------------ diff --git a/Documentation/cciss.txt b/Documentation/cciss.txt index f74affe..e65736c 100644 --- a/Documentation/cciss.txt +++ b/Documentation/cciss.txt @@ -22,14 +22,21 @@ This driver is known to work with the following cards: * SA E200i * SA E500 -If nodes are not already created in the /dev/cciss directory, run as root: +Detecting drive failures: +------------------------- -# cd /dev -# ./MAKEDEV cciss +To get the status of logical volumes and to detect physical drive +failures, you can use the cciss_vol_status program found here: +http://cciss.sourceforge.net/#cciss_utils Device Naming: -------------- +If nodes are not already created in the /dev/cciss directory, run as root: + +# cd /dev +# ./MAKEDEV cciss + You need some entries in /dev for the cciss device. The MAKEDEV script can make device nodes for you automatically. Currently the device setup is as follows: diff --git a/Documentation/cpu-freq/cpufreq-stats.txt b/Documentation/cpu-freq/cpufreq-stats.txt index 53d62c1..fc64749 100644 --- a/Documentation/cpu-freq/cpufreq-stats.txt +++ b/Documentation/cpu-freq/cpufreq-stats.txt @@ -17,7 +17,7 @@ Contents 1. Introduction -cpufreq-stats is a driver that provices CPU frequency statistics for each CPU. +cpufreq-stats is a driver that provides CPU frequency statistics for each CPU. These statistics are provided in /sysfs as a bunch of read_only interfaces. This interface (when configured) will appear in a separate directory under cpufreq in /sysfs (<sysfs root>/devices/system/cpu/cpuX/cpufreq/stats/) for each CPU. diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index cc60d29..b6d24c2 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -217,14 +217,17 @@ Q: What happens when a CPU is being logically offlined? A: The following happen, listed in no particular order :-) - A notification is sent to in-kernel registered modules by sending an event - CPU_DOWN_PREPARE + CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the + CPU is being offlined while tasks are frozen due to a suspend operation in + progress - All process is migrated away from this outgoing CPU to a new CPU - All interrupts targeted to this CPU is migrated to a new CPU - timers/bottom half/task lets are also migrated to a new CPU - Once all services are migrated, kernel calls an arch specific routine __cpu_disable() to perform arch specific cleanup. - Once this is successful, an event for successful cleanup is sent by an event - CPU_DEAD. + CPU_DEAD (or CPU_DEAD_FROZEN if tasks are frozen due to a suspend while the + CPU is being offlined). "It is expected that each service cleans up when the CPU_DOWN_PREPARE notifier is called, when CPU_DEAD is called its expected there is nothing @@ -242,9 +245,11 @@ A: This is what you would need in your kernel code to receive notifications. switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: foobar_online_action(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: foobar_dead_action(cpu); break; } diff --git a/Documentation/crypto/api-intro.txt b/Documentation/crypto/api-intro.txt index 9b84b805..a2ac6d2 100644 --- a/Documentation/crypto/api-intro.txt +++ b/Documentation/crypto/api-intro.txt @@ -177,7 +177,7 @@ Portions of this API were derived from the following projects: and; Nettle (http://www.lysator.liu.se/~nisse/nettle/) - Niels Möller + Niels Möller Original developers of the crypto algorithms: @@ -200,8 +200,8 @@ SHA1 algorithm contributors: DES algorithm contributors: Raimar Falke - Gisle Sælensminde - Niels Möller + Gisle Sælensminde + Niels Möller Blowfish algorithm contributors: Herbert Valerio Riedel diff --git a/Documentation/device-mapper/delay.txt b/Documentation/device-mapper/delay.txt new file mode 100644 index 0000000..15adc55 --- /dev/null +++ b/Documentation/device-mapper/delay.txt @@ -0,0 +1,26 @@ +dm-delay +======== + +Device-Mapper's "delay" target delays reads and/or writes +and maps them to different devices. + +Parameters: + <device> <offset> <delay> [<write_device> <write_offset> <write_delay>] + +With separate write parameters, the first set is only used for reads. +Delays are specified in milliseconds. + +Example scripts +=============== +[[ +#!/bin/sh +# Create device delaying rw operation for 500ms +echo "0 `blockdev --getsize $1` delay $1 0 500" | dmsetup create delayed +]] + +[[ +#!/bin/sh +# Create device delaying only write operation for 500ms and +# splitting reads and writes to different devices $1 $2 +echo "0 `blockdev --getsize $1` delay $1 0 0 $2 0 500" | dmsetup create delayed +]] diff --git a/Documentation/dontdiff b/Documentation/dontdiff index 63c2d0c..64e9f6c 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff @@ -55,8 +55,8 @@ aic7*seq.h* aicasm aicdb.h* asm -asm-offsets.* -asm_offsets.* +asm-offsets.h +asm_offsets.h autoconf.h* bbootsect bin2c diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index 5163b85..6c8d8f2 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt @@ -182,7 +182,7 @@ For example, you can do something like the following. ... - devres_close_group(dev, my_midlayer_something); + devres_close_group(dev, my_midlayer_create_something); return 0; } diff --git a/Documentation/driver-model/platform.txt b/Documentation/driver-model/platform.txt index f7c9262..19c4a6e 100644 --- a/Documentation/driver-model/platform.txt +++ b/Documentation/driver-model/platform.txt @@ -16,7 +16,7 @@ host bridges to peripheral buses, and most controllers integrated into system-on-chip platforms. What they usually have in common is direct addressing from a CPU bus. Rarely, a platform_device will be connected through a segment of some other kind of bus; but its -registers will still be directly addressible. +registers will still be directly addressable. Platform devices are given a name, used in driver binding, and a list of resources such as addresses and IRQs. @@ -125,7 +125,7 @@ three different ways to find such a match: usually register later during booting, or by module loading. - Registering a driver using platform_driver_probe() works just like - using platform_driver_register(), except that the the driver won't + using platform_driver_register(), except that the driver won't be probed later if another device registers. (Which is OK, since this interface is only for use with non-hotpluggable devices.) diff --git a/Documentation/dvb/README.dvb-usb b/Documentation/dvb/README.dvb-usb index 46b78b7..bf2a9cd 100644 --- a/Documentation/dvb/README.dvb-usb +++ b/Documentation/dvb/README.dvb-usb @@ -228,5 +228,5 @@ Patches, comments and suggestions are very very welcome. Ulf Hermenau for helping me out with traditional chinese. - André Smoktun and Christian Frömmel for supporting me with + André Smoktun and Christian Frömmel for supporting me with hardware and listening to my problems very patiently. diff --git a/Documentation/dvb/contributors.txt b/Documentation/dvb/contributors.txt index 4c33cce..4865add 100644 --- a/Documentation/dvb/contributors.txt +++ b/Documentation/dvb/contributors.txt @@ -66,7 +66,7 @@ Michael Dreher <michael@5dot1.de> Andreas 'randy' Weinberger for the support of the Fujitsu-Siemens Activy budget DVB-S -Kenneth Aafløy <ke-aa@frisurf.no> +Kenneth Aafløy <ke-aa@frisurf.no> for adding support for Typhoon DVB-S budget card Ernst Peinlich <e.peinlich@inode.at> diff --git a/Documentation/fb/arkfb.txt b/Documentation/fb/arkfb.txt new file mode 100644 index 0000000..e8487a9 --- /dev/null +++ b/Documentation/fb/arkfb.txt @@ -0,0 +1,68 @@ + + arkfb - fbdev driver for ARK Logic chips + ======================================== + + +Supported Hardware +================== + + ARK 2000PV chip + ICS 5342 ramdac + + - only BIOS initialized VGA devices supported + - probably not working on big endian + + +Supported Features +================== + + * 4 bpp pseudocolor modes (with 18bit palette, two variants) + * 8 bpp pseudocolor mode (with 18bit palette) + * 16 bpp truecolor modes (RGB 555 and RGB 565) + * 24 bpp truecolor mode (RGB 888) + * 32 bpp truecolor mode (RGB 888) + * text mode (activated by bpp = 0) + * doublescan mode variant (not available in text mode) + * panning in both directions + * suspend/resume support + +Text mode is supported even in higher resolutions, but there is limitation to +lower pixclocks (i got maximum about 70 MHz, it is dependent on specific +hardware). This limitation is not enforced by driver. Text mode supports 8bit +wide fonts only (hardware limitation) and 16bit tall fonts (driver +limitation). Unfortunately character attributes (like color) in text mode are +broken for unknown reason, so its usefulness is limited. + +There are two 4 bpp modes. First mode (selected if nonstd == 0) is mode with +packed pixels, high nibble first. Second mode (selected if nonstd == 1) is mode +with interleaved planes (1 byte interleave), MSB first. Both modes support +8bit wide fonts only (driver limitation). + +Suspend/resume works on systems that initialize video card during resume and +if device is active (for example used by fbcon). + + +Missing Features +================ +(alias TODO list) + + * secondary (not initialized by BIOS) device support + * big endian support + * DPMS support + * MMIO support + * interlaced mode variant + * support for fontwidths != 8 in 4 bpp modes + * support for fontheight != 16 in text mode + * hardware cursor + * vsync synchronization + * feature connector support + * acceleration support (8514-like 2D) + + +Known bugs +========== + + * character attributes (and cursor) in text mode are broken + +-- +Ondrej Zajicek <santiago@crfreenet.org> diff --git a/Documentation/fb/aty128fb.txt b/Documentation/fb/aty128fb.txt index 069262f..b605204 100644 --- a/Documentation/fb/aty128fb.txt +++ b/Documentation/fb/aty128fb.txt @@ -54,8 +54,8 @@ Accepted options: noaccel - do not use acceleration engine. It is default. accel - use acceleration engine. Not finished. -vmode:x - chooses PowerMacintosh video mode <x>. Depreciated. -cmode:x - chooses PowerMacintosh colour mode <x>. Depreciated. +vmode:x - chooses PowerMacintosh video mode <x>. Deprecated. +cmode:x - chooses PowerMacintosh colour mode <x>. Deprecated. <XxX@X> - selects startup videomode. See modedb.txt for detailed explanation. Default is 640x480x8bpp. diff --git a/Documentation/fb/deferred_io.txt b/Documentation/fb/deferred_io.txt new file mode 100644 index 0000000..73cf9fb --- /dev/null +++ b/Documentation/fb/deferred_io.txt @@ -0,0 +1,75 @@ +Deferred IO +----------- + +Deferred IO is a way to delay and repurpose IO. It uses host memory as a +buffer and the MMU pagefault as a pretrigger for when to perform the device +IO. The following example may be a useful explaination of how one such setup +works: + +- userspace app like Xfbdev mmaps framebuffer +- deferred IO and driver sets up nopage and page_mkwrite handlers +- userspace app tries to write to mmaped vaddress +- we get pagefault and reach nopage handler +- nopage handler finds and returns physical page +- we get page_mkwrite where we add this page to a list +- schedule a workqueue task to be run after a delay +- app continues writing to that page with no additional cost. this is + the key benefit. +- the workqueue task comes in and mkcleans the pages on the list, then + completes the work associated with updating the framebuffer. this is + the real work talking to the device. +- app tries to write to the address (that has now been mkcleaned) +- get pagefault and the above sequence occurs again + +As can be seen from above, one benefit is roughly to allow bursty framebuffer +writes to occur at minimum cost. Then after some time when hopefully things +have gone quiet, we go and really update the framebuffer which would be +a relatively more expensive operation. + +For some types of nonvolatile high latency displays, the desired image is +the final image rather than the intermediate stages which is why it's okay +to not update for each write that is occuring. + +It may be the case that this is useful in other scenarios as well. Paul Mundt +has mentioned a case where it is beneficial to use the page count to decide +whether to coalesce and issue SG DMA or to do memory bursts. + +Another one may be if one has a device framebuffer that is in an usual format, +say diagonally shifting RGB, this may then be a mechanism for you to allow +apps to pretend to have a normal framebuffer but reswizzle for the device +framebuffer at vsync time based on the touched pagelist. + +How to use it: (for applications) +--------------------------------- +No changes needed. mmap the framebuffer like normal and just use it. + +How to use it: (for fbdev drivers) +---------------------------------- +The following example may be helpful. + +1. Setup your structure. Eg: + +static struct fb_deferred_io hecubafb_defio = { + .delay = HZ, + .deferred_io = hecubafb_dpy_deferred_io, +}; + +The delay is the minimum delay between when the page_mkwrite trigger occurs +and when the deferred_io callback is called. The deferred_io callback is +explained below. + +2. Setup your deferred IO callback. Eg: +static void hecubafb_dpy_deferred_io(struct fb_info *info, + struct list_head *pagelist) + +The deferred_io callback is where you would perform all your IO to the display +device. You receive the pagelist which is the list of pages that were written +to during the delay. You must not modify this list. This callback is called +from a workqueue. + +3. Call init + info->fbdefio = &hecubafb_defio; + fb_deferred_io_init(info); + +4. Call cleanup + fb_deferred_io_cleanup(info); diff --git a/Documentation/fb/framebuffer.txt b/Documentation/fb/framebuffer.txt index 610e780..b3e3a03 100644 --- a/Documentation/fb/framebuffer.txt +++ b/Documentation/fb/framebuffer.txt @@ -215,11 +215,11 @@ vertical retrace time is the sum of the upper margin, the lower margin and the vsync length. +----------+---------------------------------------------+----------+-------+ - | | ^ | | | + | | ↑ | | | | | |upper_margin | | | - | | ¥ | | | + | | ↓ | | | +----------###############################################----------+-------+ - | # ^ # | | + | # ↑ # | | | # | # | | | # | # | | | # | # | | @@ -238,15 +238,15 @@ vsync length. | # | # | | | # | # | | | # | # | | - | # ¥ # | | + | # ↓ # | | +----------###############################################----------+-------+ - | | ^ | | | + | | ↑ | | | | | |lower_margin | | | - | | ¥ | | | + | | ↓ | | | +----------+---------------------------------------------+----------+-------+ - | | ^ | | | + | | ↑ | | | | | |vsync_len | | | - | | ¥ | | | + | | ↓ | | | +----------+---------------------------------------------+----------+-------+ The frame buffer device expects all horizontal timings in number of dotclocks diff --git a/Documentation/fb/imacfb.txt b/Documentation/fb/imacfb.txt index 7590285..316ec9b 100644 --- a/Documentation/fb/imacfb.txt +++ b/Documentation/fb/imacfb.txt @@ -17,7 +17,7 @@ How to use it? ============== Imacfb does not have any kind of autodetection of your machine. -You have to add the fillowing kernel parameters in your elilo.conf: +You have to add the following kernel parameters in your elilo.conf: Macbook : video=imacfb:macbook MacMini : diff --git a/Documentation/fb/s3fb.txt b/Documentation/fb/s3fb.txt index 8a04c0d..2c97770 100644 --- a/Documentation/fb/s3fb.txt +++ b/Documentation/fb/s3fb.txt @@ -35,10 +35,12 @@ Supported Features * suspend/resume support * DPMS support -Text mode is supported even in higher resolutions, but there is limitation -to lower pixclocks (maximum between 50-60 MHz, depending on specific hardware). -This limitation is not enforced by driver. Text mode supports 8bit wide fonts -only (hardware limitation) and 16bit tall fonts (driver limitation). +Text mode is supported even in higher resolutions, but there is limitation to +lower pixclocks (maximum usually between 50-60 MHz, depending on specific +hardware, i get best results from plain S3 Trio32 card - about 75 MHz). This +limitation is not enforced by driver. Text mode supports 8bit wide fonts only +(hardware limitation) and 16bit tall fonts (driver limitation). Text mode +support is broken on S3 Trio64 V2/DX. There are two 4 bpp modes. First mode (selected if nonstd == 0) is mode with packed pixels, high nibble first. Second mode (selected if nonstd == 1) is mode @@ -73,6 +75,8 @@ Known bugs ========== * cursor disable in text mode doesn't work + * text mode broken on S3 Trio64 V2/DX + -- Ondrej Zajicek <santiago@crfreenet.org> diff --git a/Documentation/fb/sstfb.txt b/Documentation/fb/sstfb.txt index df27f5b..550ca77 100644 --- a/Documentation/fb/sstfb.txt +++ b/Documentation/fb/sstfb.txt @@ -2,9 +2,9 @@ Introduction This is a frame buffer device driver for 3dfx' Voodoo Graphics - (aka voodoo 1, aka sst1) and Voodoo² (aka Voodoo 2, aka CVG) based + (aka voodoo 1, aka sst1) and Voodoo² (aka Voodoo 2, aka CVG) based video boards. It's highly experimental code, but is guaranteed to work - on my computer, with my "Maxi Gamer 3D" and "Maxi Gamer 3d²" boards, + on my computer, with my "Maxi Gamer 3D" and "Maxi Gamer 3d²" boards, and with me "between chair and keyboard". Some people tested other combinations and it seems that it works. The main page is located at <http://sstfb.sourceforge.net>, and if diff --git a/Documentation/fb/vt8623fb.txt b/Documentation/fb/vt8623fb.txt new file mode 100644 index 0000000..f654576 --- /dev/null +++ b/Documentation/fb/vt8623fb.txt @@ -0,0 +1,64 @@ + + vt8623fb - fbdev driver for graphics core in VIA VT8623 chipset + =============================================================== + + +Supported Hardware +================== + + VIA VT8623 [CLE266] chipset and its graphics core + (known as CastleRock or Unichrome) + +I tested vt8623fb on VIA EPIA ML-6000 + + +Supported Features +================== + + * 4 bpp pseudocolor modes (with 18bit palette, two variants) + * 8 bpp pseudocolor mode (with 18bit palette) + * 16 bpp truecolor mode (RGB 565) + * 32 bpp truecolor mode (RGB 888) + * text mode (activated by bpp = 0) + * doublescan mode variant (not available in text mode) + * panning in both directions + * suspend/resume support + * DPMS support + +Text mode is supported even in higher resolutions, but there is limitation to +lower pixclocks (maximum about 100 MHz). This limitation is not enforced by +driver. Text mode supports 8bit wide fonts only (hardware limitation) and +16bit tall fonts (driver limitation). + +There are two 4 bpp modes. First mode (selected if nonstd == 0) is mode with +packed pixels, high nibble first. Second mode (selected if nonstd == 1) is mode +with interleaved planes (1 byte interleave), MSB first. Both modes support +8bit wide fonts only (driver limitation). + +Suspend/resume works on systems that initialize video card during resume and +if device is active (for example used by fbcon). + + +Missing Features +================ +(alias TODO list) + + * secondary (not initialized by BIOS) device support + * MMIO support + * interlaced mode variant + * support for fontwidths != 8 in 4 bpp modes + * support for fontheight != 16 in text mode + * hardware cursor + * video overlay support + * vsync synchronization + * acceleration support (8514-like 2D, busmaster transfers) + + +Known bugs +========== + + * cursor disable in text mode doesn't work + + +-- +Ondrej Zajicek <santiago@crfreenet.org> diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index d6d183f..c6322c7 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -6,6 +6,14 @@ be removed from this file. --------------------------- +What: MXSER +When: December 2007 +Why: Old mxser driver is obsoleted by the mxser_new. Give it some time yet + and remove it. +Who: Jiri Slaby <jirislaby@gmail.com> + +--------------------------- + What: V4L2 VIDIOC_G_MPEGCOMP and VIDIOC_S_MPEGCOMP When: October 2007 Why: Broken attempt to set MPEG compression parameters. These ioctls are @@ -51,6 +59,15 @@ Who: Dan Dennedy <dan@dennedy.org>, Stefan Richter <stefanr@s5r6.in-berlin.de> --------------------------- +What: old NCR53C9x driver +When: October 2007 +Why: Replaced by the much better esp_scsi driver. Actual low-level + driver can ported over almost trivially. +Who: David Miller <davem@davemloft.net> + Christoph Hellwig <hch@lst.de> + +--------------------------- + What: Video4Linux API 1 ioctls and video_decoder.h from Video devices. When: December 2006 Why: V4L1 AP1 was replaced by V4L2 API. during migration from 2.4 to 2.6 @@ -117,25 +134,6 @@ Who: Adrian Bunk <bunk@stusta.de> --------------------------- -What: pci_module_init(driver) -When: January 2007 -Why: Is replaced by pci_register_driver(pci_driver). -Who: Richard Knutsson <ricknu-0@student.ltu.se> and Greg Kroah-Hartman <gregkh@suse.de> - ---------------------------- - -What: Usage of invalid timevals in setitimer -When: March 2007 -Why: POSIX requires to validate timevals in the setitimer call. This - was never done by Linux. The invalid (e.g. negative timevals) were - silently converted to more or less random timeouts and intervals. - Until the removal a per boot limited number of warnings is printed - and the timevals are sanitized. - -Who: Thomas Gleixner <tglx@linutronix.de> - ---------------------------- - What: Unused EXPORT_SYMBOL/EXPORT_SYMBOL_GPL exports (temporary transition config option provided until then) The transition config option will also be removed at the same time. @@ -163,7 +161,7 @@ Who: Greg Kroah-Hartman <gregkh@suse.de> --------------------------- What: Interrupt only SA_* flags -When: Januar 2007 +When: September 2007 Why: The interrupt related SA_* flags are replaced by IRQF_* to move them out of the signal namespace. @@ -190,18 +188,10 @@ Who: Jean Delvare <khali@linux-fr.org> --------------------------- -What: i2c_adapter.dev - i2c_adapter.list +What: i2c_adapter.list When: July 2007 -Why: Superfluous, given i2c_adapter.class_dev: - * The "dev" was a stand-in for the physical device node that legacy - drivers would not have; but now it's almost always present. Any - remaining legacy drivers must upgrade (they now trigger warnings). - * The "list" duplicates class device children. - The delay in removing this is so upgraded lm_sensors and libsensors - can get deployed. (Removal causes minor changes in the sysfs layout, - notably the location of the adapter type name and parenting the i2c - client hardware directly from their controller.) +Why: Superfluous, this list duplicates the one maintained by the driver + core. Who: Jean Delvare <khali@linux-fr.org>, David Brownell <dbrownell@users.sourceforge.net> @@ -306,3 +296,35 @@ Why: Code was merged, then submitter immediately disappeared leaving Who: David S. Miller <davem@davemloft.net> --------------------------- + +What: read_dev_chars(), read_conf_data{,_lpm}() (s390 common I/O layer) +When: December 2007 +Why: These functions are a leftover from 2.4 times. They have several + problems: + - Duplication of checks that are done in the device driver's + interrupt handler + - common I/O layer can't do device specific error recovery + - device driver can't be notified for conditions happening during + execution of the function + Device drivers should issue the read device characteristics and read + configuration data ccws and do the appropriate error handling + themselves. +Who: Cornelia Huck <cornelia.huck@de.ibm.com> + +--------------------------- + +What: i2c-ixp2000, i2c-ixp4xx and scx200_i2c drivers +When: September 2007 +Why: Obsolete. The new i2c-gpio driver replaces all hardware-specific + I2C-over-GPIO drivers. +Who: Jean Delvare <khali@linux-fr.org> + +--------------------------- + +What: drivers depending on OSS_OBSOLETE +When: options in 2.6.23, code in 2.6.25 +Why: obsolete OSS drivers +Who: Adrian Bunk <bunk@stusta.de> + +--------------------------- + diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 28bfea7..d866551 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -15,6 +15,7 @@ prototypes: int (*d_delete)(struct dentry *); void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); + char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); locking rules: none have BKL @@ -25,6 +26,7 @@ d_compare: no yes no no d_delete: yes no yes no d_release: no no no yes d_iput: no no no yes +d_dname: no no no no --------------------------- inode_operations --------------------------- prototypes: @@ -52,7 +54,7 @@ ata *); locking rules: all may block, none have BKL - i_sem(inode) + i_mutex(inode) lookup: yes create: yes link: yes (both) @@ -72,7 +74,7 @@ setxattr: yes getxattr: no listxattr: no removexattr: yes - Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_sem on + Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. ->truncate() is never called directly - it's a callback, not a @@ -459,7 +461,7 @@ doesn't take the BKL. ->read on directories probably must go away - we should just enforce -EISDIR in sys_read() and friends. -->fsync() has i_sem on inode. +->fsync() has i_mutex on inode. --------------------------- dquot_operations ------------------------------- prototypes: diff --git a/Documentation/filesystems/hpfs.txt b/Documentation/filesystems/hpfs.txt index 38aba03..fa45c3b 100644 --- a/Documentation/filesystems/hpfs.txt +++ b/Documentation/filesystems/hpfs.txt @@ -290,7 +290,7 @@ History 2.07 More fixes for Warp Server. Now it really works 2.08 Creating new files is not so slow on large disks An attempt to sync deleted file does not generate filesystem error -2.09 Fixed error on extremly fragmented files +2.09 Fixed error on extremely fragmented files vim: set textwidth=80: diff --git a/Documentation/filesystems/jfs.txt b/Documentation/filesystems/jfs.txt index bae1286..26ebde7 100644 --- a/Documentation/filesystems/jfs.txt +++ b/Documentation/filesystems/jfs.txt @@ -29,7 +29,13 @@ errors=continue Keep going on a filesystem error. errors=remount-ro Default. Remount the filesystem read-only on an error. errors=panic Panic and halt the machine if an error occurs. -Please send bugs, comments, cards and letters to shaggy@austin.ibm.com. +uid=value Override on-disk uid with specified value +gid=value Override on-disk gid with specified value +umask=value Override on-disk umask with specified octal value. For + directories, the execute bit will be set if the corresponding + read bit is set. + +Please send bugs, comments, cards and letters to shaggy@linux.vnet.ibm.com. The JFS mailing list can be subscribed to by using the link labeled "Mail list Subscribe" at our web page http://jfs.sourceforge.net/ diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt index 8177906..8ee10ec 100644 --- a/Documentation/filesystems/ntfs.txt +++ b/Documentation/filesystems/ntfs.txt @@ -349,7 +349,7 @@ end of the line. Note the "Should sync?" parameter "nosync" means that the two mirrors are already in sync which will be the case on a clean shutdown of Windows. If the mirrors are not clean, you can specify the "sync" option instead of "nosync" -and the Device-Mapper driver will then copy the entirey of the "Source Device" +and the Device-Mapper driver will then copy the entirety of the "Source Device" to the "Target Device" or if you specified multipled target devices to all of them. diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 7aaf09b..8756a07 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -122,21 +122,22 @@ subdirectory has the entries listed in Table 1-1. Table 1-1: Process specific entries in /proc .............................................................................. - File Content - cmdline Command line arguments - cpu Current and last cpu in which it was executed (2.4)(smp) - cwd Link to the current working directory - environ Values of environment variables - exe Link to the executable of this process - fd Directory, which contains all file descriptors - maps Memory maps to executables and library files (2.4) - mem Memory held by this process - root Link to the root directory of this process - stat Process status - statm Process memory status information - status Process status in human readable form - wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan - smaps Extension based on maps, presenting the rss size for each mapped file + File Content + clear_refs Clears page referenced bits shown in smaps output + cmdline Command line arguments + cpu Current and last cpu in which it was executed (2.4)(smp) + cwd Link to the current working directory + environ Values of environment variables + exe Link to the executable of this process + fd Directory, which contains all file descriptors + maps Memory maps to executables and library files (2.4) + mem Memory held by this process + root Link to the root directory of this process + stat Process status + statm Process memory status information + status Process status in human readable form + wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan + smaps Extension based on maps, the rss size for each mapped file .............................................................................. For example, to get the status information of a process, all you have to do is @@ -228,7 +229,7 @@ Table 1-3: Kernel info in /proc mounts Mounted filesystems net Networking info (see text) partitions Table of partitions known to the system - pci Depreciated info of PCI bus (new way -> /proc/bus/pci/, + pci Deprecated info of PCI bus (new way -> /proc/bus/pci/, decoupled by lspci (2.4) rtc Real time clock scsi SCSI info (see text) @@ -1137,6 +1138,13 @@ determine whether or not they are still functioning properly. Because the NMI watchdog shares registers with oprofile, by disabling the NMI watchdog, oprofile may have more registers to utilize. +maps_protect +------------ + +Enables/Disables the protection of the per-process proc entries "maps" and +"smaps". When enabled, the contents of these files are visible only to +readers that are allowed to ptrace() the given process. + 2.4 /proc/sys/vm - The virtual memory subsystem ----------------------------------------------- diff --git a/Documentation/filesystems/relay.txt b/Documentation/filesystems/relay.txt index 7fbb6ff..18d23f9 100644 --- a/Documentation/filesystems/relay.txt +++ b/Documentation/filesystems/relay.txt @@ -351,7 +351,7 @@ If the current buffer is full, i.e. all sub-buffers remain unconsumed, the callback returns 0 to indicate that the buffer switch should not occur yet, i.e. until the consumer has had a chance to read the current set of ready sub-buffers. For the relay_buf_full() function -to make sense, the consumer is reponsible for notifying the relay +to make sense, the consumer is responsible for notifying the relay interface when sub-buffers have been consumed via relay_subbufs_consumed(). Any subsequent attempts to write into the buffer will again invoke the subbuf_start() callback with the same diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index 069cb10..fcc123f 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt @@ -57,6 +57,13 @@ nonumtail=<bool> -- When creating 8.3 aliases, normally the alias will currently exist in the directory, 'longfile.txt' will be the short alias instead of 'longfi~1.txt'. +usefree -- Use the "free clusters" value stored on FSINFO. It'll + be used to determine number of free clusters without + scanning disk. But it's not used by default, because + recent Windows don't update it correctly in some + case. If you are sure the "free clusters" on FSINFO is + correct, by this option you can avoid scanning disk. + quiet -- Stops printing certain warning messages. check=s|r|n -- Case sensitivity checking setting. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index ea271f2..a47cc81 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -827,7 +827,7 @@ This describes how a filesystem can overload the standard dentry operations. Dentries and the dcache are the domain of the VFS and the individual filesystem implementations. Device drivers have no business here. These methods may be set to NULL, as they are either optional or -the VFS uses a default. As of kernel 2.6.13, the following members are +the VFS uses a default. As of kernel 2.6.22, the following members are defined: struct dentry_operations { @@ -837,6 +837,7 @@ struct dentry_operations { int (*d_delete)(struct dentry *); void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); + char *(*d_dname)(struct dentry *, char *, int); }; d_revalidate: called when the VFS needs to revalidate a dentry. This @@ -859,6 +860,26 @@ struct dentry_operations { VFS calls iput(). If you define this method, you must call iput() yourself + d_dname: called when the pathname of a dentry should be generated. + Usefull for some pseudo filesystems (sockfs, pipefs, ...) to delay + pathname generation. (Instead of doing it when dentry is created, + its done only when the path is needed.). Real filesystems probably + dont want to use it, because their dentries are present in global + dcache hash, so their hash should be an invariant. As no lock is + held, d_dname() should not try to modify the dentry itself, unless + appropriate SMP safety is used. CAUTION : d_path() logic is quite + tricky. The correct way to return for example "Hello" is to put it + at the end of the buffer, and returns a pointer to the first char. + dynamic_dname() helper function is provided to take care of this. + +Example : + +static char *pipefs_dname(struct dentry *dent, char *buffer, int buflen) +{ + return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]", + dentry->d_inode->i_ino); +} + Each dentry has a pointer to its parent dentry, as well as a hash list of child dentries. Child dentries are basically like files in a directory. diff --git a/Documentation/filesystems/xip.txt b/Documentation/filesystems/xip.txt index 6c0cef1..3cc4010 100644 --- a/Documentation/filesystems/xip.txt +++ b/Documentation/filesystems/xip.txt @@ -19,7 +19,7 @@ completely. With execute-in-place, read&write type operations are performed directly from/to the memory backed storage device. For file mappings, the storage device itself is mapped directly into userspace. -This implementation was initialy written for shared memory segments between +This implementation was initially written for shared memory segments between different virtual machines on s390 hardware to allow multiple machines to share the same binaries and libraries. diff --git a/Documentation/fujitsu/frv/gdbstub.txt b/Documentation/fujitsu/frv/gdbstub.txt index 9304fb3..b92bfd9 100644 --- a/Documentation/fujitsu/frv/gdbstub.txt +++ b/Documentation/fujitsu/frv/gdbstub.txt @@ -126,5 +126,5 @@ GDB stub and the debugger: Furthermore, the GDB stub will intercept a number of exceptions automatically if they are caused by kernel execution. It will also intercept BUG() macro -invokation. +invocation. diff --git a/Documentation/hwmon/adm1026 b/Documentation/hwmon/adm1026 index 473c689..f4327db 100644 --- a/Documentation/hwmon/adm1026 +++ b/Documentation/hwmon/adm1026 @@ -80,7 +80,7 @@ temperature sensor inputs. Both the PWM output and the DAC output can be used to control fan speed. Usually only one of these two outputs will be used. Write the minimum PWM or DAC value to the appropriate control register. Then set the low temperature limit in the tmin values for each -temperature sensor. The range of control is fixed at 20 °C, and the +temperature sensor. The range of control is fixed at 20 °C, and the largest difference between current and tmin of the temperature sensors sets the control output. See the datasheet for several example circuits for controlling fan speed with the PWM and DAC outputs. The fan speed sensors diff --git a/Documentation/hwmon/coretemp b/Documentation/hwmon/coretemp new file mode 100644 index 0000000..870cda9 --- /dev/null +++ b/Documentation/hwmon/coretemp @@ -0,0 +1,36 @@ +Kernel driver coretemp +====================== + +Supported chips: + * All Intel Core family + Prefix: 'coretemp' + CPUID: family 0x6, models 0xe, 0xf + Datasheet: Intel 64 and IA-32 Architectures Software Developer's Manual + Volume 3A: System Programming Guide + +Author: Rudolf Marek + +Description +----------- + +This driver permits reading temperature sensor embedded inside Intel Core CPU. +Temperature is measured in degrees Celsius and measurement resolution is +1 degree C. Valid temperatures are from 0 to TjMax degrees C, because +the actual value of temperature register is in fact a delta from TjMax. + +Temperature known as TjMax is the maximum junction temperature of processor. +Intel defines this temperature as 85C or 100C. At this temperature, protection +mechanism will perform actions to forcibly cool down the processor. Alarm +may be raised, if the temperature grows enough (more than TjMax) to trigger +the Out-Of-Spec bit. Following table summarizes the exported sysfs files: + +temp1_input - Core temperature (in millidegrees Celsius). +temp1_crit - Maximum junction temperature (in millidegrees Celsius). +temp1_crit_alarm - Set when Out-of-spec bit is set, never clears. + Correct CPU operation is no longer guaranteed. +temp1_label - Contains string "Core X", where X is processor + number. + +The TjMax temperature is set to 85 degrees C if undocumented model specific +register (UMSR) 0xee has bit 30 set. If not the TjMax is 100 degrees C as +(sometimes) documented in processor datasheet. diff --git a/Documentation/hwmon/gl518sm b/Documentation/hwmon/gl518sm index ce08818..229f8b7 100644 --- a/Documentation/hwmon/gl518sm +++ b/Documentation/hwmon/gl518sm @@ -13,7 +13,7 @@ Supported chips: Authors: Frodo Looijaard <frodol@dds.nl>, - Kyösti Mälkki <kmalkki@cc.hut.fi> + Kyösti Mälkki <kmalkki@cc.hut.fi> Hong-Gunn Chew <hglinux@gunnet.org> Jean Delvare <khali@linux-fr.org> diff --git a/Documentation/hwmon/lm83 b/Documentation/hwmon/lm83 index f7aad14..a04d1fe 100644 --- a/Documentation/hwmon/lm83 +++ b/Documentation/hwmon/lm83 @@ -45,7 +45,7 @@ Unconfirmed motherboards: The LM82 is confirmed to have been found on most AMD Geode reference designs and test platforms. -The driver has been successfully tested by Magnus Forsström, who I'd +The driver has been successfully tested by Magnus Forsström, who I'd like to thank here. More testers will be of course welcome. The fact that the LM83 is only scarcely used can be easily explained. diff --git a/Documentation/hwmon/max6650 b/Documentation/hwmon/max6650 new file mode 100644 index 0000000..8be7beb --- /dev/null +++ b/Documentation/hwmon/max6650 @@ -0,0 +1,53 @@ +Kernel driver max6650 +===================== + +Supported chips: + * Maxim 6650 / 6651 + Prefix: 'max6650' + Addresses scanned: I2C 0x1b, 0x1f, 0x48, 0x4b + Datasheet: http://pdfserv.maxim-ic.com/en/ds/MAX6650-MAX6651.pdf + +Authors: + Hans J. Koch <hjk@linutronix.de> + John Morris <john.morris@spirentcom.com> + Claus Gindhart <claus.gindhart@kontron.com> + +Description +----------- + +This driver implements support for the Maxim 6650/6651 + +The 2 devices are very similar, but the Maxim 6550 has a reduced feature +set, e.g. only one fan-input, instead of 4 for the 6651. + +The driver is not able to distinguish between the 2 devices. + +The driver provides the following sensor accesses in sysfs: + +fan1_input ro fan tachometer speed in RPM +fan2_input ro " +fan3_input ro " +fan4_input ro " +fan1_target rw desired fan speed in RPM (closed loop mode only) +pwm1_enable rw regulator mode, 0=full on, 1=open loop, 2=closed loop +pwm1 rw relative speed (0-255), 255=max. speed. + Used in open loop mode only. +fan1_div rw sets the speed range the inputs can handle. Legal + values are 1, 2, 4, and 8. Use lower values for + faster fans. + +Module parameters +----------------- + +If your board has a BIOS that initializes the MAX6650/6651 correctly, you can +simply load your module without parameters. It won't touch the configuration +registers then. If your board BIOS doesn't initialize the chip, or you want +different settings, you can set the following parameters: + +voltage_12V: 5=5V fan, 12=12V fan, 0=don't change +prescaler: Possible values are 1,2,4,8,16, or 0 for don't change +clock: The clock frequency in Hz of the chip the driver should assume [254000] + +Please have a look at the MAX6650/6651 data sheet and make sure that you fully +understand the meaning of these parameters before you attempt to change them. + diff --git a/Documentation/hwmon/sis5595 b/Documentation/hwmon/sis5595 index b7ae36b..4f8877a 100644 --- a/Documentation/hwmon/sis5595 +++ b/Documentation/hwmon/sis5595 @@ -8,7 +8,7 @@ Supported chips: Datasheet: Publicly available at the Silicon Integrated Systems Corp. site. Authors: - Kyösti Mälkki <kmalkki@cc.hut.fi>, + Kyösti Mälkki <kmalkki@cc.hut.fi>, Mark D. Studebaker <mdsxyz123@yahoo.com>, Aurelien Jarno <aurelien@aurel32.net> 2.6 port diff --git a/Documentation/hwmon/smsc47m1 b/Documentation/hwmon/smsc47m1 index 04a1112..42c8431 100644 --- a/Documentation/hwmon/smsc47m1 +++ b/Documentation/hwmon/smsc47m1 @@ -14,6 +14,10 @@ Supported chips: http://www.smsc.com/main/datasheets/47m14x.pdf http://www.smsc.com/main/tools/discontinued/47m15x.pdf http://www.smsc.com/main/datasheets/47m192.pdf + * SMSC LPC47M292 + Addresses scanned: none, address read from Super I/O config space + Prefix: 'smsc47m2' + Datasheet: Not public * SMSC LPC47M997 Addresses scanned: none, address read from Super I/O config space Prefix: 'smsc47m1' @@ -32,9 +36,10 @@ Description The Standard Microsystems Corporation (SMSC) 47M1xx Super I/O chips contain monitoring and PWM control circuitry for two fans. -The 47M15x and 47M192 chips contain a full 'hardware monitoring block' -in addition to the fan monitoring and control. The hardware monitoring -block is not supported by the driver. +The LPC47M15x, LPC47M192 and LPC47M292 chips contain a full 'hardware +monitoring block' in addition to the fan monitoring and control. The +hardware monitoring block is not supported by this driver, use the +smsc47m192 driver for that. No documentation is available for the 47M997, but it has the same device ID as the 47M15x and 47M192 chips and seems to be compatible. diff --git a/Documentation/hwmon/smsc47m192 b/Documentation/hwmon/smsc47m192 index 45d6453..6d54ecb 100644 --- a/Documentation/hwmon/smsc47m192 +++ b/Documentation/hwmon/smsc47m192 @@ -2,12 +2,13 @@ Kernel driver smsc47m192 ======================== Supported chips: - * SMSC LPC47M192 and LPC47M997 + * SMSC LPC47M192, LPC47M15x, LPC47M292 and LPC47M997 Prefix: 'smsc47m192' Addresses scanned: I2C 0x2c - 0x2d Datasheet: The datasheet for LPC47M192 is publicly available from http://www.smsc.com/ - The LPC47M997 is compatible for hardware monitoring. + The LPC47M15x, LPC47M292 and LPC47M997 are compatible for + hardware monitoring. Author: Hartmut Rick <linux@rick.claranet.de> Special thanks to Jean Delvare for careful checking @@ -18,7 +19,7 @@ Description ----------- This driver implements support for the hardware sensor capabilities -of the SMSC LPC47M192 and LPC47M997 Super-I/O chips. +of the SMSC LPC47M192 and compatible Super-I/O chips. These chips support 3 temperature channels and 8 voltage inputs as well as CPU voltage VID input. diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface index d73d2e8..a9a18ad 100644 --- a/Documentation/hwmon/sysfs-interface +++ b/Documentation/hwmon/sysfs-interface @@ -152,6 +152,13 @@ fan[1-*]_div Fan divisor. Note that this is actually an internal clock divisor, which affects the measurable speed range, not the read value. +fan[1-*]_target + Desired fan speed + Unit: revolution/min (RPM) + RW + Only makes sense if the chip supports closed-loop fan speed + control based on the measured fan speed. + Also see the Alarms section for status flags associated with fans. diff --git a/Documentation/hwmon/via686a b/Documentation/hwmon/via686a index a936fb3..d651b25 100644 --- a/Documentation/hwmon/via686a +++ b/Documentation/hwmon/via686a @@ -8,7 +8,7 @@ Supported chips: Datasheet: On request through web form (http://www.via.com.tw/en/support/datasheets/) Authors: - Kyösti Mälkki <kmalkki@cc.hut.fi>, + Kyösti Mälkki <kmalkki@cc.hut.fi>, Mark D. Studebaker <mdsxyz123@yahoo.com> Bob Dougherty <bobd@stanford.edu> (Some conversion-factor data were contributed by diff --git a/Documentation/hwmon/w83792d b/Documentation/hwmon/w83792d index 8171c28..14a668e 100644 --- a/Documentation/hwmon/w83792d +++ b/Documentation/hwmon/w83792d @@ -107,7 +107,7 @@ Known problems: by CR[0x49h]. - The function of vid and vrm has not been finished, because I'm NOT very familiar with them. Adding support is welcome. - - The function of chassis open detection needs more tests. + - The function of chassis open detection needs more tests. - If you have ASUS server board and chip was not found: Then you will need to upgrade to latest (or beta) BIOS. If it does not help please contact us. diff --git a/Documentation/i2c/busses/i2c-i810 b/Documentation/i2c/busses/i2c-i810 index 83c3b97..778210e 100644 --- a/Documentation/i2c/busses/i2c-i810 +++ b/Documentation/i2c/busses/i2c-i810 @@ -7,7 +7,7 @@ Supported adapters: Authors: Frodo Looijaard <frodol@dds.nl>, Philip Edelbrock <phil@netroedge.com>, - Kyösti Mälkki <kmalkki@cc.hut.fi>, + Kyösti Mälkki <kmalkki@cc.hut.fi>, Ralph Metzler <rjkm@thp.uni-koeln.de>, Mark D. Studebaker <mdsxyz123@yahoo.com> diff --git a/Documentation/i2c/busses/i2c-nforce2 b/Documentation/i2c/busses/i2c-nforce2 index 7f61fbc..fae3495 100644 --- a/Documentation/i2c/busses/i2c-nforce2 +++ b/Documentation/i2c/busses/i2c-nforce2 @@ -9,6 +9,8 @@ Supported adapters: * nForce4 MCP-04 10de:0034 * nForce4 MCP51 10de:0264 * nForce4 MCP55 10de:0368 + * nForce4 MCP61 10de:03EB + * nForce4 MCP65 10de:0446 Datasheet: not publicly available, but seems to be similar to the AMD-8111 SMBus 2.0 adapter. diff --git a/Documentation/i2c/busses/i2c-sis96x b/Documentation/i2c/busses/i2c-sis96x index 08d7b2d..266481f 100644 --- a/Documentation/i2c/busses/i2c-sis96x +++ b/Documentation/i2c/busses/i2c-sis96x @@ -60,7 +60,7 @@ Mark D. Studebaker <mdsxyz123@yahoo.com> - design hints and bug fixes Alexander Maylsh <amalysh@web.de> - ditto, plus an important datasheet... almost the one I really wanted -Hans-Günter Lütke Uphues <hg_lu@t-online.de> +Hans-Günter Lütke Uphues <hg_lu@t-online.de> - patch for SiS735 Robert Zwerus <arzie@dds.nl> - testing for SiS645DX diff --git a/Documentation/i2c/busses/i2c-via b/Documentation/i2c/busses/i2c-via index 55edfe1..3438706 100644 --- a/Documentation/i2c/busses/i2c-via +++ b/Documentation/i2c/busses/i2c-via @@ -4,7 +4,7 @@ Supported adapters: * VIA Technologies, InC. VT82C586B Datasheet: Publicly available at the VIA website -Author: Kyösti Mälkki <kmalkki@cc.hut.fi> +Author: Kyösti Mälkki <kmalkki@cc.hut.fi> Description ----------- diff --git a/Documentation/i2c/busses/i2c-viapro b/Documentation/i2c/busses/i2c-viapro index 775f489..06b4be3 100644 --- a/Documentation/i2c/busses/i2c-viapro +++ b/Documentation/i2c/busses/i2c-viapro @@ -17,7 +17,7 @@ Supported adapters: Datasheet: available on request and under NDA from VIA Authors: - Kyösti Mälkki <kmalkki@cc.hut.fi>, + Kyösti Mälkki <kmalkki@cc.hut.fi>, Mark D. Studebaker <mdsxyz123@yahoo.com>, Jean Delvare <khali@linux-fr.org> diff --git a/Documentation/i2c/i2c-protocol b/Documentation/i2c/i2c-protocol index b4022c9..579b92d 100644 --- a/Documentation/i2c/i2c-protocol +++ b/Documentation/i2c/i2c-protocol @@ -68,7 +68,7 @@ We have found some I2C devices that needs the following modifications: Flags I2C_M_IGNORE_NAK Normally message is interrupted immediately if there is [NA] from the - client. Setting this flag treats any [NA] as [A], and all of + client. Setting this flag treats any [NA] as [A], and all of message is sent. These messages may still fail to SCL lo->hi timeout. diff --git a/Documentation/i2c/porting-clients b/Documentation/i2c/porting-clients index ca272b2..7bf82c0 100644 --- a/Documentation/i2c/porting-clients +++ b/Documentation/i2c/porting-clients @@ -1,4 +1,4 @@ -Revision 6, 2005-11-20 +Revision 7, 2007-04-19 Jean Delvare <khali@linux-fr.org> Greg KH <greg@kroah.com> @@ -20,6 +20,10 @@ yours for best results. Technical changes: +* [Driver type] Any driver that was relying on i2c-isa has to be + converted to a proper isa, platform or pci driver. This is not + covered by this guide. + * [Includes] Get rid of "version.h" and <linux/i2c-proc.h>. Includes typically look like that: #include <linux/module.h> @@ -27,12 +31,10 @@ Technical changes: #include <linux/slab.h> #include <linux/jiffies.h> #include <linux/i2c.h> - #include <linux/i2c-isa.h> /* for ISA drivers */ #include <linux/hwmon.h> /* for hardware monitoring drivers */ #include <linux/hwmon-sysfs.h> #include <linux/hwmon-vid.h> /* if you need VRM support */ #include <linux/err.h> /* for class registration */ - #include <asm/io.h> /* if you have I/O operations */ Please respect this inclusion order. Some extra headers may be required for a given driver (e.g. "lm75.h"). @@ -69,20 +71,16 @@ Technical changes: sensors mailing list <lm-sensors@lm-sensors.org> by providing a patch to the Documentation/hwmon/sysfs-interface file. -* [Attach] For I2C drivers, the attach function should make sure - that the adapter's class has I2C_CLASS_HWMON (or whatever class is - suitable for your driver), using the following construct: +* [Attach] The attach function should make sure that the adapter's + class has I2C_CLASS_HWMON (or whatever class is suitable for your + driver), using the following construct: if (!(adapter->class & I2C_CLASS_HWMON)) return 0; - ISA-only drivers of course don't need this. Call i2c_probe() instead of i2c_detect(). * [Detect] As mentioned earlier, the flags parameter is gone. The type_name and client_name strings are replaced by a single name string, which will be filled with a lowercase, short string. - In i2c-only drivers, drop the i2c_is_isa_adapter check, it's - useless. Same for isa-only drivers, as the test would always be - true. Only hybrid drivers (which are quite rare) still need it. The labels used for error paths are reduced to the number needed. It is advised that the labels are given descriptive names such as exit and exit_free. Don't forget to properly set err before diff --git a/Documentation/i2c/summary b/Documentation/i2c/summary index 41dde87..aea60bf 100644 --- a/Documentation/i2c/summary +++ b/Documentation/i2c/summary @@ -4,17 +4,23 @@ I2C and SMBus ============= I2C (pronounce: I squared C) is a protocol developed by Philips. It is a -slow two-wire protocol (10-400 kHz), but it suffices for many types of -devices. +slow two-wire protocol (variable speed, up to 400 kHz), with a high speed +extension (3.4 MHz). It provides an inexpensive bus for connecting many +types of devices with infrequent or low bandwidth communications needs. +I2C is widely used with embedded systems. Some systems use variants that +don't meet branding requirements, and so are not advertised as being I2C. -SMBus (System Management Bus) is a subset of the I2C protocol. Many -modern mainboards have a System Management Bus. There are a lot of -devices which can be connected to a SMBus; the most notable are modern -memory chips with EEPROM memories and chips for hardware monitoring. +SMBus (System Management Bus) is based on the I2C protocol, and is mostly +a subset of I2C protocols and signaling. Many I2C devices will work on an +SMBus, but some SMBus protocols add semantics beyond what is required to +achieve I2C branding. Modern PC mainboards rely on SMBus. The most common +devices connected through SMBus are RAM modules configured using I2C EEPROMs, +and hardware monitoring chips. -Because the SMBus is just a special case of the generalized I2C bus, we -can simulate the SMBus protocol on plain I2C busses. The reverse is -regretfully impossible. +Because the SMBus is mostly a subset of the generalized I2C bus, we can +use its protocols on many I2C systems. However, there are systems that don't +meet both SMBus and I2C electrical constraints; and others which can't +implement all the common SMBus protocol semantics or messages. Terminology @@ -29,6 +35,7 @@ When we talk about I2C, we use the following terms: An Algorithm driver contains general code that can be used for a whole class of I2C adapters. Each specific adapter driver depends on one algorithm driver. + A Driver driver (yes, this sounds ridiculous, sorry) contains the general code to access some type of device. Each detected device gets its own data in the Client structure. Usually, Driver and Client are more closely @@ -40,6 +47,10 @@ a separate Adapter and Algorithm driver), and drivers for your I2C devices in this package. See the lm_sensors project http://www.lm-sensors.nu for device drivers. +At this time, Linux only operates I2C (or SMBus) in master mode; you can't +use these APIs to make a Linux system behave as a slave/device, either to +speak a custom protocol or to emulate some other device. + Included Bus Drivers ==================== diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients index fbcff96..3d8d36b 100644 --- a/Documentation/i2c/writing-clients +++ b/Documentation/i2c/writing-clients @@ -1,5 +1,5 @@ This is a small guide for those who want to write kernel drivers for I2C -or SMBus devices. +or SMBus devices, using Linux as the protocol host/master (not slave). To set up a driver, you need to do several things. Some are optional, and some things can be done slightly or completely different. Use this as a @@ -29,8 +29,16 @@ static struct i2c_driver foo_driver = { .driver = { .name = "foo", }, + + /* iff driver uses driver model ("new style") binding model: */ + .probe = foo_probe, + .remove = foo_remove, + + /* else, driver uses "legacy" binding model: */ .attach_adapter = foo_attach_adapter, .detach_client = foo_detach_client, + + /* these may be used regardless of the driver binding model */ .shutdown = foo_shutdown, /* optional */ .suspend = foo_suspend, /* optional */ .resume = foo_resume, /* optional */ @@ -40,7 +48,8 @@ static struct i2c_driver foo_driver = { The name field is the driver name, and must not contain spaces. It should match the module name (if the driver can be compiled as a module), although you can use MODULE_ALIAS (passing "foo" in this example) to add -another name for the module. +another name for the module. If the driver name doesn't match the module +name, the module won't be automatically loaded (hotplug/coldplug). All other fields are for call-back functions which will be explained below. @@ -65,16 +74,13 @@ An example structure is below. struct foo_data { struct i2c_client client; - struct semaphore lock; /* For ISA access in `sensors' drivers. */ - int sysctl_id; /* To keep the /proc directory entry for - `sensors' drivers. */ enum chips type; /* To keep the chips type for `sensors' drivers. */ /* Because the i2c bus is slow, it is often useful to cache the read information of a chip for some time (for example, 1 or 2 seconds). It depends of course on the device whether this is really worthwhile or even sensible. */ - struct semaphore update_lock; /* When we are reading lots of information, + struct mutex update_lock; /* When we are reading lots of information, another process should not update the below information */ char valid; /* != 0 if the following fields are valid. */ @@ -95,8 +101,7 @@ some obscure clients). But we need generic reading and writing routines. I have found it useful to define foo_read and foo_write function for this. For some cases, it will be easier to call the i2c functions directly, but many chips have some kind of register-value idea that can easily -be encapsulated. Also, some chips have both ISA and I2C interfaces, and -it useful to abstract from this (only for `sensors' drivers). +be encapsulated. The below functions are simple examples, and should not be copied literally. @@ -119,28 +124,101 @@ literally. return i2c_smbus_write_word_data(client,reg,value); } -For sensors code, you may have to cope with ISA registers too. Something -like the below often works. Note the locking! - - int foo_read_value(struct i2c_client *client, u8 reg) - { - int res; - if (i2c_is_isa_client(client)) { - down(&(((struct foo_data *) (client->data)) -> lock)); - outb_p(reg,client->addr + FOO_ADDR_REG_OFFSET); - res = inb_p(client->addr + FOO_DATA_REG_OFFSET); - up(&(((struct foo_data *) (client->data)) -> lock)); - return res; - } else - return i2c_smbus_read_byte_data(client,reg); - } - -Writing is done the same way. - Probing and attaching ===================== +The Linux I2C stack was originally written to support access to hardware +monitoring chips on PC motherboards, and thus it embeds some assumptions +that are more appropriate to SMBus (and PCs) than to I2C. One of these +assumptions is that most adapters and devices drivers support the SMBUS_QUICK +protocol to probe device presence. Another is that devices and their drivers +can be sufficiently configured using only such probe primitives. + +As Linux and its I2C stack became more widely used in embedded systems +and complex components such as DVB adapters, those assumptions became more +problematic. Drivers for I2C devices that issue interrupts need more (and +different) configuration information, as do drivers handling chip variants +that can't be distinguished by protocol probing, or which need some board +specific information to operate correctly. + +Accordingly, the I2C stack now has two models for associating I2C devices +with their drivers: the original "legacy" model, and a newer one that's +fully compatible with the Linux 2.6 driver model. These models do not mix, +since the "legacy" model requires drivers to create "i2c_client" device +objects after SMBus style probing, while the Linux driver model expects +drivers to be given such device objects in their probe() routines. + + +Standard Driver Model Binding ("New Style") +------------------------------------------- + +System infrastructure, typically board-specific initialization code or +boot firmware, reports what I2C devices exist. For example, there may be +a table, in the kernel or from the boot loader, identifying I2C devices +and linking them to board-specific configuration information about IRQs +and other wiring artifacts, chip type, and so on. That could be used to +create i2c_client objects for each I2C device. + +I2C device drivers using this binding model work just like any other +kind of driver in Linux: they provide a probe() method to bind to +those devices, and a remove() method to unbind. + + static int foo_probe(struct i2c_client *client); + static int foo_remove(struct i2c_client *client); + +Remember that the i2c_driver does not create those client handles. The +handle may be used during foo_probe(). If foo_probe() reports success +(zero not a negative status code) it may save the handle and use it until +foo_remove() returns. That binding model is used by most Linux drivers. + +Drivers match devices when i2c_client.driver_name and the driver name are +the same; this approach is used in several other busses that don't have +device typing support in the hardware. The driver and module name should +match, so hotplug/coldplug mechanisms will modprobe the driver. + + +Device Creation (Standard driver model) +--------------------------------------- + +If you know for a fact that an I2C device is connected to a given I2C bus, +you can instantiate that device by simply filling an i2c_board_info +structure with the device address and driver name, and calling +i2c_new_device(). This will create the device, then the driver core will +take care of finding the right driver and will call its probe() method. +If a driver supports different device types, you can specify the type you +want using the type field. You can also specify an IRQ and platform data +if needed. + +Sometimes you know that a device is connected to a given I2C bus, but you +don't know the exact address it uses. This happens on TV adapters for +example, where the same driver supports dozens of slightly different +models, and I2C device addresses change from one model to the next. In +that case, you can use the i2c_new_probed_device() variant, which is +similar to i2c_new_device(), except that it takes an additional list of +possible I2C addresses to probe. A device is created for the first +responsive address in the list. If you expect more than one device to be +present in the address range, simply call i2c_new_probed_device() that +many times. + +The call to i2c_new_device() or i2c_new_probed_device() typically happens +in the I2C bus driver. You may want to save the returned i2c_client +reference for later use. + + +Device Deletion (Standard driver model) +--------------------------------------- + +Each I2C device which has been created using i2c_new_device() or +i2c_new_probed_device() can be unregistered by calling +i2c_unregister_device(). If you don't call it explicitly, it will be +called automatically before the underlying I2C bus itself is removed, as a +device can't survive its parent in the device driver model. + + +Legacy Driver Binding Model +--------------------------- + Most i2c devices can be present on several i2c addresses; for some this is determined in hardware (by soldering some chip pins to Vcc or Ground), for others this can be changed in software (by writing to specific client @@ -157,13 +235,9 @@ detection algorithm. You do not have to use this parameter interface; but don't try to use function i2c_probe() if you don't. -NOTE: If you want to write a `sensors' driver, the interface is slightly - different! See below. - - -Probing classes ---------------- +Probing classes (Legacy model) +------------------------------ All parameters are given as lists of unsigned 16-bit integers. Lists are terminated by I2C_CLIENT_END. @@ -210,8 +284,8 @@ Note that you *have* to call the defined variable `normal_i2c', without any prefix! -Attaching to an adapter ------------------------ +Attaching to an adapter (Legacy model) +-------------------------------------- Whenever a new adapter is inserted, or for all adapters if the driver is being registered, the callback attach_adapter() is called. Now is the @@ -237,17 +311,13 @@ them (unless a `force' parameter was used). In addition, addresses that are already in use (by some other registered client) are skipped. -The detect client function --------------------------- +The detect client function (Legacy model) +----------------------------------------- The detect client function is called by i2c_probe. The `kind' parameter contains -1 for a probed detection, 0 for a forced detection, or a positive number for a forced detection with a chip type forced. -Below, some things are only needed if this is a `sensors' driver. Those -parts are between /* SENSORS ONLY START */ and /* SENSORS ONLY END */ -markers. - Returning an error different from -ENODEV in a detect function will cause the detection to stop: other addresses and adapters won't be scanned. This should only be done on fatal or internal errors, such as a memory @@ -256,64 +326,20 @@ shortage or i2c_attach_client failing. For now, you can ignore the `flags' parameter. It is there for future use. int foo_detect_client(struct i2c_adapter *adapter, int address, - unsigned short flags, int kind) + int kind) { int err = 0; int i; - struct i2c_client *new_client; + struct i2c_client *client; struct foo_data *data; - const char *client_name = ""; /* For non-`sensors' drivers, put the real - name here! */ + const char *name = ""; /* Let's see whether this adapter can support what we need. - Please substitute the things you need here! - For `sensors' drivers, add `! is_isa &&' to the if statement */ + Please substitute the things you need here! */ if (!i2c_check_functionality(adapter,I2C_FUNC_SMBUS_WORD_DATA | I2C_FUNC_SMBUS_WRITE_BYTE)) goto ERROR0; - /* SENSORS ONLY START */ - const char *type_name = ""; - int is_isa = i2c_is_isa_adapter(adapter); - - /* Do this only if the chip can additionally be found on the ISA bus - (hybrid chip). */ - - if (is_isa) { - - /* Discard immediately if this ISA range is already used */ - /* FIXME: never use check_region(), only request_region() */ - if (check_region(address,FOO_EXTENT)) - goto ERROR0; - - /* Probe whether there is anything on this address. - Some example code is below, but you will have to adapt this - for your own driver */ - - if (kind < 0) /* Only if no force parameter was used */ { - /* We may need long timeouts at least for some chips. */ - #define REALLY_SLOW_IO - i = inb_p(address + 1); - if (inb_p(address + 2) != i) - goto ERROR0; - if (inb_p(address + 3) != i) - goto ERROR0; - if (inb_p(address + 7) != i) - goto ERROR0; - #undef REALLY_SLOW_IO - - /* Let's just hope nothing breaks here */ - i = inb_p(address + 5) & 0x7f; - outb_p(~i & 0x7f,address+5); - if ((inb_p(address + 5) & 0x7f) != (~i & 0x7f)) { - outb_p(i,address+5); - return 0; - } - } - } - - /* SENSORS ONLY END */ - /* OK. For now, we presume we have a valid client. We now create the client structure, even though we cannot fill it completely yet. But it allows us to access several i2c functions safely */ @@ -323,13 +349,12 @@ For now, you can ignore the `flags' parameter. It is there for future use. goto ERROR0; } - new_client = &data->client; - i2c_set_clientdata(new_client, data); + client = &data->client; + i2c_set_clientdata(client, data); - new_client->addr = address; - new_client->adapter = adapter; - new_client->driver = &foo_driver; - new_client->flags = 0; + client->addr = address; + client->adapter = adapter; + client->driver = &foo_driver; /* Now, we do the remaining detection. If no `force' parameter is used. */ @@ -337,19 +362,17 @@ For now, you can ignore the `flags' parameter. It is there for future use. parameter was used. */ if (kind < 0) { /* The below is of course bogus */ - if (foo_read(new_client,FOO_REG_GENERIC) != FOO_GENERIC_VALUE) + if (foo_read(client, FOO_REG_GENERIC) != FOO_GENERIC_VALUE) goto ERROR1; } - /* SENSORS ONLY START */ - /* Next, specific detection. This is especially important for `sensors' devices. */ /* Determine the chip type. Not needed if a `force_CHIPTYPE' parameter was used. */ if (kind <= 0) { - i = foo_read(new_client,FOO_REG_CHIPTYPE); + i = foo_read(client, FOO_REG_CHIPTYPE); if (i == FOO_TYPE_1) kind = chip1; /* As defined in the enum */ else if (i == FOO_TYPE_2) @@ -363,63 +386,31 @@ For now, you can ignore the `flags' parameter. It is there for future use. /* Now set the type and chip names */ if (kind == chip1) { - type_name = "chip1"; /* For /proc entry */ - client_name = "CHIP 1"; + name = "chip1"; } else if (kind == chip2) { - type_name = "chip2"; /* For /proc entry */ - client_name = "CHIP 2"; + name = "chip2"; } - /* Reserve the ISA region */ - if (is_isa) - request_region(address,FOO_EXTENT,type_name); - - /* SENSORS ONLY END */ - /* Fill in the remaining client fields. */ - strcpy(new_client->name,client_name); - - /* SENSORS ONLY BEGIN */ + strlcpy(client->name, name, I2C_NAME_SIZE); data->type = kind; - /* SENSORS ONLY END */ - - data->valid = 0; /* Only if you use this field */ - init_MUTEX(&data->update_lock); /* Only if you use this field */ + mutex_init(&data->update_lock); /* Only if you use this field */ /* Any other initializations in data must be done here too. */ - /* Tell the i2c layer a new client has arrived */ - if ((err = i2c_attach_client(new_client))) - goto ERROR3; - - /* SENSORS ONLY BEGIN */ - /* Register a new directory entry with module sensors. See below for - the `template' structure. */ - if ((i = i2c_register_entry(new_client, type_name, - foo_dir_table_template,THIS_MODULE)) < 0) { - err = i; - goto ERROR4; - } - data->sysctl_id = i; - - /* SENSORS ONLY END */ - /* This function can write default values to the client registers, if needed. */ - foo_init_client(new_client); + foo_init_client(client); + + /* Tell the i2c layer a new client has arrived */ + if ((err = i2c_attach_client(client))) + goto ERROR1; + return 0; /* OK, this is not exactly good programming practice, usually. But it is very code-efficient in this case. */ - ERROR4: - i2c_detach_client(new_client); - ERROR3: - ERROR2: - /* SENSORS ONLY START */ - if (is_isa) - release_region(address,FOO_EXTENT); - /* SENSORS ONLY END */ ERROR1: kfree(data); ERROR0: @@ -427,8 +418,8 @@ For now, you can ignore the `flags' parameter. It is there for future use. } -Removing the client -=================== +Removing the client (Legacy model) +================================== The detach_client call back function is called when a client should be removed. It may actually fail, but only when panicking. This code is @@ -436,22 +427,12 @@ much simpler than the attachment code, fortunately! int foo_detach_client(struct i2c_client *client) { - int err,i; - - /* SENSORS ONLY START */ - /* Deregister with the `i2c-proc' module. */ - i2c_deregister_entry(((struct lm78_data *)(client->data))->sysctl_id); - /* SENSORS ONLY END */ + int err; /* Try to detach the client from i2c space */ if ((err = i2c_detach_client(client))) return err; - /* HYBRID SENSORS CHIP ONLY START */ - if i2c_is_isa_client(client) - release_region(client->addr,LM78_EXTENT); - /* HYBRID SENSORS CHIP ONLY END */ - kfree(i2c_get_clientdata(client)); return 0; } @@ -464,45 +445,34 @@ When the kernel is booted, or when your foo driver module is inserted, you have to do some initializing. Fortunately, just attaching (registering) the driver module is usually enough. - /* Keep track of how far we got in the initialization process. If several - things have to initialized, and we fail halfway, only those things - have to be cleaned up! */ - static int __initdata foo_initialized = 0; - static int __init foo_init(void) { int res; - printk("foo version %s (%s)\n",FOO_VERSION,FOO_DATE); if ((res = i2c_add_driver(&foo_driver))) { printk("foo: Driver registration failed, module not inserted.\n"); - foo_cleanup(); return res; } - foo_initialized ++; return 0; } - void foo_cleanup(void) + static void __exit foo_cleanup(void) { - if (foo_initialized == 1) { - if ((res = i2c_del_driver(&foo_driver))) { - printk("foo: Driver registration failed, module not removed.\n"); - return; - } - foo_initialized --; - } + i2c_del_driver(&foo_driver); } /* Substitute your own name and email address */ MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl>" MODULE_DESCRIPTION("Driver for Barf Inc. Foo I2C devices"); + /* a few non-GPL license types are also allowed */ + MODULE_LICENSE("GPL"); + module_init(foo_init); module_exit(foo_cleanup); Note that some functions are marked by `__init', and some data structures -by `__init_data'. Hose functions and structures can be removed after +by `__initdata'. These functions and structures can be removed after kernel booting (or module loading) is completed. @@ -632,110 +602,7 @@ General purpose routines Below all general purpose routines are listed, that were not mentioned before. - /* This call returns a unique low identifier for each registered adapter, - * or -1 if the adapter was not registered. + /* This call returns a unique low identifier for each registered adapter. */ extern int i2c_adapter_id(struct i2c_adapter *adap); - -The sensors sysctl/proc interface -================================= - -This section only applies if you write `sensors' drivers. - -Each sensors driver creates a directory in /proc/sys/dev/sensors for each -registered client. The directory is called something like foo-i2c-4-65. -The sensors module helps you to do this as easily as possible. - -The template ------------- - -You will need to define a ctl_table template. This template will automatically -be copied to a newly allocated structure and filled in where necessary when -you call sensors_register_entry. - -First, I will give an example definition. - static ctl_table foo_dir_table_template[] = { - { FOO_SYSCTL_FUNC1, "func1", NULL, 0, 0644, NULL, &i2c_proc_real, - &i2c_sysctl_real,NULL,&foo_func }, - { FOO_SYSCTL_FUNC2, "func2", NULL, 0, 0644, NULL, &i2c_proc_real, - &i2c_sysctl_real,NULL,&foo_func }, - { FOO_SYSCTL_DATA, "data", NULL, 0, 0644, NULL, &i2c_proc_real, - &i2c_sysctl_real,NULL,&foo_data }, - { 0 } - }; - -In the above example, three entries are defined. They can either be -accessed through the /proc interface, in the /proc/sys/dev/sensors/* -directories, as files named func1, func2 and data, or alternatively -through the sysctl interface, in the appropriate table, with identifiers -FOO_SYSCTL_FUNC1, FOO_SYSCTL_FUNC2 and FOO_SYSCTL_DATA. - -The third, sixth and ninth parameters should always be NULL, and the -fourth should always be 0. The fifth is the mode of the /proc file; -0644 is safe, as the file will be owned by root:root. - -The seventh and eighth parameters should be &i2c_proc_real and -&i2c_sysctl_real if you want to export lists of reals (scaled -integers). You can also use your own function for them, as usual. -Finally, the last parameter is the call-back to gather the data -(see below) if you use the *_proc_real functions. - - -Gathering the data ------------------- - -The call back functions (foo_func and foo_data in the above example) -can be called in several ways; the operation parameter determines -what should be done: - - * If operation == SENSORS_PROC_REAL_INFO, you must return the - magnitude (scaling) in nrels_mag; - * If operation == SENSORS_PROC_REAL_READ, you must read information - from the chip and return it in results. The number of integers - to display should be put in nrels_mag; - * If operation == SENSORS_PROC_REAL_WRITE, you must write the - supplied information to the chip. nrels_mag will contain the number - of integers, results the integers themselves. - -The *_proc_real functions will display the elements as reals for the -/proc interface. If you set the magnitude to 2, and supply 345 for -SENSORS_PROC_REAL_READ, it would display 3.45; and if the user would -write 45.6 to the /proc file, it would be returned as 4560 for -SENSORS_PROC_REAL_WRITE. A magnitude may even be negative! - -An example function: - - /* FOO_FROM_REG and FOO_TO_REG translate between scaled values and - register values. Note the use of the read cache. */ - void foo_in(struct i2c_client *client, int operation, int ctl_name, - int *nrels_mag, long *results) - { - struct foo_data *data = client->data; - int nr = ctl_name - FOO_SYSCTL_FUNC1; /* reduce to 0 upwards */ - - if (operation == SENSORS_PROC_REAL_INFO) - *nrels_mag = 2; - else if (operation == SENSORS_PROC_REAL_READ) { - /* Update the readings cache (if necessary) */ - foo_update_client(client); - /* Get the readings from the cache */ - results[0] = FOO_FROM_REG(data->foo_func_base[nr]); - results[1] = FOO_FROM_REG(data->foo_func_more[nr]); - results[2] = FOO_FROM_REG(data->foo_func_readonly[nr]); - *nrels_mag = 2; - } else if (operation == SENSORS_PROC_REAL_WRITE) { - if (*nrels_mag >= 1) { - /* Update the cache */ - data->foo_base[nr] = FOO_TO_REG(results[0]); - /* Update the chip */ - foo_write_value(client,FOO_REG_FUNC_BASE(nr),data->foo_base[nr]); - } - if (*nrels_mag >= 2) { - /* Update the cache */ - data->foo_more[nr] = FOO_TO_REG(results[1]); - /* Update the chip */ - foo_write_value(client,FOO_REG_FUNC_MORE(nr),data->foo_more[nr]); - } - } - } diff --git a/Documentation/i2o/README b/Documentation/i2o/README index 9aa6ddb..0ebf58c 100644 --- a/Documentation/i2o/README +++ b/Documentation/i2o/README @@ -30,13 +30,13 @@ Juha Sievanen, University of Helsinki Finland Bug fixes Core code extensions -Auvo Häkkinen, University of Helsinki Finland +Auvo Häkkinen, University of Helsinki Finland LAN OSM code /Proc interface to LAN class Bug fixes Core code extensions -Taneli Vähäkangas, University of Helsinki Finland +Taneli Vähäkangas, University of Helsinki Finland Fixes to i2o_config CREDITS diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt index 38fe1f0..d01b7a2 100644 --- a/Documentation/i386/boot.txt +++ b/Documentation/i386/boot.txt @@ -2,7 +2,7 @@ ---------------------------- H. Peter Anvin <hpa@zytor.com> - Last update 2007-01-26 + Last update 2007-05-07 On the i386 platform, the Linux kernel uses a rather complicated boot convention. This has evolved partially due to historical aspects, as @@ -11,7 +11,7 @@ bootable image, the complicated PC memory model and due to changed expectations in the PC industry caused by the effective demise of real-mode DOS as a mainstream operating system. -Currently, four versions of the Linux/i386 boot protocol exist. +Currently, the following versions of the Linux/i386 boot protocol exist. Old kernels: zImage/Image support only. Some very early kernels may not even support a command line. @@ -35,9 +35,13 @@ Protocol 2.03: (Kernel 2.4.18-pre1) Explicitly makes the highest possible initrd address available to the bootloader. Protocol 2.04: (Kernel 2.6.14) Extend the syssize field to four bytes. + Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable. Introduce relocatable_kernel and kernel_alignment fields. +Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of + the boot command line + **** MEMORY LAYOUT @@ -133,6 +137,8 @@ Offset Proto Name Meaning 022C/4 2.03+ initrd_addr_max Highest legal initrd address 0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel 0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not +0235/3 N/A pad2 Unused +0238/4 2.06+ cmdline_size Maximum size of the kernel command line (1) For backwards compatibility, if the setup_sects field contains 0, the real value is 4. @@ -177,9 +183,9 @@ filled out, however: a version number. Otherwise, enter 0xFF here. Assigned boot loader ids: - 0 LILO + 0 LILO (0x00 reserved for pre-2.00 bootloader) 1 Loadlin - 2 bootsect-loader + 2 bootsect-loader (0x20, all other values reserved) 3 SYSLINUX 4 EtherBoot 5 ELILO @@ -204,6 +210,9 @@ filled out, however: additional data (such as the kernel command line) moved in addition to the real-mode kernel itself. + The unit is bytes starting with the beginning of the boot + sector. + ramdisk_image, ramdisk_size: If your boot loader has loaded an initial ramdisk (initrd), set ramdisk_image to the 32-bit pointer to the ramdisk data @@ -233,6 +242,12 @@ filled out, however: if your ramdisk is exactly 131072 bytes long and this field is 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.) + cmdline_size: + The maximum size of the command line without the terminating + zero. This means that the command line can contain at most + cmdline_size characters. With protocol version 2.05 and + earlier, the maximum size was 255. + **** THE KERNEL COMMAND LINE @@ -241,11 +256,10 @@ loader to communicate with the kernel. Some of its options are also relevant to the boot loader itself, see "special command line options" below. -The kernel command line is a null-terminated string currently up to -255 characters long, plus the final null. A string that is too long -will be automatically truncated by the kernel, a boot loader may allow -a longer command line to be passed to permit future kernels to extend -this limit. +The kernel command line is a null-terminated string. The maximum +length can be retrieved from the field cmdline_size. Before protocol +version 2.06, the maximum was 255 characters. A string that is too +long will be automatically truncated by the kernel. If the boot protocol version is 2.02 or later, the address of the kernel command line is given by the header field cmd_line_ptr (see @@ -267,14 +281,54 @@ command line is entered using the following protocol: field. +**** MEMORY LAYOUT OF THE REAL-MODE CODE + +The real-mode code requires a stack/heap to be set up, as well as +memory allocated for the kernel command line. This needs to be done +in the real-mode accessible memory in bottom megabyte. + +It should be noted that modern machines often have a sizable Extended +BIOS Data Area (EBDA). As a result, it is advisable to use as little +of the low megabyte as possible. + +Unfortunately, under the following circumstances the 0x90000 memory +segment has to be used: + + - When loading a zImage kernel ((loadflags & 0x01) == 0). + - When loading a 2.01 or earlier boot protocol kernel. + + -> For the 2.00 and 2.01 boot protocols, the real-mode code + can be loaded at another address, but it is internally + relocated to 0x90000. For the "old" protocol, the + real-mode code must be loaded at 0x90000. + +When loading at 0x90000, avoid using memory above 0x9a000. + +For boot protocol 2.02 or higher, the command line does not have to be +located in the same 64K segment as the real-mode setup code; it is +thus permitted to give the stack/heap the full 64K segment and locate +the command line above it. + +The kernel command line should not be located below the real-mode +code, nor should it be located in high memory. + + **** SAMPLE BOOT CONFIGURATION As a sample configuration, assume the following layout of the real -mode segment (this is a typical, and recommended layout): +mode segment: + + When loading below 0x90000, use the entire segment: - 0x0000-0x7FFF Real mode kernel - 0x8000-0x8FFF Stack and heap - 0x9000-0x90FF Kernel command line + 0x0000-0x7fff Real mode kernel + 0x8000-0xdfff Stack and heap + 0xe000-0xffff Kernel command line + + When loading at 0x90000 OR the protocol version is 2.01 or earlier: + + 0x0000-0x7fff Real mode kernel + 0x8000-0x97ff Stack and heap + 0x9800-0x9fff Kernel command line Such a boot loader should enter the following fields in the header: @@ -290,22 +344,33 @@ Such a boot loader should enter the following fields in the header: ramdisk_image = <initrd_address>; ramdisk_size = <initrd_size>; } + + if ( protocol >= 0x0202 && loadflags & 0x01 ) + heap_end = 0xe000; + else + heap_end = 0x9800; + if ( protocol >= 0x0201 ) { - heap_end_ptr = 0x9000 - 0x200; + heap_end_ptr = heap_end - 0x200; loadflags |= 0x80; /* CAN_USE_HEAP */ } + if ( protocol >= 0x0202 ) { - cmd_line_ptr = base_ptr + 0x9000; + cmd_line_ptr = base_ptr + heap_end; + strcpy(cmd_line_ptr, cmdline); } else { cmd_line_magic = 0xA33F; - cmd_line_offset = 0x9000; - setup_move_size = 0x9100; + cmd_line_offset = heap_end; + setup_move_size = heap_end + strlen(cmdline)+1; + strcpy(base_ptr+cmd_line_offset, cmdline); } } else { /* Very old kernel */ + heap_end = 0x9800; + cmd_line_magic = 0xA33F; - cmd_line_offset = 0x9000; + cmd_line_offset = heap_end; /* A very old kernel MUST have its real-mode code loaded at 0x90000 */ @@ -313,12 +378,11 @@ Such a boot loader should enter the following fields in the header: if ( base_ptr != 0x90000 ) { /* Copy the real-mode kernel */ memcpy(0x90000, base_ptr, (setup_sects+1)*512); - /* Copy the command line */ - memcpy(0x99000, base_ptr+0x9000, 256); - base_ptr = 0x90000; /* Relocated */ } + strcpy(0x90000+cmd_line_offset, cmdline); + /* It is recommended to clear memory up to the 32K mark */ memset(0x90000 + (setup_sects+1)*512, 0, (64-(setup_sects+1))*512); @@ -364,10 +428,11 @@ conflict with actual kernel options now or in the future. line is parsed. mem=<size> - <size> is an integer in C notation optionally followed by K, M - or G (meaning << 10, << 20 or << 30). This specifies the end - of memory to the kernel. This affects the possible placement - of an initrd, since an initrd should be placed near end of + <size> is an integer in C notation optionally followed by + (case insensitive) K, M, G, T, P or E (meaning << 10, << 20, + << 30, << 40, << 50 or << 60). This specifies the end of + memory to the kernel. This affects the possible placement of + an initrd, since an initrd should be placed near end of memory. Note that this is an option to *both* the kernel and the bootloader! @@ -417,7 +482,7 @@ In our example from above, we would do: /* Set up the real-mode kernel stack */ _SS = seg; - _SP = 0x9000; /* Load SP immediately after loading SS! */ + _SP = heap_end; _DS = _ES = _FS = _GS = seg; jmp_far(seg+0x20, 0); /* Run the kernel */ @@ -449,8 +514,9 @@ IMPORTANT: All the hooks are required to preserve %esp, %ebp, %esi and code32_start: A 32-bit flat-mode routine *jumped* to immediately after the transition to protected mode, but before the kernel is - uncompressed. No segments, except CS, are set up; you should - set them up to KERNEL_DS (0x18) yourself. + uncompressed. No segments, except CS, are guaranteed to be + set up (current kernels do, but older ones do not); you should + set them up to BOOT_DS (0x18) yourself. After completing your hook, you should jump to the address that was in this field before your boot loader overwrote it. diff --git a/Documentation/ia64/aliasing-test.c b/Documentation/ia64/aliasing-test.c new file mode 100644 index 0000000..3153167 --- /dev/null +++ b/Documentation/ia64/aliasing-test.c @@ -0,0 +1,247 @@ +/* + * Exercise /dev/mem mmap cases that have been troublesome in the past + * + * (c) Copyright 2007 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas <bjorn.helgaas@hp.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <sys/types.h> +#include <dirent.h> +#include <fcntl.h> +#include <fnmatch.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <unistd.h> + +int sum; + +int map_mem(char *path, off_t offset, size_t length, int touch) +{ + int fd, rc; + void *addr; + int *c; + + fd = open(path, O_RDWR); + if (fd == -1) { + perror(path); + return -1; + } + + addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, offset); + if (addr == MAP_FAILED) + return 1; + + if (touch) { + c = (int *) addr; + while (c < (int *) (offset + length)) + sum += *c++; + } + + rc = munmap(addr, length); + if (rc == -1) { + perror("munmap"); + return -1; + } + + close(fd); + return 0; +} + +int scan_sysfs(char *path, char *file, off_t offset, size_t length, int touch) +{ + struct dirent **namelist; + char *name, *path2; + int i, n, r, rc, result = 0; + struct stat buf; + + n = scandir(path, &namelist, 0, alphasort); + if (n < 0) { + perror("scandir"); + return -1; + } + + for (i = 0; i < n; i++) { + name = namelist[i]->d_name; + + if (fnmatch(".", name, 0) == 0) + goto skip; + if (fnmatch("..", name, 0) == 0) + goto skip; + + path2 = malloc(strlen(path) + strlen(name) + 3); + strcpy(path2, path); + strcat(path2, "/"); + strcat(path2, name); + + if (fnmatch(file, name, 0) == 0) { + rc = map_mem(path2, offset, length, touch); + if (rc == 0) + fprintf(stderr, "PASS: %s 0x%lx-0x%lx is %s\n", path2, offset, offset + length, touch ? "readable" : "mappable"); + else if (rc > 0) + fprintf(stderr, "PASS: %s 0x%lx-0x%lx not mappable\n", path2, offset, offset + length); + else { + fprintf(stderr, "FAIL: %s 0x%lx-0x%lx not accessible\n", path2, offset, offset + length); + return rc; + } + } else { + r = lstat(path2, &buf); + if (r == 0 && S_ISDIR(buf.st_mode)) { + rc = scan_sysfs(path2, file, offset, length, touch); + if (rc < 0) + return rc; + } + } + + result |= rc; + free(path2); + +skip: + free(namelist[i]); + } + free(namelist); + return rc; +} + +char buf[1024]; + +int read_rom(char *path) +{ + int fd, rc; + size_t size = 0; + + fd = open(path, O_RDWR); + if (fd == -1) { + perror(path); + return -1; + } + + rc = write(fd, "1", 2); + if (rc <= 0) { + perror("write"); + return -1; + } + + do { + rc = read(fd, buf, sizeof(buf)); + if (rc > 0) + size += rc; + } while (rc > 0); + + close(fd); + return size; +} + +int scan_rom(char *path, char *file) +{ + struct dirent **namelist; + char *name, *path2; + int i, n, r, rc, result = 0; + struct stat buf; + + n = scandir(path, &namelist, 0, alphasort); + if (n < 0) { + perror("scandir"); + return -1; + } + + for (i = 0; i < n; i++) { + name = namelist[i]->d_name; + + if (fnmatch(".", name, 0) == 0) + goto skip; + if (fnmatch("..", name, 0) == 0) + goto skip; + + path2 = malloc(strlen(path) + strlen(name) + 3); + strcpy(path2, path); + strcat(path2, "/"); + strcat(path2, name); + + if (fnmatch(file, name, 0) == 0) { + rc = read_rom(path2); + + /* + * It's OK if the ROM is unreadable. Maybe there + * is no ROM, or some other error ocurred. The + * important thing is that no MCA happened. + */ + if (rc > 0) + fprintf(stderr, "PASS: %s read %ld bytes\n", path2, rc); + else { + fprintf(stderr, "PASS: %s not readable\n", path2); + return rc; + } + } else { + r = lstat(path2, &buf); + if (r == 0 && S_ISDIR(buf.st_mode)) { + rc = scan_rom(path2, file); + if (rc < 0) + return rc; + } + } + + result |= rc; + free(path2); + +skip: + free(namelist[i]); + } + free(namelist); + return rc; +} + +main() +{ + int rc; + + if (map_mem("/dev/mem", 0, 0xA0000, 1) == 0) + fprintf(stderr, "PASS: /dev/mem 0x0-0xa0000 is readable\n"); + else + fprintf(stderr, "FAIL: /dev/mem 0x0-0xa0000 not accessible\n"); + + /* + * It's not safe to blindly read the VGA frame buffer. If you know + * how to poke the card the right way, it should respond, but it's + * not safe in general. Many machines, e.g., Intel chipsets, cover + * up a non-responding card by just returning -1, but others will + * report the failure as a machine check. + */ + if (map_mem("/dev/mem", 0xA0000, 0x20000, 0) == 0) + fprintf(stderr, "PASS: /dev/mem 0xa0000-0xc0000 is mappable\n"); + else + fprintf(stderr, "FAIL: /dev/mem 0xa0000-0xc0000 not accessible\n"); + + if (map_mem("/dev/mem", 0xC0000, 0x40000, 1) == 0) + fprintf(stderr, "PASS: /dev/mem 0xc0000-0x100000 is readable\n"); + else + fprintf(stderr, "FAIL: /dev/mem 0xc0000-0x100000 not accessible\n"); + + /* + * Often you can map all the individual pieces above (0-0xA0000, + * 0xA0000-0xC0000, and 0xC0000-0x100000), but can't map the whole + * thing at once. This is because the individual pieces use different + * attributes, and there's no single attribute supported over the + * whole region. + */ + rc = map_mem("/dev/mem", 0, 1024*1024, 0); + if (rc == 0) + fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 is mappable\n"); + else if (rc > 0) + fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 not mappable\n"); + else + fprintf(stderr, "FAIL: /dev/mem 0x0-0x100000 not accessible\n"); + + scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0, 0xA0000, 1); + scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0xA0000, 0x20000, 0); + scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0xC0000, 0x40000, 1); + scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0, 1024*1024, 0); + + scan_rom("/sys/devices", "rom"); +} diff --git a/Documentation/ia64/aliasing.txt b/Documentation/ia64/aliasing.txt index 38f9a52..9a431a7 100644 --- a/Documentation/ia64/aliasing.txt +++ b/Documentation/ia64/aliasing.txt @@ -112,16 +112,6 @@ POTENTIAL ATTRIBUTE ALIASING CASES The /dev/mem mmap constraints apply. - However, since this is for mapping legacy MMIO space, WB access - does not make sense. This matters on machines without legacy - VGA support: these machines may have WB memory for the entire - first megabyte (or even the entire first granule). - - On these machines, we could mmap legacy_mem as WB, which would - be safe in terms of attribute aliasing, but X has no way of - knowing that it is accessing regular memory, not a frame buffer, - so the kernel should fail the mmap rather than doing it with WB. - read/write of /dev/mem This uses copy_from_user(), which implicitly uses a kernel @@ -138,14 +128,20 @@ POTENTIAL ATTRIBUTE ALIASING CASES ioremap() - This returns a kernel identity mapping for use inside the - kernel. + This returns a mapping for use inside the kernel. If the region is in kern_memmap, we should use the attribute - specified there. Otherwise, if the EFI memory map reports that - the entire granule supports WB, we should use that (granules - that are partially reserved or occupied by firmware do not appear - in kern_memmap). Otherwise, we should use a UC mapping. + specified there. + + If the EFI memory map reports that the entire granule supports + WB, we should use that (granules that are partially reserved + or occupied by firmware do not appear in kern_memmap). + + If the granule contains non-WB memory, but we can cover the + region safely with kernel page table mappings, we can use + ioremap_page_range() as most other architectures do. + + Failing all of the above, we have to fall back to a UC mapping. PAST PROBLEM CASES @@ -158,7 +154,7 @@ PAST PROBLEM CASES succeed. It may create either WB or UC user mappings, depending on whether the region is in kern_memmap or the EFI memory map. - mmap of 0x0-0xA0000 /dev/mem by "hwinfo" on HP sx1000 with VGA enabled + mmap of 0x0-0x9FFFF /dev/mem by "hwinfo" on HP sx1000 with VGA enabled See https://bugzilla.novell.com/show_bug.cgi?id=140858. @@ -171,28 +167,25 @@ PAST PROBLEM CASES so it is safe to use WB mappings. The kernel VGA driver may ioremap the VGA frame buffer at 0xA0000, - which will use a granule-sized UC mapping covering 0-0xFFFFF. This - granule covers some WB-only memory, but since UC is non-speculative, - the processor will never generate an uncacheable reference to the - WB-only areas unless the driver explicitly touches them. + which uses a granule-sized UC mapping. This granule will cover some + WB-only memory, but since UC is non-speculative, the processor will + never generate an uncacheable reference to the WB-only areas unless + the driver explicitly touches them. mmap of 0x0-0xFFFFF legacy_mem by "X" - If the EFI memory map reports this entire range as WB, there - is no VGA MMIO hole, and the mmap should fail or be done with - a WB mapping. + If the EFI memory map reports that the entire range supports the + same attributes, we can allow the mmap (and we will prefer WB if + supported, as is the case with HP sx[12]000 machines with VGA + disabled). - There's no easy way for X to determine whether the 0xA0000-0xBFFFF - region is a frame buffer or just memory, so I think it's best to - just fail this mmap request rather than using a WB mapping. As - far as I know, there's no need to map legacy_mem with WB - mappings. + If EFI reports the range as partly WB and partly UC (as on sx[12]000 + machines with VGA enabled), we must fail the mmap because there's no + safe attribute to use. - Otherwise, a UC mapping of the entire region is probably safe. - The VGA hole means the region will not be in kern_memmap. The - HP sx1000 chipset doesn't support UC access to the memory surrounding - the VGA hole, but X doesn't need that area anyway and should not - reference it. + If EFI reports some of the range but not all (as on Intel firmware + that doesn't report the VGA frame buffer at all), we should fail the + mmap and force the user to map just the specific region of interest. mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled @@ -202,6 +195,16 @@ PAST PROBLEM CASES This is a special case of the previous case, and the mmap should fail for the same reason as above. + read of /sys/devices/.../rom + + For VGA devices, this may cause an ioremap() of 0xC0000. This + used to be done with a UC mapping, because the VGA frame buffer + at 0xA0000 prevents use of a WB granule. The UC mapping causes + an MCA on HP sx[12]000 chipsets. + + We should use WB page table mappings to avoid covering the VGA + frame buffer. + NOTES [1] SDM rev 2.2, vol 2, sec 4.4.1. diff --git a/Documentation/ia64/err_inject.txt b/Documentation/ia64/err_inject.txt new file mode 100644 index 0000000..6449a70 --- /dev/null +++ b/Documentation/ia64/err_inject.txt @@ -0,0 +1,1068 @@ + +IPF Machine Check (MC) error inject tool +======================================== + +IPF Machine Check (MC) error inject tool is used to inject MC +errors from Linux. The tool is a test bed for IPF MC work flow including +hardware correctable error handling, OS recoverable error handling, MC +event logging, etc. + +The tool includes two parts: a kernel driver and a user application +sample. The driver provides interface to PAL to inject error +and query error injection capabilities. The driver code is in +arch/ia64/kernel/err_inject.c. The application sample (shown below) +provides a combination of various errors and calls the driver's interface +(sysfs interface) to inject errors or query error injection capabilities. + +The tool can be used to test Intel IPF machine MC handling capabilities. +It's especially useful for people who can not access hardware MC injection +tool to inject error. It's also very useful to integrate with other +software test suits to do stressful testing on IPF. + +Below is a sample application as part of the whole tool. The sample +can be used as a working test tool. Or it can be expanded to include +more features. It also can be a integrated into a libary or other user +application to have more thorough test. + +The sample application takes err.conf as error configuation input. Gcc +compiles the code. After you install err_inject driver, you can run +this sample application to inject errors. + +Errata: Itanium 2 Processors Specification Update lists some errata against +the pal_mc_error_inject PAL procedure. The following err.conf has been tested +on latest Montecito PAL. + +err.conf: + +#This is configuration file for err_inject_tool. +#The format of the each line is: +#cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer +#where +# cpu: logical cpu number the error will be inject in. +# loop: times the error will be injected. +# interval: In second. every so often one error is injected. +# err_type_info, err_struct_info: PAL parameters. +# +#Note: All values are hex w/o or w/ 0x prefix. + + +#On cpu2, inject only total 0x10 errors, interval 5 seconds +#corrected, data cache, hier-2, physical addr(assigned by tool code). +#working on Montecito latest PAL. +2, 10, 5, 4101, 95 + +#On cpu4, inject and consume total 0x10 errors, interval 5 seconds +#corrected, data cache, hier-2, physical addr(assigned by tool code). +#working on Montecito latest PAL. +4, 10, 5, 4109, 95 + +#On cpu15, inject and consume total 0x10 errors, interval 5 seconds +#recoverable, DTR0, hier-2. +#working on Montecito latest PAL. +0xf, 0x10, 5, 4249, 15 + +The sample application source code: + +err_injection_tool.c: + +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Copyright (C) 2006 Intel Co + * Fenghua Yu <fenghua.yu@intel.com> + * + */ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <sched.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <errno.h> +#include <time.h> +#include <sys/ipc.h> +#include <sys/sem.h> +#include <sys/wait.h> +#include <sys/mman.h> +#include <sys/shm.h> + +#define MAX_FN_SIZE 256 +#define MAX_BUF_SIZE 256 +#define DATA_BUF_SIZE 256 +#define NR_CPUS 512 +#define MAX_TASK_NUM 2048 +#define MIN_INTERVAL 5 // seconds +#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte. +#define PARA_FIELD_NUM 5 +#define MASK_SIZE (NR_CPUS/64) +#define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/" + +int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask); + +int verbose; +#define vbprintf if (verbose) printf + +int log_info(int cpu, const char *fmt, ...) +{ + FILE *log; + char fn[MAX_FN_SIZE]; + char buf[MAX_BUF_SIZE]; + va_list args; + + sprintf(fn, "%d.log", cpu); + log=fopen(fn, "a+"); + if (log==NULL) { + perror("Error open:"); + return -1; + } + + va_start(args, fmt); + vprintf(fmt, args); + memset(buf, 0, MAX_BUF_SIZE); + vsprintf(buf, fmt, args); + va_end(args); + + fwrite(buf, sizeof(buf), 1, log); + fclose(log); + + return 0; +} + +typedef unsigned long u64; +typedef unsigned int u32; + +typedef union err_type_info_u { + struct { + u64 mode : 3, /* 0-2 */ + err_inj : 3, /* 3-5 */ + err_sev : 2, /* 6-7 */ + err_struct : 5, /* 8-12 */ + struct_hier : 3, /* 13-15 */ + reserved : 48; /* 16-63 */ + } err_type_info_u; + u64 err_type_info; +} err_type_info_t; + +typedef union err_struct_info_u { + struct { + u64 siv : 1, /* 0 */ + c_t : 2, /* 1-2 */ + cl_p : 3, /* 3-5 */ + cl_id : 3, /* 6-8 */ + cl_dp : 1, /* 9 */ + reserved1 : 22, /* 10-31 */ + tiv : 1, /* 32 */ + trigger : 4, /* 33-36 */ + trigger_pl : 3, /* 37-39 */ + reserved2 : 24; /* 40-63 */ + } err_struct_info_cache; + struct { + u64 siv : 1, /* 0 */ + tt : 2, /* 1-2 */ + tc_tr : 2, /* 3-4 */ + tr_slot : 8, /* 5-12 */ + reserved1 : 19, /* 13-31 */ + tiv : 1, /* 32 */ + trigger : 4, /* 33-36 */ + trigger_pl : 3, /* 37-39 */ + reserved2 : 24; /* 40-63 */ + } err_struct_info_tlb; + struct { + u64 siv : 1, /* 0 */ + regfile_id : 4, /* 1-4 */ + reg_num : 7, /* 5-11 */ + reserved1 : 20, /* 12-31 */ + tiv : 1, /* 32 */ + trigger : 4, /* 33-36 */ + trigger_pl : 3, /* 37-39 */ + reserved2 : 24; /* 40-63 */ + } err_struct_info_register; + struct { + u64 reserved; + } err_struct_info_bus_processor_interconnect; + u64 err_struct_info; +} err_struct_info_t; + +typedef union err_data_buffer_u { + struct { + u64 trigger_addr; /* 0-63 */ + u64 inj_addr; /* 64-127 */ + u64 way : 5, /* 128-132 */ + index : 20, /* 133-152 */ + : 39; /* 153-191 */ + } err_data_buffer_cache; + struct { + u64 trigger_addr; /* 0-63 */ + u64 inj_addr; /* 64-127 */ + u64 way : 5, /* 128-132 */ + index : 20, /* 133-152 */ + reserved : 39; /* 153-191 */ + } err_data_buffer_tlb; + struct { + u64 trigger_addr; /* 0-63 */ + } err_data_buffer_register; + struct { + u64 reserved; /* 0-63 */ + } err_data_buffer_bus_processor_interconnect; + u64 err_data_buffer[ERR_DATA_BUFFER_SIZE]; +} err_data_buffer_t; + +typedef union capabilities_u { + struct { + u64 i : 1, + d : 1, + rv : 1, + tag : 1, + data : 1, + mesi : 1, + dp : 1, + reserved1 : 3, + pa : 1, + va : 1, + wi : 1, + reserved2 : 20, + trigger : 1, + trigger_pl : 1, + reserved3 : 30; + } capabilities_cache; + struct { + u64 d : 1, + i : 1, + rv : 1, + tc : 1, + tr : 1, + reserved1 : 27, + trigger : 1, + trigger_pl : 1, + reserved2 : 30; + } capabilities_tlb; + struct { + u64 gr_b0 : 1, + gr_b1 : 1, + fr : 1, + br : 1, + pr : 1, + ar : 1, + cr : 1, + rr : 1, + pkr : 1, + dbr : 1, + ibr : 1, + pmc : 1, + pmd : 1, + reserved1 : 3, + regnum : 1, + reserved2 : 15, + trigger : 1, + trigger_pl : 1, + reserved3 : 30; + } capabilities_register; + struct { + u64 reserved; + } capabilities_bus_processor_interconnect; +} capabilities_t; + +typedef struct resources_s { + u64 ibr0 : 1, + ibr2 : 1, + ibr4 : 1, + ibr6 : 1, + dbr0 : 1, + dbr2 : 1, + dbr4 : 1, + dbr6 : 1, + reserved : 48; +} resources_t; + + +long get_page_size(void) +{ + long page_size=sysconf(_SC_PAGESIZE); + return page_size; +} + +#define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size()) +#define SHM_SIZE (2*PAGE_SIZE*NR_CPUS) +#define SHM_VA 0x2000000100000000 + +int shmid; +void *shmaddr; + +int create_shm(void) +{ + key_t key; + char fn[MAX_FN_SIZE]; + + /* cpu0 is always existing */ + sprintf(fn, PATH_FORMAT, 0); + if ((key = ftok(fn, 's')) == -1) { + perror("ftok"); + return -1; + } + + shmid = shmget(key, SHM_SIZE, 0644 | IPC_CREAT); + if (shmid == -1) { + if (errno==EEXIST) { + shmid = shmget(key, SHM_SIZE, 0); + if (shmid == -1) { + perror("shmget"); + return -1; + } + } + else { + perror("shmget"); + return -1; + } + } + vbprintf("shmid=%d", shmid); + + /* connect to the segment: */ + shmaddr = shmat(shmid, (void *)SHM_VA, 0); + if (shmaddr == (void*)-1) { + perror("shmat"); + return -1; + } + + memset(shmaddr, 0, SHM_SIZE); + mlock(shmaddr, SHM_SIZE); + + return 0; +} + +int free_shm() +{ + munlock(shmaddr, SHM_SIZE); + shmdt(shmaddr); + semctl(shmid, 0, IPC_RMID); + + return 0; +} + +#ifdef _SEM_SEMUN_UNDEFINED +union semun +{ + int val; + struct semid_ds *buf; + unsigned short int *array; + struct seminfo *__buf; +}; +#endif + +u32 mode=1; /* 1: physical mode; 2: virtual mode. */ +int one_lock=1; +key_t key[NR_CPUS]; +int semid[NR_CPUS]; + +int create_sem(int cpu) +{ + union semun arg; + char fn[MAX_FN_SIZE]; + int sid; + + sprintf(fn, PATH_FORMAT, cpu); + sprintf(fn, "%s/%s", fn, "err_type_info"); + if ((key[cpu] = ftok(fn, 'e')) == -1) { + perror("ftok"); + return -1; + } + + if (semid[cpu]!=0) + return 0; + + /* clear old semaphore */ + if ((sid = semget(key[cpu], 1, 0)) != -1) + semctl(sid, 0, IPC_RMID); + + /* get one semaphore */ + if ((semid[cpu] = semget(key[cpu], 1, IPC_CREAT | IPC_EXCL)) == -1) { + perror("semget"); + printf("Please remove semaphore with key=0x%lx, then run the tool.\n", + (u64)key[cpu]); + return -1; + } + + vbprintf("semid[%d]=0x%lx, key[%d]=%lx\n",cpu,(u64)semid[cpu],cpu, + (u64)key[cpu]); + /* initialize the semaphore to 1: */ + arg.val = 1; + if (semctl(semid[cpu], 0, SETVAL, arg) == -1) { + perror("semctl"); + return -1; + } + + return 0; +} + +static int lock(int cpu) +{ + struct sembuf lock; + + lock.sem_num = cpu; + lock.sem_op = 1; + semop(semid[cpu], &lock, 1); + + return 0; +} + +static int unlock(int cpu) +{ + struct sembuf unlock; + + unlock.sem_num = cpu; + unlock.sem_op = -1; + semop(semid[cpu], &unlock, 1); + + return 0; +} + +void free_sem(int cpu) +{ + semctl(semid[cpu], 0, IPC_RMID); +} + +int wr_multi(char *fn, unsigned long *data, int size) +{ + int fd; + char buf[MAX_BUF_SIZE]; + int ret; + + if (size==1) + sprintf(buf, "%lx", *data); + else if (size==3) + sprintf(buf, "%lx,%lx,%lx", data[0], data[1], data[2]); + else { + fprintf(stderr,"write to file with wrong size!\n"); + return -1; + } + + fd=open(fn, O_RDWR); + if (!fd) { + perror("Error:"); + return -1; + } + ret=write(fd, buf, sizeof(buf)); + close(fd); + return ret; +} + +int wr(char *fn, unsigned long data) +{ + return wr_multi(fn, &data, 1); +} + +int rd(char *fn, unsigned long *data) +{ + int fd; + char buf[MAX_BUF_SIZE]; + + fd=open(fn, O_RDONLY); + if (fd<0) { + perror("Error:"); + return -1; + } + read(fd, buf, MAX_BUF_SIZE); + *data=strtoul(buf, NULL, 16); + close(fd); + return 0; +} + +int rd_status(char *path, int *status) +{ + char fn[MAX_FN_SIZE]; + sprintf(fn, "%s/status", path); + if (rd(fn, (u64*)status)<0) { + perror("status reading error.\n"); + return -1; + } + + return 0; +} + +int rd_capabilities(char *path, u64 *capabilities) +{ + char fn[MAX_FN_SIZE]; + sprintf(fn, "%s/capabilities", path); + if (rd(fn, capabilities)<0) { + perror("capabilities reading error.\n"); + return -1; + } + + return 0; +} + +int rd_all(char *path) +{ + unsigned long err_type_info, err_struct_info, err_data_buffer; + int status; + unsigned long capabilities, resources; + char fn[MAX_FN_SIZE]; + + sprintf(fn, "%s/err_type_info", path); + if (rd(fn, &err_type_info)<0) { + perror("err_type_info reading error.\n"); + return -1; + } + printf("err_type_info=%lx\n", err_type_info); + + sprintf(fn, "%s/err_struct_info", path); + if (rd(fn, &err_struct_info)<0) { + perror("err_struct_info reading error.\n"); + return -1; + } + printf("err_struct_info=%lx\n", err_struct_info); + + sprintf(fn, "%s/err_data_buffer", path); + if (rd(fn, &err_data_buffer)<0) { + perror("err_data_buffer reading error.\n"); + return -1; + } + printf("err_data_buffer=%lx\n", err_data_buffer); + + sprintf(fn, "%s/status", path); + if (rd("status", (u64*)&status)<0) { + perror("status reading error.\n"); + return -1; + } + printf("status=%d\n", status); + + sprintf(fn, "%s/capabilities", path); + if (rd(fn,&capabilities)<0) { + perror("capabilities reading error.\n"); + return -1; + } + printf("capabilities=%lx\n", capabilities); + + sprintf(fn, "%s/resources", path); + if (rd(fn, &resources)<0) { + perror("resources reading error.\n"); + return -1; + } + printf("resources=%lx\n", resources); + + return 0; +} + +int query_capabilities(char *path, err_type_info_t err_type_info, + u64 *capabilities) +{ + char fn[MAX_FN_SIZE]; + err_struct_info_t err_struct_info; + err_data_buffer_t err_data_buffer; + + err_struct_info.err_struct_info=0; + memset(err_data_buffer.err_data_buffer, -1, ERR_DATA_BUFFER_SIZE*8); + + sprintf(fn, "%s/err_type_info", path); + wr(fn, err_type_info.err_type_info); + sprintf(fn, "%s/err_struct_info", path); + wr(fn, 0x0); + sprintf(fn, "%s/err_data_buffer", path); + wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE); + + // Fire pal_mc_error_inject procedure. + sprintf(fn, "%s/call_start", path); + wr(fn, mode); + + if (rd_capabilities(path, capabilities)<0) + return -1; + + return 0; +} + +int query_all_capabilities() +{ + int status; + err_type_info_t err_type_info; + int err_sev, err_struct, struct_hier; + int cap=0; + u64 capabilities; + char path[MAX_FN_SIZE]; + + err_type_info.err_type_info=0; // Initial + err_type_info.err_type_info_u.mode=0; // Query mode; + err_type_info.err_type_info_u.err_inj=0; + + printf("All capabilities implemented in pal_mc_error_inject:\n"); + sprintf(path, PATH_FORMAT ,0); + for (err_sev=0;err_sev<3;err_sev++) + for (err_struct=0;err_struct<5;err_struct++) + for (struct_hier=0;struct_hier<5;struct_hier++) + { + status=-1; + capabilities=0; + err_type_info.err_type_info_u.err_sev=err_sev; + err_type_info.err_type_info_u.err_struct=err_struct; + err_type_info.err_type_info_u.struct_hier=struct_hier; + + if (query_capabilities(path, err_type_info, &capabilities)<0) + continue; + + if (rd_status(path, &status)<0) + continue; + + if (status==0) { + cap=1; + printf("For err_sev=%d, err_struct=%d, struct_hier=%d: ", + err_sev, err_struct, struct_hier); + printf("capabilities 0x%lx\n", capabilities); + } + } + if (!cap) { + printf("No capabilities supported.\n"); + return 0; + } + + return 0; +} + +int err_inject(int cpu, char *path, err_type_info_t err_type_info, + err_struct_info_t err_struct_info, + err_data_buffer_t err_data_buffer) +{ + int status; + char fn[MAX_FN_SIZE]; + + log_info(cpu, "err_type_info=%lx, err_struct_info=%lx, ", + err_type_info.err_type_info, + err_struct_info.err_struct_info); + log_info(cpu,"err_data_buffer=[%lx,%lx,%lx]\n", + err_data_buffer.err_data_buffer[0], + err_data_buffer.err_data_buffer[1], + err_data_buffer.err_data_buffer[2]); + sprintf(fn, "%s/err_type_info", path); + wr(fn, err_type_info.err_type_info); + sprintf(fn, "%s/err_struct_info", path); + wr(fn, err_struct_info.err_struct_info); + sprintf(fn, "%s/err_data_buffer", path); + wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE); + + // Fire pal_mc_error_inject procedure. + sprintf(fn, "%s/call_start", path); + wr(fn,mode); + + if (rd_status(path, &status)<0) { + vbprintf("fail: read status\n"); + return -100; + } + + if (status!=0) { + log_info(cpu, "fail: status=%d\n", status); + return status; + } + + return status; +} + +static int construct_data_buf(char *path, err_type_info_t err_type_info, + err_struct_info_t err_struct_info, + err_data_buffer_t *err_data_buffer, + void *va1) +{ + char fn[MAX_FN_SIZE]; + u64 virt_addr=0, phys_addr=0; + + vbprintf("va1=%lx\n", (u64)va1); + memset(&err_data_buffer->err_data_buffer_cache, 0, ERR_DATA_BUFFER_SIZE*8); + + switch (err_type_info.err_type_info_u.err_struct) { + case 1: // Cache + switch (err_struct_info.err_struct_info_cache.cl_id) { + case 1: //Virtual addr + err_data_buffer->err_data_buffer_cache.inj_addr=(u64)va1; + break; + case 2: //Phys addr + sprintf(fn, "%s/virtual_to_phys", path); + virt_addr=(u64)va1; + if (wr(fn,virt_addr)<0) + return -1; + rd(fn, &phys_addr); + err_data_buffer->err_data_buffer_cache.inj_addr=phys_addr; + break; + default: + printf("Not supported cl_id\n"); + break; + } + break; + case 2: // TLB + break; + case 3: // Register file + break; + case 4: // Bus/system interconnect + default: + printf("Not supported err_struct\n"); + break; + } + + return 0; +} + +typedef struct { + u64 cpu; + u64 loop; + u64 interval; + u64 err_type_info; + u64 err_struct_info; + u64 err_data_buffer[ERR_DATA_BUFFER_SIZE]; +} parameters_t; + +parameters_t line_para; +int para; + +static int empty_data_buffer(u64 *err_data_buffer) +{ + int empty=1; + int i; + + for (i=0;i<ERR_DATA_BUFFER_SIZE; i++) + if (err_data_buffer[i]!=-1) + empty=0; + + return empty; +} + +int err_inj() +{ + err_type_info_t err_type_info; + err_struct_info_t err_struct_info; + err_data_buffer_t err_data_buffer; + int count; + FILE *fp; + unsigned long cpu, loop, interval, err_type_info_conf, err_struct_info_conf; + u64 err_data_buffer_conf[ERR_DATA_BUFFER_SIZE]; + int num; + int i; + char path[MAX_FN_SIZE]; + parameters_t parameters[MAX_TASK_NUM]={}; + pid_t child_pid[MAX_TASK_NUM]; + time_t current_time; + int status; + + if (!para) { + fp=fopen("err.conf", "r"); + if (fp==NULL) { + perror("Error open err.conf"); + return -1; + } + + num=0; + while (!feof(fp)) { + char buf[256]; + memset(buf,0,256); + fgets(buf, 256, fp); + count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n", + &cpu, &loop, &interval,&err_type_info_conf, + &err_struct_info_conf, + &err_data_buffer_conf[0], + &err_data_buffer_conf[1], + &err_data_buffer_conf[2]); + if (count!=PARA_FIELD_NUM+3) { + err_data_buffer_conf[0]=-1; + err_data_buffer_conf[1]=-1; + err_data_buffer_conf[2]=-1; + count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx\n", + &cpu, &loop, &interval,&err_type_info_conf, + &err_struct_info_conf); + if (count!=PARA_FIELD_NUM) + continue; + } + + parameters[num].cpu=cpu; + parameters[num].loop=loop; + parameters[num].interval= interval>MIN_INTERVAL + ?interval:MIN_INTERVAL; + parameters[num].err_type_info=err_type_info_conf; + parameters[num].err_struct_info=err_struct_info_conf; + memcpy(parameters[num++].err_data_buffer, + err_data_buffer_conf,ERR_DATA_BUFFER_SIZE*8) ; + + if (num>=MAX_TASK_NUM) + break; + } + } + else { + parameters[0].cpu=line_para.cpu; + parameters[0].loop=line_para.loop; + parameters[0].interval= line_para.interval>MIN_INTERVAL + ?line_para.interval:MIN_INTERVAL; + parameters[0].err_type_info=line_para.err_type_info; + parameters[0].err_struct_info=line_para.err_struct_info; + memcpy(parameters[0].err_data_buffer, + line_para.err_data_buffer,ERR_DATA_BUFFER_SIZE*8) ; + + num=1; + } + + /* Create semaphore: If one_lock, one semaphore for all processors. + Otherwise, one sempaphore for each processor. */ + if (one_lock) { + if (create_sem(0)) { + printf("Can not create semaphore...exit\n"); + free_sem(0); + return -1; + } + } + else { + for (i=0;i<num;i++) { + if (create_sem(parameters[i].cpu)) { + printf("Can not create semaphore for cpu%d...exit\n",i); + free_sem(parameters[num].cpu); + return -1; + } + } + } + + /* Create a shm segment which will be used to inject/consume errors on.*/ + if (create_shm()==-1) { + printf("Error to create shm...exit\n"); + return -1; + } + + for (i=0;i<num;i++) { + pid_t pid; + + current_time=time(NULL); + log_info(parameters[i].cpu, "\nBegine at %s", ctime(¤t_time)); + log_info(parameters[i].cpu, "Configurations:\n"); + log_info(parameters[i].cpu,"On cpu%ld: loop=%lx, interval=%lx(s)", + parameters[i].cpu, + parameters[i].loop, + parameters[i].interval); + log_info(parameters[i].cpu," err_type_info=%lx,err_struct_info=%lx\n", + parameters[i].err_type_info, + parameters[i].err_struct_info); + + sprintf(path, PATH_FORMAT, (int)parameters[i].cpu); + err_type_info.err_type_info=parameters[i].err_type_info; + err_struct_info.err_struct_info=parameters[i].err_struct_info; + memcpy(err_data_buffer.err_data_buffer, + parameters[i].err_data_buffer, + ERR_DATA_BUFFER_SIZE*8); + + pid=fork(); + if (pid==0) { + unsigned long mask[MASK_SIZE]; + int j, k; + + void *va1, *va2; + + /* Allocate two memory areas va1 and va2 in shm */ + va1=shmaddr+parameters[i].cpu*PAGE_SIZE; + va2=shmaddr+parameters[i].cpu*PAGE_SIZE+PAGE_SIZE; + + vbprintf("va1=%lx, va2=%lx\n", (u64)va1, (u64)va2); + memset(va1, 0x1, PAGE_SIZE); + memset(va2, 0x2, PAGE_SIZE); + + if (empty_data_buffer(err_data_buffer.err_data_buffer)) + /* If not specified yet, construct data buffer + * with va1 + */ + construct_data_buf(path, err_type_info, + err_struct_info, &err_data_buffer,va1); + + for (j=0;j<MASK_SIZE;j++) + mask[j]=0; + + cpu=parameters[i].cpu; + k = cpu%64; + j = cpu/64; + mask[j]=1<<k; + + if (sched_setaffinity(0, MASK_SIZE*8, mask)==-1) { + perror("Error sched_setaffinity:"); + return -1; + } + + for (j=0; j<parameters[i].loop; j++) { + log_info(parameters[i].cpu,"Injection "); + log_info(parameters[i].cpu,"on cpu%ld: #%d/%ld ", + + parameters[i].cpu,j+1, parameters[i].loop); + + /* Hold the lock */ + if (one_lock) + lock(0); + else + /* Hold lock on this cpu */ + lock(parameters[i].cpu); + + if ((status=err_inject(parameters[i].cpu, + path, err_type_info, + err_struct_info, err_data_buffer)) + ==0) { + /* consume the error for "inject only"*/ + memcpy(va2, va1, PAGE_SIZE); + memcpy(va1, va2, PAGE_SIZE); + log_info(parameters[i].cpu, + "successful\n"); + } + else { + log_info(parameters[i].cpu,"fail:"); + log_info(parameters[i].cpu, + "status=%d\n", status); + unlock(parameters[i].cpu); + break; + } + if (one_lock) + /* Release the lock */ + unlock(0); + /* Release lock on this cpu */ + else + unlock(parameters[i].cpu); + + if (j < parameters[i].loop-1) + sleep(parameters[i].interval); + } + current_time=time(NULL); + log_info(parameters[i].cpu, "Done at %s", ctime(¤t_time)); + return 0; + } + else if (pid<0) { + perror("Error fork:"); + continue; + } + child_pid[i]=pid; + } + for (i=0;i<num;i++) + waitpid(child_pid[i], NULL, 0); + + if (one_lock) + free_sem(0); + else + for (i=0;i<num;i++) + free_sem(parameters[i].cpu); + + printf("All done.\n"); + + return 0; +} + +void help() +{ + printf("err_inject_tool:\n"); + printf("\t-q: query all capabilities. default: off\n"); + printf("\t-m: procedure mode. 1: physical 2: virtual. default: 1\n"); + printf("\t-i: inject errors. default: off\n"); + printf("\t-l: one lock per cpu. default: one lock for all\n"); + printf("\t-e: error parameters:\n"); + printf("\t\tcpu,loop,interval,err_type_info,err_struct_info[,err_data_buffer[0],err_data_buffer[1],err_data_buffer[2]]\n"); + printf("\t\t cpu: logical cpu number the error will be inject in.\n"); + printf("\t\t loop: times the error will be injected.\n"); + printf("\t\t interval: In second. every so often one error is injected.\n"); + printf("\t\t err_type_info, err_struct_info: PAL parameters.\n"); + printf("\t\t err_data_buffer: PAL parameter. Optional. If not present,\n"); + printf("\t\t it's constructed by tool automatically. Be\n"); + printf("\t\t careful to provide err_data_buffer and make\n"); + printf("\t\t sure it's working with the environment.\n"); + printf("\t Note:no space between error parameters.\n"); + printf("\t default: Take error parameters from err.conf instead of command line.\n"); + printf("\t-v: verbose. default: off\n"); + printf("\t-h: help\n\n"); + printf("The tool will take err.conf file as "); + printf("input to inject single or multiple errors "); + printf("on one or multiple cpus in parallel.\n"); +} + +int main(int argc, char **argv) +{ + char c; + int do_err_inj=0; + int do_query_all=0; + int count; + u32 m; + + /* Default one lock for all cpu's */ + one_lock=1; + while ((c = getopt(argc, argv, "m:iqvhle:")) != EOF) + switch (c) { + case 'm': /* Procedure mode. 1: phys 2: virt */ + count=sscanf(optarg, "%x", &m); + if (count!=1 || (m!=1 && m!=2)) { + printf("Wrong mode number.\n"); + help(); + return -1; + } + mode=m; + break; + case 'i': /* Inject errors */ + do_err_inj=1; + break; + case 'q': /* Query */ + do_query_all=1; + break; + case 'v': /* Verbose */ + verbose=1; + break; + case 'l': /* One lock per cpu */ + one_lock=0; + break; + case 'e': /* error arguments */ + /* Take parameters: + * #cpu, loop, interval, err_type_info, err_struct_info[, err_data_buffer] + * err_data_buffer is optional. Recommend not to specify + * err_data_buffer. Better to use tool to generate it. + */ + count=sscanf(optarg, + "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n", + &line_para.cpu, + &line_para.loop, + &line_para.interval, + &line_para.err_type_info, + &line_para.err_struct_info, + &line_para.err_data_buffer[0], + &line_para.err_data_buffer[1], + &line_para.err_data_buffer[2]); + if (count!=PARA_FIELD_NUM+3) { + line_para.err_data_buffer[0]=-1, + line_para.err_data_buffer[1]=-1, + line_para.err_data_buffer[2]=-1; + count=sscanf(optarg, "%lx, %lx, %lx, %lx, %lx\n", + &line_para.cpu, + &line_para.loop, + &line_para.interval, + &line_para.err_type_info, + &line_para.err_struct_info); + if (count!=PARA_FIELD_NUM) { + printf("Wrong error arguments.\n"); + help(); + return -1; + } + } + para=1; + break; + continue; + break; + case 'h': + help(); + return 0; + default: + break; + } + + if (do_query_all) + query_all_capabilities(); + if (do_err_inj) + err_inj(); + + if (!do_query_all && !do_err_inj) + help(); + + return 0; +} + diff --git a/Documentation/input/atarikbd.txt b/Documentation/input/atarikbd.txt index 668f4d0..ab05062 100644 --- a/Documentation/input/atarikbd.txt +++ b/Documentation/input/atarikbd.txt @@ -179,9 +179,9 @@ reporting mode for joystick 1, with both buttons being logically assigned to the mouse. After any joystick command, the ikbd assumes that joysticks are connected to both Joystick0 and Joystick1. Any mouse command (except MOUSE DISABLE) then causes port 0 to again be scanned as if it were a mouse, and -both buttons are logically connected to it. If a mouse diable command is +both buttons are logically connected to it. If a mouse disable command is received while port 0 is presumed to be a mouse, the button is logically -assigned to Joystick1 ( until the mouse is reenabled by another mouse command). +assigned to Joystick1 (until the mouse is reenabled by another mouse command). 9. ikbd Command Set diff --git a/Documentation/input/input-programming.txt b/Documentation/input/input-programming.txt index 180e0689..d9d5230 100644 --- a/Documentation/input/input-programming.txt +++ b/Documentation/input/input-programming.txt @@ -1,5 +1,3 @@ -$Id: input-programming.txt,v 1.4 2001/05/04 09:47:14 vojtech Exp $ - Programming input drivers ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -20,28 +18,51 @@ pressed or released a BUTTON_IRQ happens. The driver could look like: #include <asm/irq.h> #include <asm/io.h> +static struct input_dev *button_dev; + static void button_interrupt(int irq, void *dummy, struct pt_regs *fp) { - input_report_key(&button_dev, BTN_1, inb(BUTTON_PORT) & 1); - input_sync(&button_dev); + input_report_key(button_dev, BTN_1, inb(BUTTON_PORT) & 1); + input_sync(button_dev); } static int __init button_init(void) { + int error; + if (request_irq(BUTTON_IRQ, button_interrupt, 0, "button", NULL)) { printk(KERN_ERR "button.c: Can't allocate irq %d\n", button_irq); return -EBUSY; } - - button_dev.evbit[0] = BIT(EV_KEY); - button_dev.keybit[LONG(BTN_0)] = BIT(BTN_0); - - input_register_device(&button_dev); + + button_dev = input_allocate_device(); + if (!button_dev) { + printk(KERN_ERR "button.c: Not enough memory\n"); + error = -ENOMEM; + goto err_free_irq; + } + + button_dev->evbit[0] = BIT(EV_KEY); + button_dev->keybit[LONG(BTN_0)] = BIT(BTN_0); + + error = input_register_device(button_dev); + if (error) { + printk(KERN_ERR "button.c: Failed to register device\n"); + goto err_free_dev; + } + + return 0; + + err_free_dev: + input_free_device(button_dev); + err_free_irq: + free_irq(BUTTON_IRQ, button_interrupt); + return error; } static void __exit button_exit(void) { - input_unregister_device(&button_dev); + input_unregister_device(button_dev); free_irq(BUTTON_IRQ, button_interrupt); } @@ -58,17 +79,18 @@ In the _init function, which is called either upon module load or when booting the kernel, it grabs the required resources (it should also check for the presence of the device). -Then it sets the input bitfields. This way the device driver tells the other +Then it allocates a new input device structure with input_aloocate_device() +and sets up input bitfields. This way the device driver tells the other parts of the input systems what it is - what events can be generated or -accepted by this input device. Our example device can only generate EV_KEY type -events, and from those only BTN_0 event code. Thus we only set these two -bits. We could have used +accepted by this input device. Our example device can only generate EV_KEY +type events, and from those only BTN_0 event code. Thus we only set these +two bits. We could have used set_bit(EV_KEY, button_dev.evbit); set_bit(BTN_0, button_dev.keybit); as well, but with more than single bits the first approach tends to be -shorter. +shorter. Then the example driver registers the input device structure by calling @@ -76,16 +98,15 @@ Then the example driver registers the input device structure by calling This adds the button_dev structure to linked lists of the input driver and calls device handler modules _connect functions to tell them a new input -device has appeared. Because the _connect functions may call kmalloc(, -GFP_KERNEL), which can sleep, input_register_device() must not be called -from an interrupt or with a spinlock held. +device has appeared. input_register_device() may sleep and therefore must +not be called from an interrupt or with a spinlock held. While in use, the only used function of the driver is button_interrupt() which upon every interrupt from the button checks its state and reports it -via the +via the input_report_key() @@ -113,16 +134,10 @@ can use the open and close callback to know when it can stop polling or release the interrupt and when it must resume polling or grab the interrupt again. To do that, we would add this to our example driver: -int button_used = 0; - static int button_open(struct input_dev *dev) { - if (button_used++) - return 0; - if (request_irq(BUTTON_IRQ, button_interrupt, 0, "button", NULL)) { printk(KERN_ERR "button.c: Can't allocate irq %d\n", button_irq); - button_used--; return -EBUSY; } @@ -131,20 +146,21 @@ static int button_open(struct input_dev *dev) static void button_close(struct input_dev *dev) { - if (!--button_used) - free_irq(IRQ_AMIGA_VERTB, button_interrupt); + free_irq(IRQ_AMIGA_VERTB, button_interrupt); } static int __init button_init(void) { ... - button_dev.open = button_open; - button_dev.close = button_close; + button_dev->open = button_open; + button_dev->close = button_close; ... } -Note the button_used variable - we have to track how many times the open -function was called to know when exactly our device stops being used. +Note that input core keeps track of number of users for the device and +makes sure that dev->open() is called only when the first user connects +to the device and that dev->close() is called when the very last user +disconnects. Calls to both callbacks are serialized. The open() callback should return a 0 in case of success or any nonzero value in case of failure. The close() callback (which is void) must always succeed. @@ -175,7 +191,7 @@ set the corresponding bits and call the input_report_rel(struct input_dev *dev, int code, int value) -function. Events are generated only for nonzero value. +function. Events are generated only for nonzero value. However EV_ABS requires a little special care. Before calling input_register_device, you have to fill additional fields in the input_dev @@ -187,6 +203,10 @@ the ABS_X axis: button_dev.absfuzz[ABS_X] = 4; button_dev.absflat[ABS_X] = 8; +Or, you can just say: + + input_set_abs_params(button_dev, ABS_X, 0, 255, 4, 8); + This setting would be appropriate for a joystick X axis, with the minimum of 0, maximum of 255 (which the joystick *must* be able to reach, no problem if it sometimes reports more, but it must be able to always reach the min and @@ -197,14 +217,7 @@ If you don't need absfuzz and absflat, you can set them to zero, which mean that the thing is precise and always returns to exactly the center position (if it has any). -1.4 The void *private field -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This field in the input structure can be used to point to any private data -structures in the input device driver, in case the driver handles more than -one device. You'll need it in the open and close callbacks. - -1.5 NBITS(), LONG(), BIT() +1.4 NBITS(), LONG(), BIT() ~~~~~~~~~~~~~~~~~~~~~~~~~~ These three macros from input.h help some bitfield computations: @@ -213,13 +226,9 @@ These three macros from input.h help some bitfield computations: LONG(x) - returns the index in the array in longs for bit x BIT(x) - returns the index in a long for bit x -1.6 The number, id* and name fields +1.5 The id* and name fields ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The dev->number is assigned by the input system to the input device when it -is registered. It has no use except for identifying the device to the user -in system messages. - The dev->name should be set before registering the input device by the input device driver. It's a string like 'Generic button device' containing a user friendly name of the device. @@ -234,15 +243,25 @@ driver. The id and name fields can be passed to userland via the evdev interface. -1.7 The keycode, keycodemax, keycodesize fields +1.6 The keycode, keycodemax, keycodesize fields ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -These two fields will be used for any input devices that report their data -as scancodes. If not all scancodes can be known by autodetection, they may -need to be set by userland utilities. The keycode array then is an array -used to map from scancodes to input system keycodes. The keycode max will -contain the size of the array and keycodesize the size of each entry in it -(in bytes). +These three fields should be used by input devices that have dense keymaps. +The keycode is an array used to map from scancodes to input system keycodes. +The keycode max should contain the size of the array and keycodesize the +size of each entry in it (in bytes). + +Userspace can query and alter current scancode to keycode mappings using +EVIOCGKEYCODE and EVIOCSKEYCODE ioctls on corresponding evdev interface. +When a device has all 3 aforementioned fields filled in, the driver may +rely on kernel's default implementation of setting and querying keycode +mappings. + +1.7 dev->getkeycode() and dev->setkeycode() +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +getkeycode() and setkeycode() callbacks allow drivers to override default +keycode/keycodesize/keycodemax mapping mechanism provided by input core +and implement sparse keycode maps. 1.8 Key autorepeat ~~~~~~~~~~~~~~~~~~ @@ -266,7 +285,7 @@ direction - from the system to the input device driver. If your input device driver can handle these events, it has to set the respective bits in evbit, *and* also the callback routine: - button_dev.event = button_event; + button_dev->event = button_event; int button_event(struct input_dev *dev, unsigned int type, unsigned int code, int value); { diff --git a/Documentation/input/xpad.txt b/Documentation/input/xpad.txt index 5427bdf..aae0d40 100644 --- a/Documentation/input/xpad.txt +++ b/Documentation/input/xpad.txt @@ -65,15 +65,15 @@ of buttons, see section 0.3 - Unknown Controllers I've tested this with Stepmania, and it works quite well. -0.3 Unkown Controllers +0.3 Unknown Controllers ---------------------- -If you have an unkown xbox controller, it should work just fine with +If you have an unknown xbox controller, it should work just fine with the default settings. HOWEVER if you have an unknown dance pad not listed below, it will not work UNLESS you set "dpad_to_buttons" to 1 in the module configuration. -PLEASE if you have an unkown controller, email Dom <binary1230@yahoo.com> with +PLEASE, if you have an unknown controller, email Dom <binary1230@yahoo.com> with a dump from /proc/bus/usb and a description of the pad (manufacturer, country, whether it is a dance pad or normal controller) so that we can add your pad to the list of supported devices, ensuring that it will work out of the diff --git a/Documentation/ioctl-number.txt b/Documentation/ioctl-number.txt index 8f750c0..3de7d37 100644 --- a/Documentation/ioctl-number.txt +++ b/Documentation/ioctl-number.txt @@ -138,7 +138,8 @@ Code Seq# Include File Comments 'm' 00-1F net/irda/irmod.h conflict! 'n' 00-7F linux/ncp_fs.h 'n' E0-FF video/matrox.h matroxfb -'p' 00-3F linux/mc146818rtc.h +'p' 00-0F linux/phantom.h conflict! (OpenHaptics needs this) +'p' 00-3F linux/mc146818rtc.h conflict! 'p' 40-7F linux/nvram.h 'p' 80-9F user-space parport <mailto:tim@cyberelk.net> diff --git a/Documentation/isdn/CREDITS b/Documentation/isdn/CREDITS index e1b3023..7c17c83 100644 --- a/Documentation/isdn/CREDITS +++ b/Documentation/isdn/CREDITS @@ -2,7 +2,7 @@ I want to thank all who contributed to this project and especially to: (in alphabetical order) -Thomas Bogendörfer (tsbogend@bigbug.franken.de) +Thomas Bogendörfer (tsbogend@bigbug.franken.de) Tester, lots of bugfixes and hints. Alan Cox (alan@redhat.com) @@ -11,7 +11,7 @@ Alan Cox (alan@redhat.com) Henner Eisen (eis@baty.hanse.de) For X.25 implementation. -Volker Götz (volker@oops.franken.de) +Volker Götz (volker@oops.franken.de) For contribution of man-pages, the imontty-tool and a perfect maintaining of the mailing-list at hub-wue. diff --git a/Documentation/isdn/README b/Documentation/isdn/README index 7615952..6783437 100644 --- a/Documentation/isdn/README +++ b/Documentation/isdn/README @@ -402,7 +402,7 @@ README for the ISDN-subsystem the script tools/tcltk/isdnmon. You can add actions for line-status changes. See the comments at the beginning of the script for how to do that. There are other tty-based tools in the tools-subdirectory - contributed by Michael Knigge (imon), Volker Götz (imontty) and + contributed by Michael Knigge (imon), Volker Götz (imontty) and Andreas Kool (isdnmon). l) For initial testing, you can set the verbose-level to 2 (default: 0). diff --git a/Documentation/isdn/README.icn b/Documentation/isdn/README.icn index a5f55ea..13f833d 100644 --- a/Documentation/isdn/README.icn +++ b/Documentation/isdn/README.icn @@ -3,8 +3,8 @@ $Id: README.icn,v 1.7 2000/08/06 09:22:51 armin Exp $ You can get the ICN-ISDN-card from: Thinking Objects Software GmbH -Versbacher Röthe 159 -97078 Würzburg +Versbacher Röthe 159 +97078 Würzburg Tel: +49 931 2877950 Fax: +49 931 2877951 diff --git a/Documentation/java.txt b/Documentation/java.txt index c768dc6..3cce3fb 100644 --- a/Documentation/java.txt +++ b/Documentation/java.txt @@ -390,7 +390,7 @@ the execution bit, then just do originally by Brian A. Lantz, brian@lantz.com -heavily edited for binfmt_misc by Richard Günther +heavily edited for binfmt_misc by Richard Günther new scripts by Colin J. Watson <cjw44@cam.ac.uk> added executable Jar file support by Kurt Huwig <kurt@iku-netz.de> diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt index 769ee05..1d247d5 100644 --- a/Documentation/kbuild/modules.txt +++ b/Documentation/kbuild/modules.txt @@ -249,7 +249,7 @@ following files: --> filename: Makefile KERNELDIR := /lib/modules/`uname -r`/build all:: - $(MAKE) -C $KERNELDIR M=`pwd` $@ + $(MAKE) -C $(KERNELDIR) M=`pwd` $@ # Module specific targets genbin: diff --git a/Documentation/kernel-docs.txt b/Documentation/kernel-docs.txt index c68dafe..d9e3b19 100644 --- a/Documentation/kernel-docs.txt +++ b/Documentation/kernel-docs.txt @@ -236,7 +236,7 @@ * Title: "Design and Implementation of the Second Extended Filesystem" - Author: Rémy Card, Theodore Ts'o, Stephen Tweedie. + Author: Rémy Card, Theodore Ts'o, Stephen Tweedie. URL: http://web.mit.edu/tytso/www/linux/ext2intro.html Keywords: ext2, linux fs history, inode, directory, link, devices, VFS, physical structure, performance, benchmarks, ext2fs library, diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 84c3bd0..09220a1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -64,6 +64,7 @@ parameter is applicable: GENERIC_TIME The generic timeofday code is enabled. NFS Appropriate NFS support is enabled. OSS OSS sound support is enabled. + PV_OPS A paravirtualized kernel PARIDE The ParIDE subsystem is enabled. PARISC The PA-RISC architecture is enabled. PCI PCI bus support is enabled. @@ -495,6 +496,30 @@ and is between 256 and 4096 characters. It is defined in the file Format: <area>[,<node>] See also Documentation/networking/decnet.txt. + default_blu= [VT] + Format: <blue0>,<blue1>,<blue2>,...,<blue15> + Change the default blue palette of the console. + This is a 16-member array composed of values + ranging from 0-255. + + default_grn= [VT] + Format: <green0>,<green1>,<green2>,...,<green15> + Change the default green palette of the console. + This is a 16-member array composed of values + ranging from 0-255. + + default_red= [VT] + Format: <red0>,<red1>,<red2>,...,<red15> + Change the default red palette of the console. + This is a 16-member array composed of values + ranging from 0-255. + + default_utf8= [VT] + Format=<0|1> + Set system-wide default UTF-8 mode for all tty's. + Default is 0 and by setting to 1, it enables UTF-8 + mode for all newly opened or allocated terminals. + dhash_entries= [KNL] Set number of hash buckets for dentry cache. @@ -695,8 +720,15 @@ and is between 256 and 4096 characters. It is defined in the file idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed See Documentation/ide.txt. - idle= [HW] - Format: idle=poll or idle=halt + idle= [X86] + Format: idle=poll or idle=mwait + Poll forces a polling idle loop that can slightly improves the performance + of waking up a idle CPU, but will use a lot of power and make the system + run hot. Not recommended. + idle=mwait. On systems which support MONITOR/MWAIT but the kernel chose + to not use it because it doesn't save as much power as a normal idle + loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same + as idle=poll. ignore_loglevel [KNL] Ignore loglevel setting - this will print /all/ @@ -722,14 +754,6 @@ and is between 256 and 4096 characters. It is defined in the file inport.irq= [HW] Inport (ATI XL and Microsoft) busmouse driver Format: <irq> - combined_mode= [HW] control which driver uses IDE ports in combined - mode: legacy IDE driver, libata, or both - (in the libata case, libata.atapi_enabled=1 may be - useful as well). Note that using the ide or libata - options may affect your device naming (e.g. by - changing hdc to sdb). - Format: combined (default), ide, or libata - inttest= [IA64] io7= [HW] IO7 for Marvel based alpha systems @@ -808,6 +832,11 @@ and is between 256 and 4096 characters. It is defined in the file lasi= [HW,SCSI] PARISC LASI driver for the 53c700 chip Format: addr:<io>,irq:<irq> + legacy_serial.force [HW,IA-32,X86-64] + Probe for COM ports at legacy addresses even + if PNPBIOS or ACPI should describe them. This + is for working around firmware defects. + llsc*= [IA64] See function print_params() in arch/ia64/sn/kernel/llsc4.c. @@ -1157,6 +1186,11 @@ and is between 256 and 4096 characters. It is defined in the file nomce [IA-32] Machine Check Exception + noreplace-paravirt [IA-32,PV_OPS] Don't patch paravirt_ops + + noreplace-smp [IA-32,SMP] Don't replace SMP instructions + with UP alternatives + noresidual [PPC] Don't use residual data on PReP machines. noresume [SWSUSP] Disables resume and restores original swap @@ -1562,6 +1596,20 @@ and is between 256 and 4096 characters. It is defined in the file smart2= [HW] Format: <io1>[,<io2>[,...,<io8>]] + smp-alt-once [IA-32,SMP] On a hotplug CPU system, only + attempt to substitute SMP alternatives once at boot. + + smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices + smsc-ircc2.ircc_cfg= [HW] Device configuration I/O port + smsc-ircc2.ircc_sir= [HW] SIR base I/O port + smsc-ircc2.ircc_fir= [HW] FIR base I/O port + smsc-ircc2.ircc_irq= [HW] IRQ line + smsc-ircc2.ircc_dma= [HW] DMA channel + smsc-ircc2.ircc_transceiver= [HW] Transceiver type: + 0: Toshiba Satellite 1800 (GP data pin select) + 1: Fast pin select (default) + 2: ATC IRMode + snd-ad1816a= [HW,ALSA] snd-ad1848= [HW,ALSA] @@ -1820,6 +1868,7 @@ and is between 256 and 4096 characters. It is defined in the file [USBHID] The interval which mice are to be polled at. vdso= [IA-32,SH] + vdso=2: enable compat VDSO (default with COMPAT_VDSO) vdso=1: enable VDSO (default) vdso=0: disable VDSO mapping diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index d71faff..da5404ab 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -14,6 +14,7 @@ CONTENTS 8. Kprobes Example 9. Jprobes Example 10. Kretprobes Example +Appendix A: The kprobes debugfs interface 1. Concepts: Kprobes, Jprobes, Return Probes @@ -349,9 +350,12 @@ for instrumentation and error reporting.) If the number of times a function is called does not match the number of times it returns, registering a return probe on that function may -produce undesirable results. We have the do_exit() case covered. -do_execve() and do_fork() are not an issue. We're unaware of other -specific cases where this could be a problem. +produce undesirable results. In such a case, a line: +kretprobe BUG!: Processing kretprobe d000000000041aa8 @ c00000000004f48c +gets printed. With this information, one will be able to correlate the +exact instance of the kretprobe that caused the problem. We have the +do_exit() case covered. do_execve() and do_fork() are not an issue. +We're unaware of other specific cases where this could be a problem. If, upon entry to or exit from a function, the CPU is running on a stack other than that of the current task, registering a return @@ -614,3 +618,27 @@ http://www-106.ibm.com/developerworks/library/l-kprobes.html?ca=dgr-lnxw42Kprobe http://www.redhat.com/magazine/005mar05/features/kprobes/ http://www-users.cs.umn.edu/~boutcher/kprobes/ http://www.linuxsymposium.org/2006/linuxsymposium_procv2.pdf (pages 101-115) + + +Appendix A: The kprobes debugfs interface + +With recent kernels (> 2.6.20) the list of registered kprobes is visible +under the /debug/kprobes/ directory (assuming debugfs is mounted at /debug). + +/debug/kprobes/list: Lists all registered probes on the system + +c015d71a k vfs_read+0x0 +c011a316 j do_fork+0x0 +c03dedc5 r tcp_v4_rcv+0x0 + +The first column provides the kernel address where the probe is inserted. +The second column identifies the type of probe (k - kprobe, r - kretprobe +and j - jprobe), while the third column specifies the symbol+offset of +the probe. If the probed function belongs to a module, the module name +is also specified. + +/debug/kprobes/enabled: Turn kprobes ON/OFF + +Provides a knob to globally turn registered kprobes ON or OFF. By default, +all kprobes are enabled. By echoing "0" to this file, all registered probes +will be disarmed, till such time a "1" is echoed to this file. diff --git a/Documentation/laptop-mode.txt b/Documentation/laptop-mode.txt index 6f639e3..eeedee1 100644 --- a/Documentation/laptop-mode.txt +++ b/Documentation/laptop-mode.txt @@ -33,7 +33,7 @@ or anything. Simply install all the files included in this document, and laptop mode will automatically be started when you're on battery. For your convenience, a tarball containing an installer can be downloaded at: -http://www.xs4all.nl/~bsamwel/laptop_mode/tools/ +http://www.samwel.tk/laptop_mode/laptop_mode/ To configure laptop mode, you need to edit the configuration file, which is located in /etc/default/laptop-mode on Debian-based systems, or in diff --git a/Documentation/m68k/README.buddha b/Documentation/m68k/README.buddha index ef484a7..3ea9827 100644 --- a/Documentation/m68k/README.buddha +++ b/Documentation/m68k/README.buddha @@ -204,7 +204,7 @@ always shows a "no IRQ here" on the Buddha, and accesses to the third IDE port are going into data's Nirwana on the Buddha. - Jens Schönfeld february 19th, 1997 + Jens Schönfeld february 19th, 1997 updated may 27th, 1997 eMail: sysop@nostlgic.tng.oche.de diff --git a/Documentation/magic-number.txt b/Documentation/magic-number.txt index 0e740c8..bd450e7 100644 --- a/Documentation/magic-number.txt +++ b/Documentation/magic-number.txt @@ -129,7 +129,7 @@ SAVEKMSG_MAGIC1 0x53415645 savekmsg arch/*/amiga/config.c GDA_MAGIC 0x58464552 gda include/asm-mips64/sn/gda.h RED_MAGIC1 0x5a2cf071 (any) mm/slab.c STL_PORTMAGIC 0x5a7182c9 stlport include/linux/stallion.h -EEPROM_MAGIC_VALUE 0X5ab478d2 lanai_dev drivers/atm/lanai.c +EEPROM_MAGIC_VALUE 0x5ab478d2 lanai_dev drivers/atm/lanai.c HDLCDRV_MAGIC 0x5ac6e778 hdlcdrv_state include/linux/hdlcdrv.h EPCA_MAGIC 0x5c6df104 channel include/linux/epca.h PCXX_MAGIC 0x5c6df104 channel drivers/char/pcxx.h diff --git a/Documentation/md.txt b/Documentation/md.txt index 2202f5d..5818628 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt @@ -178,6 +178,21 @@ All md devices contain: The size should be at least PAGE_SIZE (4k) and should be a power of 2. This can only be set while assembling an array + layout + The "layout" for the array for the particular level. This is + simply a number that is interpretted differently by different + levels. It can be written while assembling an array. + + reshape_position + This is either "none" or a sector number within the devices of + the array where "reshape" is up to. If this is set, the three + attributes mentioned above (raid_disks, chunk_size, layout) can + potentially have 2 values, an old and a new value. If these + values differ, reading the attribute returns + new (old) + and writing will effect the 'new' value, leaving the 'old' + unchanged. + component_size For arrays with data redundancy (i.e. not raid0, linear, faulty, multipath), all components must be the same size - or at least @@ -193,11 +208,6 @@ All md devices contain: 1.2 (newer format in varying locations) or "none" indicating that the kernel isn't managing metadata at all. - layout - The "layout" for the array for the particular level. This is - simply a number that is interpretted differently by different - levels. It can be written while assembling an array. - resync_start The point at which resync should start. If no resync is needed, this will be a very large number. At array creation it will @@ -259,29 +269,6 @@ All md devices contain: like active, but no writes have been seen for a while (safe_mode_delay). - sync_speed_min - sync_speed_max - This are similar to /proc/sys/dev/raid/speed_limit_{min,max} - however they only apply to the particular array. - If no value has been written to these, of if the word 'system' - is written, then the system-wide value is used. If a value, - in kibibytes-per-second is written, then it is used. - When the files are read, they show the currently active value - followed by "(local)" or "(system)" depending on whether it is - a locally set or system-wide value. - - sync_completed - This shows the number of sectors that have been completed of - whatever the current sync_action is, followed by the number of - sectors in total that could need to be processed. The two - numbers are separated by a '/' thus effectively showing one - value, a fraction of the process that is complete. - - sync_speed - This shows the current actual speed, in K/sec, of the current - sync_action. It is averaged over the last 30 seconds. - - As component devices are added to an md array, they appear in the 'md' directory as new directories named dev-XXX @@ -412,6 +399,35 @@ also have Note that the numbers are 'bit' numbers, not 'block' numbers. They should be scaled by the bitmap_chunksize. + sync_speed_min + sync_speed_max + This are similar to /proc/sys/dev/raid/speed_limit_{min,max} + however they only apply to the particular array. + If no value has been written to these, of if the word 'system' + is written, then the system-wide value is used. If a value, + in kibibytes-per-second is written, then it is used. + When the files are read, they show the currently active value + followed by "(local)" or "(system)" depending on whether it is + a locally set or system-wide value. + + sync_completed + This shows the number of sectors that have been completed of + whatever the current sync_action is, followed by the number of + sectors in total that could need to be processed. The two + numbers are separated by a '/' thus effectively showing one + value, a fraction of the process that is complete. + + sync_speed + This shows the current actual speed, in K/sec, of the current + sync_action. It is averaged over the last 30 seconds. + + suspend_lo + suspend_hi + The two values, given as numbers of sectors, indicate a range + within the array where IO will be blocked. This is currently + only supported for raid4/5/6. + + Each active md device may also have attributes specific to the personality module that manages it. These are specific to the implementation of the module and could diff --git a/Documentation/netlabel/introduction.txt b/Documentation/netlabel/introduction.txt index a4ffba1..5ecd8d1 100644 --- a/Documentation/netlabel/introduction.txt +++ b/Documentation/netlabel/introduction.txt @@ -30,7 +30,7 @@ The communication layer exists to allow NetLabel configuration and monitoring from user space. The NetLabel communication layer uses a message based protocol built on top of the Generic NETLINK transport mechanism. The exact formatting of these NetLabel messages as well as the Generic NETLINK family -names can be found in the the 'net/netlabel/' directory as comments in the +names can be found in the 'net/netlabel/' directory as comments in the header files as well as in 'include/net/netlabel.h'. * Security Module API diff --git a/Documentation/networking/6pack.txt b/Documentation/networking/6pack.txt index 48ed2b7..d0777a1 100644 --- a/Documentation/networking/6pack.txt +++ b/Documentation/networking/6pack.txt @@ -1,6 +1,6 @@ This is the 6pack-mini-HOWTO, written by -Andreas Könsgen DG3KQ +Andreas Könsgen DG3KQ Internet: ajk@iehk.rwth-aachen.de AMPR-net: dg3kq@db0pra.ampr.org AX.25: dg3kq@db0ach.#nrw.deu.eu diff --git a/Documentation/networking/NAPI_HOWTO.txt b/Documentation/networking/NAPI_HOWTO.txt index fb8dc64..7907435 100644 --- a/Documentation/networking/NAPI_HOWTO.txt +++ b/Documentation/networking/NAPI_HOWTO.txt @@ -160,7 +160,7 @@ on current cpu. This primitive is called by dev->poll(), when it completes its work. The device cannot be out of poll list at this call, if it is then clearly it is a BUG(). You'll know ;-> -All these above nethods are used below. So keep reading for clarity. +All of the above methods are used below, so keep reading for clarity. Device driver changes to be made when porting NAPI ================================================== diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 5a232d9..db0cd51 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -13,7 +13,7 @@ You can find the latest version of this document at Please send me your comments to - Ulisses Alonso Camaró <uaca@i.hate.spam.alumni.uv.es> + Ulisses Alonso Camaró <uaca@i.hate.spam.alumni.uv.es> ------------------------------------------------------------------------------- + Why use PACKET_MMAP diff --git a/Documentation/networking/slicecom.hun b/Documentation/networking/slicecom.hun index 5acf191..bed2f04 100644 --- a/Documentation/networking/slicecom.hun +++ b/Documentation/networking/slicecom.hun @@ -1,7 +1,7 @@ SliceCOM adapter felhasznaloi dokumentacioja - 0.51 verziohoz -Bartók István <bartoki@itc.hu> +Bartók István <bartoki@itc.hu> Utolso modositas: Wed Aug 29 17:26:58 CEST 2001 ----------------------------------------------------------------- diff --git a/Documentation/networking/slicecom.txt b/Documentation/networking/slicecom.txt index 32d3b91..c82c0cf 100644 --- a/Documentation/networking/slicecom.txt +++ b/Documentation/networking/slicecom.txt @@ -1,9 +1,9 @@ SliceCOM adapter user's documentation - for the 0.51 driver version -Written by Bartók István <bartoki@itc.hu> +Written by Bartók István <bartoki@itc.hu> -English translation: Lakatos György <gyuri@itc.hu> +English translation: Lakatos György <gyuri@itc.hu> Mon Dec 11 15:28:42 CET 2000 Last modified: Wed Aug 29 17:25:37 CEST 2001 diff --git a/Documentation/networking/tms380tr.txt b/Documentation/networking/tms380tr.txt index c169a57..1f73e13 100644 --- a/Documentation/networking/tms380tr.txt +++ b/Documentation/networking/tms380tr.txt @@ -71,24 +71,24 @@ Below find attached the setting for the SK NET TR 4/16 ISA adapters CHAPTER 1 LOCATION OF DIP-SWITCH ============================================================== -UÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄ¿ -þUÄÄÄÄÄÄ¿ UÄÄÄÄÄ¿ UÄÄÄ¿ þ -þAÄÄÄÄÄÄU W1 AÄÄÄÄÄU UÄÄÄÄ¿ þ þ þ -þUÄÄÄÄÄÄ¿ þ þ þ þ UÄÄÅ¿ -þAÄÄÄÄÄÄU UÄÄÄÄÄÄÄÄÄÄÄ¿ AÄÄÄÄU þ þ þ þþ -þUÄÄÄÄÄÄ¿ þ þ UÄÄÄ¿ AÄÄÄU AÄÄÅU -þAÄÄÄÄÄÄU þ TMS380C26 þ þ þ þ -þUÄÄÄÄÄÄ¿ þ þ AÄÄÄU AÄ¿ -þAÄÄÄÄÄÄU þ þ þ þ -þ AÄÄÄÄÄÄÄÄÄÄÄU þ þ -þ þ þ -þ AÄU -þ þ -þ þ -þ þ -þ þ -AÄÄÄÄÄÄÄÄÄÄÄÄAÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄAÄÄAÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄAÄÄÄÄÄÄÄÄÄU - AÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄU AÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄU +UÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄ¿ +þUÄÄÄÄÄÄ¿ UÄÄÄÄÄ¿ UÄÄÄ¿ þ +þAÄÄÄÄÄÄU W1 AÄÄÄÄÄU UÄÄÄÄ¿ þ þ þ +þUÄÄÄÄÄÄ¿ þ þ þ þ UÄÄÅ¿ +þAÄÄÄÄÄÄU UÄÄÄÄÄÄÄÄÄÄÄ¿ AÄÄÄÄU þ þ þ þþ +þUÄÄÄÄÄÄ¿ þ þ UÄÄÄ¿ AÄÄÄU AÄÄÅU +þAÄÄÄÄÄÄU þ TMS380C26 þ þ þ þ +þUÄÄÄÄÄÄ¿ þ þ AÄÄÄU AÄ¿ +þAÄÄÄÄÄÄU þ þ þ þ +þ AÄÄÄÄÄÄÄÄÄÄÄU þ þ +þ þ þ +þ AÄU +þ þ +þ þ +þ þ +þ þ +AÄÄÄÄÄÄÄÄÄÄÄÄAÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄAÄÄAÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄAÄÄÄÄÄÄÄÄÄU + AÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄU AÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄU ============================================================== CHAPTER 2 DEFAULT SETTINGS @@ -108,9 +108,9 @@ AÄÄÄÄÄÄÄÄÄÄÄÄAÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄAÄÄAÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄAÄÄÄÄÄÄÄÄÄU CHAPTER 3 DIP SWITCH W1 DESCRIPTION ============================================================== - UÄÄÄAÄÄÄAÄÄÄAÄÄÄAÄÄÄAÄÄÄAÄÄÄAÄÄÄ¿ ON - þ 1 þ 2 þ 3 þ 4 þ 5 þ 6 þ 7 þ 8 þ - AÄÄÄAÄÄÄAÄÄÄAÄÄÄAÄÄÄAÄÄÄAÄÄÄAÄÄÄU OFF + UÄÄÄAÄÄÄAÄÄÄAÄÄÄAÄÄÄAÄÄÄAÄÄÄAÄÄÄ¿ ON + þ 1 þ 2 þ 3 þ 4 þ 5 þ 6 þ 7 þ 8 þ + AÄÄÄAÄÄÄAÄÄÄAÄÄÄAÄÄÄAÄÄÄAÄÄÄAÄÄÄU OFF |AD | BootROM Addr. | I/O | +-+-+-------+-------+-----+-----+ | | | diff --git a/Documentation/networking/udplite.txt b/Documentation/networking/udplite.txt index dd6f46b..6be09ba 100644 --- a/Documentation/networking/udplite.txt +++ b/Documentation/networking/udplite.txt @@ -139,7 +139,7 @@ 3) Disabling the Checksum Computation On both sender and receiver, checksumming will always be performed - and can not be disabled using SO_NO_CHECK. Thus + and cannot be disabled using SO_NO_CHECK. Thus setsockopt(sockfd, SOL_SOCKET, SO_NO_CHECK, ... ); diff --git a/Documentation/networking/wan-router.txt b/Documentation/networking/wan-router.txt index 07dd6d9..bc2ab41 100644 --- a/Documentation/networking/wan-router.txt +++ b/Documentation/networking/wan-router.txt @@ -335,7 +335,7 @@ REVISION HISTORY creating applications using BiSync streaming. -2.0.5 Aug 04, 1999 CHDLC initializatin bug fix. +2.0.5 Aug 04, 1999 CHDLC initialization bug fix. PPP interrupt driven driver: Fix to the PPP line hangup problem. New PPP firmware @@ -372,7 +372,7 @@ REVISION HISTORY o cfgft1 GUI csu/dsu configurator o wancfg GUI configuration file configurator. - o Architectual directory changes. + o Architectural directory changes. beta-2.1.4 Jul 2000 o Dynamic interface configuration: Network interfaces reflect the state diff --git a/Documentation/oops-tracing.txt b/Documentation/oops-tracing.txt index ea55ea8..7d5b60d 100644 --- a/Documentation/oops-tracing.txt +++ b/Documentation/oops-tracing.txt @@ -234,9 +234,6 @@ characters, each representing a particular tainted value. 6: 'B' if a page-release function has found a bad page reference or some unexpected page flags. - 7: 'U' if a user specifically requested that the Tainted flag be set, - ' ' otherwise. - 7: 'U' if a user or user application specifically requested that the Tainted flag be set, ' ' otherwise. diff --git a/Documentation/pci.txt b/Documentation/pci.txt index cdf2f3c..d38261b 100644 --- a/Documentation/pci.txt +++ b/Documentation/pci.txt @@ -124,10 +124,6 @@ initialization with a pointer to a structure describing the driver err_handler See Documentation/pci-error-recovery.txt - multithread_probe Enable multi-threaded probe/scan. Driver must - provide its own locking/syncronization for init - operations if this is enabled. - The ID table is an array of struct pci_device_id entries ending with an all-zero entry. Each entry consists of: @@ -163,9 +159,9 @@ echo "vendor device subvendor subdevice class class_mask driver_data" > \ /sys/bus/pci/drivers/{driver}/new_id All fields are passed in as hexadecimal values (no leading 0x). -Users need pass only as many fields as necessary: - o vendor, device, subvendor, and subdevice fields default - to PCI_ANY_ID (FFFFFFFF), +The vendor and device fields are mandatory, the others are optional. Users +need pass only as many optional fields as necessary: + o subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF) o class and classmask fields default to 0 o driver_data defaults to 0UL. @@ -377,7 +373,7 @@ E.g. clearing pending interrupts. 3.6 Register IRQ handler ~~~~~~~~~~~~~~~~~~~~~~~~ -While calling request_irq() is the the last step described here, +While calling request_irq() is the last step described here, this is often just another intermediate step to initialize a device. This step can often be deferred until the device is opened for use. @@ -549,8 +545,6 @@ pci_find_slot() Find pci_dev corresponding to given bus and pci_set_power_state() Set PCI Power Management state (0=D0 ... 3=D3) pci_find_capability() Find specified capability in device's capability list. -pci_module_init() Inline helper function for ensuring correct - pci_driver initialization and error handling. pci_resource_start() Returns bus start address for a given PCI region pci_resource_end() Returns bus end address for a given PCI region pci_resource_len() Returns the byte length of a PCI region diff --git a/Documentation/pcieaer-howto.txt b/Documentation/pcieaer-howto.txt index 16c2512..d5da861 100644 --- a/Documentation/pcieaer-howto.txt +++ b/Documentation/pcieaer-howto.txt @@ -13,7 +13,7 @@ Reporting (AER) driver and provides information on how to use it, as well as how to enable the drivers of endpoint devices to conform with PCI Express AER driver. -1.2 Copyright © Intel Corporation 2006. +1.2 Copyright © Intel Corporation 2006. 1.3 What is the PCI Express AER Driver? diff --git a/Documentation/pcmcia/driver.txt b/Documentation/pcmcia/driver.txt new file mode 100644 index 0000000..0ac1679 --- /dev/null +++ b/Documentation/pcmcia/driver.txt @@ -0,0 +1,30 @@ +PCMCIA Driver +------------- + + +sysfs +----- + +New PCMCIA IDs may be added to a device driver pcmcia_device_id table at +runtime as shown below: + +echo "match_flags manf_id card_id func_id function device_no \ +prod_id_hash[0] prod_id_hash[1] prod_id_hash[2] prod_id_hash[3]" > \ +/sys/bus/pcmcia/drivers/{driver}/new_id + +All fields are passed in as hexadecimal values (no leading 0x). +The meaning is described in the PCMCIA specification, the match_flags is +a bitwise or-ed combination from PCMCIA_DEV_ID_MATCH_* constants +defined in include/linux/mod_devicetable.h. + +Once added, the driver probe routine will be invoked for any unclaimed +PCMCIA device listed in its (newly updated) pcmcia_device_id list. + +A common use-case is to add a new device according to the manufacturer ID +and the card ID (form the manf_id and card_id file in the device tree). +For this, just use: + +echo "0x3 manf_id card_id 0 0 0 0 0 0 0" > \ + /sys/bus/pcmcia/drivers/{driver}/new_id + +after loading the driver. diff --git a/Documentation/pnp.txt b/Documentation/pnp.txt index 28037aa..481faf5 100644 --- a/Documentation/pnp.txt +++ b/Documentation/pnp.txt @@ -140,7 +140,7 @@ Plug and Play but it is planned to be in the near future. Requirements for a Linux PnP protocol: 1.) the protocol must use EISA IDs 2.) the protocol must inform the PnP Layer of a devices current configuration -- the ability to set resources is optional but prefered. +- the ability to set resources is optional but preferred. The following are PnP protocol related functions: diff --git a/Documentation/power/basic-pm-debugging.txt b/Documentation/power/basic-pm-debugging.txt new file mode 100644 index 0000000..1a85e2b --- /dev/null +++ b/Documentation/power/basic-pm-debugging.txt @@ -0,0 +1,106 @@ +Debugging suspend and resume + (C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL + +1. Testing suspend to disk (STD) + +To verify that the STD works, you can try to suspend in the "reboot" mode: + +# echo reboot > /sys/power/disk +# echo disk > /sys/power/state + +and the system should suspend, reboot, resume and get back to the command prompt +where you have started the transition. If that happens, the STD is most likely +to work correctly, but you need to repeat the test at least a couple of times in +a row for confidence. This is necessary, because some problems only show up on +a second attempt at suspending and resuming the system. You should also test +the "platform" and "shutdown" modes of suspend: + +# echo platform > /sys/power/disk +# echo disk > /sys/power/state + +or + +# echo shutdown > /sys/power/disk +# echo disk > /sys/power/state + +in which cases you will have to press the power button to make the system +resume. If that does not work, you will need to identify what goes wrong. + +a) Test mode of STD + +To verify if there are any drivers that cause problems you can run the STD +in the test mode: + +# echo test > /sys/power/disk +# echo disk > /sys/power/state + +in which case the system should freeze tasks, suspend devices, disable nonboot +CPUs (if any), wait for 5 seconds, enable nonboot CPUs, resume devices, thaw +tasks and return to your command prompt. If that fails, most likely there is +a driver that fails to either suspend or resume (in the latter case the system +may hang or be unstable after the test, so please take that into consideration). +To find this driver, you can carry out a binary search according to the rules: +- if the test fails, unload a half of the drivers currently loaded and repeat +(that would probably involve rebooting the system, so always note what drivers +have been loaded before the test), +- if the test succeeds, load a half of the drivers you have unloaded most +recently and repeat. + +Once you have found the failing driver (there can be more than just one of +them), you have to unload it every time before the STD transition. In that case +please make sure to report the problem with the driver. + +It is also possible that a cycle can still fail after you have unloaded +all modules. In that case, you would want to look in your kernel configuration +for the drivers that can be compiled as modules (testing again with them as +modules), and possibly also try boot time options such as "noapic" or "noacpi". + +b) Testing minimal configuration + +If the test mode of STD works, you can boot the system with "init=/bin/bash" +and attempt to suspend in the "reboot", "shutdown" and "platform" modes. If +that does not work, there probably is a problem with a driver statically +compiled into the kernel and you can try to compile more drivers as modules, +so that they can be tested individually. Otherwise, there is a problem with a +modular driver and you can find it by loading a half of the modules you normally +use and binary searching in accordance with the algorithm: +- if there are n modules loaded and the attempt to suspend and resume fails, +unload n/2 of the modules and try again (that would probably involve rebooting +the system), +- if there are n modules loaded and the attempt to suspend and resume succeeds, +load n/2 modules more and try again. + +Again, if you find the offending module(s), it(they) must be unloaded every time +before the STD transition, and please report the problem with it(them). + +c) Advanced debugging + +In case the STD does not work on your system even in the minimal configuration +and compiling more drivers as modules is not practical or some modules cannot +be unloaded, you can use one of the more advanced debugging techniques to find +the problem. First, if there is a serial port in your box, you can set the +CONFIG_DISABLE_CONSOLE_SUSPEND kernel configuration option and try to log kernel +messages using the serial console. This may provide you with some information +about the reasons of the suspend (resume) failure. Alternatively, it may be +possible to use a FireWire port for debugging with firescope +(ftp://ftp.firstfloor.org/pub/ak/firescope/). On i386 it is also possible to +use the PM_TRACE mechanism documented in Documentation/s2ram.txt . + +2. Testing suspend to RAM (STR) + +To verify that the STR works, it is generally more convenient to use the s2ram +tool available from http://suspend.sf.net and documented at +http://en.opensuse.org/s2ram . However, before doing that it is recommended to +carry out the procedure described in section 1. + +Assume you have resolved the problems with the STD and you have found some +failing drivers. These drivers are also likely to fail during the STR or +during the resume, so it is better to unload them every time before the STR +transition. Now, you can follow the instructions at +http://en.opensuse.org/s2ram to test the system, but if it does not work +"out of the box", you may need to boot it with "init=/bin/bash" and test +s2ram in the minimal configuration. In that case, you may be able to search +for failing drivers by following the procedure analogous to the one described in +1b). If you find some failing drivers, you will have to unload them every time +before the STR transition (ie. before you run s2ram), and please report the +problems with them. diff --git a/Documentation/power/drivers-testing.txt b/Documentation/power/drivers-testing.txt new file mode 100644 index 0000000..33016c2 --- /dev/null +++ b/Documentation/power/drivers-testing.txt @@ -0,0 +1,42 @@ +Testing suspend and resume support in device drivers + (C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL + +1. Preparing the test system + +Unfortunately, to effectively test the support for the system-wide suspend and +resume transitions in a driver, it is necessary to suspend and resume a fully +functional system with this driver loaded. Moreover, that should be done +several times, preferably several times in a row, and separately for the suspend +to disk (STD) and the suspend to RAM (STR) transitions, because each of these +cases involves different ordering of operations and different interactions with +the machine's BIOS. + +Of course, for this purpose the test system has to be known to suspend and +resume without the driver being tested. Thus, if possible, you should first +resolve all suspend/resume-related problems in the test system before you start +testing the new driver. Please see Documents/power/basic-pm-debugging.txt for +more information about the debugging of suspend/resume functionality. + +2. Testing the driver + +Once you have resolved the suspend/resume-related problems with your test system +without the new driver, you are ready to test it: + +a) Build the driver as a module, load it and try the STD in the test mode (see: +Documents/power/basic-pm-debugging.txt, 1a)). + +b) Load the driver and attempt to suspend to disk in the "reboot", "shutdown" +and "platform" modes (see: Documents/power/basic-pm-debugging.txt, 1). + +c) Compile the driver directly into the kernel and try the STD in the test mode. + +d) Attempt to suspend to disk with the driver compiled directly into the kernel +in the "reboot", "shutdown" and "platform" modes. + +e) Attempt to suspend to RAM using the s2ram tool with the driver loaded (see: +Documents/power/basic-pm-debugging.txt, 2). As far as the STR tests are +concerned, it should not matter whether or not the driver is built as a module. + +Each of the above tests should be repeated several times and the STD tests +should be mixed with the STR tests. If any of them fails, the driver cannot be +regarded as suspend/resume-safe. diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt index 8c5b41b..fd5192a 100644 --- a/Documentation/power/interface.txt +++ b/Documentation/power/interface.txt @@ -34,8 +34,12 @@ for 5 seconds, resume devices, unfreeze tasks and enable nonboot CPUs. Then, we are able to look in the log messages and work out, for example, which code is being slow and which device drivers are misbehaving. -Reading from this file will display what the mode is currently set -to. Writing to this file will accept one of +Reading from this file will display all supported modes and the currently +selected one in brackets, for example + + [shutdown] reboot test testproc + +Writing to this file will accept one of 'platform' (only if the platform supports it) 'shutdown' diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt index b6a3cbf..e00b099 100644 --- a/Documentation/power/pci.txt +++ b/Documentation/power/pci.txt @@ -203,7 +203,7 @@ resume Usage: -if (dev->driver && dev->driver->suspend) +if (dev->driver && dev->driver->resume) dev->driver->resume(dev) The resume callback may be called from any power state, and is always meant to diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt index c55bd50..5b8d695 100644 --- a/Documentation/power/swsusp.txt +++ b/Documentation/power/swsusp.txt @@ -48,7 +48,7 @@ before suspend (it is limited to 500 MB by default). Article about goals and implementation of Software Suspend for Linux ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Author: G‚ábor Kuti +Author: G‚ábor Kuti Last revised: 2003-10-20 by Pavel Machek Idea and goals to achieve diff --git a/Documentation/power/userland-swsusp.txt b/Documentation/power/userland-swsusp.txt index 000556c..e00c6cf 100644 --- a/Documentation/power/userland-swsusp.txt +++ b/Documentation/power/userland-swsusp.txt @@ -93,21 +93,23 @@ SNAPSHOT_S2RAM - suspend to RAM; using this call causes the kernel to to resume the system from RAM if there's enough battery power or restore its state on the basis of the saved suspend image otherwise) -SNAPSHOT_PMOPS - enable the usage of the pmops->prepare, pmops->enter and - pmops->finish methods (the in-kernel swsusp knows these as the "platform - method") which are needed on many machines to (among others) speed up - the resume by letting the BIOS skip some steps or to let the system - recognise the correct state of the hardware after the resume (in - particular on many machines this ensures that unplugged AC - adapters get correctly detected and that kacpid does not run wild after - the resume). The last ioctl() argument can take one of the three - values, defined in kernel/power/power.h: +SNAPSHOT_PMOPS - enable the usage of the hibernation_ops->prepare, + hibernate_ops->enter and hibernation_ops->finish methods (the in-kernel + swsusp knows these as the "platform method") which are needed on many + machines to (among others) speed up the resume by letting the BIOS skip + some steps or to let the system recognise the correct state of the + hardware after the resume (in particular on many machines this ensures + that unplugged AC adapters get correctly detected and that kacpid does + not run wild after the resume). The last ioctl() argument can take one + of the three values, defined in kernel/power/power.h: PMOPS_PREPARE - make the kernel carry out the - pm_ops->prepare(PM_SUSPEND_DISK) operation + hibernation_ops->prepare() operation PMOPS_ENTER - make the kernel power off the system by calling - pm_ops->enter(PM_SUSPEND_DISK) + hibernation_ops->enter() PMOPS_FINISH - make the kernel carry out the - pm_ops->finish(PM_SUSPEND_DISK) operation + hibernation_ops->finish() operation + Note that the actual constants are misnamed because they surface + internal kernel implementation details that have changed. The device's read() operation can be used to transfer the snapshot image from the kernel. It has the following limitations: diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index 033a3f3..b49ce16 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt @@ -1444,7 +1444,7 @@ platforms are moved over to use the flattened-device-tree model. Basically, it is a bus of devices, that could act more or less as a complete entity (UCC, USB etc ). All of them should be siblings on the "root" qe node, using the common properties from there. - The description below applies to the the qe of MPC8360 and + The description below applies to the qe of MPC8360 and more nodes and properties would be extended in the future. i) Root QE device @@ -1560,6 +1560,9 @@ platforms are moved over to use the flattened-device-tree model. network device. This is used by the bootwrapper to interpret MAC addresses passed by the firmware when no information other than indices is available to associate an address with a device. + - phy-connection-type : a string naming the controller/PHY interface type, + i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id", "tbi", + or "rtbi". Example: ucc@2000 { @@ -1574,6 +1577,7 @@ platforms are moved over to use the flattened-device-tree model. rx-clock = "none"; tx-clock = "clk9"; phy-handle = <212000>; + phy-connection-type = "gmii"; pio-handle = <140001>; }; @@ -1629,7 +1633,7 @@ platforms are moved over to use the flattened-device-tree model. - assignment : function number of the pin according to the Pin Assignment tables in User Manual. Each pin can have up to 4 possible functions in QE and two options for CPM. - - has_irq : indicates if the pin is used as source of exteral + - has_irq : indicates if the pin is used as source of external interrupts. Example: diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt index 1ef6bb8..7c701b8 100644 --- a/Documentation/rtc.txt +++ b/Documentation/rtc.txt @@ -147,7 +147,7 @@ RTC class framework, but can't be supported by the older driver. * RTC_AIE_ON, RTC_AIE_OFF, RTC_ALM_SET, RTC_ALM_READ ... when the RTC is connected to an IRQ line, it can often issue an alarm IRQ up to - 24 hours in the future. + 24 hours in the future. (Use RTC_WKALM_* by preference.) * RTC_WKALM_SET, RTC_WKALM_RD ... RTCs that can issue alarms beyond the next 24 hours use a slightly more powerful API, which supports @@ -175,10 +175,7 @@ driver returns ENOIOCTLCMD. Some common examples: called with appropriate values. * RTC_ALM_SET, RTC_ALM_READ, RTC_WKALM_SET, RTC_WKALM_RD: the - set_alarm/read_alarm functions will be called. To differentiate - between the ALM and WKALM, check the larger fields of the rtc_wkalrm - struct (like tm_year). These will be set to -1 when using ALM and - will be set to proper values when using WKALM. + set_alarm/read_alarm functions will be called. * RTC_IRQP_SET, RTC_IRQP_READ: the irq_set_freq function will be called to set the frequency while the framework will handle the read for you diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt index 09939696..d30a281 100644 --- a/Documentation/s390/Debugging390.txt +++ b/Documentation/s390/Debugging390.txt @@ -2209,7 +2209,7 @@ Breakpoint 2 at 0x4d87a4: file top.c, line 2609. #3 0x5167e6 in readline_internal_char () at readline.c:454 #4 0x5168ee in readline_internal_charloop () at readline.c:507 #5 0x51692c in readline_internal () at readline.c:521 -#6 0x5164fe in readline (prompt=0x7ffff810 "\177ÿøx\177ÿ÷Ø\177ÿøxÀ") +#6 0x5164fe in readline (prompt=0x7ffff810 "\177ÂÿÂøx\177ÂÿÂ÷ÂØ\177ÂÿÂøxÂÀ") at readline.c:349 #7 0x4d7a8a in command_line_input (prrompt=0x564420 "(gdb) ", repeat=1, annotation_suffix=0x4d6b44 "prompt") at top.c:2091 diff --git a/Documentation/scsi/aacraid.txt b/Documentation/scsi/aacraid.txt index dc8e44fc..ce3cb42 100644 --- a/Documentation/scsi/aacraid.txt +++ b/Documentation/scsi/aacraid.txt @@ -37,7 +37,11 @@ Supported Cards/Chipsets 9005:0286:9005:029d Adaptec 2420SA (Intruder HP release) 9005:0286:9005:02ac Adaptec 1800 (Typhoon44) 9005:0285:9005:02b5 Adaptec 5445 (Voodoo44) + 9005:0285:15d9:02b5 SMC AOC-USAS-S4i + 9005:0285:15d9:02c9 SMC AOC-USAS-S4iR 9005:0285:9005:02b6 Adaptec 5805 (Voodoo80) + 9005:0285:15d9:02b6 SMC AOC-USAS-S8i + 9005:0285:15d9:02ca SMC AOC-USAS-S8iR 9005:0285:9005:02b7 Adaptec 5085 (Voodoo08) 9005:0285:9005:02bb Adaptec 3405 (Marauder40LP) 9005:0285:9005:02bc Adaptec 3805 (Marauder80LP) @@ -93,6 +97,9 @@ Supported Cards/Chipsets 9005:0286:9005:02ae (Aurora Lite ARK) 9005:0285:9005:02b0 (Sunrise Lake ARK) 9005:0285:9005:02b1 Adaptec (Voodoo 8 internal 8 external) + 9005:0285:108e:7aac SUN STK RAID REM (Voodoo44 Coyote) + 9005:0285:108e:0286 SUN STK RAID INT (Cougar) + 9005:0285:108e:0287 SUN STK RAID EXT (Prometheus) People ------------------------- diff --git a/Documentation/scsi/aha152x.txt b/Documentation/scsi/aha152x.txt index 2ce022c..29ce6d8 100644 --- a/Documentation/scsi/aha152x.txt +++ b/Documentation/scsi/aha152x.txt @@ -1,7 +1,7 @@ $Id: README.aha152x,v 1.2 1999/12/25 15:32:30 fischer Exp fischer $ Adaptec AHA-1520/1522 SCSI driver for Linux (aha152x) -Copyright 1993-1999 Jürgen Fischer <fischer@norbit.de> +Copyright 1993-1999 Jürgen Fischer <fischer@norbit.de> TC1550 patches by Luuk van Dijk (ldz@xs4all.nl) diff --git a/Documentation/scsi/aic7xxx.txt b/Documentation/scsi/aic7xxx.txt index 9b894f1..5f34d2b 100644 --- a/Documentation/scsi/aic7xxx.txt +++ b/Documentation/scsi/aic7xxx.txt @@ -40,7 +40,7 @@ The following information is available in this file: 2. Multi-function Twin Channel Device - Two controllers on one chip. 3. Command Channel Secondary DMA Engine - Allows scatter gather list and SCB prefetch. - 4. 64 Byte SCB Support - Allows disconnected, unttagged request table + 4. 64 Byte SCB Support - Allows disconnected, untagged request table for all possible target/lun combinations. 5. Block Move Instruction Support - Doubles the speed of certain sequencer operations. diff --git a/Documentation/scsi/aic7xxx_old.txt b/Documentation/scsi/aic7xxx_old.txt index 05667e7..7bd210a 100644 --- a/Documentation/scsi/aic7xxx_old.txt +++ b/Documentation/scsi/aic7xxx_old.txt @@ -356,7 +356,7 @@ linux-1.1.x and fairly stable since linux-1.2.x, and are also in FreeBSD or enable Tagged Command Queueing (TCQ) on specific devices. As of driver version 5.1.11, TCQ is now either on or off by default according to the setting you choose during the make config process. - In order to en/disable TCQ for certian devices at boot time, a user + In order to en/disable TCQ for certain devices at boot time, a user may use this boot param. The driver will then parse this message out and en/disable the specific device entries that are present based upon the value given. The param line is parsed in the following manner: diff --git a/Documentation/scsi/ncr53c8xx.txt b/Documentation/scsi/ncr53c8xx.txt index caf10b1..39d409a8 100644 --- a/Documentation/scsi/ncr53c8xx.txt +++ b/Documentation/scsi/ncr53c8xx.txt @@ -562,11 +562,6 @@ if only one has a flaw for some SCSI feature, you can disable the support by the driver of this feature at linux start-up and enable this feature after boot-up only for devices that support it safely. -CONFIG_SCSI_NCR53C8XX_PROFILE_SUPPORT (default answer: n) - This option must be set for profiling information to be gathered - and printed out through the proc file system. This features may - impact performances. - CONFIG_SCSI_NCR53C8XX_IOMAPPED (default answer: n) Answer "y" if you suspect your mother board to not allow memory mapped I/O. May slow down performance a little. This option is required by @@ -1265,7 +1260,7 @@ then the request of the IRQ obviously will not succeed for all the drivers. 15.1 Problem tracking Most SCSI problems are due to a non conformant SCSI bus or to buggy -devices. If infortunately you have SCSI problems, you can check the +devices. If unfortunately you have SCSI problems, you can check the following things: - SCSI bus cables diff --git a/Documentation/scsi/st.txt b/Documentation/scsi/st.txt index 3c12422..b7be95b 100644 --- a/Documentation/scsi/st.txt +++ b/Documentation/scsi/st.txt @@ -1,5 +1,5 @@ This file contains brief information about the SCSI tape driver. -The driver is currently maintained by Kai Mäkisara (email +The driver is currently maintained by Kai Mäkisara (email Kai.Makisara@kolumbus.fi) Last modified: Mon Mar 7 21:14:44 2005 by kai.makisara diff --git a/Documentation/scsi/sym53c8xx_2.txt b/Documentation/scsi/sym53c8xx_2.txt index 2c1745a..3d9f06b 100644 --- a/Documentation/scsi/sym53c8xx_2.txt +++ b/Documentation/scsi/sym53c8xx_2.txt @@ -587,7 +587,7 @@ devices, ... may cause a SCSI signal to be wrong when te driver reads it. 15.1 Problem tracking Most SCSI problems are due to a non conformant SCSI bus or too buggy -devices. If infortunately you have SCSI problems, you can check the +devices. If unfortunately you have SCSI problems, you can check the following things: - SCSI bus cables diff --git a/Documentation/scsi/tmscsim.txt b/Documentation/scsi/tmscsim.txt index 8b2168a..61c0531 100644 --- a/Documentation/scsi/tmscsim.txt +++ b/Documentation/scsi/tmscsim.txt @@ -426,7 +426,7 @@ Thanks to Linus Torvalds, Alan Cox, the FSF people, the XFree86 team and all the others for the wonderful OS and software. Thanks to C.L. Huang and Philip Giang (Tekram) for the initial driver release and support. -Thanks to Doug Ledford, Gérard Roudier for support with SCSI coding. +Thanks to Doug Ledford, Gérard Roudier for support with SCSI coding. Thanks to a lot of people (espec. Chiaki Ishikawa, Andreas Haumer, Hubert Tonneau) for intensively testing the driver (and even risking data loss doing this during early revisions). diff --git a/Documentation/sh/clk.txt b/Documentation/sh/clk.txt new file mode 100644 index 0000000..9aef710 --- /dev/null +++ b/Documentation/sh/clk.txt @@ -0,0 +1,32 @@ +Clock framework on SuperH architecture + +The framework on SH extends existing API by the function clk_set_rate_ex, +which prototype is as follows: + + clk_set_rate_ex (struct clk *clk, unsigned long rate, int algo_id) + +The algo_id parameter is used to specify algorithm used to recalculate clocks, +adjanced to clock, specified as first argument. It is assumed that algo_id==0 +means no changes to adjanced clock + +Internally, the clk_set_rate_ex forwards request to clk->ops->set_rate method, +if it is present in ops structure. The method should set the clock rate and adjust +all needed clocks according to the passed algo_id. +Exact values for algo_id are machine-dependend. For the sh7722, the following +values are defined: + + NO_CHANGE = 0, + IUS_N1_N1, /* I:U = N:1, U:Sh = N:1 */ + IUS_322, /* I:U:Sh = 3:2:2 */ + IUS_522, /* I:U:Sh = 5:2:2 */ + IUS_N11, /* I:U:Sh = N:1:1 */ + SB_N1, /* Sh:B = N:1 */ + SB3_N1, /* Sh:B3 = N:1 */ + SB3_32, /* Sh:B3 = 3:2 */ + SB3_43, /* Sh:B3 = 4:3 */ + SB3_54, /* Sh:B3 = 5:4 */ + BP_N1, /* B:P = N:1 */ + IP_N1 /* I:P = N:1 */ + +Each of these constants means relation between clocks that can be set via the FRQCR +register diff --git a/Documentation/sonypi.txt b/Documentation/sonypi.txt index c1237a92..4857acf 100644 --- a/Documentation/sonypi.txt +++ b/Documentation/sonypi.txt @@ -1,7 +1,7 @@ Sony Programmable I/O Control Device Driver Readme -------------------------------------------------- Copyright (C) 2001-2004 Stelian Pop <stelian@popies.net> - Copyright (C) 2001-2002 Alcôve <www.alcove.com> + Copyright (C) 2001-2002 Alcôve <www.alcove.com> Copyright (C) 2001 Michael Ashley <m.ashley@unsw.edu.au> Copyright (C) 2001 Junichi Morita <jun1m@mars.dti.ne.jp> Copyright (C) 2000 Takaya Kinjo <t-kinjo@tc4.so-net.ne.jp> diff --git a/Documentation/sound/oss/mwave b/Documentation/sound/oss/mwave index 858334bb..5fbcb16 100644 --- a/Documentation/sound/oss/mwave +++ b/Documentation/sound/oss/mwave @@ -163,7 +163,7 @@ OR the Default= line COULD be Default=SBPRO Reboot to Windows 95 and choose Linux. When booted, use sndconfig to configure -the sound modules and voilà - ThinkPad sound with Linux. +the sound modules and voilà - ThinkPad sound with Linux. Now the gotchas - you can either have CD sound OR Mixers but not both. That's a problem with the SB1.5 (CD sound) or SBPRO (Mixers) settings. No one knows why diff --git a/Documentation/spi/pxa2xx b/Documentation/spi/pxa2xx index f9717fe..215e3b8 100644 --- a/Documentation/spi/pxa2xx +++ b/Documentation/spi/pxa2xx @@ -62,7 +62,7 @@ static struct resource pxa_spi_nssp_resources[] = { static struct pxa2xx_spi_master pxa_nssp_master_info = { .ssp_type = PXA25x_NSSP, /* Type of SSP */ - .clock_enable = CKEN9_NSSP, /* NSSP Peripheral clock */ + .clock_enable = CKEN_NSSP, /* NSSP Peripheral clock */ .num_chipselect = 1, /* Matches the number of chips attached to NSSP */ .enable_dma = 1, /* Enables NSSP DMA */ }; diff --git a/Documentation/spi/spi-summary b/Documentation/spi/spi-summary index ecc7c9e..795fbb4 100644 --- a/Documentation/spi/spi-summary +++ b/Documentation/spi/spi-summary @@ -8,7 +8,7 @@ What is SPI? The "Serial Peripheral Interface" (SPI) is a synchronous four wire serial link used to connect microcontrollers to sensors, memory, and peripherals. -The three signal wires hold a clock (SCLK, often on the order of 10 MHz), +The three signal wires hold a clock (SCK, often on the order of 10 MHz), and parallel data lines with "Master Out, Slave In" (MOSI) or "Master In, Slave Out" (MISO) signals. (Other names are also used.) There are four clocking modes through which data is exchanged; mode-0 and mode-3 are most @@ -22,7 +22,7 @@ other signals, often including an interrupt to the master. Unlike serial busses like USB or SMBUS, even low level protocols for SPI slave functions are usually not interoperable between vendors -(except for cases like SPI memory chips). +(except for commodities like SPI memory chips). - SPI may be used for request/response style device protocols, as with touchscreen sensors and memory chips. @@ -77,8 +77,9 @@ cards without needing a special purpose MMC/SD/SDIO controller. How do these driver programming interfaces work? ------------------------------------------------ The <linux/spi/spi.h> header file includes kerneldoc, as does the -main source code, and you should certainly read that. This is just -an overview, so you get the big picture before the details. +main source code, and you should certainly read that chapter of the +kernel API document. This is just an overview, so you get the big +picture before those details. SPI requests always go into I/O queues. Requests for a given SPI device are always executed in FIFO order, and complete asynchronously through @@ -88,7 +89,7 @@ a command and then reading its response. There are two types of SPI driver, here called: - Controller drivers ... these are often built in to System-On-Chip + Controller drivers ... controllers may be built in to System-On-Chip processors, and often support both Master and Slave roles. These drivers touch hardware registers and may use DMA. Or they can be PIO bitbangers, needing just GPIO pins. @@ -108,18 +109,18 @@ those two types of driver. At this writing, Linux has no slave side programming interface. There is a minimal core of SPI programming interfaces, focussing on -using driver model to connect controller and protocol drivers using +using the driver model to connect controller and protocol drivers using device tables provided by board specific initialization code. SPI shows up in sysfs in several locations: - /sys/devices/.../CTLR/spiB.C ... spi_device for on bus "B", + /sys/devices/.../CTLR/spiB.C ... spi_device on bus "B", chipselect C, accessed through CTLR. /sys/devices/.../CTLR/spiB.C/modalias ... identifies the driver that should be used with this device (for hotplug/coldplug) /sys/bus/spi/devices/spiB.C ... symlink to the physical - spiB-C device + spiB.C device /sys/bus/spi/drivers/D ... driver for one or more spi*.* devices @@ -240,7 +241,7 @@ The board_info should provide enough information to let the system work without the chip's driver being loaded. The most troublesome aspect of that is likely the SPI_CS_HIGH bit in the spi_device.mode field, since sharing a bus with a device that interprets chipselect "backwards" is -not possible. +not possible until the infrastructure knows how to deselect it. Then your board initialization code would register that table with the SPI infrastructure, so that it's available later when the SPI master controller @@ -268,16 +269,14 @@ board info based on the board that was hotplugged. Of course, you'd later call at least spi_unregister_device() when that board is removed. When Linux includes support for MMC/SD/SDIO/DataFlash cards through SPI, those -configurations will also be dynamic. Fortunately, those devices all support -basic device identification probes, so that support should hotplug normally. +configurations will also be dynamic. Fortunately, such devices all support +basic device identification probes, so they should hotplug normally. How do I write an "SPI Protocol Driver"? ---------------------------------------- -All SPI drivers are currently kernel drivers. A userspace driver API -would just be another kernel driver, probably offering some lowlevel -access through aio_read(), aio_write(), and ioctl() calls and using the -standard userspace sysfs mechanisms to bind to a given SPI device. +Most SPI drivers are currently kernel drivers, but there's also support +for userspace drivers. Here we talk only about kernel drivers. SPI protocol drivers somewhat resemble platform device drivers: @@ -319,7 +318,8 @@ might look like this unless you're creating a class_device: As soon as it enters probe(), the driver may issue I/O requests to the SPI device using "struct spi_message". When remove() returns, -the driver guarantees that it won't submit any more such messages. +or after probe() fails, the driver guarantees that it won't submit +any more such messages. - An spi_message is a sequence of protocol operations, executed as one atomic sequence. SPI driver controls include: @@ -368,7 +368,8 @@ the driver guarantees that it won't submit any more such messages. Some drivers may need to modify spi_device characteristics like the transfer mode, wordsize, or clock rate. This is done with spi_setup(), which would normally be called from probe() before the first I/O is -done to the device. +done to the device. However, that can also be called at any time +that no message is pending for that device. While "spi_device" would be the bottom boundary of the driver, the upper boundaries might include sysfs (especially for sensor readings), @@ -445,11 +446,15 @@ SPI MASTER METHODS This sets up the device clock rate, SPI mode, and word sizes. Drivers may change the defaults provided by board_info, and then call spi_setup(spi) to invoke this routine. It may sleep. + Unless each SPI slave has its own configuration registers, don't + change them right away ... otherwise drivers could corrupt I/O + that's in progress for other SPI devices. master->transfer(struct spi_device *spi, struct spi_message *message) This must not sleep. Its responsibility is arrange that the - transfer happens and its complete() callback is issued; the two - will normally happen later, after other transfers complete. + transfer happens and its complete() callback is issued. The two + will normally happen later, after other transfers complete, and + if the controller is idle it will need to be kickstarted. master->cleanup(struct spi_device *spi) Your controller driver may use spi_device.controller_state to hold diff --git a/Documentation/spi/spidev b/Documentation/spi/spidev new file mode 100644 index 0000000..5c8e1b98 --- /dev/null +++ b/Documentation/spi/spidev @@ -0,0 +1,307 @@ +SPI devices have a limited userspace API, supporting basic half-duplex +read() and write() access to SPI slave devices. Using ioctl() requests, +full duplex transfers and device I/O configuration are also available. + + #include <fcntl.h> + #include <unistd.h> + #include <sys/ioctl.h> + #include <linux/types.h> + #include <linux/spi/spidev.h> + +Some reasons you might want to use this programming interface include: + + * Prototyping in an environment that's not crash-prone; stray pointers + in userspace won't normally bring down any Linux system. + + * Developing simple protocols used to talk to microcontrollers acting + as SPI slaves, which you may need to change quite often. + +Of course there are drivers that can never be written in userspace, because +they need to access kernel interfaces (such as IRQ handlers or other layers +of the driver stack) that are not accessible to userspace. + + +DEVICE CREATION, DRIVER BINDING +=============================== +The simplest way to arrange to use this driver is to just list it in the +spi_board_info for a device as the driver it should use: the "modalias" +entry is "spidev", matching the name of the driver exposing this API. +Set up the other device characteristics (bits per word, SPI clocking, +chipselect polarity, etc) as usual, so you won't always need to override +them later. + +(Sysfs also supports userspace driven binding/unbinding of drivers to +devices. That mechanism might be supported here in the future.) + +When you do that, the sysfs node for the SPI device will include a child +device node with a "dev" attribute that will be understood by udev or mdev. +(Larger systems will have "udev". Smaller ones may configure "mdev" into +busybox; it's less featureful, but often enough.) For a SPI device with +chipselect C on bus B, you should see: + + /dev/spidevB.C ... character special device, major number 153 with + a dynamically chosen minor device number. This is the node + that userspace programs will open, created by "udev" or "mdev". + + /sys/devices/.../spiB.C ... as usual, the SPI device node will + be a child of its SPI master controller. + + /sys/class/spidev/spidevB.C ... created when the "spidev" driver + binds to that device. (Directory or symlink, based on whether + or not you enabled the "deprecated sysfs files" Kconfig option.) + +Do not try to manage the /dev character device special file nodes by hand. +That's error prone, and you'd need to pay careful attention to system +security issues; udev/mdev should already be configured securely. + +If you unbind the "spidev" driver from that device, those two "spidev" nodes +(in sysfs and in /dev) should automatically be removed (respectively by the +kernel and by udev/mdev). You can unbind by removing the "spidev" driver +module, which will affect all devices using this driver. You can also unbind +by having kernel code remove the SPI device, probably by removing the driver +for its SPI controller (so its spi_master vanishes). + +Since this is a standard Linux device driver -- even though it just happens +to expose a low level API to userspace -- it can be associated with any number +of devices at a time. Just provide one spi_board_info record for each such +SPI device, and you'll get a /dev device node for each device. + + +BASIC CHARACTER DEVICE API +========================== +Normal open() and close() operations on /dev/spidevB.D files work as you +would expect. + +Standard read() and write() operations are obviously only half-duplex, and +the chipselect is deactivated between those operations. Full-duplex access, +and composite operation without chipselect de-activation, is available using +the SPI_IOC_MESSAGE(N) request. + +Several ioctl() requests let your driver read or override the device's current +settings for data transfer parameters: + + SPI_IOC_RD_MODE, SPI_IOC_WR_MODE ... pass a pointer to a byte which will + return (RD) or assign (WR) the SPI transfer mode. Use the constants + SPI_MODE_0..SPI_MODE_3; or if you prefer you can combine SPI_CPOL + (clock polarity, idle high iff this is set) or SPI_CPHA (clock phase, + sample on trailing edge iff this is set) flags. + + SPI_IOC_RD_LSB_FIRST, SPI_IOC_WR_LSB_FIRST ... pass a pointer to a byte + which will return (RD) or assign (WR) the bit justification used to + transfer SPI words. Zero indicates MSB-first; other values indicate + the less common LSB-first encoding. In both cases the specified value + is right-justified in each word, so that unused (TX) or undefined (RX) + bits are in the MSBs. + + SPI_IOC_RD_BITS_PER_WORD, SPI_IOC_WR_BITS_PER_WORD ... pass a pointer to + a byte which will return (RD) or assign (WR) the number of bits in + each SPI transfer word. The value zero signifies eight bits. + + SPI_IOC_RD_MAX_SPEED_HZ, SPI_IOC_WR_MAX_SPEED_HZ ... pass a pointer to a + u32 which will return (RD) or assign (WR) the maximum SPI transfer + speed, in Hz. The controller can't necessarily assign that specific + clock speed. + +NOTES: + + - At this time there is no async I/O support; everything is purely + synchronous. + + - There's currently no way to report the actual bit rate used to + shift data to/from a given device. + + - From userspace, you can't currently change the chip select polarity; + that could corrupt transfers to other devices sharing the SPI bus. + Each SPI device is deselected when it's not in active use, allowing + other drivers to talk to other devices. + + - There's a limit on the number of bytes each I/O request can transfer + to the SPI device. It defaults to one page, but that can be changed + using a module parameter. + + - Because SPI has no low-level transfer acknowledgement, you usually + won't see any I/O errors when talking to a non-existent device. + + +FULL DUPLEX CHARACTER DEVICE API +================================ + +See the sample program below for one example showing the use of the full +duplex programming interface. (Although it doesn't perform a full duplex +transfer.) The model is the same as that used in the kernel spi_sync() +request; the individual transfers offer the same capabilities as are +available to kernel drivers (except that it's not asynchronous). + +The example shows one half-duplex RPC-style request and response message. +These requests commonly require that the chip not be deselected between +the request and response. Several such requests could be chained into +a single kernel request, even allowing the chip to be deselected after +each response. (Other protocol options include changing the word size +and bitrate for each transfer segment.) + +To make a full duplex request, provide both rx_buf and tx_buf for the +same transfer. It's even OK if those are the same buffer. + + +SAMPLE PROGRAM +============== + +-------------------------------- CUT HERE +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <fcntl.h> +#include <string.h> + +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include <linux/types.h> +#include <linux/spi/spidev.h> + + +static int verbose; + +static void do_read(int fd, int len) +{ + unsigned char buf[32], *bp; + int status; + + /* read at least 2 bytes, no more than 32 */ + if (len < 2) + len = 2; + else if (len > sizeof(buf)) + len = sizeof(buf); + memset(buf, 0, sizeof buf); + + status = read(fd, buf, len); + if (status < 0) { + perror("read"); + return; + } + if (status != len) { + fprintf(stderr, "short read\n"); + return; + } + + printf("read(%2d, %2d): %02x %02x,", len, status, + buf[0], buf[1]); + status -= 2; + bp = buf + 2; + while (status-- > 0) + printf(" %02x", *bp++); + printf("\n"); +} + +static void do_msg(int fd, int len) +{ + struct spi_ioc_transfer xfer[2]; + unsigned char buf[32], *bp; + int status; + + memset(xfer, 0, sizeof xfer); + memset(buf, 0, sizeof buf); + + if (len > sizeof buf) + len = sizeof buf; + + buf[0] = 0xaa; + xfer[0].tx_buf = (__u64) buf; + xfer[0].len = 1; + + xfer[1].rx_buf = (__u64) buf; + xfer[1].len = len; + + status = ioctl(fd, SPI_IOC_MESSAGE(2), xfer); + if (status < 0) { + perror("SPI_IOC_MESSAGE"); + return; + } + + printf("response(%2d, %2d): ", len, status); + for (bp = buf; len; len--) + printf(" %02x", *bp++); + printf("\n"); +} + +static void dumpstat(const char *name, int fd) +{ + __u8 mode, lsb, bits; + __u32 speed; + + if (ioctl(fd, SPI_IOC_RD_MODE, &mode) < 0) { + perror("SPI rd_mode"); + return; + } + if (ioctl(fd, SPI_IOC_RD_LSB_FIRST, &lsb) < 0) { + perror("SPI rd_lsb_fist"); + return; + } + if (ioctl(fd, SPI_IOC_RD_BITS_PER_WORD, &bits) < 0) { + perror("SPI bits_per_word"); + return; + } + if (ioctl(fd, SPI_IOC_RD_MAX_SPEED_HZ, &speed) < 0) { + perror("SPI max_speed_hz"); + return; + } + + printf("%s: spi mode %d, %d bits %sper word, %d Hz max\n", + name, mode, bits, lsb ? "(lsb first) " : "", speed); +} + +int main(int argc, char **argv) +{ + int c; + int readcount = 0; + int msglen = 0; + int fd; + const char *name; + + while ((c = getopt(argc, argv, "hm:r:v")) != EOF) { + switch (c) { + case 'm': + msglen = atoi(optarg); + if (msglen < 0) + goto usage; + continue; + case 'r': + readcount = atoi(optarg); + if (readcount < 0) + goto usage; + continue; + case 'v': + verbose++; + continue; + case 'h': + case '?': +usage: + fprintf(stderr, + "usage: %s [-h] [-m N] [-r N] /dev/spidevB.D\n", + argv[0]); + return 1; + } + } + + if ((optind + 1) != argc) + goto usage; + name = argv[optind]; + + fd = open(name, O_RDWR); + if (fd < 0) { + perror("open"); + return 1; + } + + dumpstat(name, fd); + + if (msglen) + do_msg(fd, msglen); + + if (readcount) + do_read(fd, readcount); + + close(fd); + return 0; +} diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 5922e84..111fd28 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -221,14 +221,14 @@ Controls the kernel's behaviour when an oops or BUG is encountered. 0: try to continue operation -1: panic immediatly. If the `panic' sysctl is also non-zero then the +1: panic immediately. If the `panic' sysctl is also non-zero then the machine will be rebooted. ============================================================== pid_max: -PID allocation wrap value. When the kenrel's next PID value +PID allocation wrap value. When the kernel's next PID value reaches this value, it wraps back to a minimum PID value. PIDs of value pid_max or larger are not allocated. diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index e96a341..1d19256 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -197,11 +197,22 @@ and may not be fast. panic_on_oom -This enables or disables panic on out-of-memory feature. If this is set to 1, -the kernel panics when out-of-memory happens. If this is set to 0, the kernel -will kill some rogue process, called oom_killer. Usually, oom_killer can kill -rogue processes and system will survive. If you want to panic the system -rather than killing rogue processes, set this to 1. +This enables or disables panic on out-of-memory feature. -The default value is 0. +If this is set to 0, the kernel will kill some rogue process, +called oom_killer. Usually, oom_killer can kill rogue processes and +system will survive. + +If this is set to 1, the kernel panics when out-of-memory happens. +However, if a process limits using nodes by mempolicy/cpusets, +and those nodes become memory exhaustion status, one process +may be killed by oom-killer. No panic occurs in this case. +Because other nodes' memory may be free. This means system total status +may be not fatal yet. +If this is set to 2, the kernel panics compulsorily even on the +above-mentioned. + +The default value is 0. +1 and 2 are for failover of clustering. Please select either +according to your policy of failover. diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt index d43aa9d..ba328f2 100644 --- a/Documentation/sysrq.txt +++ b/Documentation/sysrq.txt @@ -1,6 +1,6 @@ Linux Magic System Request Key Hacks Documentation for sysrq.c -Last update: 2007-JAN-06 +Last update: 2007-MAR-14 * What is the magic SysRq key? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -75,7 +75,7 @@ On all - write a character to /proc/sysrq-trigger. e.g.: 'f' - Will call oom_kill to kill a memory hog process. -'g' - Used by kgdb on ppc platforms. +'g' - Used by kgdb on ppc and sh platforms. 'h' - Will display help (actually any other key than those listed above will display help. but 'h' is easy to remember :-) diff --git a/Documentation/tty.txt b/Documentation/tty.txt index 5f799e6..048a876 100644 --- a/Documentation/tty.txt +++ b/Documentation/tty.txt @@ -108,7 +108,9 @@ hardware driver through the function pointers within the tty->driver structure: write() Write a block of characters to the tty device. - Returns the number of characters accepted. + Returns the number of characters accepted. The + character buffer passed to this method is already + in kernel space. put_char() Queues a character for writing to the tty device. If there is no room in the queue, the character is diff --git a/Documentation/usb/CREDITS b/Documentation/usb/CREDITS index 27a7216..67c59cd 100644 --- a/Documentation/usb/CREDITS +++ b/Documentation/usb/CREDITS @@ -65,7 +65,7 @@ THANKS file in Inaky's driver): will sell keyboards to some of the 3 million (at least) Linux users. - - Many thanks to ing büro h doran [http://www.ibhdoran.com]! + - Many thanks to ing büro h doran [http://www.ibhdoran.com]! It was almost impossible to get a PC backplate USB connector for the motherboard here at Europe (mine, home-made, was quite lousy :). Now I know where to acquire nice USB stuff! diff --git a/Documentation/usb/usb-serial.txt b/Documentation/usb/usb-serial.txt index d61f6e7..5b635ae 100644 --- a/Documentation/usb/usb-serial.txt +++ b/Documentation/usb/usb-serial.txt @@ -42,12 +42,12 @@ ConnectTech WhiteHEAT 4 port converter http://www.connecttech.com For any questions or problems with this driver, please contact - Stuart MacDonald at stuartm@connecttech.com + Connect Tech's Support Department at support@connecttech.com -HandSpring Visor, Palm USB, and Clié USB driver +HandSpring Visor, Palm USB, and Clié USB driver - This driver works with all HandSpring USB, Palm USB, and Sony Clié USB + This driver works with all HandSpring USB, Palm USB, and Sony Clié USB devices. Only when the device tries to connect to the host, will the device show @@ -69,7 +69,7 @@ HandSpring Visor, Palm USB, and Clié USB driver the port to use for the HotSync transfer. The "Generic" port can be used for other device communication, such as a PPP link. - For some Sony Clié devices, /dev/ttyUSB0 must be used to talk to the + For some Sony Clié devices, /dev/ttyUSB0 must be used to talk to the device. This is true for all OS version 3.5 devices, and most devices that have had a flash upgrade to a newer version of the OS. See the kernel system log for information on which is the correct port to use. diff --git a/Documentation/video4linux/README.pvrusb2 b/Documentation/video4linux/README.pvrusb2 index a4b7ae8..a747200 100644 --- a/Documentation/video4linux/README.pvrusb2 +++ b/Documentation/video4linux/README.pvrusb2 @@ -8,7 +8,7 @@ Background: This driver is intended for the "Hauppauge WinTV PVR USB 2.0", which is a USB 2.0 hosted TV Tuner. This driver is a work in progress. - Its history started with the reverse-engineering effort by Björn + Its history started with the reverse-engineering effort by Björn Danielsson <pvrusb2@dax.nu> whose web page can be found here: http://pvrusb2.dax.nu/ diff --git a/Documentation/video4linux/Zoran b/Documentation/video4linux/Zoran index 85c575ac..295462b 100644 --- a/Documentation/video4linux/Zoran +++ b/Documentation/video4linux/Zoran @@ -242,7 +242,7 @@ can generate: PAL , NTSC , SECAM Conexant bt866 TV encoder is used in AVS6EYES, and -can generate: NTSC/PAL, PALM, PALN +can generate: NTSC/PAL, PALÂM, PALÂN The adv717x, should be able to produce PAL N. But you find nothing PAL N specific in the registers. Seem that you have to reuse a other standard diff --git a/Documentation/video4linux/meye.txt b/Documentation/video4linux/meye.txt index 5e51c59..bf3af5f 100644 --- a/Documentation/video4linux/meye.txt +++ b/Documentation/video4linux/meye.txt @@ -1,7 +1,7 @@ Vaio Picturebook Motion Eye Camera Driver Readme ------------------------------------------------ Copyright (C) 2001-2004 Stelian Pop <stelian@popies.net> - Copyright (C) 2001-2002 Alcôve <www.alcove.com> + Copyright (C) 2001-2002 Alcôve <www.alcove.com> Copyright (C) 2000 Andrew Tridgell <tridge@samba.org> This driver enable the use of video4linux compatible applications with the diff --git a/Documentation/video4linux/ov511.txt b/Documentation/video4linux/ov511.txt index 79af610..b3326b1 100644 --- a/Documentation/video4linux/ov511.txt +++ b/Documentation/video4linux/ov511.txt @@ -195,11 +195,11 @@ MODULE PARAMETERS: NAME: bandingfilter TYPE: integer (Boolean) DEFAULT: 0 (off) - DESC: Enables the sensor´s banding filter exposure algorithm. This reduces + DESC: Enables the sensor´s banding filter exposure algorithm. This reduces or stabilizes the "banding" caused by some artificial light sources (especially fluorescent). You might have to set lightfreq correctly for this to work right. As an added bonus, this sometimes makes it - possible to capture your monitor´s output. + possible to capture your monitor´s output. NAME: fastset TYPE: integer (Boolean) diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c new file mode 100644 index 0000000..686a8e0 --- /dev/null +++ b/Documentation/vm/slabinfo.c @@ -0,0 +1,1221 @@ +/* + * Slabinfo: Tool to get reports about slabs + * + * (C) 2007 sgi, Christoph Lameter <clameter@sgi.com> + * + * Compile by: + * + * gcc -o slabinfo slabinfo.c + */ +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <dirent.h> +#include <string.h> +#include <unistd.h> +#include <stdarg.h> +#include <getopt.h> +#include <regex.h> +#include <errno.h> + +#define MAX_SLABS 500 +#define MAX_ALIASES 500 +#define MAX_NODES 1024 + +struct slabinfo { + char *name; + int alias; + int refs; + int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu; + int hwcache_align, object_size, objs_per_slab; + int sanity_checks, slab_size, store_user, trace; + int order, poison, reclaim_account, red_zone; + unsigned long partial, objects, slabs; + int numa[MAX_NODES]; + int numa_partial[MAX_NODES]; +} slabinfo[MAX_SLABS]; + +struct aliasinfo { + char *name; + char *ref; + struct slabinfo *slab; +} aliasinfo[MAX_ALIASES]; + +int slabs = 0; +int actual_slabs = 0; +int aliases = 0; +int alias_targets = 0; +int highest_node = 0; + +char buffer[4096]; + +int show_empty = 0; +int show_report = 0; +int show_alias = 0; +int show_slab = 0; +int skip_zero = 1; +int show_numa = 0; +int show_track = 0; +int show_first_alias = 0; +int validate = 0; +int shrink = 0; +int show_inverted = 0; +int show_single_ref = 0; +int show_totals = 0; +int sort_size = 0; +int set_debug = 0; +int show_ops = 0; + +/* Debug options */ +int sanity = 0; +int redzone = 0; +int poison = 0; +int tracking = 0; +int tracing = 0; + +int page_size; + +regex_t pattern; + +void fatal(const char *x, ...) +{ + va_list ap; + + va_start(ap, x); + vfprintf(stderr, x, ap); + va_end(ap); + exit(1); +} + +void usage(void) +{ + printf("slabinfo 5/7/2007. (c) 2007 sgi. clameter@sgi.com\n\n" + "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" + "-a|--aliases Show aliases\n" + "-d<options>|--debug=<options> Set/Clear Debug options\n" + "-e|--empty Show empty slabs\n" + "-f|--first-alias Show first alias\n" + "-h|--help Show usage information\n" + "-i|--inverted Inverted list\n" + "-l|--slabs Show slabs\n" + "-n|--numa Show NUMA information\n" + "-o|--ops Show kmem_cache_ops\n" + "-s|--shrink Shrink slabs\n" + "-r|--report Detailed report on single slabs\n" + "-S|--Size Sort by size\n" + "-t|--tracking Show alloc/free information\n" + "-T|--Totals Show summary information\n" + "-v|--validate Validate slabs\n" + "-z|--zero Include empty slabs\n" + "-1|--1ref Single reference\n" + "\nValid debug options (FZPUT may be combined)\n" + "a / A Switch on all debug options (=FZUP)\n" + "- Switch off all debug options\n" + "f / F Sanity Checks (SLAB_DEBUG_FREE)\n" + "z / Z Redzoning\n" + "p / P Poisoning\n" + "u / U Tracking\n" + "t / T Tracing\n" + ); +} + +unsigned long read_obj(char *name) +{ + FILE *f = fopen(name, "r"); + + if (!f) + buffer[0] = 0; + else { + if (!fgets(buffer,sizeof(buffer), f)) + buffer[0] = 0; + fclose(f); + if (buffer[strlen(buffer)] == '\n') + buffer[strlen(buffer)] = 0; + } + return strlen(buffer); +} + + +/* + * Get the contents of an attribute + */ +unsigned long get_obj(char *name) +{ + if (!read_obj(name)) + return 0; + + return atol(buffer); +} + +unsigned long get_obj_and_str(char *name, char **x) +{ + unsigned long result = 0; + char *p; + + *x = NULL; + + if (!read_obj(name)) { + x = NULL; + return 0; + } + result = strtoul(buffer, &p, 10); + while (*p == ' ') + p++; + if (*p) + *x = strdup(p); + return result; +} + +void set_obj(struct slabinfo *s, char *name, int n) +{ + char x[100]; + FILE *f; + + sprintf(x, "%s/%s", s->name, name); + f = fopen(x, "w"); + if (!f) + fatal("Cannot write to %s\n", x); + + fprintf(f, "%d\n", n); + fclose(f); +} + +unsigned long read_slab_obj(struct slabinfo *s, char *name) +{ + char x[100]; + FILE *f; + int l; + + sprintf(x, "%s/%s", s->name, name); + f = fopen(x, "r"); + if (!f) { + buffer[0] = 0; + l = 0; + } else { + l = fread(buffer, 1, sizeof(buffer), f); + buffer[l] = 0; + fclose(f); + } + return l; +} + + +/* + * Put a size string together + */ +int store_size(char *buffer, unsigned long value) +{ + unsigned long divisor = 1; + char trailer = 0; + int n; + + if (value > 1000000000UL) { + divisor = 100000000UL; + trailer = 'G'; + } else if (value > 1000000UL) { + divisor = 100000UL; + trailer = 'M'; + } else if (value > 1000UL) { + divisor = 100; + trailer = 'K'; + } + + value /= divisor; + n = sprintf(buffer, "%ld",value); + if (trailer) { + buffer[n] = trailer; + n++; + buffer[n] = 0; + } + if (divisor != 1) { + memmove(buffer + n - 2, buffer + n - 3, 4); + buffer[n-2] = '.'; + n++; + } + return n; +} + +void decode_numa_list(int *numa, char *t) +{ + int node; + int nr; + + memset(numa, 0, MAX_NODES * sizeof(int)); + + while (*t == 'N') { + t++; + node = strtoul(t, &t, 10); + if (*t == '=') { + t++; + nr = strtoul(t, &t, 10); + numa[node] = nr; + if (node > highest_node) + highest_node = node; + } + while (*t == ' ') + t++; + } +} + +void slab_validate(struct slabinfo *s) +{ + set_obj(s, "validate", 1); +} + +void slab_shrink(struct slabinfo *s) +{ + set_obj(s, "shrink", 1); +} + +int line = 0; + +void first_line(void) +{ + printf("Name Objects Objsize Space " + "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); +} + +/* + * Find the shortest alias of a slab + */ +struct aliasinfo *find_one_alias(struct slabinfo *find) +{ + struct aliasinfo *a; + struct aliasinfo *best = NULL; + + for(a = aliasinfo;a < aliasinfo + aliases; a++) { + if (a->slab == find && + (!best || strlen(best->name) < strlen(a->name))) { + best = a; + if (strncmp(a->name,"kmall", 5) == 0) + return best; + } + } + return best; +} + +unsigned long slab_size(struct slabinfo *s) +{ + return s->slabs * (page_size << s->order); +} + +void slab_numa(struct slabinfo *s, int mode) +{ + int node; + + if (strcmp(s->name, "*") == 0) + return; + + if (!highest_node) { + printf("\n%s: No NUMA information available.\n", s->name); + return; + } + + if (skip_zero && !s->slabs) + return; + + if (!line) { + printf("\n%-21s:", mode ? "NUMA nodes" : "Slab"); + for(node = 0; node <= highest_node; node++) + printf(" %4d", node); + printf("\n----------------------"); + for(node = 0; node <= highest_node; node++) + printf("-----"); + printf("\n"); + } + printf("%-21s ", mode ? "All slabs" : s->name); + for(node = 0; node <= highest_node; node++) { + char b[20]; + + store_size(b, s->numa[node]); + printf(" %4s", b); + } + printf("\n"); + if (mode) { + printf("%-21s ", "Partial slabs"); + for(node = 0; node <= highest_node; node++) { + char b[20]; + + store_size(b, s->numa_partial[node]); + printf(" %4s", b); + } + printf("\n"); + } + line++; +} + +void show_tracking(struct slabinfo *s) +{ + printf("\n%s: Kernel object allocation\n", s->name); + printf("-----------------------------------------------------------------------\n"); + if (read_slab_obj(s, "alloc_calls")) + printf(buffer); + else + printf("No Data\n"); + + printf("\n%s: Kernel object freeing\n", s->name); + printf("------------------------------------------------------------------------\n"); + if (read_slab_obj(s, "free_calls")) + printf(buffer); + else + printf("No Data\n"); + +} + +void ops(struct slabinfo *s) +{ + if (strcmp(s->name, "*") == 0) + return; + + if (read_slab_obj(s, "ops")) { + printf("\n%s: kmem_cache operations\n", s->name); + printf("--------------------------------------------\n"); + printf(buffer); + } else + printf("\n%s has no kmem_cache operations\n", s->name); +} + +const char *onoff(int x) +{ + if (x) + return "On "; + return "Off"; +} + +void report(struct slabinfo *s) +{ + if (strcmp(s->name, "*") == 0) + return; + printf("\nSlabcache: %-20s Aliases: %2d Order : %2d\n", s->name, s->aliases, s->order); + if (s->hwcache_align) + printf("** Hardware cacheline aligned\n"); + if (s->cache_dma) + printf("** Memory is allocated in a special DMA zone\n"); + if (s->destroy_by_rcu) + printf("** Slabs are destroyed via RCU\n"); + if (s->reclaim_account) + printf("** Reclaim accounting active\n"); + + printf("\nSizes (bytes) Slabs Debug Memory\n"); + printf("------------------------------------------------------------------------\n"); + printf("Object : %7d Total : %7ld Sanity Checks : %s Total: %7ld\n", + s->object_size, s->slabs, onoff(s->sanity_checks), + s->slabs * (page_size << s->order)); + printf("SlabObj: %7d Full : %7ld Redzoning : %s Used : %7ld\n", + s->slab_size, s->slabs - s->partial - s->cpu_slabs, + onoff(s->red_zone), s->objects * s->object_size); + printf("SlabSiz: %7d Partial: %7ld Poisoning : %s Loss : %7ld\n", + page_size << s->order, s->partial, onoff(s->poison), + s->slabs * (page_size << s->order) - s->objects * s->object_size); + printf("Loss : %7d CpuSlab: %7d Tracking : %s Lalig: %7ld\n", + s->slab_size - s->object_size, s->cpu_slabs, onoff(s->store_user), + (s->slab_size - s->object_size) * s->objects); + printf("Align : %7d Objects: %7d Tracing : %s Lpadd: %7ld\n", + s->align, s->objs_per_slab, onoff(s->trace), + ((page_size << s->order) - s->objs_per_slab * s->slab_size) * + s->slabs); + + ops(s); + show_tracking(s); + slab_numa(s, 1); +} + +void slabcache(struct slabinfo *s) +{ + char size_str[20]; + char dist_str[40]; + char flags[20]; + char *p = flags; + + if (strcmp(s->name, "*") == 0) + return; + + if (actual_slabs == 1) { + report(s); + return; + } + + if (skip_zero && !show_empty && !s->slabs) + return; + + if (show_empty && s->slabs) + return; + + store_size(size_str, slab_size(s)); + sprintf(dist_str,"%lu/%lu/%d", s->slabs, s->partial, s->cpu_slabs); + + if (!line++) + first_line(); + + if (s->aliases) + *p++ = '*'; + if (s->cache_dma) + *p++ = 'd'; + if (s->hwcache_align) + *p++ = 'A'; + if (s->poison) + *p++ = 'P'; + if (s->reclaim_account) + *p++ = 'a'; + if (s->red_zone) + *p++ = 'Z'; + if (s->sanity_checks) + *p++ = 'F'; + if (s->store_user) + *p++ = 'U'; + if (s->trace) + *p++ = 'T'; + + *p = 0; + printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n", + s->name, s->objects, s->object_size, size_str, dist_str, + s->objs_per_slab, s->order, + s->slabs ? (s->partial * 100) / s->slabs : 100, + s->slabs ? (s->objects * s->object_size * 100) / + (s->slabs * (page_size << s->order)) : 100, + flags); +} + +/* + * Analyze debug options. Return false if something is amiss. + */ +int debug_opt_scan(char *opt) +{ + if (!opt || !opt[0] || strcmp(opt, "-") == 0) + return 1; + + if (strcasecmp(opt, "a") == 0) { + sanity = 1; + poison = 1; + redzone = 1; + tracking = 1; + return 1; + } + + for ( ; *opt; opt++) + switch (*opt) { + case 'F' : case 'f': + if (sanity) + return 0; + sanity = 1; + break; + case 'P' : case 'p': + if (poison) + return 0; + poison = 1; + break; + + case 'Z' : case 'z': + if (redzone) + return 0; + redzone = 1; + break; + + case 'U' : case 'u': + if (tracking) + return 0; + tracking = 1; + break; + + case 'T' : case 't': + if (tracing) + return 0; + tracing = 1; + break; + default: + return 0; + } + return 1; +} + +int slab_empty(struct slabinfo *s) +{ + if (s->objects > 0) + return 0; + + /* + * We may still have slabs even if there are no objects. Shrinking will + * remove them. + */ + if (s->slabs != 0) + set_obj(s, "shrink", 1); + + return 1; +} + +void slab_debug(struct slabinfo *s) +{ + if (sanity && !s->sanity_checks) { + set_obj(s, "sanity", 1); + } + if (!sanity && s->sanity_checks) { + if (slab_empty(s)) + set_obj(s, "sanity", 0); + else + fprintf(stderr, "%s not empty cannot disable sanity checks\n", s->name); + } + if (redzone && !s->red_zone) { + if (slab_empty(s)) + set_obj(s, "red_zone", 1); + else + fprintf(stderr, "%s not empty cannot enable redzoning\n", s->name); + } + if (!redzone && s->red_zone) { + if (slab_empty(s)) + set_obj(s, "red_zone", 0); + else + fprintf(stderr, "%s not empty cannot disable redzoning\n", s->name); + } + if (poison && !s->poison) { + if (slab_empty(s)) + set_obj(s, "poison", 1); + else + fprintf(stderr, "%s not empty cannot enable poisoning\n", s->name); + } + if (!poison && s->poison) { + if (slab_empty(s)) + set_obj(s, "poison", 0); + else + fprintf(stderr, "%s not empty cannot disable poisoning\n", s->name); + } + if (tracking && !s->store_user) { + if (slab_empty(s)) + set_obj(s, "store_user", 1); + else + fprintf(stderr, "%s not empty cannot enable tracking\n", s->name); + } + if (!tracking && s->store_user) { + if (slab_empty(s)) + set_obj(s, "store_user", 0); + else + fprintf(stderr, "%s not empty cannot disable tracking\n", s->name); + } + if (tracing && !s->trace) { + if (slabs == 1) + set_obj(s, "trace", 1); + else + fprintf(stderr, "%s can only enable trace for one slab at a time\n", s->name); + } + if (!tracing && s->trace) + set_obj(s, "trace", 1); +} + +void totals(void) +{ + struct slabinfo *s; + + int used_slabs = 0; + char b1[20], b2[20], b3[20], b4[20]; + unsigned long long max = 1ULL << 63; + + /* Object size */ + unsigned long long min_objsize = max, max_objsize = 0, avg_objsize; + + /* Number of partial slabs in a slabcache */ + unsigned long long min_partial = max, max_partial = 0, + avg_partial, total_partial = 0; + + /* Number of slabs in a slab cache */ + unsigned long long min_slabs = max, max_slabs = 0, + avg_slabs, total_slabs = 0; + + /* Size of the whole slab */ + unsigned long long min_size = max, max_size = 0, + avg_size, total_size = 0; + + /* Bytes used for object storage in a slab */ + unsigned long long min_used = max, max_used = 0, + avg_used, total_used = 0; + + /* Waste: Bytes used for alignment and padding */ + unsigned long long min_waste = max, max_waste = 0, + avg_waste, total_waste = 0; + /* Number of objects in a slab */ + unsigned long long min_objects = max, max_objects = 0, + avg_objects, total_objects = 0; + /* Waste per object */ + unsigned long long min_objwaste = max, + max_objwaste = 0, avg_objwaste, + total_objwaste = 0; + + /* Memory per object */ + unsigned long long min_memobj = max, + max_memobj = 0, avg_memobj, + total_objsize = 0; + + /* Percentage of partial slabs per slab */ + unsigned long min_ppart = 100, max_ppart = 0, + avg_ppart, total_ppart = 0; + + /* Number of objects in partial slabs */ + unsigned long min_partobj = max, max_partobj = 0, + avg_partobj, total_partobj = 0; + + /* Percentage of partial objects of all objects in a slab */ + unsigned long min_ppartobj = 100, max_ppartobj = 0, + avg_ppartobj, total_ppartobj = 0; + + + for (s = slabinfo; s < slabinfo + slabs; s++) { + unsigned long long size; + unsigned long used; + unsigned long long wasted; + unsigned long long objwaste; + long long objects_in_partial_slabs; + unsigned long percentage_partial_slabs; + unsigned long percentage_partial_objs; + + if (!s->slabs || !s->objects) + continue; + + used_slabs++; + + size = slab_size(s); + used = s->objects * s->object_size; + wasted = size - used; + objwaste = s->slab_size - s->object_size; + + objects_in_partial_slabs = s->objects - + (s->slabs - s->partial - s ->cpu_slabs) * + s->objs_per_slab; + + if (objects_in_partial_slabs < 0) + objects_in_partial_slabs = 0; + + percentage_partial_slabs = s->partial * 100 / s->slabs; + if (percentage_partial_slabs > 100) + percentage_partial_slabs = 100; + + percentage_partial_objs = objects_in_partial_slabs * 100 + / s->objects; + + if (percentage_partial_objs > 100) + percentage_partial_objs = 100; + + if (s->object_size < min_objsize) + min_objsize = s->object_size; + if (s->partial < min_partial) + min_partial = s->partial; + if (s->slabs < min_slabs) + min_slabs = s->slabs; + if (size < min_size) + min_size = size; + if (wasted < min_waste) + min_waste = wasted; + if (objwaste < min_objwaste) + min_objwaste = objwaste; + if (s->objects < min_objects) + min_objects = s->objects; + if (used < min_used) + min_used = used; + if (objects_in_partial_slabs < min_partobj) + min_partobj = objects_in_partial_slabs; + if (percentage_partial_slabs < min_ppart) + min_ppart = percentage_partial_slabs; + if (percentage_partial_objs < min_ppartobj) + min_ppartobj = percentage_partial_objs; + if (s->slab_size < min_memobj) + min_memobj = s->slab_size; + + if (s->object_size > max_objsize) + max_objsize = s->object_size; + if (s->partial > max_partial) + max_partial = s->partial; + if (s->slabs > max_slabs) + max_slabs = s->slabs; + if (size > max_size) + max_size = size; + if (wasted > max_waste) + max_waste = wasted; + if (objwaste > max_objwaste) + max_objwaste = objwaste; + if (s->objects > max_objects) + max_objects = s->objects; + if (used > max_used) + max_used = used; + if (objects_in_partial_slabs > max_partobj) + max_partobj = objects_in_partial_slabs; + if (percentage_partial_slabs > max_ppart) + max_ppart = percentage_partial_slabs; + if (percentage_partial_objs > max_ppartobj) + max_ppartobj = percentage_partial_objs; + if (s->slab_size > max_memobj) + max_memobj = s->slab_size; + + total_partial += s->partial; + total_slabs += s->slabs; + total_size += size; + total_waste += wasted; + + total_objects += s->objects; + total_used += used; + total_partobj += objects_in_partial_slabs; + total_ppart += percentage_partial_slabs; + total_ppartobj += percentage_partial_objs; + + total_objwaste += s->objects * objwaste; + total_objsize += s->objects * s->slab_size; + } + + if (!total_objects) { + printf("No objects\n"); + return; + } + if (!used_slabs) { + printf("No slabs\n"); + return; + } + + /* Per slab averages */ + avg_partial = total_partial / used_slabs; + avg_slabs = total_slabs / used_slabs; + avg_size = total_size / used_slabs; + avg_waste = total_waste / used_slabs; + + avg_objects = total_objects / used_slabs; + avg_used = total_used / used_slabs; + avg_partobj = total_partobj / used_slabs; + avg_ppart = total_ppart / used_slabs; + avg_ppartobj = total_ppartobj / used_slabs; + + /* Per object object sizes */ + avg_objsize = total_used / total_objects; + avg_objwaste = total_objwaste / total_objects; + avg_partobj = total_partobj * 100 / total_objects; + avg_memobj = total_objsize / total_objects; + + printf("Slabcache Totals\n"); + printf("----------------\n"); + printf("Slabcaches : %3d Aliases : %3d->%-3d Active: %3d\n", + slabs, aliases, alias_targets, used_slabs); + + store_size(b1, total_size);store_size(b2, total_waste); + store_size(b3, total_waste * 100 / total_used); + printf("Memory used: %6s # Loss : %6s MRatio: %6s%%\n", b1, b2, b3); + + store_size(b1, total_objects);store_size(b2, total_partobj); + store_size(b3, total_partobj * 100 / total_objects); + printf("# Objects : %6s # PartObj: %6s ORatio: %6s%%\n", b1, b2, b3); + + printf("\n"); + printf("Per Cache Average Min Max Total\n"); + printf("---------------------------------------------------------\n"); + + store_size(b1, avg_objects);store_size(b2, min_objects); + store_size(b3, max_objects);store_size(b4, total_objects); + printf("#Objects %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_slabs);store_size(b2, min_slabs); + store_size(b3, max_slabs);store_size(b4, total_slabs); + printf("#Slabs %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_partial);store_size(b2, min_partial); + store_size(b3, max_partial);store_size(b4, total_partial); + printf("#PartSlab %10s %10s %10s %10s\n", + b1, b2, b3, b4); + store_size(b1, avg_ppart);store_size(b2, min_ppart); + store_size(b3, max_ppart); + store_size(b4, total_partial * 100 / total_slabs); + printf("%%PartSlab %10s%% %10s%% %10s%% %10s%%\n", + b1, b2, b3, b4); + + store_size(b1, avg_partobj);store_size(b2, min_partobj); + store_size(b3, max_partobj); + store_size(b4, total_partobj); + printf("PartObjs %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_ppartobj);store_size(b2, min_ppartobj); + store_size(b3, max_ppartobj); + store_size(b4, total_partobj * 100 / total_objects); + printf("%% PartObj %10s%% %10s%% %10s%% %10s%%\n", + b1, b2, b3, b4); + + store_size(b1, avg_size);store_size(b2, min_size); + store_size(b3, max_size);store_size(b4, total_size); + printf("Memory %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_used);store_size(b2, min_used); + store_size(b3, max_used);store_size(b4, total_used); + printf("Used %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_waste);store_size(b2, min_waste); + store_size(b3, max_waste);store_size(b4, total_waste); + printf("Loss %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + printf("\n"); + printf("Per Object Average Min Max\n"); + printf("---------------------------------------------\n"); + + store_size(b1, avg_memobj);store_size(b2, min_memobj); + store_size(b3, max_memobj); + printf("Memory %10s %10s %10s\n", + b1, b2, b3); + store_size(b1, avg_objsize);store_size(b2, min_objsize); + store_size(b3, max_objsize); + printf("User %10s %10s %10s\n", + b1, b2, b3); + + store_size(b1, avg_objwaste);store_size(b2, min_objwaste); + store_size(b3, max_objwaste); + printf("Loss %10s %10s %10s\n", + b1, b2, b3); +} + +void sort_slabs(void) +{ + struct slabinfo *s1,*s2; + + for (s1 = slabinfo; s1 < slabinfo + slabs; s1++) { + for (s2 = s1 + 1; s2 < slabinfo + slabs; s2++) { + int result; + + if (sort_size) + result = slab_size(s1) < slab_size(s2); + else + result = strcasecmp(s1->name, s2->name); + + if (show_inverted) + result = -result; + + if (result > 0) { + struct slabinfo t; + + memcpy(&t, s1, sizeof(struct slabinfo)); + memcpy(s1, s2, sizeof(struct slabinfo)); + memcpy(s2, &t, sizeof(struct slabinfo)); + } + } + } +} + +void sort_aliases(void) +{ + struct aliasinfo *a1,*a2; + + for (a1 = aliasinfo; a1 < aliasinfo + aliases; a1++) { + for (a2 = a1 + 1; a2 < aliasinfo + aliases; a2++) { + char *n1, *n2; + + n1 = a1->name; + n2 = a2->name; + if (show_alias && !show_inverted) { + n1 = a1->ref; + n2 = a2->ref; + } + if (strcasecmp(n1, n2) > 0) { + struct aliasinfo t; + + memcpy(&t, a1, sizeof(struct aliasinfo)); + memcpy(a1, a2, sizeof(struct aliasinfo)); + memcpy(a2, &t, sizeof(struct aliasinfo)); + } + } + } +} + +void link_slabs(void) +{ + struct aliasinfo *a; + struct slabinfo *s; + + for (a = aliasinfo; a < aliasinfo + aliases; a++) { + + for (s = slabinfo; s < slabinfo + slabs; s++) + if (strcmp(a->ref, s->name) == 0) { + a->slab = s; + s->refs++; + break; + } + if (s == slabinfo + slabs) + fatal("Unresolved alias %s\n", a->ref); + } +} + +void alias(void) +{ + struct aliasinfo *a; + char *active = NULL; + + sort_aliases(); + link_slabs(); + + for(a = aliasinfo; a < aliasinfo + aliases; a++) { + + if (!show_single_ref && a->slab->refs == 1) + continue; + + if (!show_inverted) { + if (active) { + if (strcmp(a->slab->name, active) == 0) { + printf(" %s", a->name); + continue; + } + } + printf("\n%-12s <- %s", a->slab->name, a->name); + active = a->slab->name; + } + else + printf("%-20s -> %s\n", a->name, a->slab->name); + } + if (active) + printf("\n"); +} + + +void rename_slabs(void) +{ + struct slabinfo *s; + struct aliasinfo *a; + + for (s = slabinfo; s < slabinfo + slabs; s++) { + if (*s->name != ':') + continue; + + if (s->refs > 1 && !show_first_alias) + continue; + + a = find_one_alias(s); + + if (a) + s->name = a->name; + else { + s->name = "*"; + actual_slabs--; + } + } +} + +int slab_mismatch(char *slab) +{ + return regexec(&pattern, slab, 0, NULL, 0); +} + +void read_slab_dir(void) +{ + DIR *dir; + struct dirent *de; + struct slabinfo *slab = slabinfo; + struct aliasinfo *alias = aliasinfo; + char *p; + char *t; + int count; + + if (chdir("/sys/slab")) + fatal("SYSFS support for SLUB not active\n"); + + dir = opendir("."); + while ((de = readdir(dir))) { + if (de->d_name[0] == '.' || + (de->d_name[0] != ':' && slab_mismatch(de->d_name))) + continue; + switch (de->d_type) { + case DT_LNK: + alias->name = strdup(de->d_name); + count = readlink(de->d_name, buffer, sizeof(buffer)); + + if (count < 0) + fatal("Cannot read symlink %s\n", de->d_name); + + buffer[count] = 0; + p = buffer + count; + while (p > buffer && p[-1] != '/') + p--; + alias->ref = strdup(p); + alias++; + break; + case DT_DIR: + if (chdir(de->d_name)) + fatal("Unable to access slab %s\n", slab->name); + slab->name = strdup(de->d_name); + slab->alias = 0; + slab->refs = 0; + slab->aliases = get_obj("aliases"); + slab->align = get_obj("align"); + slab->cache_dma = get_obj("cache_dma"); + slab->cpu_slabs = get_obj("cpu_slabs"); + slab->destroy_by_rcu = get_obj("destroy_by_rcu"); + slab->hwcache_align = get_obj("hwcache_align"); + slab->object_size = get_obj("object_size"); + slab->objects = get_obj("objects"); + slab->objs_per_slab = get_obj("objs_per_slab"); + slab->order = get_obj("order"); + slab->partial = get_obj("partial"); + slab->partial = get_obj_and_str("partial", &t); + decode_numa_list(slab->numa_partial, t); + slab->poison = get_obj("poison"); + slab->reclaim_account = get_obj("reclaim_account"); + slab->red_zone = get_obj("red_zone"); + slab->sanity_checks = get_obj("sanity_checks"); + slab->slab_size = get_obj("slab_size"); + slab->slabs = get_obj_and_str("slabs", &t); + decode_numa_list(slab->numa, t); + slab->store_user = get_obj("store_user"); + slab->trace = get_obj("trace"); + chdir(".."); + if (slab->name[0] == ':') + alias_targets++; + slab++; + break; + default : + fatal("Unknown file type %lx\n", de->d_type); + } + } + closedir(dir); + slabs = slab - slabinfo; + actual_slabs = slabs; + aliases = alias - aliasinfo; + if (slabs > MAX_SLABS) + fatal("Too many slabs\n"); + if (aliases > MAX_ALIASES) + fatal("Too many aliases\n"); +} + +void output_slabs(void) +{ + struct slabinfo *slab; + + for (slab = slabinfo; slab < slabinfo + slabs; slab++) { + + if (slab->alias) + continue; + + + if (show_numa) + slab_numa(slab, 0); + else if (show_track) + show_tracking(slab); + else if (validate) + slab_validate(slab); + else if (shrink) + slab_shrink(slab); + else if (set_debug) + slab_debug(slab); + else if (show_ops) + ops(slab); + else if (show_slab) + slabcache(slab); + } +} + +struct option opts[] = { + { "aliases", 0, NULL, 'a' }, + { "debug", 2, NULL, 'd' }, + { "empty", 0, NULL, 'e' }, + { "first-alias", 0, NULL, 'f' }, + { "help", 0, NULL, 'h' }, + { "inverted", 0, NULL, 'i'}, + { "numa", 0, NULL, 'n' }, + { "ops", 0, NULL, 'o' }, + { "report", 0, NULL, 'r' }, + { "shrink", 0, NULL, 's' }, + { "slabs", 0, NULL, 'l' }, + { "track", 0, NULL, 't'}, + { "validate", 0, NULL, 'v' }, + { "zero", 0, NULL, 'z' }, + { "1ref", 0, NULL, '1'}, + { NULL, 0, NULL, 0 } +}; + +int main(int argc, char *argv[]) +{ + int c; + int err; + char *pattern_source; + + page_size = getpagesize(); + + while ((c = getopt_long(argc, argv, "ad::efhil1noprstvzTS", + opts, NULL)) != -1) + switch(c) { + case '1': + show_single_ref = 1; + break; + case 'a': + show_alias = 1; + break; + case 'd': + set_debug = 1; + if (!debug_opt_scan(optarg)) + fatal("Invalid debug option '%s'\n", optarg); + break; + case 'e': + show_empty = 1; + break; + case 'f': + show_first_alias = 1; + break; + case 'h': + usage(); + return 0; + case 'i': + show_inverted = 1; + break; + case 'n': + show_numa = 1; + break; + case 'o': + show_ops = 1; + break; + case 'r': + show_report = 1; + break; + case 's': + shrink = 1; + break; + case 'l': + show_slab = 1; + break; + case 't': + show_track = 1; + break; + case 'v': + validate = 1; + break; + case 'z': + skip_zero = 0; + break; + case 'T': + show_totals = 1; + break; + case 'S': + sort_size = 1; + break; + + default: + fatal("%s: Invalid option '%c'\n", argv[0], optopt); + + } + + if (!show_slab && !show_alias && !show_track && !show_report + && !validate && !shrink && !set_debug && !show_ops) + show_slab = 1; + + if (argc > optind) + pattern_source = argv[optind]; + else + pattern_source = ".*"; + + err = regcomp(&pattern, pattern_source, REG_ICASE|REG_NOSUB); + if (err) + fatal("%s: Invalid pattern '%s' code %d\n", + argv[0], pattern_source, err); + read_slab_dir(); + if (show_alias) + alias(); + else + if (show_totals) + totals(); + else { + link_slabs(); + rename_slabs(); + sort_slabs(); + output_slabs(); + } + return 0; +} diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt new file mode 100644 index 0000000..727c8d8 --- /dev/null +++ b/Documentation/vm/slub.txt @@ -0,0 +1,113 @@ +Short users guide for SLUB +-------------------------- + +First of all slub should transparently replace SLAB. If you enable +SLUB then everything should work the same (Note the word "should". +There is likely not much value in that word at this point). + +The basic philosophy of SLUB is very different from SLAB. SLAB +requires rebuilding the kernel to activate debug options for all +SLABS. SLUB always includes full debugging but its off by default. +SLUB can enable debugging only for selected slabs in order to avoid +an impact on overall system performance which may make a bug more +difficult to find. + +In order to switch debugging on one can add a option "slub_debug" +to the kernel command line. That will enable full debugging for +all slabs. + +Typically one would then use the "slabinfo" command to get statistical +data and perform operation on the slabs. By default slabinfo only lists +slabs that have data in them. See "slabinfo -h" for more options when +running the command. slabinfo can be compiled with + +gcc -o slabinfo Documentation/vm/slabinfo.c + +Some of the modes of operation of slabinfo require that slub debugging +be enabled on the command line. F.e. no tracking information will be +available without debugging on and validation can only partially +be performed if debugging was not switched on. + +Some more sophisticated uses of slub_debug: +------------------------------------------- + +Parameters may be given to slub_debug. If none is specified then full +debugging is enabled. Format: + +slub_debug=<Debug-Options> Enable options for all slabs +slub_debug=<Debug-Options>,<slab name> + Enable options only for select slabs + +Possible debug options are + F Sanity checks on (enables SLAB_DEBUG_FREE. Sorry + SLAB legacy issues) + Z Red zoning + P Poisoning (object and padding) + U User tracking (free and alloc) + T Trace (please only use on single slabs) + +F.e. in order to boot just with sanity checks and red zoning one would specify: + + slub_debug=FZ + +Trying to find an issue in the dentry cache? Try + + slub_debug=,dentry_cache + +to only enable debugging on the dentry cache. + +Red zoning and tracking may realign the slab. We can just apply sanity checks +to the dentry cache with + + slub_debug=F,dentry_cache + +In case you forgot to enable debugging on the kernel command line: It is +possible to enable debugging manually when the kernel is up. Look at the +contents of: + +/sys/slab/<slab name>/ + +Look at the writable files. Writing 1 to them will enable the +corresponding debug option. All options can be set on a slab that does +not contain objects. If the slab already contains objects then sanity checks +and tracing may only be enabled. The other options may cause the realignment +of objects. + +Careful with tracing: It may spew out lots of information and never stop if +used on the wrong slab. + +SLAB Merging +------------ + +If no debugging is specified then SLUB may merge similar slabs together +in order to reduce overhead and increase cache hotness of objects. +slabinfo -a displays which slabs were merged together. + +Getting more performance +------------------------ + +To some degree SLUB's performance is limited by the need to take the +list_lock once in a while to deal with partial slabs. That overhead is +governed by the order of the allocation for each slab. The allocations +can be influenced by kernel parameters: + +slub_min_objects=x (default 8) +slub_min_order=x (default 0) +slub_max_order=x (default 4) + +slub_min_objects allows to specify how many objects must at least fit +into one slab in order for the allocation order to be acceptable. +In general slub will be able to perform this number of allocations +on a slab without consulting centralized resources (list_lock) where +contention may occur. + +slub_min_order specifies a minim order of slabs. A similar effect like +slub_min_objects. + +slub_max_order specified the order at which slub_min_objects should no +longer be checked. This is useful to avoid SLUB trying to generate +super large order pages to fit slub_min_objects of a slab cache with +large object sizes into one high order page. + + +Christoph Lameter, <clameter@sgi.com>, April 10, 2007 diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index 85f51e5..6177d88 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt @@ -149,7 +149,19 @@ NUMA numa=noacpi Don't parse the SRAT table for NUMA setup - numa=fake=X Fake X nodes and ignore NUMA setup of the actual machine. + numa=fake=CMDLINE + If a number, fakes CMDLINE nodes and ignores NUMA setup of the + actual machine. Otherwise, system memory is configured + depending on the sizes and coefficients listed. For example: + numa=fake=2*512,1024,4*256,*128 + gives two 512M nodes, a 1024M node, four 256M nodes, and the + rest split into 128M chunks. If the last character of CMDLINE + is a *, the remaining memory is divided up equally among its + coefficient: + numa=fake=2*512,2* + gives two 512M nodes and the rest split into two nodes. + Otherwise, the remaining system RAM is allocated to an + additional node. numa=hotadd=percent Only allow hotadd memory to preallocate page structures upto diff --git a/Documentation/x86_64/fake-numa-for-cpusets b/Documentation/x86_64/fake-numa-for-cpusets new file mode 100644 index 0000000..d1a985c --- /dev/null +++ b/Documentation/x86_64/fake-numa-for-cpusets @@ -0,0 +1,66 @@ +Using numa=fake and CPUSets for Resource Management +Written by David Rientjes <rientjes@cs.washington.edu> + +This document describes how the numa=fake x86_64 command-line option can be used +in conjunction with cpusets for coarse memory management. Using this feature, +you can create fake NUMA nodes that represent contiguous chunks of memory and +assign them to cpusets and their attached tasks. This is a way of limiting the +amount of system memory that are available to a certain class of tasks. + +For more information on the features of cpusets, see Documentation/cpusets.txt. +There are a number of different configurations you can use for your needs. For +more information on the numa=fake command line option and its various ways of +configuring fake nodes, see Documentation/x86_64/boot-options.txt. + +For the purposes of this introduction, we'll assume a very primitive NUMA +emulation setup of "numa=fake=4*512,". This will split our system memory into +four equal chunks of 512M each that we can now use to assign to cpusets. As +you become more familiar with using this combination for resource control, +you'll determine a better setup to minimize the number of nodes you have to deal +with. + +A machine may be split as follows with "numa=fake=4*512," as reported by dmesg: + + Faking node 0 at 0000000000000000-0000000020000000 (512MB) + Faking node 1 at 0000000020000000-0000000040000000 (512MB) + Faking node 2 at 0000000040000000-0000000060000000 (512MB) + Faking node 3 at 0000000060000000-0000000080000000 (512MB) + ... + On node 0 totalpages: 130975 + On node 1 totalpages: 131072 + On node 2 totalpages: 131072 + On node 3 totalpages: 131072 + +Now following the instructions for mounting the cpusets filesystem from +Documentation/cpusets.txt, you can assign fake nodes (i.e. contiguous memory +address spaces) to individual cpusets: + + [root@xroads /]# mkdir exampleset + [root@xroads /]# mount -t cpuset none exampleset + [root@xroads /]# mkdir exampleset/ddset + [root@xroads /]# cd exampleset/ddset + [root@xroads /exampleset/ddset]# echo 0-1 > cpus + [root@xroads /exampleset/ddset]# echo 0-1 > mems + +Now this cpuset, 'ddset', will only allowed access to fake nodes 0 and 1 for +memory allocations (1G). + +You can now assign tasks to these cpusets to limit the memory resources +available to them according to the fake nodes assigned as mems: + + [root@xroads /exampleset/ddset]# echo $$ > tasks + [root@xroads /exampleset/ddset]# dd if=/dev/zero of=tmp bs=1024 count=1G + [1] 13425 + +Notice the difference between the system memory usage as reported by +/proc/meminfo between the restricted cpuset case above and the unrestricted +case (i.e. running the same 'dd' command without assigning it to a fake NUMA +cpuset): + Unrestricted Restricted + MemTotal: 3091900 kB 3091900 kB + MemFree: 42113 kB 1513236 kB + +This allows for coarse memory management for the tasks you assign to particular +cpusets. Since cpusets can form a hierarchy, you can create some pretty +interesting combinations of use-cases for various classes of tasks for your +memory management needs. diff --git a/Documentation/x86_64/machinecheck b/Documentation/x86_64/machinecheck index 068a6d9..feaeaf6 100644 --- a/Documentation/x86_64/machinecheck +++ b/Documentation/x86_64/machinecheck @@ -36,7 +36,12 @@ between all CPUs. check_interval How often to poll for corrected machine check errors, in seconds - (Note output is hexademical). Default 5 minutes. + (Note output is hexademical). Default 5 minutes. When the poller + finds MCEs it triggers an exponential speedup (poll more often) on + the polling interval. When the poller stops finding MCEs, it + triggers an exponential backoff (poll less often) on the polling + interval. The check_interval variable is both the initial and + maximum polling interval. tolerant Tolerance level. When a machine check exception occurs for a non |