diff options
Diffstat (limited to 'Documentation')
53 files changed, 1282 insertions, 462 deletions
diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt index 6845574..ee4bb73 100644 --- a/Documentation/DMA-mapping.txt +++ b/Documentation/DMA-mapping.txt @@ -199,6 +199,8 @@ address during PCI bus mastering you might do something like: "mydev: 24-bit DMA addressing not available.\n"); goto ignore_this_device; } +[Better use DMA_24BIT_MASK instead of 0x00ffffff. +See linux/include/dma-mapping.h for reference.] When pci_set_dma_mask() is successful, and returns zero, the PCI layer saves away this mask you have provided. The PCI layer will use this diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 2975291..7d87dd7 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -28,7 +28,7 @@ PS_METHOD = $(prefer-db2x) ### # The targets that may be used. -.PHONY: xmldocs sgmldocs psdocs pdfdocs htmldocs mandocs installmandocs +PHONY += xmldocs sgmldocs psdocs pdfdocs htmldocs mandocs installmandocs BOOKS := $(addprefix $(obj)/,$(DOCBOOKS)) xmldocs: $(BOOKS) @@ -211,3 +211,9 @@ clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) #man put files in man subdir - traverse down subdir- := man/ + + +# Declare the contents of the .PHONY variable as phony. We keep that +# information in a variable se we can use it in if_changed and friends. + +.PHONY: $(PHONY) diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 5ed85af..07cb93b 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -360,7 +360,7 @@ uses of RCU may be found in listRCU.txt, arrayRCU.txt, and NMI-RCU.txt. struct foo *new_fp; struct foo *old_fp; - new_fp = kmalloc(sizeof(*fp), GFP_KERNEL); + new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL); spin_lock(&foo_mutex); old_fp = gbl_foo; *new_fp = *old_fp; @@ -461,7 +461,7 @@ The foo_update_a() function might then be written as follows: struct foo *new_fp; struct foo *old_fp; - new_fp = kmalloc(sizeof(*fp), GFP_KERNEL); + new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL); spin_lock(&foo_mutex); old_fp = gbl_foo; *new_fp = *old_fp; @@ -605,7 +605,7 @@ are the same as those shown in the preceding section, so they are omitted. { int cpu; - for_each_cpu(cpu) + for_each_possible_cpu(cpu) run_on(cpu); } diff --git a/Documentation/aoe/mkdevs.sh b/Documentation/aoe/mkdevs.sh index ec5a6de..97374aa 100644 --- a/Documentation/aoe/mkdevs.sh +++ b/Documentation/aoe/mkdevs.sh @@ -27,6 +27,8 @@ rm -f $dir/discover mknod -m 0200 $dir/discover c $MAJOR 3 rm -f $dir/interfaces mknod -m 0200 $dir/interfaces c $MAJOR 4 +rm -f $dir/revalidate +mknod -m 0200 $dir/revalidate c $MAJOR 5 export n_partitions mkshelf=`echo $0 | sed 's!mkdevs!mkshelf!'` diff --git a/Documentation/aoe/udev.txt b/Documentation/aoe/udev.txt index ab39d8b..a7ed1dc 100644 --- a/Documentation/aoe/udev.txt +++ b/Documentation/aoe/udev.txt @@ -18,6 +18,7 @@ SUBSYSTEM="aoe", KERNEL="discover", NAME="etherd/%k", GROUP="disk", MODE="0220" SUBSYSTEM="aoe", KERNEL="err", NAME="etherd/%k", GROUP="disk", MODE="0440" SUBSYSTEM="aoe", KERNEL="interfaces", NAME="etherd/%k", GROUP="disk", MODE="0220" +SUBSYSTEM="aoe", KERNEL="revalidate", NAME="etherd/%k", GROUP="disk", MODE="0220" # aoe block devices KERNEL="etherd*", NAME="%k", GROUP="disk" diff --git a/Documentation/arm/Booting b/Documentation/arm/Booting index fad566b..7685029 100644 --- a/Documentation/arm/Booting +++ b/Documentation/arm/Booting @@ -118,7 +118,7 @@ to store page tables. The recommended placement is 32KiB into RAM. In either case, the following conditions must be met: -- Quiesce all DMA capable devicess so that memory does not get +- Quiesce all DMA capable devices so that memory does not get corrupted by bogus network packets or disk data. This will save you many hours of debug. diff --git a/Documentation/arm/README b/Documentation/arm/README index 5ed6f35..9b9c822 100644 --- a/Documentation/arm/README +++ b/Documentation/arm/README @@ -89,7 +89,7 @@ Modules Although modularisation is supported (and required for the FP emulator), each module on an ARM2/ARM250/ARM3 machine when is loaded will take memory up to the next 32k boundary due to the size of the pages. - Therefore, modularisation on these machines really worth it? + Therefore, is modularisation on these machines really worth it? However, ARM6 and up machines allow modules to take multiples of 4k, and as such Acorn RiscPCs and other architectures using these processors can diff --git a/Documentation/arm/SA1100/Assabet b/Documentation/arm/SA1100/Assabet index cbbe558..78bc1c1 100644 --- a/Documentation/arm/SA1100/Assabet +++ b/Documentation/arm/SA1100/Assabet @@ -26,7 +26,7 @@ Installing a bootloader A couple of bootloaders able to boot Linux on Assabet are available: -BLOB (http://www.lart.tudelft.nl/lartware/blob/) +BLOB (http://www.lartmaker.nl/lartware/blob/) BLOB is a bootloader used within the LART project. Some contributed patches were merged into BLOB to add support for Assabet. diff --git a/Documentation/arm/SA1100/LART b/Documentation/arm/SA1100/LART index 2f73f51..6d412b6 100644 --- a/Documentation/arm/SA1100/LART +++ b/Documentation/arm/SA1100/LART @@ -11,4 +11,4 @@ is under development, with plenty of others in different stages of planning. The hardware designs for this board have been released under an open license; -see the LART page at http://www.lart.tudelft.nl/ for more information. +see the LART page at http://www.lartmaker.nl/ for more information. diff --git a/Documentation/arm/Setup b/Documentation/arm/Setup index 0abd072..0cb1e64 100644 --- a/Documentation/arm/Setup +++ b/Documentation/arm/Setup @@ -58,7 +58,7 @@ below: video_y This describes the character position of cursor on VGA console, and - is otherwise unused. (should not used for other console types, and + is otherwise unused. (should not be used for other console types, and should not be used for other purposes). memc_control_reg diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 8e63831..f989a9e 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -132,8 +132,18 @@ Some new queue property settings: limit. No highmem default. blk_queue_max_sectors(q, max_sectors) - Maximum size request you can handle in units of 512 byte - sectors. 255 default. + Sets two variables that limit the size of the request. + + - The request queue's max_sectors, which is a soft size in + in units of 512 byte sectors, and could be dynamically varied + by the core kernel. + + - The request queue's max_hw_sectors, which is a hard limit + and reflects the maximum size request a driver can handle + in units of 512 byte sectors. + + The default for both max_sectors and max_hw_sectors is + 255. The upper limit of max_sectors is 1024. blk_queue_max_phys_segments(q, max_segments) Maximum physical segments you can handle in a request. 128 diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt index 4ae4188..53245c4 100644 --- a/Documentation/cachetlb.txt +++ b/Documentation/cachetlb.txt @@ -362,6 +362,27 @@ maps this page at its virtual address. likely that you will need to flush the instruction cache for copy_to_user_page(). + void flush_anon_page(struct page *page, unsigned long vmaddr) + When the kernel needs to access the contents of an anonymous + page, it calls this function (currently only + get_user_pages()). Note: flush_dcache_page() deliberately + doesn't work for an anonymous page. The default + implementation is a nop (and should remain so for all coherent + architectures). For incoherent architectures, it should flush + the cache of the page at vmaddr in the current user process. + + void flush_kernel_dcache_page(struct page *page) + When the kernel needs to modify a user page is has obtained + with kmap, it calls this function after all modifications are + complete (but before kunmapping it) to bring the underlying + page up to date. It is assumed here that the user has no + incoherent cached copies (i.e. the original page was obtained + from a mechanism like get_user_pages()). The default + implementation is a nop and should remain so on all coherent + architectures. On incoherent architectures, this should flush + the kernel cache for page (using page_address(page)). + + void flush_icache_range(unsigned long start, unsigned long end) When the kernel stores into addresses that it will execute out of (eg when loading modules), this function is called. diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index 57a09f9..1bcf699 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -97,13 +97,13 @@ at which time hotplug is disabled. You really dont need to manipulate any of the system cpu maps. They should be read-only for most use. When setting up per-cpu resources almost always use -cpu_possible_map/for_each_cpu() to iterate. +cpu_possible_map/for_each_possible_cpu() to iterate. Never use anything other than cpumask_t to represent bitmap of CPUs. #include <linux/cpumask.h> -for_each_cpu - Iterate over cpu_possible_map +for_each_possible_cpu - Iterate over cpu_possible_map for_each_online_cpu - Iterate over cpu_online_map for_each_present_cpu - Iterate over cpu_present_map for_each_cpu_mask(x,mask) - Iterate over some random collection of cpu mask. diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt index ff280e2..2b28e9e 100644 --- a/Documentation/cputopology.txt +++ b/Documentation/cputopology.txt @@ -1,5 +1,5 @@ -Export cpu topology info by sysfs. Items (attributes) are similar +Export cpu topology info via sysfs. Items (attributes) are similar to /proc/cpuinfo. 1) /sys/devices/system/cpu/cpuX/topology/physical_package_id: @@ -12,7 +12,7 @@ represent the thread siblings to cpu X in the same core; represent the thread siblings to cpu X in the same physical package; To implement it in an architecture-neutral way, a new source file, -driver/base/topology.c, is to export the 5 attributes. +drivers/base/topology.c, is to export the 4 attributes. If one architecture wants to support this feature, it just needs to implement 4 defines, typically in file include/asm-XXX/topology.h. diff --git a/Documentation/drivers/edac/edac.txt b/Documentation/drivers/edac/edac.txt index d37191f..70d96a6 100644 --- a/Documentation/drivers/edac/edac.txt +++ b/Documentation/drivers/edac/edac.txt @@ -21,7 +21,7 @@ within the computer system. In the initial release, memory Correctable Errors Detecting CE events, then harvesting those events and reporting them, CAN be a predictor of future UE events. With CE events, the system can -continue to operate, but with less safety. Preventive maintainence and +continue to operate, but with less safety. Preventive maintenance and proactive part replacement of memory DIMMs exhibiting CEs can reduce the likelihood of the dreaded UE events and system 'panics'. @@ -29,13 +29,13 @@ the likelihood of the dreaded UE events and system 'panics'. In addition, PCI Bus Parity and SERR Errors are scanned for on PCI devices in order to determine if errors are occurring on data transfers. The presence of PCI Parity errors must be examined with a grain of salt. -There are several addin adapters that do NOT follow the PCI specification +There are several add-in adapters that do NOT follow the PCI specification with regards to Parity generation and reporting. The specification says the vendor should tie the parity status bits to 0 if they do not intend to generate parity. Some vendors do not do this, and thus the parity bit can "float" giving false positives. -The PCI Parity EDAC device has the ability to "skip" known flakey +The PCI Parity EDAC device has the ability to "skip" known flaky cards during the parity scan. These are set by the parity "blacklist" interface in the sysfs for PCI Parity. (See the PCI section in the sysfs section below.) There is also a parity "whitelist" which is used as @@ -101,7 +101,7 @@ Memory Controller (mc) Model First a background on the memory controller's model abstracted in EDAC. Each mc device controls a set of DIMM memory modules. These modules are -layed out in a Chip-Select Row (csrowX) and Channel table (chX). There can +laid out in a Chip-Select Row (csrowX) and Channel table (chX). There can be multiple csrows and two channels. Memory controllers allow for several csrows, with 8 csrows being a typical value. @@ -131,7 +131,7 @@ for memory DIMMs: DIMM_B1 Labels for these slots are usually silk screened on the motherboard. Slots -labeled 'A' are channel 0 in this example. Slots labled 'B' +labeled 'A' are channel 0 in this example. Slots labeled 'B' are channel 1. Notice that there are two csrows possible on a physical DIMM. These csrows are allocated their csrow assignment based on the slot into which the memory DIMM is placed. Thus, when 1 DIMM @@ -140,7 +140,7 @@ is placed in each Channel, the csrows cross both DIMMs. Memory DIMMs come single or dual "ranked". A rank is a populated csrow. Thus, 2 single ranked DIMMs, placed in slots DIMM_A0 and DIMM_B0 above will have 1 csrow, csrow0. csrow1 will be empty. On the other hand, -when 2 dual ranked DIMMs are similiaryly placed, then both csrow0 and +when 2 dual ranked DIMMs are similarly placed, then both csrow0 and csrow1 will be populated. The pattern repeats itself for csrow2 and csrow3. @@ -246,7 +246,7 @@ Module Version read-only attribute file: 'mc_version' - The EDAC CORE modules's version and compile date are shown here to + The EDAC CORE module's version and compile date are shown here to indicate what EDAC is running. @@ -423,7 +423,7 @@ Total memory managed by this csrow attribute file: 'size_mb' This attribute file displays, in count of megabytes, of memory - that this csrow contatins. + that this csrow contains. Memory Type attribute file: @@ -557,7 +557,7 @@ On Header Type 00 devices the primary status is looked at for any parity error regardless of whether Parity is enabled on the device. (The spec indicates parity is generated in some cases). On Header Type 01 bridges, the secondary status register is also -looked at to see if parity ocurred on the bus on the other side of +looked at to see if parity occurred on the bus on the other side of the bridge. @@ -588,7 +588,7 @@ Panic on PCI PARITY Error: 'panic_on_pci_parity' - This control files enables or disables panic'ing when a parity + This control files enables or disables panicking when a parity error has been detected. @@ -616,12 +616,12 @@ PCI Device Whitelist: This control file allows for an explicit list of PCI devices to be scanned for parity errors. Only devices found on this list will - be examined. The list is a line of hexadecimel VENDOR and DEVICE + be examined. The list is a line of hexadecimal VENDOR and DEVICE ID tuples: 1022:7450,1434:16a6 - One or more can be inserted, seperated by a comma. + One or more can be inserted, separated by a comma. To write the above list doing the following as one command line: @@ -639,11 +639,11 @@ PCI Device Blacklist: This control file allows for a list of PCI devices to be skipped for scanning. - The list is a line of hexadecimel VENDOR and DEVICE ID tuples: + The list is a line of hexadecimal VENDOR and DEVICE ID tuples: 1022:7450,1434:16a6 - One or more can be inserted, seperated by a comma. + One or more can be inserted, separated by a comma. To write the above list doing the following as one command line: @@ -651,14 +651,14 @@ PCI Device Blacklist: > /sys/devices/system/edac/pci/pci_parity_blacklist - To display what the whitelist current contatins, + To display what the whitelist currently contains, simply 'cat' the same file. ======================================================================= PCI Vendor and Devices IDs can be obtained with the lspci command. Using the -n option lspci will display the vendor and device IDs. The system -adminstrator will have to determine which devices should be scanned or +administrator will have to determine which devices should be scanned or skipped. @@ -669,5 +669,5 @@ Turn OFF a whitelist by an empty echo command: echo > /sys/devices/system/edac/pci/pci_parity_whitelist -and any previous blacklist will be utililzed. +and any previous blacklist will be utilized. diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 21272e4..495858b 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -176,6 +176,18 @@ Who: Richard Knutsson <ricknu-0@student.ltu.se> and Greg Kroah-Hartman <gregkh@s --------------------------- +What: Usage of invalid timevals in setitimer +When: March 2007 +Why: POSIX requires to validate timevals in the setitimer call. This + was never done by Linux. The invalid (e.g. negative timevals) were + silently converted to more or less random timeouts and intervals. + Until the removal a per boot limited number of warnings is printed + and the timevals are sanitized. + +Who: Thomas Gleixner <tglx@linutronix.de> + +--------------------------- + What: I2C interface of the it87 driver When: January 2007 Why: The ISA interface is faster and should be always available. The I2C diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 74052d2..66fdc07 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -1,27 +1,47 @@ 00-INDEX - this file (info on some of the filesystems supported by linux). +Exporting + - explanation of how to make filesystems exportable. Locking - info on locking rules as they pertain to Linux VFS. adfs.txt - info and mount options for the Acorn Advanced Disc Filing System. +afs.txt + - info and examples for the distributed AFS (Andrew File System) fs. affs.txt - info and mount options for the Amiga Fast File System. +automount-support.txt + - information about filesystem automount support. +befs.txt + - information about the BeOS filesystem for Linux. bfs.txt - info for the SCO UnixWare Boot Filesystem (BFS). cifs.txt - - description of the CIFS filesystem + - description of the CIFS filesystem. coda.txt - description of the CODA filesystem. configfs/ - directory containing configfs documentation and example code. cramfs.txt - - info on the cram filesystem for small storage (ROMs etc) + - info on the cram filesystem for small storage (ROMs etc). +dentry-locking.txt + - info on the RCU-based dcache locking model. devfs/ - directory containing devfs documentation. +directory-locking + - info about the locking scheme used for directory operations. dlmfs.txt - info on the userspace interface to the OCFS2 DLM. ext2.txt - info, mount options and specifications for the Ext2 filesystem. +ext3.txt + - info, mount options and specifications for the Ext3 filesystem. +files.txt + - info on file management in the Linux kernel. +fuse.txt + - info on the Filesystem in User SpacE including mount options. +hfs.txt + - info on the Macintosh HFS Filesystem for Linux. hpfs.txt - info and mount options for the OS/2 HPFS. isofs.txt @@ -32,23 +52,43 @@ ncpfs.txt - info on Novell Netware(tm) filesystem using NCP protocol. ntfs.txt - info and mount options for the NTFS filesystem (Windows NT). -proc.txt - - info on Linux's /proc filesystem. ocfs2.txt - info and mount options for the OCFS2 clustered filesystem. +porting + - various information on filesystem porting. +proc.txt + - info on Linux's /proc filesystem. +ramfs-rootfs-initramfs.txt + - info on the 'in memory' filesystems ramfs, rootfs and initramfs. +reiser4.txt + - info on the Reiser4 filesystem based on dancing tree algorithms. +relayfs.txt + - info on relayfs, for efficient streaming from kernel to user space. romfs.txt - - Description of the ROMFS filesystem. + - description of the ROMFS filesystem. smbfs.txt - - info on using filesystems with the SMB protocol (Windows 3.11 and NT) + - info on using filesystems with the SMB protocol (Win 3.11 and NT). +spufs.txt + - info and mount options for the SPU filesystem used on Cell. +sysfs-pci.txt + - info on accessing PCI device resources through sysfs. +sysfs.txt + - info on sysfs, a ram-based filesystem for exporting kernel objects. sysv-fs.txt - info on the SystemV/V7/Xenix/Coherent filesystem. +tmpfs.txt + - info on tmpfs, a filesystem that holds all files in virtual memory. udf.txt - info and mount options for the UDF filesystem. ufs.txt - info on the ufs filesystem. +v9fs.txt + - v9fs is a Unix implementation of the Plan 9 9p remote fs protocol. vfat.txt - info on using the VFAT filesystem used in Windows NT and Windows 95 vfs.txt - - Overview of the Virtual File System + - overview of the Virtual File System xfs.txt - info and mount options for the XFS filesystem. +xip.txt + - info on execute-in-place for file mappings. diff --git a/Documentation/filesystems/v9fs.txt b/Documentation/filesystems/9p.txt index 24c7a9c..43b89c2 100644 --- a/Documentation/filesystems/v9fs.txt +++ b/Documentation/filesystems/9p.txt @@ -1,5 +1,5 @@ - V9FS: 9P2000 for Linux - ====================== + v9fs: Plan 9 Resource Sharing for Linux + ======================================= ABOUT ===== @@ -9,18 +9,19 @@ v9fs is a Unix implementation of the Plan 9 9p remote filesystem protocol. This software was originally developed by Ron Minnich <rminnich@lanl.gov> and Maya Gokhale <maya@lanl.gov>. Additional development by Greg Watson <gwatson@lanl.gov> and most recently Eric Van Hensbergen -<ericvh@gmail.com> and Latchesar Ionkov <lucho@ionkov.net>. +<ericvh@gmail.com>, Latchesar Ionkov <lucho@ionkov.net> and Russ Cox +<rsc@swtch.com>. USAGE ===== For remote file server: - mount -t 9P 10.10.1.2 /mnt/9 + mount -t 9p 10.10.1.2 /mnt/9 For Plan 9 From User Space applications (http://swtch.com/plan9) - mount -t 9P `namespace`/acme /mnt/9 -o proto=unix,name=$USER + mount -t 9p `namespace`/acme /mnt/9 -o proto=unix,uname=$USER OPTIONS ======= @@ -32,7 +33,7 @@ OPTIONS fd - used passed file descriptors for connection (see rfdno and wfdno) - name=name user name to attempt mount as on the remote server. The + uname=name user name to attempt mount as on the remote server. The server may override or ignore this value. Certain user names may require authentication. @@ -42,7 +43,7 @@ OPTIONS debug=n specifies debug level. The debug level is a bitmask. 0x01 = display verbose error messages 0x02 = developer debug (DEBUG_CURRENT) - 0x04 = display 9P trace + 0x04 = display 9p trace 0x08 = display VFS trace 0x10 = display Marshalling debug 0x20 = display RPC debug @@ -53,11 +54,11 @@ OPTIONS wfdno=n the file descriptor for writing with proto=fd - maxdata=n the number of bytes to use for 9P packet payload (msize) + maxdata=n the number of bytes to use for 9p packet payload (msize) port=n port to connect to on the remote server - noextend force legacy mode (no 9P2000.u semantics) + noextend force legacy mode (no 9p2000.u semantics) uid attempt to mount as a particular uid @@ -72,7 +73,7 @@ OPTIONS RESOURCES ========= -The Linux version of the 9P server is now maintained under the npfs project +The Linux version of the 9p server is now maintained under the npfs project on sourceforge (http://sourceforge.net/projects/npfs). There are user and developer mailing lists available through the v9fs project diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 944cf10..99902ae6 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -121,7 +121,7 @@ Table 1-1: Process specific entries in /proc .............................................................................. File Content cmdline Command line arguments - cpu Current and last cpu in wich it was executed (2.4)(smp) + cpu Current and last cpu in which it was executed (2.4)(smp) cwd Link to the current working directory environ Values of environment variables exe Link to the executable of this process @@ -309,13 +309,13 @@ is the same by default: > cat /proc/irq/0/smp_affinity ffffffff -It's a bitmask, in wich you can specify wich CPUs can handle the IRQ, you can +It's a bitmask, in which you can specify which CPUs can handle the IRQ, you can set it by doing: > echo 1 > /proc/irq/prof_cpu_mask This means that only the first CPU will handle the IRQ, but you can also echo 5 -wich means that only the first and fourth CPU can handle the IRQ. +which means that only the first and fourth CPU can handle the IRQ. The way IRQs are routed is handled by the IO-APIC, and it's Round Robin between all the CPUs which are allowed to handle it. As usual the kernel has diff --git a/Documentation/filesystems/udf.txt b/Documentation/filesystems/udf.txt index e5213bc..511b423 100644 --- a/Documentation/filesystems/udf.txt +++ b/Documentation/filesystems/udf.txt @@ -26,6 +26,20 @@ The following mount options are supported: nostrict Unset strict conformance iocharset= Set the NLS character set +The uid= and gid= options need a bit more explaining. They will accept a +decimal numeric value which will be used as the default ID for that mount. +They will also accept the string "ignore" and "forget". For files on the disk +that are owned by nobody ( -1 ), they will instead look as if they are owned +by the default ID. The ignore option causes the default ID to override all +IDs on the disk, not just -1. The forget option causes all IDs to be written +to disk as -1, so when the media is later remounted, they will appear to be +owned by whatever default ID it is mounted with at that time. + +For typical desktop use of removable media, you should set the ID to that +of the interactively logged on user, and also specify both the forget and +ignore options. This way the interactive user will always see the files +on the disk as belonging to him. + The remaining are for debugging and disaster recovery: novrs Skip volume sequence recognition diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index e56e842..adaa899 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -230,10 +230,15 @@ only called from a process context (i.e. not from an interrupt handler or bottom half). alloc_inode: this method is called by inode_alloc() to allocate memory - for struct inode and initialize it. + for struct inode and initialize it. If this function is not + defined, a simple 'struct inode' is allocated. Normally + alloc_inode will be used to allocate a larger structure which + contains a 'struct inode' embedded within it. destroy_inode: this method is called by destroy_inode() to release - resources allocated for struct inode. + resources allocated for struct inode. It is only required if + ->alloc_inode was defined and simply undoes anything done by + ->alloc_inode. read_inode: this method is called to read a specific inode from the mounted filesystem. The i_ino member in the struct inode is @@ -443,14 +448,81 @@ otherwise noted. The Address Space Object ======================== -The address space object is used to identify pages in the page cache. - +The address space object is used to group and manage pages in the page +cache. It can be used to keep track of the pages in a file (or +anything else) and also track the mapping of sections of the file into +process address spaces. + +There are a number of distinct yet related services that an +address-space can provide. These include communicating memory +pressure, page lookup by address, and keeping track of pages tagged as +Dirty or Writeback. + +The first can be used independently to the others. The VM can try to +either write dirty pages in order to clean them, or release clean +pages in order to reuse them. To do this it can call the ->writepage +method on dirty pages, and ->releasepage on clean pages with +PagePrivate set. Clean pages without PagePrivate and with no external +references will be released without notice being given to the +address_space. + +To achieve this functionality, pages need to be placed on an LRU with +lru_cache_add and mark_page_active needs to be called whenever the +page is used. + +Pages are normally kept in a radix tree index by ->index. This tree +maintains information about the PG_Dirty and PG_Writeback status of +each page, so that pages with either of these flags can be found +quickly. + +The Dirty tag is primarily used by mpage_writepages - the default +->writepages method. It uses the tag to find dirty pages to call +->writepage on. If mpage_writepages is not used (i.e. the address +provides its own ->writepages) , the PAGECACHE_TAG_DIRTY tag is +almost unused. write_inode_now and sync_inode do use it (through +__sync_single_inode) to check if ->writepages has been successful in +writing out the whole address_space. + +The Writeback tag is used by filemap*wait* and sync_page* functions, +via wait_on_page_writeback_range, to wait for all writeback to +complete. While waiting ->sync_page (if defined) will be called on +each page that is found to require writeback. + +An address_space handler may attach extra information to a page, +typically using the 'private' field in the 'struct page'. If such +information is attached, the PG_Private flag should be set. This will +cause various VM routines to make extra calls into the address_space +handler to deal with that data. + +An address space acts as an intermediate between storage and +application. Data is read into the address space a whole page at a +time, and provided to the application either by copying of the page, +or by memory-mapping the page. +Data is written into the address space by the application, and then +written-back to storage typically in whole pages, however the +address_space has finer control of write sizes. + +The read process essentially only requires 'readpage'. The write +process is more complicated and uses prepare_write/commit_write or +set_page_dirty to write data into the address_space, and writepage, +sync_page, and writepages to writeback data to storage. + +Adding and removing pages to/from an address_space is protected by the +inode's i_mutex. + +When data is written to a page, the PG_Dirty flag should be set. It +typically remains set until writepage asks for it to be written. This +should clear PG_Dirty and set PG_Writeback. It can be actually +written at any point after PG_Dirty is clear. Once it is known to be +safe, PG_Writeback is cleared. + +Writeback makes use of a writeback_control structure... struct address_space_operations ------------------------------- This describes how the VFS can manipulate mapping of a file to page cache in -your filesystem. As of kernel 2.6.13, the following members are defined: +your filesystem. As of kernel 2.6.16, the following members are defined: struct address_space_operations { int (*writepage)(struct page *page, struct writeback_control *wbc); @@ -469,47 +541,148 @@ struct address_space_operations { loff_t offset, unsigned long nr_segs); struct page* (*get_xip_page)(struct address_space *, sector_t, int); + /* migrate the contents of a page to the specified target */ + int (*migratepage) (struct page *, struct page *); }; - writepage: called by the VM write a dirty page to backing store. + writepage: called by the VM to write a dirty page to backing store. + This may happen for data integrity reasons (i.e. 'sync'), or + to free up memory (flush). The difference can be seen in + wbc->sync_mode. + The PG_Dirty flag has been cleared and PageLocked is true. + writepage should start writeout, should set PG_Writeback, + and should make sure the page is unlocked, either synchronously + or asynchronously when the write operation completes. + + If wbc->sync_mode is WB_SYNC_NONE, ->writepage doesn't have to + try too hard if there are problems, and may choose to write out + other pages from the mapping if that is easier (e.g. due to + internal dependencies). If it chooses not to start writeout, it + should return AOP_WRITEPAGE_ACTIVATE so that the VM will not keep + calling ->writepage on that page. + + See the file "Locking" for more details. readpage: called by the VM to read a page from backing store. + The page will be Locked when readpage is called, and should be + unlocked and marked uptodate once the read completes. + If ->readpage discovers that it needs to unlock the page for + some reason, it can do so, and then return AOP_TRUNCATED_PAGE. + In this case, the page will be relocated, relocked and if + that all succeeds, ->readpage will be called again. sync_page: called by the VM to notify the backing store to perform all queued I/O operations for a page. I/O operations for other pages associated with this address_space object may also be performed. + This function is optional and is called only for pages with + PG_Writeback set while waiting for the writeback to complete. + writepages: called by the VM to write out pages associated with the - address_space object. + address_space object. If wbc->sync_mode is WBC_SYNC_ALL, then + the writeback_control will specify a range of pages that must be + written out. If it is WBC_SYNC_NONE, then a nr_to_write is given + and that many pages should be written if possible. + If no ->writepages is given, then mpage_writepages is used + instead. This will choose pages from the address space that are + tagged as DIRTY and will pass them to ->writepage. set_page_dirty: called by the VM to set a page dirty. + This is particularly needed if an address space attaches + private data to a page, and that data needs to be updated when + a page is dirtied. This is called, for example, when a memory + mapped page gets modified. + If defined, it should set the PageDirty flag, and the + PAGECACHE_TAG_DIRTY tag in the radix tree. readpages: called by the VM to read pages associated with the address_space - object. + object. This is essentially just a vector version of + readpage. Instead of just one page, several pages are + requested. + readpages is only used for read-ahead, so read errors are + ignored. If anything goes wrong, feel free to give up. prepare_write: called by the generic write path in VM to set up a write - request for a page. - - commit_write: called by the generic write path in VM to write page to - its backing store. + request for a page. This indicates to the address space that + the given range of bytes is about to be written. The + address_space should check that the write will be able to + complete, by allocating space if necessary and doing any other + internal housekeeping. If the write will update parts of + any basic-blocks on storage, then those blocks should be + pre-read (if they haven't been read already) so that the + updated blocks can be written out properly. + The page will be locked. If prepare_write wants to unlock the + page it, like readpage, may do so and return + AOP_TRUNCATED_PAGE. + In this case the prepare_write will be retried one the lock is + regained. + + commit_write: If prepare_write succeeds, new data will be copied + into the page and then commit_write will be called. It will + typically update the size of the file (if appropriate) and + mark the inode as dirty, and do any other related housekeeping + operations. It should avoid returning an error if possible - + errors should have been handled by prepare_write. bmap: called by the VFS to map a logical block offset within object to - physical block number. This method is use by for the legacy FIBMAP - ioctl. Other uses are discouraged. - - invalidatepage: called by the VM on truncate to disassociate a page from its - address_space mapping. - - releasepage: called by the VFS to release filesystem specific metadata from - a page. - - direct_IO: called by the VM for direct I/O writes and reads. + physical block number. This method is used by the FIBMAP + ioctl and for working with swap-files. To be able to swap to + a file, the file must have a stable mapping to a block + device. The swap system does not go through the filesystem + but instead uses bmap to find out where the blocks in the file + are and uses those addresses directly. + + + invalidatepage: If a page has PagePrivate set, then invalidatepage + will be called when part or all of the page is to be removed + from the address space. This generally corresponds to either a + truncation or a complete invalidation of the address space + (in the latter case 'offset' will always be 0). + Any private data associated with the page should be updated + to reflect this truncation. If offset is 0, then + the private data should be released, because the page + must be able to be completely discarded. This may be done by + calling the ->releasepage function, but in this case the + release MUST succeed. + + releasepage: releasepage is called on PagePrivate pages to indicate + that the page should be freed if possible. ->releasepage + should remove any private data from the page and clear the + PagePrivate flag. It may also remove the page from the + address_space. If this fails for some reason, it may indicate + failure with a 0 return value. + This is used in two distinct though related cases. The first + is when the VM finds a clean page with no active users and + wants to make it a free page. If ->releasepage succeeds, the + page will be removed from the address_space and become free. + + The second case if when a request has been made to invalidate + some or all pages in an address_space. This can happen + through the fadvice(POSIX_FADV_DONTNEED) system call or by the + filesystem explicitly requesting it as nfs and 9fs do (when + they believe the cache may be out of date with storage) by + calling invalidate_inode_pages2(). + If the filesystem makes such a call, and needs to be certain + that all pages are invalidated, then its releasepage will + need to ensure this. Possibly it can clear the PageUptodate + bit if it cannot free private data yet. + + direct_IO: called by the generic read/write routines to perform + direct_IO - that is IO requests which bypass the page cache + and transfer data directly between the storage and the + application's address space. get_xip_page: called by the VM to translate a block number to a page. The page is valid until the corresponding filesystem is unmounted. Filesystems that want to use execute-in-place (XIP) need to implement it. An example implementation can be found in fs/ext2/xip.c. + migrate_page: This is used to compact the physical memory usage. + If the VM wants to relocate a page (maybe off a memory card + that is signalling imminent failure) it will pass a new page + and an old page to this function. migrate_page should + transfer any private data across and update any references + that it has to the page. The File Object =============== diff --git a/Documentation/firmware_class/firmware_sample_driver.c b/Documentation/firmware_class/firmware_sample_driver.c index d3ad2c2..ad3edab 100644 --- a/Documentation/firmware_class/firmware_sample_driver.c +++ b/Documentation/firmware_class/firmware_sample_driver.c @@ -23,7 +23,6 @@ char __init inkernel_firmware[] = "let's say that this is firmware\n"; #endif static struct device ghost_device = { - .name = "Ghost Device", .bus_id = "ghost0", }; @@ -92,7 +91,7 @@ static void sample_probe_async(void) { /* Let's say that I can't sleep */ int error; - error = request_firmware_nowait (THIS_MODULE, + error = request_firmware_nowait (THIS_MODULE, FW_ACTION_NOHOTPLUG, "sample_driver_fw", &ghost_device, "my device pointer", sample_probe_async_cont); diff --git a/Documentation/firmware_class/firmware_sample_firmware_class.c b/Documentation/firmware_class/firmware_sample_firmware_class.c index 57b956a..9e1b0e4 100644 --- a/Documentation/firmware_class/firmware_sample_firmware_class.c +++ b/Documentation/firmware_class/firmware_sample_firmware_class.c @@ -172,7 +172,6 @@ static void fw_remove_class_device(struct class_device *class_dev) static struct class_device *class_dev; static struct device my_device = { - .name = "Sample Device", .bus_id = "my_dev0", }; diff --git a/Documentation/ioctl-number.txt b/Documentation/ioctl-number.txt index aa7ba00..171a44e 100644 --- a/Documentation/ioctl-number.txt +++ b/Documentation/ioctl-number.txt @@ -78,8 +78,6 @@ Code Seq# Include File Comments '#' 00-3F IEEE 1394 Subsystem Block for the entire subsystem '1' 00-1F <linux/timepps.h> PPS kit from Ulrich Windl <ftp://ftp.de.kernel.org/pub/linux/daemons/ntp/PPS/> -'6' 00-10 <asm-i386/processor.h> Intel IA32 microcode update driver - <mailto:tigran@veritas.com> '8' all SNP8023 advanced NIC card <mailto:mcr@solidum.com> 'A' 00-1F linux/apm_bios.h diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index 443230b..a9c00fa 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -17,6 +17,7 @@ This document describes the Linux kernel Makefiles. --- 3.8 Command line dependency --- 3.9 Dependency tracking --- 3.10 Special Rules + --- 3.11 $(CC) support functions === 4 Host Program support --- 4.1 Simple Host Program @@ -38,7 +39,6 @@ This document describes the Linux kernel Makefiles. --- 6.6 Commands useful for building a boot image --- 6.7 Custom kbuild commands --- 6.8 Preprocessing linker scripts - --- 6.9 $(CC) support functions === 7 Kbuild Variables === 8 Makefile language @@ -106,9 +106,9 @@ This document is aimed towards normal developers and arch developers. Most Makefiles within the kernel are kbuild Makefiles that use the kbuild infrastructure. This chapter introduce the syntax used in the kbuild makefiles. -The preferred name for the kbuild files is 'Kbuild' but 'Makefile' will -continue to be supported. All new developmen is expected to use the -Kbuild filename. +The preferred name for the kbuild files are 'Makefile' but 'Kbuild' can +be used and if both a 'Makefile' and a 'Kbuild' file exists then the 'Kbuild' +file will be used. Section 3.1 "Goal definitions" is a quick intro, further chapters provide more details, with real examples. @@ -385,6 +385,102 @@ more details, with real examples. to prerequisites are referenced with $(src) (because they are not generated files). +--- 3.11 $(CC) support functions + + The kernel may be build with several different versions of + $(CC), each supporting a unique set of features and options. + kbuild provide basic support to check for valid options for $(CC). + $(CC) is useally the gcc compiler, but other alternatives are + available. + + as-option + as-option is used to check if $(CC) when used to compile + assembler (*.S) files supports the given option. An optional + second option may be specified if first option are not supported. + + Example: + #arch/sh/Makefile + cflags-y += $(call as-option,-Wa$(comma)-isa=$(isa-y),) + + In the above example cflags-y will be assinged the the option + -Wa$(comma)-isa=$(isa-y) if it is supported by $(CC). + The second argument is optional, and if supplied will be used + if first argument is not supported. + + cc-option + cc-option is used to check if $(CC) support a given option, and not + supported to use an optional second option. + + Example: + #arch/i386/Makefile + cflags-y += $(call cc-option,-march=pentium-mmx,-march=i586) + + In the above example cflags-y will be assigned the option + -march=pentium-mmx if supported by $(CC), otherwise -march-i586. + The second argument to cc-option is optional, and if omitted + cflags-y will be assigned no value if first option is not supported. + + cc-option-yn + cc-option-yn is used to check if gcc supports a given option + and return 'y' if supported, otherwise 'n'. + + Example: + #arch/ppc/Makefile + biarch := $(call cc-option-yn, -m32) + aflags-$(biarch) += -a32 + cflags-$(biarch) += -m32 + + In the above example $(biarch) is set to y if $(CC) supports the -m32 + option. When $(biarch) equals to y the expanded variables $(aflags-y) + and $(cflags-y) will be assigned the values -a32 and -m32. + + cc-option-align + gcc version >= 3.0 shifted type of options used to speify + alignment of functions, loops etc. $(cc-option-align) whrn used + as prefix to the align options will select the right prefix: + gcc < 3.00 + cc-option-align = -malign + gcc >= 3.00 + cc-option-align = -falign + + Example: + CFLAGS += $(cc-option-align)-functions=4 + + In the above example the option -falign-functions=4 is used for + gcc >= 3.00. For gcc < 3.00 -malign-functions=4 is used. + + cc-version + cc-version return a numerical version of the $(CC) compiler version. + The format is <major><minor> where both are two digits. So for example + gcc 3.41 would return 0341. + cc-version is useful when a specific $(CC) version is faulty in one + area, for example the -mregparm=3 were broken in some gcc version + even though the option was accepted by gcc. + + Example: + #arch/i386/Makefile + cflags-y += $(shell \ + if [ $(call cc-version) -ge 0300 ] ; then \ + echo "-mregparm=3"; fi ;) + + In the above example -mregparm=3 is only used for gcc version greater + than or equal to gcc 3.0. + + cc-ifversion + cc-ifversion test the version of $(CC) and equals last argument if + version expression is true. + + Example: + #fs/reiserfs/Makefile + EXTRA_CFLAGS := $(call cc-ifversion, -lt, 0402, -O1) + + In this example EXTRA_CFLAGS will be assigned the value -O1 if the + $(CC) version is less than 4.2. + cc-ifversion takes all the shell operators: + -eq, -ne, -lt, -le, -gt, and -ge + The third parameter may be a text as in this example, but it may also + be an expanded variable or a macro. + === 4 Host Program support @@ -973,74 +1069,6 @@ When kbuild executes the following steps are followed (roughly): architecture specific files. ---- 6.9 $(CC) support functions - - The kernel may be build with several different versions of - $(CC), each supporting a unique set of features and options. - kbuild provide basic support to check for valid options for $(CC). - $(CC) is useally the gcc compiler, but other alternatives are - available. - - cc-option - cc-option is used to check if $(CC) support a given option, and not - supported to use an optional second option. - - Example: - #arch/i386/Makefile - cflags-y += $(call cc-option,-march=pentium-mmx,-march=i586) - - In the above example cflags-y will be assigned the option - -march=pentium-mmx if supported by $(CC), otherwise -march-i586. - The second argument to cc-option is optional, and if omitted - cflags-y will be assigned no value if first option is not supported. - - cc-option-yn - cc-option-yn is used to check if gcc supports a given option - and return 'y' if supported, otherwise 'n'. - - Example: - #arch/ppc/Makefile - biarch := $(call cc-option-yn, -m32) - aflags-$(biarch) += -a32 - cflags-$(biarch) += -m32 - - In the above example $(biarch) is set to y if $(CC) supports the -m32 - option. When $(biarch) equals to y the expanded variables $(aflags-y) - and $(cflags-y) will be assigned the values -a32 and -m32. - - cc-option-align - gcc version >= 3.0 shifted type of options used to speify - alignment of functions, loops etc. $(cc-option-align) whrn used - as prefix to the align options will select the right prefix: - gcc < 3.00 - cc-option-align = -malign - gcc >= 3.00 - cc-option-align = -falign - - Example: - CFLAGS += $(cc-option-align)-functions=4 - - In the above example the option -falign-functions=4 is used for - gcc >= 3.00. For gcc < 3.00 -malign-functions=4 is used. - - cc-version - cc-version return a numerical version of the $(CC) compiler version. - The format is <major><minor> where both are two digits. So for example - gcc 3.41 would return 0341. - cc-version is useful when a specific $(CC) version is faulty in one - area, for example the -mregparm=3 were broken in some gcc version - even though the option was accepted by gcc. - - Example: - #arch/i386/Makefile - cflags-y += $(shell \ - if [ $(call cc-version) -ge 0300 ] ; then \ - echo "-mregparm=3"; fi ;) - - In the above example -mregparm=3 is only used for gcc version greater - than or equal to gcc 3.0. - - === 7 Kbuild Variables The top Makefile exports the following variables: diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt index 7e77f93..fcccf24 100644 --- a/Documentation/kbuild/modules.txt +++ b/Documentation/kbuild/modules.txt @@ -13,6 +13,7 @@ In this document you will find information about: --- 2.2 Available targets --- 2.3 Available options --- 2.4 Preparing the kernel tree for module build + --- 2.5 Building separate files for a module === 3. Example commands === 4. Creating a kbuild file for an external module === 5. Include files @@ -22,7 +23,10 @@ In this document you will find information about: === 6. Module installation --- 6.1 INSTALL_MOD_PATH --- 6.2 INSTALL_MOD_DIR - === 7. Module versioning + === 7. Module versioning & Module.symvers + --- 7.1 Symbols fron the kernel (vmlinux + modules) + --- 7.2 Symbols and external modules + --- 7.3 Symbols from another external module === 8. Tips & Tricks --- 8.1 Testing for CONFIG_FOO_BAR @@ -88,7 +92,8 @@ when building an external module. make -C $KDIR M=$PWD modules_install Install the external module(s). Installation default is in /lib/modules/<kernel-version>/extra, - but may be prefixed with INSTALL_MOD_PATH - see separate chapter. + but may be prefixed with INSTALL_MOD_PATH - see separate + chapter. make -C $KDIR M=$PWD clean Remove all generated files for the module - the kernel @@ -131,6 +136,16 @@ when building an external module. Therefore a full kernel build needs to be executed to make module versioning work. +--- 2.5 Building separate files for a module + It is possible to build single files which is part of a module. + This works equal for the kernel, a module and even for external + modules. + Examples (module foo.ko, consist of bar.o, baz.o): + make -C $KDIR M=`pwd` bar.lst + make -C $KDIR M=`pwd` bar.o + make -C $KDIR M=`pwd` foo.ko + make -C $KDIR M=`pwd` / + === 3. Example commands @@ -422,7 +437,7 @@ External modules are installed in the directory: => Install dir: /lib/modules/$(KERNELRELEASE)/gandalf -=== 7. Module versioning +=== 7. Module versioning & Module.symvers Module versioning is enabled by the CONFIG_MODVERSIONS tag. @@ -432,11 +447,80 @@ when a module is loaded/used then the CRC values contained in the kernel are compared with similar values in the module. If they are not equal then the kernel refuses to load the module. -During a kernel build a file named Module.symvers will be generated. This -file includes the symbol version of all symbols within the kernel. If the -Module.symvers file is saved from the last full kernel compile one does not -have to do a full kernel compile to build a module version's compatible module. +Module.symvers contains a list of all exported symbols from a kernel build. + +--- 7.1 Symbols fron the kernel (vmlinux + modules) + + During a kernel build a file named Module.symvers will be generated. + Module.symvers contains all exported symbols from the kernel and + compiled modules. For each symbols the corresponding CRC value + is stored too. + + The syntax of the Module.symvers file is: + <CRC> <Symbol> <module> + Sample: + 0x2d036834 scsi_remove_host drivers/scsi/scsi_mod + For a kernel build without CONFIG_MODVERSIONING enabled the crc + would read: 0x00000000 + + Module.symvers serve two purposes. + 1) It list all exported symbols both from vmlinux and all modules + 2) It list CRC if CONFIG_MODVERSION is enabled + +--- 7.2 Symbols and external modules + + When building an external module the build system needs access to + the symbols from the kernel to check if all external symbols are + defined. This is done in the MODPOST step and to obtain all + symbols modpost reads Module.symvers from the kernel. + If a Module.symvers file is present in the directory where + the external module is being build this file will be read too. + During the MODPOST step a new Module.symvers file will be written + containing all exported symbols that was not defined in the kernel. + +--- 7.3 Symbols from another external module + + Sometimes one external module uses exported symbols from another + external module. Kbuild needs to have full knowledge on all symbols + to avoid spitting out warnings about undefined symbols. + Two solutions exist to let kbuild know all symbols of more than + one external module. + The method with a top-level kbuild file is recommended but may be + impractical in certain situations. + + Use a top-level Kbuild file + If you have two modules: 'foo', 'bar' and 'foo' needs symbols + from 'bar' then one can use a common top-level kbuild file so + both modules are compiled in same build. + + Consider following directory layout: + ./foo/ <= contains the foo module + ./bar/ <= contains the bar module + The top-level Kbuild file would then look like: + + #./Kbuild: (this file may also be named Makefile) + obj-y := foo/ bar/ + + Executing: + make -C $KDIR M=`pwd` + + will then do the expected and compile both modules with full + knowledge on symbols from both modules. + + Use an extra Module.symvers file + When an external module is build a Module.symvers file is + generated containing all exported symbols which are not + defined in the kernel. + To get access to symbols from module 'bar' one can copy the + Module.symvers file from the compilation of the 'bar' module + to the directory where the 'foo' module is build. + During the module build kbuild will read the Module.symvers + file in the directory of the external module and when the + build is finished a new Module.symvers file is created + containing the sum of all symbols defined and not part of the + kernel. + === 8. Tips & Tricks --- 8.1 Testing for CONFIG_FOO_BAR diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 44a25f3..f8cb55c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -367,12 +367,17 @@ running once the system is up. tty<n> Use the virtual console device <n>. ttyS<n>[,options] + ttyUSB0[,options] Use the specified serial port. The options are of - the form "bbbbpn", where "bbbb" is the baud rate, - "p" is parity ("n", "o", or "e"), and "n" is bits. - Default is "9600n8". - - See also Documentation/serial-console.txt. + the form "bbbbpnf", where "bbbb" is the baud rate, + "p" is parity ("n", "o", or "e"), "n" is number of + bits, and "f" is flow control ("r" for RTS or + omit it). Default is "9600n8". + + See Documentation/serial-console.txt for more + information. See + Documentation/networking/netconsole.txt for an + alternative. uart,io,<addr>[,options] uart,mmio,<addr>[,options] diff --git a/Documentation/m68k/README.buddha b/Documentation/m68k/README.buddha index bf802ff..ef484a7 100644 --- a/Documentation/m68k/README.buddha +++ b/Documentation/m68k/README.buddha @@ -29,7 +29,7 @@ address is written to $4a, then the whole Byte is written to $48, while it doesn't matter how often you're writing to $4a as long as $48 is not touched. After $48 has been written, the whole card disappears from $e8 and is mapped to the new -address just written. Make shure $4a is written before $48, +address just written. Make sure $4a is written before $48, otherwise your chance is only 1:16 to find the board :-). The local memory-map is even active when mapped to $e8: diff --git a/Documentation/networking/TODO b/Documentation/networking/TODO deleted file mode 100644 index 66d36ff..0000000 --- a/Documentation/networking/TODO +++ /dev/null @@ -1,18 +0,0 @@ -To-do items for network drivers -------------------------------- - -* Move ethernet crc routine to generic code - -* (for 2.5) Integrate Jamal Hadi Salim's netdev Rx polling API change - -* Audit all net drivers to make sure magic packet / wake-on-lan / - similar features are disabled in the driver by default. - -* Audit all net drivers to make sure the module always prints out a - version string when loaded as a module, but only prints a version - string when built into the kernel if a device is detected. - -* Add ETHTOOL_GDRVINFO ioctl support to all ethernet drivers. - -* dmfe PCI DMA is totally wrong and only works on x86 - diff --git a/Documentation/networking/ifenslave.c b/Documentation/networking/ifenslave.c index 545447a..a120598 100644 --- a/Documentation/networking/ifenslave.c +++ b/Documentation/networking/ifenslave.c @@ -87,7 +87,7 @@ * would fail and generate an error message in the system log. * - For opt_c: slave should not be set to the master's setting * while it is running. It was already set during enslave. To - * simplify things, it is now handeled separately. + * simplify things, it is now handled separately. * * - 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com> * - Code cleanup and style changes diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 8d4cf78..4fc8e98 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -40,7 +40,7 @@ network interface card supports some sort of interrupt load mitigation or + How to use CONFIG_PACKET_MMAP -------------------------------------------------------------------------------- -From the user standpoint, you should use the higher level libpcap library, wich +From the user standpoint, you should use the higher level libpcap library, which is a de facto standard, portable across nearly all operating systems including Win32. @@ -217,8 +217,8 @@ called pg_vec, its size limits the number of blocks that can be allocated. kmalloc allocates any number of bytes of phisically contiguous memory from a pool of pre-determined sizes. This pool of memory is mantained by the slab -allocator wich is at the end the responsible for doing the allocation and -hence wich imposes the maximum memory that kmalloc can allocate. +allocator which is at the end the responsible for doing the allocation and +hence which imposes the maximum memory that kmalloc can allocate. In a 2.4/2.6 kernel and the i386 architecture, the limit is 131072 bytes. The predetermined sizes that kmalloc uses can be checked in the "size-<bytes>" @@ -254,7 +254,7 @@ and, the number of frames be <block number> * <block size> / <frame size> -Suposse the following parameters, wich apply for 2.6 kernel and an +Suposse the following parameters, which apply for 2.6 kernel and an i386 architecture: <size-max> = 131072 bytes @@ -360,7 +360,7 @@ TP_STATUS_LOSING : indicates there were packet drops from last time statistics where checked with getsockopt() and the PACKET_STATISTICS option. -TP_STATUS_CSUMNOTREADY: currently it's used for outgoing IP packets wich +TP_STATUS_CSUMNOTREADY: currently it's used for outgoing IP packets which it's checksum will be done in hardware. So while reading the packet we should not try to check the checksum. diff --git a/Documentation/networking/ray_cs.txt b/Documentation/networking/ray_cs.txt index 5427f8c..145d27a 100644 --- a/Documentation/networking/ray_cs.txt +++ b/Documentation/networking/ray_cs.txt @@ -25,7 +25,7 @@ the essid= string parameter is available via the kernel command line. This will change after the method of sorting out parameters for all the PCMCIA drivers is agreed upon. If you must have a built in driver with nondefault parameters, they can be edited in -/usr/src/linux/drivers/net/pcmcia/ray_cs.c. Searching for MODULE_PARM +/usr/src/linux/drivers/net/pcmcia/ray_cs.c. Searching for module_param will find them all. Information on card services is available at: diff --git a/Documentation/networking/vortex.txt b/Documentation/networking/vortex.txt index 3759acf..6091e5f 100644 --- a/Documentation/networking/vortex.txt +++ b/Documentation/networking/vortex.txt @@ -24,36 +24,44 @@ Since kernel 2.3.99-pre6, this driver incorporates the support for the This driver supports the following hardware: - 3c590 Vortex 10Mbps - 3c592 EISA 10mbps Demon/Vortex - 3c597 EISA Fast Demon/Vortex - 3c595 Vortex 100baseTx - 3c595 Vortex 100baseT4 - 3c595 Vortex 100base-MII - 3Com Vortex - 3c900 Boomerang 10baseT - 3c900 Boomerang 10Mbps Combo - 3c900 Cyclone 10Mbps TPO - 3c900B Cyclone 10Mbps T - 3c900 Cyclone 10Mbps Combo - 3c900 Cyclone 10Mbps TPC - 3c900B-FL Cyclone 10base-FL - 3c905 Boomerang 100baseTx - 3c905 Boomerang 100baseT4 - 3c905B Cyclone 100baseTx - 3c905B Cyclone 10/100/BNC - 3c905B-FX Cyclone 100baseFx - 3c905C Tornado - 3c980 Cyclone - 3cSOHO100-TX Hurricane - 3c555 Laptop Hurricane - 3c575 Boomerang CardBus - 3CCFE575 Cyclone CardBus - 3CCFE575CT Cyclone CardBus - 3CCFE656 Cyclone CardBus - 3CCFEM656 Cyclone CardBus - 3c450 Cyclone/unknown - + 3c590 Vortex 10Mbps + 3c592 EISA 10Mbps Demon/Vortex + 3c597 EISA Fast Demon/Vortex + 3c595 Vortex 100baseTx + 3c595 Vortex 100baseT4 + 3c595 Vortex 100base-MII + 3c900 Boomerang 10baseT + 3c900 Boomerang 10Mbps Combo + 3c900 Cyclone 10Mbps TPO + 3c900 Cyclone 10Mbps Combo + 3c900 Cyclone 10Mbps TPC + 3c900B-FL Cyclone 10base-FL + 3c905 Boomerang 100baseTx + 3c905 Boomerang 100baseT4 + 3c905B Cyclone 100baseTx + 3c905B Cyclone 10/100/BNC + 3c905B-FX Cyclone 100baseFx + 3c905C Tornado + 3c920B-EMB-WNM (ATI Radeon 9100 IGP) + 3c980 Cyclone + 3c980C Python-T + 3cSOHO100-TX Hurricane + 3c555 Laptop Hurricane + 3c556 Laptop Tornado + 3c556B Laptop Hurricane + 3c575 [Megahertz] 10/100 LAN CardBus + 3c575 Boomerang CardBus + 3CCFE575BT Cyclone CardBus + 3CCFE575CT Tornado CardBus + 3CCFE656 Cyclone CardBus + 3CCFEM656B Cyclone+Winmodem CardBus + 3CXFEM656C Tornado+Winmodem CardBus + 3c450 HomePNA Tornado + 3c920 Tornado + 3c982 Hydra Dual Port A + 3c982 Hydra Dual Port B + 3c905B-T4 + 3c920B-EMB-WNM Tornado Module parameters ================= @@ -293,11 +301,6 @@ Donald's wake-on-LAN page: http://www.scyld.com/wakeonlan.html -3Com's documentation for many NICs, including the ones supported by -this driver is available at - - http://support.3com.com/partners/developer/developer_form.html - 3Com's DOS-based application for setting up the NICs EEPROMs: ftp://ftp.3com.com/pub/nic/3c90x/3c90xx2.exe @@ -312,10 +315,10 @@ Autonegotiation notes --------------------- The driver uses a one-minute heartbeat for adapting to changes in - the external LAN environment. This means that when, for example, a - machine is unplugged from a hubbed 10baseT LAN plugged into a - switched 100baseT LAN, the throughput will be quite dreadful for up - to sixty seconds. Be patient. + the external LAN environment if link is up and 5 seconds if link is down. + This means that when, for example, a machine is unplugged from a hubbed + 10baseT LAN plugged into a switched 100baseT LAN, the throughput + will be quite dreadful for up to sixty seconds. Be patient. Cisco interoperability note from Walter Wong <wcw+@CMU.EDU>: diff --git a/Documentation/pnp.txt b/Documentation/pnp.txt index af0f6ea..9529c9c 100644 --- a/Documentation/pnp.txt +++ b/Documentation/pnp.txt @@ -115,6 +115,9 @@ pnp_unregister_protocol pnp_register_driver - adds a PnP driver to the Plug and Play Layer - this includes driver model integration +- returns zero for success or a negative error number for failure; count + calls to the .add() method if you need to know how many devices bind to + the driver pnp_unregister_driver - removes a PnP driver from the Plug and Play Layer diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index ee551c6..217e517 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt @@ -719,6 +719,11 @@ address which can extend beyond that limit. - model : this is your board name/model - #address-cells : address representation for "root" devices - #size-cells: the size representation for "root" devices + - device_type : This property shouldn't be necessary. However, if + you decide to create a device_type for your root node, make sure it + is _not_ "chrp" unless your platform is a pSeries or PAPR compliant + one for 64-bit, or a CHRP-type machine for 32-bit as this will + matched by the kernel this way. Additionally, some recommended properties are: diff --git a/Documentation/robust-futex-ABI.txt b/Documentation/robust-futex-ABI.txt new file mode 100644 index 0000000..8529a17 --- /dev/null +++ b/Documentation/robust-futex-ABI.txt @@ -0,0 +1,182 @@ +Started by Paul Jackson <pj@sgi.com> + +The robust futex ABI +-------------------- + +Robust_futexes provide a mechanism that is used in addition to normal +futexes, for kernel assist of cleanup of held locks on task exit. + +The interesting data as to what futexes a thread is holding is kept on a +linked list in user space, where it can be updated efficiently as locks +are taken and dropped, without kernel intervention. The only additional +kernel intervention required for robust_futexes above and beyond what is +required for futexes is: + + 1) a one time call, per thread, to tell the kernel where its list of + held robust_futexes begins, and + 2) internal kernel code at exit, to handle any listed locks held + by the exiting thread. + +The existing normal futexes already provide a "Fast Userspace Locking" +mechanism, which handles uncontested locking without needing a system +call, and handles contested locking by maintaining a list of waiting +threads in the kernel. Options on the sys_futex(2) system call support +waiting on a particular futex, and waking up the next waiter on a +particular futex. + +For robust_futexes to work, the user code (typically in a library such +as glibc linked with the application) has to manage and place the +necessary list elements exactly as the kernel expects them. If it fails +to do so, then improperly listed locks will not be cleaned up on exit, +probably causing deadlock or other such failure of the other threads +waiting on the same locks. + +A thread that anticipates possibly using robust_futexes should first +issue the system call: + + asmlinkage long + sys_set_robust_list(struct robust_list_head __user *head, size_t len); + +The pointer 'head' points to a structure in the threads address space +consisting of three words. Each word is 32 bits on 32 bit arch's, or 64 +bits on 64 bit arch's, and local byte order. Each thread should have +its own thread private 'head'. + +If a thread is running in 32 bit compatibility mode on a 64 native arch +kernel, then it can actually have two such structures - one using 32 bit +words for 32 bit compatibility mode, and one using 64 bit words for 64 +bit native mode. The kernel, if it is a 64 bit kernel supporting 32 bit +compatibility mode, will attempt to process both lists on each task +exit, if the corresponding sys_set_robust_list() call has been made to +setup that list. + + The first word in the memory structure at 'head' contains a + pointer to a single linked list of 'lock entries', one per lock, + as described below. If the list is empty, the pointer will point + to itself, 'head'. The last 'lock entry' points back to the 'head'. + + The second word, called 'offset', specifies the offset from the + address of the associated 'lock entry', plus or minus, of what will + be called the 'lock word', from that 'lock entry'. The 'lock word' + is always a 32 bit word, unlike the other words above. The 'lock + word' holds 3 flag bits in the upper 3 bits, and the thread id (TID) + of the thread holding the lock in the bottom 29 bits. See further + below for a description of the flag bits. + + The third word, called 'list_op_pending', contains transient copy of + the address of the 'lock entry', during list insertion and removal, + and is needed to correctly resolve races should a thread exit while + in the middle of a locking or unlocking operation. + +Each 'lock entry' on the single linked list starting at 'head' consists +of just a single word, pointing to the next 'lock entry', or back to +'head' if there are no more entries. In addition, nearby to each 'lock +entry', at an offset from the 'lock entry' specified by the 'offset' +word, is one 'lock word'. + +The 'lock word' is always 32 bits, and is intended to be the same 32 bit +lock variable used by the futex mechanism, in conjunction with +robust_futexes. The kernel will only be able to wakeup the next thread +waiting for a lock on a threads exit if that next thread used the futex +mechanism to register the address of that 'lock word' with the kernel. + +For each futex lock currently held by a thread, if it wants this +robust_futex support for exit cleanup of that lock, it should have one +'lock entry' on this list, with its associated 'lock word' at the +specified 'offset'. Should a thread die while holding any such locks, +the kernel will walk this list, mark any such locks with a bit +indicating their holder died, and wakeup the next thread waiting for +that lock using the futex mechanism. + +When a thread has invoked the above system call to indicate it +anticipates using robust_futexes, the kernel stores the passed in 'head' +pointer for that task. The task may retrieve that value later on by +using the system call: + + asmlinkage long + sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr, + size_t __user *len_ptr); + +It is anticipated that threads will use robust_futexes embedded in +larger, user level locking structures, one per lock. The kernel +robust_futex mechanism doesn't care what else is in that structure, so +long as the 'offset' to the 'lock word' is the same for all +robust_futexes used by that thread. The thread should link those locks +it currently holds using the 'lock entry' pointers. It may also have +other links between the locks, such as the reverse side of a double +linked list, but that doesn't matter to the kernel. + +By keeping its locks linked this way, on a list starting with a 'head' +pointer known to the kernel, the kernel can provide to a thread the +essential service available for robust_futexes, which is to help clean +up locks held at the time of (a perhaps unexpectedly) exit. + +Actual locking and unlocking, during normal operations, is handled +entirely by user level code in the contending threads, and by the +existing futex mechanism to wait for, and wakeup, locks. The kernels +only essential involvement in robust_futexes is to remember where the +list 'head' is, and to walk the list on thread exit, handling locks +still held by the departing thread, as described below. + +There may exist thousands of futex lock structures in a threads shared +memory, on various data structures, at a given point in time. Only those +lock structures for locks currently held by that thread should be on +that thread's robust_futex linked lock list a given time. + +A given futex lock structure in a user shared memory region may be held +at different times by any of the threads with access to that region. The +thread currently holding such a lock, if any, is marked with the threads +TID in the lower 29 bits of the 'lock word'. + +When adding or removing a lock from its list of held locks, in order for +the kernel to correctly handle lock cleanup regardless of when the task +exits (perhaps it gets an unexpected signal 9 in the middle of +manipulating this list), the user code must observe the following +protocol on 'lock entry' insertion and removal: + +On insertion: + 1) set the 'list_op_pending' word to the address of the 'lock word' + to be inserted, + 2) acquire the futex lock, + 3) add the lock entry, with its thread id (TID) in the bottom 29 bits + of the 'lock word', to the linked list starting at 'head', and + 4) clear the 'list_op_pending' word. + +On removal: + 1) set the 'list_op_pending' word to the address of the 'lock word' + to be removed, + 2) remove the lock entry for this lock from the 'head' list, + 2) release the futex lock, and + 2) clear the 'lock_op_pending' word. + +On exit, the kernel will consider the address stored in +'list_op_pending' and the address of each 'lock word' found by walking +the list starting at 'head'. For each such address, if the bottom 29 +bits of the 'lock word' at offset 'offset' from that address equals the +exiting threads TID, then the kernel will do two things: + + 1) if bit 31 (0x80000000) is set in that word, then attempt a futex + wakeup on that address, which will waken the next thread that has + used to the futex mechanism to wait on that address, and + 2) atomically set bit 30 (0x40000000) in the 'lock word'. + +In the above, bit 31 was set by futex waiters on that lock to indicate +they were waiting, and bit 30 is set by the kernel to indicate that the +lock owner died holding the lock. + +The kernel exit code will silently stop scanning the list further if at +any point: + + 1) the 'head' pointer or an subsequent linked list pointer + is not a valid address of a user space word + 2) the calculated location of the 'lock word' (address plus + 'offset') is not the valud address of a 32 bit user space + word + 3) if the list contains more than 1 million (subject to + future kernel configuration changes) elements. + +When the kernel sees a list entry whose 'lock word' doesn't have the +current threads TID in the lower 29 bits, it does nothing with that +entry, and goes on to the next entry. + +Bit 29 (0x20000000) of the 'lock word' is reserved for future use. diff --git a/Documentation/robust-futexes.txt b/Documentation/robust-futexes.txt new file mode 100644 index 0000000..df82d75 --- /dev/null +++ b/Documentation/robust-futexes.txt @@ -0,0 +1,218 @@ +Started by: Ingo Molnar <mingo@redhat.com> + +Background +---------- + +what are robust futexes? To answer that, we first need to understand +what futexes are: normal futexes are special types of locks that in the +noncontended case can be acquired/released from userspace without having +to enter the kernel. + +A futex is in essence a user-space address, e.g. a 32-bit lock variable +field. If userspace notices contention (the lock is already owned and +someone else wants to grab it too) then the lock is marked with a value +that says "there's a waiter pending", and the sys_futex(FUTEX_WAIT) +syscall is used to wait for the other guy to release it. The kernel +creates a 'futex queue' internally, so that it can later on match up the +waiter with the waker - without them having to know about each other. +When the owner thread releases the futex, it notices (via the variable +value) that there were waiter(s) pending, and does the +sys_futex(FUTEX_WAKE) syscall to wake them up. Once all waiters have +taken and released the lock, the futex is again back to 'uncontended' +state, and there's no in-kernel state associated with it. The kernel +completely forgets that there ever was a futex at that address. This +method makes futexes very lightweight and scalable. + +"Robustness" is about dealing with crashes while holding a lock: if a +process exits prematurely while holding a pthread_mutex_t lock that is +also shared with some other process (e.g. yum segfaults while holding a +pthread_mutex_t, or yum is kill -9-ed), then waiters for that lock need +to be notified that the last owner of the lock exited in some irregular +way. + +To solve such types of problems, "robust mutex" userspace APIs were +created: pthread_mutex_lock() returns an error value if the owner exits +prematurely - and the new owner can decide whether the data protected by +the lock can be recovered safely. + +There is a big conceptual problem with futex based mutexes though: it is +the kernel that destroys the owner task (e.g. due to a SEGFAULT), but +the kernel cannot help with the cleanup: if there is no 'futex queue' +(and in most cases there is none, futexes being fast lightweight locks) +then the kernel has no information to clean up after the held lock! +Userspace has no chance to clean up after the lock either - userspace is +the one that crashes, so it has no opportunity to clean up. Catch-22. + +In practice, when e.g. yum is kill -9-ed (or segfaults), a system reboot +is needed to release that futex based lock. This is one of the leading +bugreports against yum. + +To solve this problem, the traditional approach was to extend the vma +(virtual memory area descriptor) concept to have a notion of 'pending +robust futexes attached to this area'. This approach requires 3 new +syscall variants to sys_futex(): FUTEX_REGISTER, FUTEX_DEREGISTER and +FUTEX_RECOVER. At do_exit() time, all vmas are searched to see whether +they have a robust_head set. This approach has two fundamental problems +left: + + - it has quite complex locking and race scenarios. The vma-based + approach had been pending for years, but they are still not completely + reliable. + + - they have to scan _every_ vma at sys_exit() time, per thread! + +The second disadvantage is a real killer: pthread_exit() takes around 1 +microsecond on Linux, but with thousands (or tens of thousands) of vmas +every pthread_exit() takes a millisecond or more, also totally +destroying the CPU's L1 and L2 caches! + +This is very much noticeable even for normal process sys_exit_group() +calls: the kernel has to do the vma scanning unconditionally! (this is +because the kernel has no knowledge about how many robust futexes there +are to be cleaned up, because a robust futex might have been registered +in another task, and the futex variable might have been simply mmap()-ed +into this process's address space). + +This huge overhead forced the creation of CONFIG_FUTEX_ROBUST so that +normal kernels can turn it off, but worse than that: the overhead makes +robust futexes impractical for any type of generic Linux distribution. + +So something had to be done. + +New approach to robust futexes +------------------------------ + +At the heart of this new approach there is a per-thread private list of +robust locks that userspace is holding (maintained by glibc) - which +userspace list is registered with the kernel via a new syscall [this +registration happens at most once per thread lifetime]. At do_exit() +time, the kernel checks this user-space list: are there any robust futex +locks to be cleaned up? + +In the common case, at do_exit() time, there is no list registered, so +the cost of robust futexes is just a simple current->robust_list != NULL +comparison. If the thread has registered a list, then normally the list +is empty. If the thread/process crashed or terminated in some incorrect +way then the list might be non-empty: in this case the kernel carefully +walks the list [not trusting it], and marks all locks that are owned by +this thread with the FUTEX_OWNER_DEAD bit, and wakes up one waiter (if +any). + +The list is guaranteed to be private and per-thread at do_exit() time, +so it can be accessed by the kernel in a lockless way. + +There is one race possible though: since adding to and removing from the +list is done after the futex is acquired by glibc, there is a few +instructions window for the thread (or process) to die there, leaving +the futex hung. To protect against this possibility, userspace (glibc) +also maintains a simple per-thread 'list_op_pending' field, to allow the +kernel to clean up if the thread dies after acquiring the lock, but just +before it could have added itself to the list. Glibc sets this +list_op_pending field before it tries to acquire the futex, and clears +it after the list-add (or list-remove) has finished. + +That's all that is needed - all the rest of robust-futex cleanup is done +in userspace [just like with the previous patches]. + +Ulrich Drepper has implemented the necessary glibc support for this new +mechanism, which fully enables robust mutexes. + +Key differences of this userspace-list based approach, compared to the +vma based method: + + - it's much, much faster: at thread exit time, there's no need to loop + over every vma (!), which the VM-based method has to do. Only a very + simple 'is the list empty' op is done. + + - no VM changes are needed - 'struct address_space' is left alone. + + - no registration of individual locks is needed: robust mutexes dont + need any extra per-lock syscalls. Robust mutexes thus become a very + lightweight primitive - so they dont force the application designer + to do a hard choice between performance and robustness - robust + mutexes are just as fast. + + - no per-lock kernel allocation happens. + + - no resource limits are needed. + + - no kernel-space recovery call (FUTEX_RECOVER) is needed. + + - the implementation and the locking is "obvious", and there are no + interactions with the VM. + +Performance +----------- + +I have benchmarked the time needed for the kernel to process a list of 1 +million (!) held locks, using the new method [on a 2GHz CPU]: + + - with FUTEX_WAIT set [contended mutex]: 130 msecs + - without FUTEX_WAIT set [uncontended mutex]: 30 msecs + +I have also measured an approach where glibc does the lock notification +[which it currently does for !pshared robust mutexes], and that took 256 +msecs - clearly slower, due to the 1 million FUTEX_WAKE syscalls +userspace had to do. + +(1 million held locks are unheard of - we expect at most a handful of +locks to be held at a time. Nevertheless it's nice to know that this +approach scales nicely.) + +Implementation details +---------------------- + +The patch adds two new syscalls: one to register the userspace list, and +one to query the registered list pointer: + + asmlinkage long + sys_set_robust_list(struct robust_list_head __user *head, + size_t len); + + asmlinkage long + sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr, + size_t __user *len_ptr); + +List registration is very fast: the pointer is simply stored in +current->robust_list. [Note that in the future, if robust futexes become +widespread, we could extend sys_clone() to register a robust-list head +for new threads, without the need of another syscall.] + +So there is virtually zero overhead for tasks not using robust futexes, +and even for robust futex users, there is only one extra syscall per +thread lifetime, and the cleanup operation, if it happens, is fast and +straightforward. The kernel doesnt have any internal distinction between +robust and normal futexes. + +If a futex is found to be held at exit time, the kernel sets the +following bit of the futex word: + + #define FUTEX_OWNER_DIED 0x40000000 + +and wakes up the next futex waiter (if any). User-space does the rest of +the cleanup. + +Otherwise, robust futexes are acquired by glibc by putting the TID into +the futex field atomically. Waiters set the FUTEX_WAITERS bit: + + #define FUTEX_WAITERS 0x80000000 + +and the remaining bits are for the TID. + +Testing, architecture support +----------------------------- + +i've tested the new syscalls on x86 and x86_64, and have made sure the +parsing of the userspace list is robust [ ;-) ] even if the list is +deliberately corrupted. + +i386 and x86_64 syscalls are wired up at the moment, and Ulrich has +tested the new glibc code (on x86_64 and i386), and it works for his +robust-mutex testcases. + +All other architectures should build just fine too - but they wont have +the new syscalls yet. + +Architectures need to implement the new futex_atomic_cmpxchg_inatomic() +inline function before writing up the syscalls (that function returns +-ENOSYS right now). diff --git a/Documentation/rpc-cache.txt b/Documentation/rpc-cache.txt index 2b5d443..5f757c8 100644 --- a/Documentation/rpc-cache.txt +++ b/Documentation/rpc-cache.txt @@ -1,4 +1,4 @@ -This document gives a brief introduction to the caching + This document gives a brief introduction to the caching mechanisms in the sunrpc layer that is used, in particular, for NFS authentication. @@ -25,25 +25,17 @@ The common code handles such things as: - supporting 'NEGATIVE' as well as positive entries - allowing an EXPIRED time on cache items, and removing items after they expire, and are no longe in-use. - - Future code extensions are expect to handle - making requests to user-space to fill in cache entries - allowing user-space to directly set entries in the cache - delaying RPC requests that depend on as-yet incomplete cache entries, and replaying those requests when the cache entry is complete. - - maintaining last-access times on cache entries - - clean out old entries when the caches become full - -The code for performing a cache lookup is also common, but in the form -of a template. i.e. a #define. -Each cache defines a lookup function by using the DefineCacheLookup -macro, or the simpler DefineSimpleCacheLookup macro + - clean out old entries as they expire. Creating a Cache ---------------- -1/ A cache needs a datum to cache. This is in the form of a +1/ A cache needs a datum to store. This is in the form of a structure definition that must contain a struct cache_head as an element, usually the first. @@ -51,35 +43,69 @@ Creating a Cache Each cache element is reference counted and contains expiry and update times for use in cache management. 2/ A cache needs a "cache_detail" structure that - describes the cache. This stores the hash table, and some - parameters for cache management. -3/ A cache needs a lookup function. This is created using - the DefineCacheLookup macro. This lookup function is used both - to find entries and to update entries. The normal mode for - updating an entry is to replace the old entry with a new - entry. However it is possible to allow update-in-place - for those caches where it makes sense (no atomicity issues - or indirect reference counting issue) -4/ A cache needs to be registered using cache_register(). This - includes in on a list of caches that will be regularly - cleaned to discard old data. For this to work, some - thread must periodically call cache_clean - + describes the cache. This stores the hash table, some + parameters for cache management, and some operations detailing how + to work with particular cache items. + The operations requires are: + struct cache_head *alloc(void) + This simply allocates appropriate memory and returns + a pointer to the cache_detail embedded within the + structure + void cache_put(struct kref *) + This is called when the last reference to an item is + is dropped. The pointer passed is to the 'ref' field + in the cache_head. cache_put should release any + references create by 'cache_init' and, if CACHE_VALID + is set, any references created by cache_update. + It should then release the memory allocated by + 'alloc'. + int match(struct cache_head *orig, struct cache_head *new) + test if the keys in the two structures match. Return + 1 if they do, 0 if they don't. + void init(struct cache_head *orig, struct cache_head *new) + Set the 'key' fields in 'new' from 'orig'. This may + include taking references to shared objects. + void update(struct cache_head *orig, struct cache_head *new) + Set the 'content' fileds in 'new' from 'orig'. + int cache_show(struct seq_file *m, struct cache_detail *cd, + struct cache_head *h) + Optional. Used to provide a /proc file that lists the + contents of a cache. This should show one item, + usually on just one line. + int cache_request(struct cache_detail *cd, struct cache_head *h, + char **bpp, int *blen) + Format a request to be send to user-space for an item + to be instantiated. *bpp is a buffer of size *blen. + bpp should be moved forward over the encoded message, + and *blen should be reduced to show how much free + space remains. Return 0 on success or <0 if not + enough room or other problem. + int cache_parse(struct cache_detail *cd, char *buf, int len) + A message from user space has arrived to fill out a + cache entry. It is in 'buf' of length 'len'. + cache_parse should parse this, find the item in the + cache with sunrpc_cache_lookup, and update the item + with sunrpc_cache_update. + + +3/ A cache needs to be registered using cache_register(). This + includes it on a list of caches that will be regularly + cleaned to discard old data. + Using a cache ------------- -To find a value in a cache, call the lookup function passing it a the -datum which contains key, and possibly content, and a flag saying -whether to update the cache with new data from the datum. Depending -on how the cache lookup function was defined, it may take an extra -argument to identify the particular cache in question. +To find a value in a cache, call sunrpc_cache_lookup passing a pointer +to the cache_head in a sample item with the 'key' fields filled in. +This will be passed to ->match to identify the target entry. If no +entry is found, a new entry will be create, added to the cache, and +marked as not containing valid data. -Except in cases of kmalloc failure, the lookup function -will return a new datum which will store the key and -may contain valid content, or may not. -This datum is typically passed to cache_check which determines the -validity of the datum and may later initiate an upcall to fill -in the data. +The item returned is typically passed to cache_check which will check +if the data is valid, and may initiate an up-call to get fresh data. +cache_check will return -ENOENT in the entry is negative or if an up +call is needed but not possible, -EAGAIN if an upcall is pending, +or 0 if the data is valid; cache_check can be passed a "struct cache_req *". This structure is typically embedded in the actual request and can be used to create a @@ -90,6 +116,13 @@ item does become valid, the deferred copy of the request will be revisited (->revisit). It is expected that this method will reschedule the request for processing. +The value returned by sunrpc_cache_lookup can also be passed to +sunrpc_cache_update to set the content for the item. A second item is +passed which should hold the content. If the item found by _lookup +has valid data, then it is discarded and a new item is created. This +saves any user of an item from worrying about content changing while +it is being inspected. If the item found by _lookup does not contain +valid data, then the content is copied across and CACHE_VALID is set. Populating a cache ------------------ @@ -114,8 +147,8 @@ should be create or updated to have the given content, and the expiry time should be set on that item. Reading from a channel is a bit more interesting. When a cache -lookup fail, or when it suceeds but finds an entry that may soon -expiry, a request is lodged for that cache item to be updated by +lookup fails, or when it succeeds but finds an entry that may soon +expire, a request is lodged for that cache item to be updated by user-space. These requests appear in the channel file. Successive reads will return successive requests. @@ -130,7 +163,7 @@ Thus a user-space helper is likely to: write a response loop. -If it dies and needs to be restarted, any requests that have not be +If it dies and needs to be restarted, any requests that have not been answered will still appear in the file and will be read by the new instance of the helper. @@ -142,10 +175,9 @@ Each cache should also define a "cache_request" method which takes a cache item and encodes a request into the buffer provided. - Note: If a cache has no active readers on the channel, and has had not active readers for more than 60 seconds, further requests will not be -added to the channel but instead all looks that do not find a valid +added to the channel but instead all lookups that do not find a valid entry will fail. This is partly for backward compatibility: The previous nfs exports table was deemed to be authoritative and a failed lookup meant a definite 'no'. @@ -154,18 +186,17 @@ request/response format ----------------------- While each cache is free to use it's own format for requests -and responses over channel, the following is recommended are +and responses over channel, the following is recommended as appropriate and support routines are available to help: Each request or response record should be printable ASCII with precisely one newline character which should be at the end. Fields within the record should be separated by spaces, normally one. If spaces, newlines, or nul characters are needed in a field they -much be quotes. two mechanisms are available: +much be quoted. two mechanisms are available: 1/ If a field begins '\x' then it must contain an even number of hex digits, and pairs of these digits provide the bytes in the field. 2/ otherwise a \ in the field must be followed by 3 octal digits which give the code for a byte. Other characters are treated - as them selves. At the very least, space, newlines nul, and + as them selves. At the very least, space, newline, nul, and '\' must be quoted in this way. - diff --git a/Documentation/serial-console.txt b/Documentation/serial-console.txt index 6c689b0..9a7bc8b 100644 --- a/Documentation/serial-console.txt +++ b/Documentation/serial-console.txt @@ -17,11 +17,13 @@ The format of this option is: ttyX for any other virtual console ttySx for a serial port lp0 for the first parallel port + ttyUSB0 for the first USB serial device options: depend on the driver. For the serial port this - defines the baudrate/parity/bits of the port, - in the format BBBBPN, where BBBB is the speed, - P is parity (n/o/e), and N is bits. Default is + defines the baudrate/parity/bits/flow control of + the port, in the format BBBBPNF, where BBBB is the + speed, P is parity (n/o/e), N is number of bits, + and F is flow control ('r' for RTS). Default is 9600n8. The maximum baudrate is 115200. You can specify multiple console= options on the kernel command line. @@ -45,6 +47,9 @@ become the console. You will need to create a new device to use /dev/console. The official /dev/console is now character device 5,1. +(You can also use a network device as a console. See +Documentation/networking/netconsole.txt for information on that.) + Here's an example that will use /dev/ttyS1 (COM2) as the console. Replace the sample values as needed. diff --git a/Documentation/smart-config.txt b/Documentation/smart-config.txt index c9bed4c..8467447 100644 --- a/Documentation/smart-config.txt +++ b/Documentation/smart-config.txt @@ -56,10 +56,6 @@ Here is the solution: writing one file per option. It updates only the files for options that have changed. - mkdep.c no longer generates warning messages for missing or unneeded - <linux/config.h> lines. The new top-level target 'make checkconfig' - checks for these problems. - Flag Dependencies Martin Von Loewis contributed another feature to this patch: diff --git a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl index 6dc9d9f..6feef9e 100644 --- a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl +++ b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl @@ -2836,7 +2836,7 @@ struct _snd_pcm_runtime { <para> Note that this callback became non-atomic since the recent version. - You can use schedule-related fucntions safely in this callback now. + You can use schedule-related functions safely in this callback now. </para> <para> diff --git a/Documentation/sound/oss/Introduction b/Documentation/sound/oss/Introduction index 15d4fb9..f04ba6b 100644 --- a/Documentation/sound/oss/Introduction +++ b/Documentation/sound/oss/Introduction @@ -69,7 +69,7 @@ are available, for example IRQ, address, DMA. Warning, the options for different cards sometime use different names for the same or a similar feature (dma1= versus dma16=). As a last -resort, inspect the code (search for MODULE_PARM). +resort, inspect the code (search for module_param). Notes: diff --git a/Documentation/sound/oss/cs46xx b/Documentation/sound/oss/cs46xx index 88d6cf8..b544327 100644 --- a/Documentation/sound/oss/cs46xx +++ b/Documentation/sound/oss/cs46xx @@ -88,7 +88,7 @@ parameters. for a copy email: twoller@crystal.cirrus.com MODULE_PARMS definitions ------------------------ -MODULE_PARM(defaultorder, "i"); +module_param(defaultorder, ulong, 0); defaultorder=N where N is a value from 1 to 12 The buffer order determines the size of the dma buffer for the driver. @@ -98,18 +98,18 @@ to not underrun the dma buffer as easily. As default, use 32k (order=3) rather than 64k as some of the games work more responsively. (2^N) * PAGE_SIZE = allocated buffer size -MODULE_PARM(cs_debuglevel, "i"); -MODULE_PARM(cs_debugmask, "i"); +module_param(cs_debuglevel, ulong, 0644); +module_param(cs_debugmask, ulong, 0644); cs_debuglevel=N cs_debugmask=0xMMMMMMMM where N is a value from 0 (no debug printfs), to 9 (maximum) 0xMMMMMMMM is a debug mask corresponding to the CS_xxx bits (see driver source). -MODULE_PARM(hercules_egpio_disable, "i"); +module_param(hercules_egpio_disable, ulong, 0); hercules_egpio_disable=N where N is a 0 (enable egpio), or a 1 (disable egpio support) -MODULE_PARM(initdelay, "i"); +module_param(initdelay, ulong, 0); initdelay=N This value is used to determine the millescond delay during the initialization code prior to powering up the PLL. On laptops this value can be used to @@ -118,19 +118,19 @@ system is booted under battery power then the mdelay()/udelay() functions fail t properly delay the required time. Also, if the system is booted under AC power and then the power removed, the mdelay()/udelay() functions will not delay properly. -MODULE_PARM(powerdown, "i"); +module_param(powerdown, ulong, 0); powerdown=N where N is 0 (disable any powerdown of the internal blocks) or 1 (enable powerdown) -MODULE_PARM(external_amp, "i"); +module_param(external_amp, bool, 0); external_amp=1 if N is set to 1, then force enabling the EAPD support in the primary AC97 codec. override the detection logic and force the external amp bit in the AC97 0x26 register to be reset (0). EAPD should be 0 for powerup, and 1 for powerdown. The VTB Santa Cruz card has inverted logic, so there is a special function for these cards. -MODULE_PARM(thinkpad, "i"); +module_param(thinkpad, bool, 0); thinkpad=1 if N is set to 1, then force enabling the clkrun functionality. Currently, when the part is being used, then clkrun is disabled for the entire system, diff --git a/Documentation/video4linux/CQcam.txt b/Documentation/video4linux/CQcam.txt index e415e36..464e4ce 100644 --- a/Documentation/video4linux/CQcam.txt +++ b/Documentation/video4linux/CQcam.txt @@ -1,7 +1,7 @@ c-qcam - Connectix Color QuickCam video4linux kernel driver Copyright (C) 1999 Dave Forrest <drf5n@virginia.edu> - released under GNU GPL. + released under GNU GPL. 1999-12-08 Dave Forrest, written with kernel version 2.2.12 in mind @@ -45,21 +45,21 @@ configuration. The appropriate flags are: CONFIG_PNP_PARPORT M for autoprobe.o IEEE1284 readback module CONFIG_PRINTER_READBACK M for parport_probe.o IEEE1284 readback module CONFIG_VIDEO_DEV M for videodev.o video4linux module - CONFIG_VIDEO_CQCAM M for c-qcam.o Color Quickcam module + CONFIG_VIDEO_CQCAM M for c-qcam.o Color Quickcam module With these flags, the kernel should compile and install the modules. To record and monitor the compilation, I use: (make zlilo ; \ make modules; \ - make modules_install ; + make modules_install ; depmod -a ) &>log & less log # then a capital 'F' to watch the progress - + But that is my personal preference. 2.2 Configuration - + The configuration requires module configuration and device configuration. I like kmod or kerneld process with the /etc/modprobe.conf file so the modules can automatically load/unload as @@ -68,7 +68,7 @@ using MAKEDEV, or need to be created. The following sections detail these procedures. -2.1 Module Configuration +2.1 Module Configuration Using modules requires a bit of work to install and pass the parameters. Understand that entries in /etc/modprobe.conf of: @@ -128,9 +128,9 @@ system (CONFIG_PROC_FS), the parallel printer support (CONFIG_PRINTER), the IEEE 1284 system,(CONFIG_PRINTER_READBACK), you should be able to read some identification from your quickcam with - modprobe -v parport - modprobe -v parport_probe - cat /proc/parport/PORTNUMBER/autoprobe + modprobe -v parport + modprobe -v parport_probe + cat /proc/parport/PORTNUMBER/autoprobe Returns: CLASS:MEDIA; MODEL:Color QuickCam 2.0; @@ -140,7 +140,7 @@ Returns: and well. A common problem is that the current driver does not reliably detect a c-qcam, even though one is attached. In this case, - modprobe -v c-qcam + modprobe -v c-qcam or insmod -v c-qcam @@ -152,16 +152,16 @@ video4linux mailing list and archive for more current information. 3.1 Checklist: Can you get an image? - v4lgrab >qcam.ppm ; wc qcam.ppm ; xv qcam.ppm + v4lgrab >qcam.ppm ; wc qcam.ppm ; xv qcam.ppm - Is a working c-qcam connected to the port? - grep ^ /proc/parport/?/autoprobe + Is a working c-qcam connected to the port? + grep ^ /proc/parport/?/autoprobe - Do the /dev/video* files exist? - ls -lad /dev/video + Do the /dev/video* files exist? + ls -lad /dev/video - Is the c-qcam module loaded? - modprobe -v c-qcam ; lsmod + Is the c-qcam module loaded? + modprobe -v c-qcam ; lsmod Does the camera work with alternate programs? cqcam, etc? @@ -174,7 +174,7 @@ video4linux mailing list and archive for more current information. isn't, you might try patching the c-qcam module to add a parport=xxx option as in the bw-qcam module so you can specify the parallel port: - insmod -v c-qcam parport=0 + insmod -v c-qcam parport=0 And bypass the detection code, see ../../drivers/char/c-qcam.c and look for the 'qc_detect' code and call. @@ -183,12 +183,12 @@ look for the 'qc_detect' code and call. this work is documented at the video4linux2 site listed below. -9.0 --- A sample program using v4lgrabber, +9.0 --- A sample program using v4lgrabber, This program is a simple image grabber that will copy a frame from the first video device, /dev/video0 to standard output in portable pixmap format (.ppm) Using this like: 'v4lgrab | convert - c-qcam.jpg' -produced this picture of me at +produced this picture of me at http://mug.sys.virginia.edu/~drf5n/extras/c-qcam.jpg -------------------- 8< ---------------- 8< ----------------------------- @@ -202,8 +202,8 @@ produced this picture of me at * Use as: * v4lgrab >image.ppm * - * Copyright (C) 1998-05-03, Phil Blundell <philb@gnu.org> - * Copied from http://www.tazenda.demon.co.uk/phil/vgrabber.c + * Copyright (C) 1998-05-03, Phil Blundell <philb@gnu.org> + * Copied from http://www.tazenda.demon.co.uk/phil/vgrabber.c * with minor modifications (Dave Forrest, drf5n@virginia.edu). * */ @@ -225,55 +225,55 @@ produced this picture of me at #define READ_VIDEO_PIXEL(buf, format, depth, r, g, b) \ { \ - switch (format) \ - { \ - case VIDEO_PALETTE_GREY: \ - switch (depth) \ - { \ - case 4: \ - case 6: \ - case 8: \ - (r) = (g) = (b) = (*buf++ << 8);\ - break; \ - \ - case 16: \ - (r) = (g) = (b) = \ - *((unsigned short *) buf); \ - buf += 2; \ - break; \ - } \ - break; \ - \ - \ - case VIDEO_PALETTE_RGB565: \ - { \ - unsigned short tmp = *(unsigned short *)buf; \ - (r) = tmp&0xF800; \ - (g) = (tmp<<5)&0xFC00; \ - (b) = (tmp<<11)&0xF800; \ - buf += 2; \ - } \ - break; \ - \ - case VIDEO_PALETTE_RGB555: \ - (r) = (buf[0]&0xF8)<<8; \ - (g) = ((buf[0] << 5 | buf[1] >> 3)&0xF8)<<8; \ - (b) = ((buf[1] << 2 ) & 0xF8)<<8; \ - buf += 2; \ - break; \ - \ - case VIDEO_PALETTE_RGB24: \ - (r) = buf[0] << 8; (g) = buf[1] << 8; \ - (b) = buf[2] << 8; \ - buf += 3; \ - break; \ - \ - default: \ - fprintf(stderr, \ - "Format %d not yet supported\n", \ - format); \ - } \ -} + switch (format) \ + { \ + case VIDEO_PALETTE_GREY: \ + switch (depth) \ + { \ + case 4: \ + case 6: \ + case 8: \ + (r) = (g) = (b) = (*buf++ << 8);\ + break; \ + \ + case 16: \ + (r) = (g) = (b) = \ + *((unsigned short *) buf); \ + buf += 2; \ + break; \ + } \ + break; \ + \ + \ + case VIDEO_PALETTE_RGB565: \ + { \ + unsigned short tmp = *(unsigned short *)buf; \ + (r) = tmp&0xF800; \ + (g) = (tmp<<5)&0xFC00; \ + (b) = (tmp<<11)&0xF800; \ + buf += 2; \ + } \ + break; \ + \ + case VIDEO_PALETTE_RGB555: \ + (r) = (buf[0]&0xF8)<<8; \ + (g) = ((buf[0] << 5 | buf[1] >> 3)&0xF8)<<8; \ + (b) = ((buf[1] << 2 ) & 0xF8)<<8; \ + buf += 2; \ + break; \ + \ + case VIDEO_PALETTE_RGB24: \ + (r) = buf[0] << 8; (g) = buf[1] << 8; \ + (b) = buf[2] << 8; \ + buf += 3; \ + break; \ + \ + default: \ + fprintf(stderr, \ + "Format %d not yet supported\n", \ + format); \ + } \ +} int get_brightness_adj(unsigned char *image, long size, int *brightness) { long i, tot = 0; @@ -324,40 +324,40 @@ int main(int argc, char ** argv) if(ioctl(fd, VIDIOCSPICT, &vpic) < 0) { vpic.depth=6; if(ioctl(fd, VIDIOCSPICT, &vpic) < 0) { - vpic.depth=4; - if(ioctl(fd, VIDIOCSPICT, &vpic) < 0) { - fprintf(stderr, "Unable to find a supported capture format.\n"); - close(fd); - exit(1); - } + vpic.depth=4; + if(ioctl(fd, VIDIOCSPICT, &vpic) < 0) { + fprintf(stderr, "Unable to find a supported capture format.\n"); + close(fd); + exit(1); + } } } } else { vpic.depth=24; vpic.palette=VIDEO_PALETTE_RGB24; - + if(ioctl(fd, VIDIOCSPICT, &vpic) < 0) { vpic.palette=VIDEO_PALETTE_RGB565; vpic.depth=16; - + if(ioctl(fd, VIDIOCSPICT, &vpic)==-1) { - vpic.palette=VIDEO_PALETTE_RGB555; - vpic.depth=15; - - if(ioctl(fd, VIDIOCSPICT, &vpic)==-1) { - fprintf(stderr, "Unable to find a supported capture format.\n"); - return -1; - } + vpic.palette=VIDEO_PALETTE_RGB555; + vpic.depth=15; + + if(ioctl(fd, VIDIOCSPICT, &vpic)==-1) { + fprintf(stderr, "Unable to find a supported capture format.\n"); + return -1; + } } } } - + buffer = malloc(win.width * win.height * bpp); if (!buffer) { fprintf(stderr, "Out of memory.\n"); exit(1); } - + do { int newbright; read(fd, buffer, win.width * win.height * bpp); @@ -365,8 +365,8 @@ int main(int argc, char ** argv) if (f) { vpic.brightness += (newbright << 8); if(ioctl(fd, VIDIOCSPICT, &vpic)==-1) { - perror("VIDIOSPICT"); - break; + perror("VIDIOSPICT"); + break; } } } while (f); @@ -381,7 +381,7 @@ int main(int argc, char ** argv) fputc(g>>8, stdout); fputc(b>>8, stdout); } - + close(fd); return 0; } diff --git a/Documentation/video4linux/README.cpia b/Documentation/video4linux/README.cpia index c95e7bb..19cd3bf 100644 --- a/Documentation/video4linux/README.cpia +++ b/Documentation/video4linux/README.cpia @@ -87,7 +87,7 @@ hardware configuration of the parport. You can give the boot-parameter at the LILO-prompt or specify it in lilo.conf. I use the following append-line in lilo.conf: - append="parport=0x378,7,3" + append="parport=0x378,7,3" See Documentation/parport.txt for more information about the configuration of the parport and the values given above. Do not simply @@ -175,7 +175,7 @@ THANKS (in no particular order): - Manuel J. Petit de Gabriel <mpetit@dit.upm.es> for providing help with Isabel (http://isabel.dit.upm.es/) - Bas Huisman <bhuism@cs.utwente.nl> for writing the initial parport code -- Jarl Totland <Jarl.Totland@bdc.no> for setting up the mailing list +- Jarl Totland <Jarl.Totland@bdc.no> for setting up the mailing list and maintaining the web-server[3] - Chris Whiteford <Chris@informinteractive.com> for fixes related to the 1.02 firmware diff --git a/Documentation/video4linux/Zoran b/Documentation/video4linux/Zoran index 52c94bd..be9f21b 100644 --- a/Documentation/video4linux/Zoran +++ b/Documentation/video4linux/Zoran @@ -28,7 +28,7 @@ Iomega Buz: * Philips saa7111 TV decoder * Philips saa7185 TV encoder Drivers to use: videodev, i2c-core, i2c-algo-bit, - videocodec, saa7111, saa7185, zr36060, zr36067 + videocodec, saa7111, saa7185, zr36060, zr36067 Inputs/outputs: Composite and S-video Norms: PAL, SECAM (720x576 @ 25 fps), NTSC (720x480 @ 29.97 fps) Card number: 7 @@ -39,7 +39,7 @@ Linux Media Labs LML33: * Brooktree bt819 TV decoder * Brooktree bt856 TV encoder Drivers to use: videodev, i2c-core, i2c-algo-bit, - videocodec, bt819, bt856, zr36060, zr36067 + videocodec, bt819, bt856, zr36060, zr36067 Inputs/outputs: Composite and S-video Norms: PAL (720x576 @ 25 fps), NTSC (720x480 @ 29.97 fps) Card number: 5 @@ -50,7 +50,7 @@ Linux Media Labs LML33R10: * Philips saa7114 TV decoder * Analog Devices adv7170 TV encoder Drivers to use: videodev, i2c-core, i2c-algo-bit, - videocodec, saa7114, adv7170, zr36060, zr36067 + videocodec, saa7114, adv7170, zr36060, zr36067 Inputs/outputs: Composite and S-video Norms: PAL (720x576 @ 25 fps), NTSC (720x480 @ 29.97 fps) Card number: 6 @@ -61,7 +61,7 @@ Pinnacle/Miro DC10(new): * Philips saa7110a TV decoder * Analog Devices adv7176 TV encoder Drivers to use: videodev, i2c-core, i2c-algo-bit, - videocodec, saa7110, adv7175, zr36060, zr36067 + videocodec, saa7110, adv7175, zr36060, zr36067 Inputs/outputs: Composite, S-video and Internal Norms: PAL, SECAM (768x576 @ 25 fps), NTSC (640x480 @ 29.97 fps) Card number: 1 @@ -84,7 +84,7 @@ Pinnacle/Miro DC10(old): * * Micronas vpx3220a TV decoder * mse3000 TV encoder or Analog Devices adv7176 TV encoder * Drivers to use: videodev, i2c-core, i2c-algo-bit, - videocodec, vpx3220, mse3000/adv7175, zr36050, zr36016, zr36067 + videocodec, vpx3220, mse3000/adv7175, zr36050, zr36016, zr36067 Inputs/outputs: Composite, S-video and Internal Norms: PAL, SECAM (768x576 @ 25 fps), NTSC (640x480 @ 29.97 fps) Card number: 0 @@ -96,7 +96,7 @@ Pinnacle/Miro DC30: * * Micronas vpx3225d/vpx3220a/vpx3216b TV decoder * Analog Devices adv7176 TV encoder Drivers to use: videodev, i2c-core, i2c-algo-bit, - videocodec, vpx3220/vpx3224, adv7175, zr36050, zr36016, zr36067 + videocodec, vpx3220/vpx3224, adv7175, zr36050, zr36016, zr36067 Inputs/outputs: Composite, S-video and Internal Norms: PAL, SECAM (768x576 @ 25 fps), NTSC (640x480 @ 29.97 fps) Card number: 3 @@ -123,11 +123,11 @@ Note: use encoder=X or decoder=X for non-default i2c chips (see i2c-id.h) The best know TV standards are NTSC/PAL/SECAM. but for decoding a frame that information is not enough. There are several formats of the TV standards. -And not every TV decoder is able to handle every format. Also the every -combination is supported by the driver. There are currently 11 different -tv broadcast formats all aver the world. +And not every TV decoder is able to handle every format. Also the every +combination is supported by the driver. There are currently 11 different +tv broadcast formats all aver the world. -The CCIR defines parameters needed for broadcasting the signal. +The CCIR defines parameters needed for broadcasting the signal. The CCIR has defined different standards: A,B,D,E,F,G,D,H,I,K,K1,L,M,N,... The CCIR says not much about about the colorsystem used !!! And talking about a colorsystem says not to much about how it is broadcast. @@ -136,18 +136,18 @@ The CCIR standards A,E,F are not used any more. When you speak about NTSC, you usually mean the standard: CCIR - M using the NTSC colorsystem which is used in the USA, Japan, Mexico, Canada -and a few others. +and a few others. When you talk about PAL, you usually mean: CCIR - B/G using the PAL -colorsystem which is used in many Countries. +colorsystem which is used in many Countries. -When you talk about SECAM, you mean: CCIR - L using the SECAM Colorsystem +When you talk about SECAM, you mean: CCIR - L using the SECAM Colorsystem which is used in France, and a few others. There the other version of SECAM, CCIR - D/K is used in Bulgaria, China, -Slovakai, Hungary, Korea (Rep.), Poland, Rumania and a others. +Slovakai, Hungary, Korea (Rep.), Poland, Rumania and a others. -The CCIR - H uses the PAL colorsystem (sometimes SECAM) and is used in +The CCIR - H uses the PAL colorsystem (sometimes SECAM) and is used in Egypt, Libya, Sri Lanka, Syrain Arab. Rep. The CCIR - I uses the PAL colorsystem, and is used in Great Britain, Hong Kong, @@ -158,30 +158,30 @@ and is used in Argentinia, Uruguay, an a few others We do not talk about how the audio is broadcast ! -A rather good sites about the TV standards are: +A rather good sites about the TV standards are: http://www.sony.jp/ServiceArea/Voltage_map/ http://info.electronicwerkstatt.de/bereiche/fernsehtechnik/frequenzen_und_normen/Fernsehnormen/ and http://www.cabl.com/restaurant/channel.html Other weird things around: NTSC 4.43 is a modificated NTSC, which is mainly used in PAL VCR's that are able to play back NTSC. PAL 60 seems to be the same -as NTSC 4.43 . The Datasheets also talk about NTSC 44, It seems as if it would -be the same as NTSC 4.43. +as NTSC 4.43 . The Datasheets also talk about NTSC 44, It seems as if it would +be the same as NTSC 4.43. NTSC Combs seems to be a decoder mode where the decoder uses a comb filter to split coma and luma instead of a Delay line. But I did not defiantly find out what NTSC Comb is. Philips saa7111 TV decoder -was introduced in 1997, is used in the BUZ and -can handle: PAL B/G/H/I, PAL N, PAL M, NTSC M, NTSC N, NTSC 4.43 and SECAM +was introduced in 1997, is used in the BUZ and +can handle: PAL B/G/H/I, PAL N, PAL M, NTSC M, NTSC N, NTSC 4.43 and SECAM Philips saa7110a TV decoder was introduced in 1995, is used in the Pinnacle/Miro DC10(new), DC10+ and -can handle: PAL B/G, NTSC M and SECAM +can handle: PAL B/G, NTSC M and SECAM Philips saa7114 TV decoder -was introduced in 2000, is used in the LML33R10 and +was introduced in 2000, is used in the LML33R10 and can handle: PAL B/G/D/H/I/N, PAL N, PAL M, NTSC M, NTSC 4.43 and SECAM Brooktree bt819 TV decoder @@ -206,7 +206,7 @@ was introduced in 1996, is used in the BUZ can generate: PAL B/G, NTSC M Brooktree bt856 TV Encoder -was introduced in 1994, is used in the LML33 +was introduced in 1994, is used in the LML33 can generate: PAL B/D/G/H/I/N, PAL M, NTSC M, PAL-N (Argentina) Analog Devices adv7170 TV Encoder @@ -221,9 +221,9 @@ ITT mse3000 TV encoder was introduced in 1991, is used in the DC10 old can generate: PAL , NTSC , SECAM -The adv717x, should be able to produce PAL N. But you find nothing PAL N +The adv717x, should be able to produce PAL N. But you find nothing PAL N specific in the registers. Seem that you have to reuse a other standard -to generate PAL N, maybe it would work if you use the PAL M settings. +to generate PAL N, maybe it would work if you use the PAL M settings. ========================== @@ -261,7 +261,7 @@ Here's my experience of using LML33 and Buz on various motherboards: VIA MVP3 Forget it. Pointless. Doesn't work. -Intel 430FX (Pentium 200) +Intel 430FX (Pentium 200) LML33 perfect, Buz tolerable (3 or 4 frames dropped per movie) Intel 440BX (early stepping) LML33 tolerable. Buz starting to get annoying (6-10 frames/hour) @@ -438,52 +438,52 @@ importance of buffer sizes: > -q 25 -b 128 : 24.655.992 > -q 25 -b 256 : 25.859.820 -I woke up, and can't go to sleep again. I'll kill some time explaining why +I woke up, and can't go to sleep again. I'll kill some time explaining why this doesn't look strange to me. -Let's do some math using a width of 704 pixels. I'm not sure whether the Buz +Let's do some math using a width of 704 pixels. I'm not sure whether the Buz actually use that number or not, but that's not too important right now. -704x288 pixels, one field, is 202752 pixels. Divided by 64 pixels per block; -3168 blocks per field. Each pixel consist of two bytes; 128 bytes per block; -1024 bits per block. 100% in the new driver mean 1:2 compression; the maximum -output becomes 512 bits per block. Actually 510, but 512 is simpler to use +704x288 pixels, one field, is 202752 pixels. Divided by 64 pixels per block; +3168 blocks per field. Each pixel consist of two bytes; 128 bytes per block; +1024 bits per block. 100% in the new driver mean 1:2 compression; the maximum +output becomes 512 bits per block. Actually 510, but 512 is simpler to use for calculations. -Let's say that we specify d1q50. We thus want 256 bits per block; times 3168 -becomes 811008 bits; 101376 bytes per field. We're talking raw bits and bytes -here, so we don't need to do any fancy corrections for bits-per-pixel or such +Let's say that we specify d1q50. We thus want 256 bits per block; times 3168 +becomes 811008 bits; 101376 bytes per field. We're talking raw bits and bytes +here, so we don't need to do any fancy corrections for bits-per-pixel or such things. 101376 bytes per field. -d1 video contains two fields per frame. Those sum up to 202752 bytes per +d1 video contains two fields per frame. Those sum up to 202752 bytes per frame, and one of those frames goes into each buffer. -But wait a second! -b128 gives 128kB buffers! It's not possible to cram +But wait a second! -b128 gives 128kB buffers! It's not possible to cram 202752 bytes of JPEG data into 128kB! -This is what the driver notice and automatically compensate for in your +This is what the driver notice and automatically compensate for in your examples. Let's do some math using this information: -128kB is 131072 bytes. In this buffer, we want to store two fields, which -leaves 65536 bytes for each field. Using 3168 blocks per field, we get -20.68686868... available bytes per block; 165 bits. We can't allow the -request for 256 bits per block when there's only 165 bits available! The -q50 -option is silently overridden, and the -b128 option takes precedence, leaving +128kB is 131072 bytes. In this buffer, we want to store two fields, which +leaves 65536 bytes for each field. Using 3168 blocks per field, we get +20.68686868... available bytes per block; 165 bits. We can't allow the +request for 256 bits per block when there's only 165 bits available! The -q50 +option is silently overridden, and the -b128 option takes precedence, leaving us with the equivalence of -q32. -This gives us a data rate of 165 bits per block, which, times 3168, sums up -to 65340 bytes per field, out of the allowed 65536. The current driver has -another level of rate limiting; it won't accept -q values that fill more than -6/8 of the specified buffers. (I'm not sure why. "Playing it safe" seem to be -a safe bet. Personally, I think I would have lowered requested-bits-per-block -by one, or something like that.) We can't use 165 bits per block, but have to -lower it again, to 6/8 of the available buffer space: We end up with 124 bits -per block, the equivalence of -q24. With 128kB buffers, you can't use greater +This gives us a data rate of 165 bits per block, which, times 3168, sums up +to 65340 bytes per field, out of the allowed 65536. The current driver has +another level of rate limiting; it won't accept -q values that fill more than +6/8 of the specified buffers. (I'm not sure why. "Playing it safe" seem to be +a safe bet. Personally, I think I would have lowered requested-bits-per-block +by one, or something like that.) We can't use 165 bits per block, but have to +lower it again, to 6/8 of the available buffer space: We end up with 124 bits +per block, the equivalence of -q24. With 128kB buffers, you can't use greater than -q24 at -d1. (And PAL, and 704 pixels width...) -The third example is limited to -q24 through the same process. The second -example, using very similar calculations, is limited to -q48. The only -example that actually grab at the specified -q value is the last one, which +The third example is limited to -q24 through the same process. The second +example, using very similar calculations, is limited to -q48. The only +example that actually grab at the specified -q value is the last one, which is clearly visible, looking at the file size. -- diff --git a/Documentation/video4linux/bttv/ICs b/Documentation/video4linux/bttv/ICs index 6b74913..611315f 100644 --- a/Documentation/video4linux/bttv/ICs +++ b/Documentation/video4linux/bttv/ICs @@ -14,13 +14,13 @@ Hauppauge Win/TV pci (version 405): Microchip 24LC02B or Philips 8582E2Y: 256 Byte EEPROM with configuration information - I2C 0xa0-0xa1, (24LC02B also responds to 0xa2-0xaf) + I2C 0xa0-0xa1, (24LC02B also responds to 0xa2-0xaf) Philips SAA5246AGP/E: Videotext decoder chip, I2C 0x22-0x23 TDA9800: sound decoder Winbond W24257AS-35: 32Kx8 CMOS static RAM (Videotext buffer mem) 14052B: analog switch for selection of sound source -PAL: +PAL: TDA5737: VHF, hyperband and UHF mixer/oscillator for TV and VCR 3-band tuners TSA5522: 1.4 GHz I2C-bus controlled synthesizer, I2C 0xc2-0xc3 diff --git a/Documentation/video4linux/bttv/PROBLEMS b/Documentation/video4linux/bttv/PROBLEMS index 8e31e9e..2b8b007 100644 --- a/Documentation/video4linux/bttv/PROBLEMS +++ b/Documentation/video4linux/bttv/PROBLEMS @@ -3,7 +3,7 @@ - Start capturing by pressing "c" or by selecting it via a menu!!! - The memory of some S3 cards is not recognized right: - + First of all, if you are not using XFree-3.2 or newer, upgrade AT LEAST to XFree-3.2A! This solved the problem for most people. @@ -31,23 +31,23 @@ (mostly with Trio 64 but also with some others) Get the free demo version of Accelerated X from www.xinside.com and try bttv with it. bttv seems to work with most S3 cards with Accelerated X. - + Since I do not know much (better make that almost nothing) about VGA card programming I do not know the reason for this. Looks like XFree does something different when setting up the video memory? - Maybe somebody can enlighten me? - Would be nice if somebody could get this to work with XFree since - Accelerated X costs more than some of the grabber cards ... - + Maybe somebody can enlighten me? + Would be nice if somebody could get this to work with XFree since + Accelerated X costs more than some of the grabber cards ... + Better linear frame buffer support for S3 cards will probably be in XFree 4.0. - + - Grabbing is not switched off when changing consoles with XFree. That's because XFree and some AcceleratedX versions do not send unmap events. - Some popup windows (e.g. of the window manager) are not refreshed. - + Disable backing store by starting X with the option "-bs" - When using 32 bpp in XFree or 24+8bpp mode in AccelX 3.1 the system diff --git a/Documentation/video4linux/bttv/README.quirks b/Documentation/video4linux/bttv/README.quirks index e8edb87..92e0392 100644 --- a/Documentation/video4linux/bttv/README.quirks +++ b/Documentation/video4linux/bttv/README.quirks @@ -38,9 +38,9 @@ tolerate. ------------------------ When using the 430FX PCI, the following rules will ensure -compatibility: +compatibility: - (1) Deassert REQ at the same time as asserting FRAME. + (1) Deassert REQ at the same time as asserting FRAME. (2) Do not reassert REQ to request another bus transaction until after finish-ing the previous transaction. diff --git a/Documentation/video4linux/bttv/THANKS b/Documentation/video4linux/bttv/THANKS index 2085399..950aa78 100644 --- a/Documentation/video4linux/bttv/THANKS +++ b/Documentation/video4linux/bttv/THANKS @@ -1,6 +1,6 @@ Many thanks to: -- Markus Schroeder <schroedm@uni-duesseldorf.de> for information on the Bt848 +- Markus Schroeder <schroedm@uni-duesseldorf.de> for information on the Bt848 and tuner programming and his control program xtvc. - Martin Buck <martin-2.buck@student.uni-ulm.de> for his great Videotext @@ -16,7 +16,7 @@ Many thanks to: - MIRO for providing a free PCTV card and detailed information about the components on their cards. (E.g. how the tuner type is detected) Without their card I could not have debugged the NTSC mode. - + - Hauppauge for telling how the sound input is selected and what components they do and will use on their radio cards. Also many thanks for faxing me the FM1216 data sheet. diff --git a/Documentation/video4linux/radiotrack.txt b/Documentation/video4linux/radiotrack.txt index 2b75345..d1f3ed1 100644 --- a/Documentation/video4linux/radiotrack.txt +++ b/Documentation/video4linux/radiotrack.txt @@ -131,17 +131,17 @@ Check Stereo: BASE <-- 0xd8 (current volume, stereo detect, x=0xff ==> "not stereo", x=0xfd ==> "stereo detected" Set Frequency: code = (freq*40) + 10486188 - foreach of the 24 bits in code, - (from Least to Most Significant): - to write a "zero" bit, - BASE <-- 0x01 (audio mute, no stereo detect, radio + foreach of the 24 bits in code, + (from Least to Most Significant): + to write a "zero" bit, + BASE <-- 0x01 (audio mute, no stereo detect, radio disable, "zero" bit phase 1, tuner adjust) - BASE <-- 0x03 (audio mute, no stereo detect, radio + BASE <-- 0x03 (audio mute, no stereo detect, radio disable, "zero" bit phase 2, tuner adjust) - to write a "one" bit, - BASE <-- 0x05 (audio mute, no stereo detect, radio + to write a "one" bit, + BASE <-- 0x05 (audio mute, no stereo detect, radio disable, "one" bit phase 1, tuner adjust) - BASE <-- 0x07 (audio mute, no stereo detect, radio + BASE <-- 0x07 (audio mute, no stereo detect, radio disable, "one" bit phase 2, tuner adjust) ---------------------------------------------------------------------------- diff --git a/Documentation/video4linux/w9966.txt b/Documentation/video4linux/w9966.txt index e7ac33a..78a6512 100644 --- a/Documentation/video4linux/w9966.txt +++ b/Documentation/video4linux/w9966.txt @@ -26,7 +26,7 @@ is called VIDEO_PALETTE_YUV422 (16 bpp). A minimal test application (with source) is available from: http://hem.fyristorg.com/mogul/w9966.html -The slow framerate is due to missing DMA ECP read support in the +The slow framerate is due to missing DMA ECP read support in the parport drivers. I might add working EPP support later. Good luck! diff --git a/Documentation/video4linux/zr36120.txt b/Documentation/video4linux/zr36120.txt index 5d6357e..ac6d92d 100644 --- a/Documentation/video4linux/zr36120.txt +++ b/Documentation/video4linux/zr36120.txt @@ -2,7 +2,7 @@ Driver for Trust Computer Products Framegrabber, version 0.6.1 ------ --- ----- -------- -------- ------------ ------- - - - - ZORAN ------------------------------------------------------ - Author: Pauline Middelink <middelin@polyware.nl> + Author: Pauline Middelink <middelin@polyware.nl> Date: 18 September 1999 Version: 0.6.1 @@ -115,7 +115,7 @@ After making/checking the devices do: <n> is the cardtype of the card you have. The cardnumber can be found in the source of zr36120. Look for tvcards. If your card is not there, please try if any other card gives some -response, and mail me if you got a working tvcard addition. +response, and mail me if you got a working tvcard addition. PS. <TVCard editors behold!) Dont forget to set video_input to the number of inputs |