30 files changed, 15492 insertions, 0 deletions
diff --git a/Documentation/DocBook/.gitignore b/Documentation/DocBook/.gitignore
new file mode 100644
index 0000000..c102c02
--- /dev/null
+++ b/Documentation/DocBook/.gitignore
@@ -0,0 +1,6 @@
+*.xml
+*.ps
+*.pdf
+*.html
+*.9.gz
+*.9
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
new file mode 100644
index 0000000..9b1f6ca
--- /dev/null
+++ b/Documentation/DocBook/Makefile
@@ -0,0 +1,240 @@
+###
+# This makefile is used to generate the kernel documentation,
+# primarily based on in-line comments in various source files.
+# See Documentation/kernel-doc-nano-HOWTO.txt for instruction in how
+# to document the SRC - and how to read it.
+# To add a new book the only step required is to add the book to the
+# list of DOCBOOKS.
+
+DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml \
+	    kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
+	    procfs-guide.xml writing_usb_driver.xml networking.xml \
+	    kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \
+	    gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
+	    genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
+	    mac80211.xml debugobjects.xml sh.xml
+
+###
+# The build process is as follows (targets):
+#              (xmldocs) [by docproc]
+# file.tmpl --> file.xml +--> file.ps   (psdocs)   [by db2ps or xmlto]
+#                        +--> file.pdf  (pdfdocs)  [by db2pdf or xmlto]
+#                        +--> DIR=file  (htmldocs) [by xmlto]
+#                        +--> man/      (mandocs)  [by xmlto]
+
+
+# for PDF and PS output you can choose between xmlto and docbook-utils tools
+PDF_METHOD	= $(prefer-db2x)
+PS_METHOD	= $(prefer-db2x)
+
+
+###
+# The targets that may be used.
+PHONY += xmldocs sgmldocs psdocs pdfdocs htmldocs mandocs installmandocs
+
+BOOKS := $(addprefix $(obj)/,$(DOCBOOKS))
+xmldocs: $(BOOKS)
+sgmldocs: xmldocs
+
+PS := $(patsubst %.xml, %.ps, $(BOOKS))
+psdocs: $(PS)
+
+PDF := $(patsubst %.xml, %.pdf, $(BOOKS))
+pdfdocs: $(PDF)
+
+HTML := $(sort $(patsubst %.xml, %.html, $(BOOKS)))
+htmldocs: $(HTML)
+	$(call build_main_index)
+
+MAN := $(patsubst %.xml, %.9, $(BOOKS))
+mandocs: $(MAN)
+
+installmandocs: mandocs
+	mkdir -p /usr/local/man/man9/
+	install Documentation/DocBook/man/*.9.gz /usr/local/man/man9/
+
+###
+#External programs used
+KERNELDOC = $(srctree)/scripts/kernel-doc
+DOCPROC   = $(objtree)/scripts/basic/docproc
+
+XMLTOFLAGS = -m $(srctree)/Documentation/DocBook/stylesheet.xsl
+#XMLTOFLAGS += --skip-validation
+
+###
+# DOCPROC is used for two purposes:
+# 1) To generate a dependency list for a .tmpl file
+# 2) To preprocess a .tmpl file and call kernel-doc with
+#     appropriate parameters.
+# The following rules are used to generate the .xml documentation
+# required to generate the final targets. (ps, pdf, html).
+quiet_cmd_docproc = DOCPROC $@
+      cmd_docproc = SRCTREE=$(srctree)/ $(DOCPROC) doc $< >$@
+define rule_docproc
+	set -e;								\
+        $(if $($(quiet)cmd_$(1)),echo '  $($(quiet)cmd_$(1))';) 	\
+        $(cmd_$(1)); 							\
+        ( 								\
+          echo 'cmd_$@ := $(cmd_$(1))'; 				\
+          echo $@: `SRCTREE=$(srctree) $(DOCPROC) depend $<`; 		\
+        ) > $(dir $@).$(notdir $@).cmd
+endef
+
+%.xml: %.tmpl FORCE
+	$(call if_changed_rule,docproc)
+
+###
+#Read in all saved dependency files 
+cmd_files := $(wildcard $(foreach f,$(BOOKS),$(dir $(f)).$(notdir $(f)).cmd))
+
+ifneq ($(cmd_files),)
+  include $(cmd_files)
+endif
+
+###
+# Changes in kernel-doc force a rebuild of all documentation
+$(BOOKS): $(KERNELDOC)
+
+###
+# procfs guide uses a .c file as example code.
+# This requires an explicit dependency
+C-procfs-example = procfs_example.xml
+C-procfs-example2 = $(addprefix $(obj)/,$(C-procfs-example))
+$(obj)/procfs-guide.xml: $(C-procfs-example2)
+
+# List of programs to build
+##oops, this is a kernel module::hostprogs-y := procfs_example
+obj-m += procfs_example.o
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+notfoundtemplate = echo "*** You have to install docbook-utils or xmlto ***"; \
+		   exit 1
+db2xtemplate = db2TYPE -o $(dir $@) $<
+xmltotemplate = xmlto TYPE $(XMLTOFLAGS) -o $(dir $@) $<
+
+# determine which methods are available
+ifeq ($(shell which db2ps >/dev/null 2>&1 && echo found),found)
+	use-db2x = db2x
+	prefer-db2x = db2x
+else
+	use-db2x = notfound
+	prefer-db2x = $(use-xmlto)
+endif
+ifeq ($(shell which xmlto >/dev/null 2>&1 && echo found),found)
+	use-xmlto = xmlto
+	prefer-xmlto = xmlto
+else
+	use-xmlto = notfound
+	prefer-xmlto = $(use-db2x)
+endif
+
+# the commands, generated from the chosen template
+quiet_cmd_db2ps = PS      $@
+      cmd_db2ps = $(subst TYPE,ps, $($(PS_METHOD)template))
+%.ps : %.xml
+	$(call cmd,db2ps)
+
+quiet_cmd_db2pdf = PDF     $@
+      cmd_db2pdf = $(subst TYPE,pdf, $($(PDF_METHOD)template))
+%.pdf : %.xml
+	$(call cmd,db2pdf)
+
+
+main_idx = Documentation/DocBook/index.html
+build_main_index = rm -rf $(main_idx) && \
+		   echo '<h1>Linux Kernel HTML Documentation</h1>' >> $(main_idx) && \
+		   echo '<h2>Kernel Version: $(KERNELVERSION)</h2>' >> $(main_idx) && \
+		   cat $(HTML) >> $(main_idx)
+
+quiet_cmd_db2html = HTML    $@
+      cmd_db2html = xmlto xhtml $(XMLTOFLAGS) -o $(patsubst %.html,%,$@) $< && \
+		echo '<a HREF="$(patsubst %.html,%,$(notdir $@))/index.html"> \
+        $(patsubst %.html,%,$(notdir $@))</a><p>' > $@
+
+%.html:	%.xml
+	@(which xmlto > /dev/null 2>&1) || \
+	 (echo "*** You need to install xmlto ***"; \
+	  exit 1)
+	@rm -rf $@ $(patsubst %.html,%,$@)
+	$(call cmd,db2html)
+	@if [ ! -z "$(PNG-$(basename $(notdir $@)))" ]; then \
+            cp $(PNG-$(basename $(notdir $@))) $(patsubst %.html,%,$@); fi
+
+quiet_cmd_db2man = MAN     $@
+      cmd_db2man = if grep -q refentry $<; then xmlto man $(XMLTOFLAGS) -o $(obj)/man $< ; gzip -f $(obj)/man/*.9; fi
+%.9 : %.xml
+	@(which xmlto > /dev/null 2>&1) || \
+	 (echo "*** You need to install xmlto ***"; \
+	  exit 1)
+	$(Q)mkdir -p $(obj)/man
+	$(call cmd,db2man)
+	@touch $@
+
+###
+# Rules to generate postscripts and PNG images from .fig format files
+quiet_cmd_fig2eps = FIG2EPS $@
+      cmd_fig2eps = fig2dev -Leps $< $@
+
+%.eps: %.fig
+	@(which fig2dev > /dev/null 2>&1) || \
+	 (echo "*** You need to install transfig ***"; \
+	  exit 1)
+	$(call cmd,fig2eps)
+
+quiet_cmd_fig2png = FIG2PNG $@
+      cmd_fig2png = fig2dev -Lpng $< $@
+
+%.png: %.fig
+	@(which fig2dev > /dev/null 2>&1) || \
+	 (echo "*** You need to install transfig ***"; \
+	  exit 1)
+	$(call cmd,fig2png)
+
+###
+# Rule to convert a .c file to inline XML documentation
+       gen_xml = :
+ quiet_gen_xml = echo '  GEN     $@'
+silent_gen_xml = :
+%.xml: %.c
+	@$($(quiet)gen_xml)
+	@(                            \
+	   echo "<programlisting>";   \
+	   expand --tabs=8 < $< |     \
+	   sed -e "s/&/\\&amp;/g"     \
+	       -e "s/</\\&lt;/g"      \
+	       -e "s/>/\\&gt;/g";     \
+	   echo "</programlisting>")  > $@
+
+###
+# Help targets as used by the top-level makefile
+dochelp:
+	@echo  ' Linux kernel internal documentation in different formats:'
+	@echo  '  htmldocs        - HTML'
+	@echo  '  installmandocs  - install man pages generated by mandocs'
+	@echo  '  mandocs         - man pages'
+	@echo  '  pdfdocs         - PDF'
+	@echo  '  psdocs          - Postscript'
+	@echo  '  xmldocs         - XML DocBook'
+
+###
+# Temporary files left by various tools
+clean-files := $(DOCBOOKS) \
+	$(patsubst %.xml, %.dvi,  $(DOCBOOKS)) \
+	$(patsubst %.xml, %.aux,  $(DOCBOOKS)) \
+	$(patsubst %.xml, %.tex,  $(DOCBOOKS)) \
+	$(patsubst %.xml, %.log,  $(DOCBOOKS)) \
+	$(patsubst %.xml, %.out,  $(DOCBOOKS)) \
+	$(patsubst %.xml, %.ps,   $(DOCBOOKS)) \
+	$(patsubst %.xml, %.pdf,  $(DOCBOOKS)) \
+	$(patsubst %.xml, %.html, $(DOCBOOKS)) \
+	$(patsubst %.xml, %.9,    $(DOCBOOKS)) \
+	$(C-procfs-example)
+
+clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) man
+
+# Declare the contents of the .PHONY variable as phony.  We keep that
+# information in a variable se we can use it in if_changed and friends.
+
+.PHONY: $(PHONY)
diff --git a/Documentation/DocBook/debugobjects.tmpl b/Documentation/DocBook/debugobjects.tmpl
new file mode 100644
index 0000000..7f5f218
--- /dev/null
+++ b/Documentation/DocBook/debugobjects.tmpl
@@ -0,0 +1,391 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="debug-objects-guide">
+ <bookinfo>
+  <title>Debug objects life time</title>
+
+  <authorgroup>
+   <author>
+    <firstname>Thomas</firstname>
+    <surname>Gleixner</surname>
+    <affiliation>
+     <address>
+      <email>tglx@linutronix.de</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2008</year>
+   <holder>Thomas Gleixner</holder>
+  </copyright>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License version 2 as published by the Free Software Foundation.
+   </para>
+
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="intro">
+    <title>Introduction</title>
+    <para>
+      debugobjects is a generic infrastructure to track the life time
+      of kernel objects and validate the operations on those.
+    </para>
+    <para>
+      debugobjects is useful to check for the following error patterns:
+	<itemizedlist>
+	  <listitem><para>Activation of uninitialized objects</para></listitem>
+	  <listitem><para>Initialization of active objects</para></listitem>
+	  <listitem><para>Usage of freed/destroyed objects</para></listitem>
+	</itemizedlist>
+    </para>
+    <para>
+      debugobjects is not changing the data structure of the real
+      object so it can be compiled in with a minimal runtime impact
+      and enabled on demand with a kernel command line option.
+    </para>
+  </chapter>
+
+  <chapter id="howto">
+    <title>Howto use debugobjects</title>
+    <para>
+      A kernel subsystem needs to provide a data structure which
+      describes the object type and add calls into the debug code at
+      appropriate places. The data structure to describe the object
+      type needs at minimum the name of the object type. Optional
+      functions can and should be provided to fixup detected problems
+      so the kernel can continue to work and the debug information can
+      be retrieved from a live system instead of hard core debugging
+      with serial consoles and stack trace transcripts from the
+      monitor.
+    </para>
+    <para>
+      The debug calls provided by debugobjects are:
+      <itemizedlist>
+	<listitem><para>debug_object_init</para></listitem>
+	<listitem><para>debug_object_init_on_stack</para></listitem>
+	<listitem><para>debug_object_activate</para></listitem>
+	<listitem><para>debug_object_deactivate</para></listitem>
+	<listitem><para>debug_object_destroy</para></listitem>
+	<listitem><para>debug_object_free</para></listitem>
+      </itemizedlist>
+      Each of these functions takes the address of the real object and
+      a pointer to the object type specific debug description
+      structure.
+    </para>
+    <para>
+      Each detected error is reported in the statistics and a limited
+      number of errors are printk'ed including a full stack trace.
+    </para>
+    <para>
+      The statistics are available via debugfs/debug_objects/stats.
+      They provide information about the number of warnings and the
+      number of successful fixups along with information about the
+      usage of the internal tracking objects and the state of the
+      internal tracking objects pool.
+    </para>
+  </chapter>
+  <chapter id="debugfunctions">
+    <title>Debug functions</title>
+    <sect1 id="prototypes">
+      <title>Debug object function reference</title>
+!Elib/debugobjects.c
+    </sect1>
+    <sect1 id="debug_object_init">
+      <title>debug_object_init</title>
+      <para>
+	This function is called whenever the initialization function
+	of a real object is called.
+      </para>
+      <para>
+	When the real object is already tracked by debugobjects it is
+	checked, whether the object can be initialized.  Initializing
+	is not allowed for active and destroyed objects. When
+	debugobjects detects an error, then it calls the fixup_init
+	function of the object type description structure if provided
+	by the caller. The fixup function can correct the problem
+	before the real initialization of the object happens. E.g. it
+	can deactivate an active object in order to prevent damage to
+	the subsystem.
+      </para>
+      <para>
+	When the real object is not yet tracked by debugobjects,
+	debugobjects allocates a tracker object for the real object
+	and sets the tracker object state to ODEBUG_STATE_INIT. It
+	verifies that the object is not on the callers stack. If it is
+	on the callers stack then a limited number of warnings
+	including a full stack trace is printk'ed. The calling code
+	must use debug_object_init_on_stack() and remove the object
+	before leaving the function which allocated it. See next
+	section.
+      </para>
+    </sect1>
+
+    <sect1 id="debug_object_init_on_stack">
+      <title>debug_object_init_on_stack</title>
+      <para>
+	This function is called whenever the initialization function
+	of a real object which resides on the stack is called.
+      </para>
+      <para>
+	When the real object is already tracked by debugobjects it is
+	checked, whether the object can be initialized. Initializing
+	is not allowed for active and destroyed objects. When
+	debugobjects detects an error, then it calls the fixup_init
+	function of the object type description structure if provided
+	by the caller. The fixup function can correct the problem
+	before the real initialization of the object happens. E.g. it
+	can deactivate an active object in order to prevent damage to
+	the subsystem.
+      </para>
+      <para>
+	When the real object is not yet tracked by debugobjects
+	debugobjects allocates a tracker object for the real object
+	and sets the tracker object state to ODEBUG_STATE_INIT. It
+	verifies that the object is on the callers stack.
+      </para>
+      <para>
+	An object which is on the stack must be removed from the
+	tracker by calling debug_object_free() before the function
+	which allocates the object returns. Otherwise we keep track of
+	stale objects.
+      </para>
+    </sect1>
+
+    <sect1 id="debug_object_activate">
+      <title>debug_object_activate</title>
+      <para>
+	This function is called whenever the activation function of a
+	real object is called.
+      </para>
+      <para>
+	When the real object is already tracked by debugobjects it is
+	checked, whether the object can be activated.  Activating is
+	not allowed for active and destroyed objects. When
+	debugobjects detects an error, then it calls the
+	fixup_activate function of the object type description
+	structure if provided by the caller. The fixup function can
+	correct the problem before the real activation of the object
+	happens. E.g. it can deactivate an active object in order to
+	prevent damage to the subsystem.
+      </para>
+      <para>
+	When the real object is not yet tracked by debugobjects then
+	the fixup_activate function is called if available. This is
+	necessary to allow the legitimate activation of statically
+	allocated and initialized objects. The fixup function checks
+	whether the object is valid and calls the debug_objects_init()
+	function to initialize the tracking of this object.
+      </para>
+      <para>
+	When the activation is legitimate, then the state of the
+	associated tracker object is set to ODEBUG_STATE_ACTIVE.
+      </para>
+    </sect1>
+
+    <sect1 id="debug_object_deactivate">
+      <title>debug_object_deactivate</title>
+      <para>
+	This function is called whenever the deactivation function of
+	a real object is called.
+      </para>
+      <para>
+	When the real object is tracked by debugobjects it is checked,
+	whether the object can be deactivated. Deactivating is not
+	allowed for untracked or destroyed objects.
+      </para>
+      <para>
+	When the deactivation is legitimate, then the state of the
+	associated tracker object is set to ODEBUG_STATE_INACTIVE.
+      </para>
+    </sect1>
+
+    <sect1 id="debug_object_destroy">
+      <title>debug_object_destroy</title>
+      <para>
+	This function is called to mark an object destroyed. This is
+	useful to prevent the usage of invalid objects, which are
+	still available in memory: either statically allocated objects
+	or objects which are freed later.
+      </para>
+      <para>
+	When the real object is tracked by debugobjects it is checked,
+	whether the object can be destroyed. Destruction is not
+	allowed for active and destroyed objects. When debugobjects
+	detects an error, then it calls the fixup_destroy function of
+	the object type description structure if provided by the
+	caller. The fixup function can correct the problem before the
+	real destruction of the object happens. E.g. it can deactivate
+	an active object in order to prevent damage to the subsystem.
+      </para>
+      <para>
+	When the destruction is legitimate, then the state of the
+	associated tracker object is set to ODEBUG_STATE_DESTROYED.
+      </para>
+    </sect1>
+
+    <sect1 id="debug_object_free">
+      <title>debug_object_free</title>
+      <para>
+	This function is called before an object is freed.
+      </para>
+      <para>
+	When the real object is tracked by debugobjects it is checked,
+	whether the object can be freed. Free is not allowed for
+	active objects. When debugobjects detects an error, then it
+	calls the fixup_free function of the object type description
+	structure if provided by the caller. The fixup function can
+	correct the problem before the real free of the object
+	happens. E.g. it can deactivate an active object in order to
+	prevent damage to the subsystem.
+      </para>
+      <para>
+	Note that debug_object_free removes the object from the
+	tracker. Later usage of the object is detected by the other
+	debug checks.
+      </para>
+    </sect1>
+  </chapter>
+  <chapter id="fixupfunctions">
+    <title>Fixup functions</title>
+    <sect1 id="debug_obj_descr">
+      <title>Debug object type description structure</title>
+!Iinclude/linux/debugobjects.h
+    </sect1>
+    <sect1 id="fixup_init">
+      <title>fixup_init</title>
+      <para>
+	This function is called from the debug code whenever a problem
+	in debug_object_init is detected. The function takes the
+	address of the object and the state which is currently
+	recorded in the tracker.
+      </para>
+      <para>
+	Called from debug_object_init when the object state is:
+	<itemizedlist>
+	  <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
+	</itemizedlist>
+      </para>
+      <para>
+	The function returns 1 when the fixup was successful,
+	otherwise 0. The return value is used to update the
+	statistics.
+      </para>
+      <para>
+	Note, that the function needs to call the debug_object_init()
+	function again, after the damage has been repaired in order to
+	keep the state consistent.
+      </para>
+    </sect1>
+
+    <sect1 id="fixup_activate">
+      <title>fixup_activate</title>
+      <para>
+	This function is called from the debug code whenever a problem
+	in debug_object_activate is detected.
+      </para>
+      <para>
+	Called from debug_object_activate when the object state is:
+	<itemizedlist>
+	  <listitem><para>ODEBUG_STATE_NOTAVAILABLE</para></listitem>
+	  <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
+	</itemizedlist>
+      </para>
+      <para>
+	The function returns 1 when the fixup was successful,
+	otherwise 0. The return value is used to update the
+	statistics.
+      </para>
+      <para>
+	Note that the function needs to call the debug_object_activate()
+	function again after the damage has been repaired in order to
+	keep the state consistent.
+      </para>
+      <para>
+	The activation of statically initialized objects is a special
+	case. When debug_object_activate() has no tracked object for
+	this object address then fixup_activate() is called with
+	object state ODEBUG_STATE_NOTAVAILABLE. The fixup function
+	needs to check whether this is a legitimate case of a
+	statically initialized object or not. In case it is it calls
+	debug_object_init() and debug_object_activate() to make the
+	object known to the tracker and marked active. In this case
+	the function should return 0 because this is not a real fixup.
+      </para>
+    </sect1>
+
+    <sect1 id="fixup_destroy">
+      <title>fixup_destroy</title>
+      <para>
+	This function is called from the debug code whenever a problem
+	in debug_object_destroy is detected.
+      </para>
+      <para>
+	Called from debug_object_destroy when the object state is:
+	<itemizedlist>
+	  <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
+	</itemizedlist>
+      </para>
+      <para>
+	The function returns 1 when the fixup was successful,
+	otherwise 0. The return value is used to update the
+	statistics.
+      </para>
+    </sect1>
+    <sect1 id="fixup_free">
+      <title>fixup_free</title>
+      <para>
+	This function is called from the debug code whenever a problem
+	in debug_object_free is detected. Further it can be called
+	from the debug checks in kfree/vfree, when an active object is
+	detected from the debug_check_no_obj_freed() sanity checks.
+      </para>
+      <para>
+	Called from debug_object_free() or debug_check_no_obj_freed()
+	when the object state is:
+	<itemizedlist>
+	  <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
+	</itemizedlist>
+      </para>
+      <para>
+	The function returns 1 when the fixup was successful,
+	otherwise 0. The return value is used to update the
+	statistics.
+      </para>
+    </sect1>
+  </chapter>
+  <chapter id="bugs">
+    <title>Known Bugs And Assumptions</title>
+    <para>
+	None (knock on wood).
+    </para>
+  </chapter>
+</book>
diff --git a/Documentation/DocBook/deviceiobook.tmpl b/Documentation/DocBook/deviceiobook.tmpl
new file mode 100644
index 0000000..3ed8812
--- /dev/null
+++ b/Documentation/DocBook/deviceiobook.tmpl
@@ -0,0 +1,323 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="DoingIO">
+ <bookinfo>
+  <title>Bus-Independent Device Accesses</title>
+  
+  <authorgroup>
+   <author>
+    <firstname>Matthew</firstname>
+    <surname>Wilcox</surname>
+    <affiliation>
+     <address>
+      <email>matthew@wil.cx</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <authorgroup>
+   <author>
+    <firstname>Alan</firstname>
+    <surname>Cox</surname>
+    <affiliation>
+     <address>
+      <email>alan@lxorguk.ukuu.org.uk</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2001</year>
+   <holder>Matthew Wilcox</holder>
+  </copyright>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+      
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+      
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+      
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="intro">
+      <title>Introduction</title>
+  <para>
+	Linux provides an API which abstracts performing IO across all busses
+	and devices, allowing device drivers to be written independently of
+	bus type.
+  </para>
+  </chapter>
+
+  <chapter id="bugs">
+     <title>Known Bugs And Assumptions</title>
+  <para>
+	None.	
+  </para>
+  </chapter>
+
+  <chapter id="mmio">
+    <title>Memory Mapped IO</title>
+    <sect1 id="getting_access_to_the_device">
+      <title>Getting Access to the Device</title>
+      <para>
+	The most widely supported form of IO is memory mapped IO.
+	That is, a part of the CPU's address space is interpreted
+	not as accesses to memory, but as accesses to a device.  Some
+	architectures define devices to be at a fixed address, but most
+	have some method of discovering devices.  The PCI bus walk is a
+	good example of such a scheme.	This document does not cover how
+	to receive such an address, but assumes you are starting with one.
+	Physical addresses are of type unsigned long. 
+      </para>
+
+      <para>
+	This address should not be used directly.  Instead, to get an
+	address suitable for passing to the accessor functions described
+	below, you should call <function>ioremap</function>.
+	An address suitable for accessing the device will be returned to you.
+      </para>
+
+      <para>
+	After you've finished using the device (say, in your module's
+	exit routine), call <function>iounmap</function> in order to return
+	the address space to the kernel.  Most architectures allocate new
+	address space each time you call <function>ioremap</function>, and
+	they can run out unless you call <function>iounmap</function>.
+      </para>
+    </sect1>
+
+    <sect1 id="accessing_the_device">
+      <title>Accessing the device</title>
+      <para>
+	The part of the interface most used by drivers is reading and
+	writing memory-mapped registers on the device.	Linux provides
+	interfaces to read and write 8-bit, 16-bit, 32-bit and 64-bit
+	quantities.  Due to a historical accident, these are named byte,
+	word, long and quad accesses.  Both read and write accesses are
+	supported; there is no prefetch support at this time.
+      </para>
+
+      <para>
+	The functions are named <function>readb</function>,
+	<function>readw</function>, <function>readl</function>,
+	<function>readq</function>, <function>readb_relaxed</function>,
+	<function>readw_relaxed</function>, <function>readl_relaxed</function>,
+	<function>readq_relaxed</function>, <function>writeb</function>,
+	<function>writew</function>, <function>writel</function> and
+	<function>writeq</function>.
+      </para>
+
+      <para>
+	Some devices (such as framebuffers) would like to use larger
+	transfers than 8 bytes at a time.  For these devices, the
+	<function>memcpy_toio</function>, <function>memcpy_fromio</function>
+	and <function>memset_io</function> functions are provided.
+	Do not use memset or memcpy on IO addresses; they
+	are not guaranteed to copy data in order.
+      </para>
+
+      <para>
+	The read and write functions are defined to be ordered. That is the
+	compiler is not permitted to reorder the I/O sequence. When the 
+	ordering can be compiler optimised, you can use <function>
+	__readb</function> and friends to indicate the relaxed ordering. Use 
+	this with care.
+      </para>
+
+      <para>
+	While the basic functions are defined to be synchronous with respect
+	to each other and ordered with respect to each other the busses the
+	devices sit on may themselves have asynchronicity. In particular many
+	authors are burned by the fact that PCI bus writes are posted
+	asynchronously. A driver author must issue a read from the same
+	device to ensure that writes have occurred in the specific cases the
+	author cares. This kind of property cannot be hidden from driver
+	writers in the API.  In some cases, the read used to flush the device
+	may be expected to fail (if the card is resetting, for example).  In
+	that case, the read should be done from config space, which is
+	guaranteed to soft-fail if the card doesn't respond.
+      </para>
+
+      <para>
+	The following is an example of flushing a write to a device when
+	the driver would like to ensure the write's effects are visible prior
+	to continuing execution.
+      </para>
+
+<programlisting>
+static inline void
+qla1280_disable_intrs(struct scsi_qla_host *ha)
+{
+	struct device_reg *reg;
+
+	reg = ha->iobase;
+	/* disable risc and host interrupts */
+	WRT_REG_WORD(&amp;reg->ictrl, 0);
+	/*
+	 * The following read will ensure that the above write
+	 * has been received by the device before we return from this
+	 * function.
+	 */
+	RD_REG_WORD(&amp;reg->ictrl);
+	ha->flags.ints_enabled = 0;
+}
+</programlisting>
+
+      <para>
+	In addition to write posting, on some large multiprocessing systems
+	(e.g. SGI Challenge, Origin and Altix machines) posted writes won't
+	be strongly ordered coming from different CPUs.  Thus it's important
+	to properly protect parts of your driver that do memory-mapped writes
+	with locks and use the <function>mmiowb</function> to make sure they
+	arrive in the order intended.  Issuing a regular <function>readX
+	</function> will also ensure write ordering, but should only be used
+	when the driver has to be sure that the write has actually arrived
+	at the device (not that it's simply ordered with respect to other
+	writes), since a full <function>readX</function> is a relatively
+	expensive operation.
+      </para>
+
+      <para>
+	Generally, one should use <function>mmiowb</function> prior to
+	releasing a spinlock that protects regions using <function>writeb
+	</function> or similar functions that aren't surrounded by <function>
+	readb</function> calls, which will ensure ordering and flushing.  The
+	following pseudocode illustrates what might occur if write ordering
+	isn't guaranteed via <function>mmiowb</function> or one of the
+	<function>readX</function> functions.
+      </para>
+
+<programlisting>
+CPU A:  spin_lock_irqsave(&amp;dev_lock, flags)
+CPU A:  ...
+CPU A:  writel(newval, ring_ptr);
+CPU A:  spin_unlock_irqrestore(&amp;dev_lock, flags)
+        ...
+CPU B:  spin_lock_irqsave(&amp;dev_lock, flags)
+CPU B:  writel(newval2, ring_ptr);
+CPU B:  ...
+CPU B:  spin_unlock_irqrestore(&amp;dev_lock, flags)
+</programlisting>
+
+      <para>
+	In the case above, newval2 could be written to ring_ptr before
+	newval.  Fixing it is easy though:
+      </para>
+
+<programlisting>
+CPU A:  spin_lock_irqsave(&amp;dev_lock, flags)
+CPU A:  ...
+CPU A:  writel(newval, ring_ptr);
+CPU A:  mmiowb(); /* ensure no other writes beat us to the device */
+CPU A:  spin_unlock_irqrestore(&amp;dev_lock, flags)
+        ...
+CPU B:  spin_lock_irqsave(&amp;dev_lock, flags)
+CPU B:  writel(newval2, ring_ptr);
+CPU B:  ...
+CPU B:  mmiowb();
+CPU B:  spin_unlock_irqrestore(&amp;dev_lock, flags)
+</programlisting>
+
+      <para>
+	See tg3.c for a real world example of how to use <function>mmiowb
+	</function>
+      </para>
+
+      <para>
+	PCI ordering rules also guarantee that PIO read responses arrive
+	after any outstanding DMA writes from that bus, since for some devices
+	the result of a <function>readb</function> call may signal to the
+	driver that a DMA transaction is complete.  In many cases, however,
+	the driver may want to indicate that the next
+	<function>readb</function> call has no relation to any previous DMA
+	writes performed by the device.  The driver can use
+	<function>readb_relaxed</function> for these cases, although only
+	some platforms will honor the relaxed semantics.  Using the relaxed
+	read functions will provide significant performance benefits on
+	platforms that support it.  The qla2xxx driver provides examples
+	of how to use <function>readX_relaxed</function>.  In many cases,
+	a majority of the driver's <function>readX</function> calls can
+	safely be converted to <function>readX_relaxed</function> calls, since
+	only a few will indicate or depend on DMA completion.
+      </para>
+    </sect1>
+
+  </chapter>
+
+  <chapter id="port_space_accesses">
+    <title>Port Space Accesses</title>
+    <sect1 id="port_space_explained">
+      <title>Port Space Explained</title>
+
+      <para>
+	Another form of IO commonly supported is Port Space.  This is a
+	range of addresses separate to the normal memory address space.
+	Access to these addresses is generally not as fast as accesses
+	to the memory mapped addresses, and it also has a potentially
+	smaller address space.
+      </para>
+
+      <para>
+	Unlike memory mapped IO, no preparation is required
+	to access port space.
+      </para>
+
+    </sect1>
+    <sect1 id="accessing_port_space">
+      <title>Accessing Port Space</title>
+      <para>
+	Accesses to this space are provided through a set of functions
+	which allow 8-bit, 16-bit and 32-bit accesses; also
+	known as byte, word and long.  These functions are
+	<function>inb</function>, <function>inw</function>,
+	<function>inl</function>, <function>outb</function>,
+	<function>outw</function> and <function>outl</function>.
+      </para>
+
+      <para>
+	Some variants are provided for these functions.  Some devices
+	require that accesses to their ports are slowed down.  This
+	functionality is provided by appending a <function>_p</function>
+	to the end of the function.  There are also equivalents to memcpy.
+	The <function>ins</function> and <function>outs</function>
+	functions copy bytes, words or longs to the given port.
+      </para>
+    </sect1>
+
+  </chapter>
+
+  <chapter id="pubfunctions">
+     <title>Public Functions Provided</title>
+!Iarch/x86/include/asm/io_32.h
+!Elib/iomap.c
+  </chapter>
+
+</book>
diff --git a/Documentation/DocBook/filesystems.tmpl b/Documentation/DocBook/filesystems.tmpl
new file mode 100644
index 0000000..5e87ad5
--- /dev/null
+++ b/Documentation/DocBook/filesystems.tmpl
@@ -0,0 +1,421 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Linux-filesystems-API">
+ <bookinfo>
+  <title>Linux Filesystems API</title>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="vfs">
+     <title>The Linux VFS</title>
+     <sect1 id="the_filesystem_types"><title>The Filesystem types</title>
+!Iinclude/linux/fs.h
+     </sect1>
+     <sect1 id="the_directory_cache"><title>The Directory Cache</title>
+!Efs/dcache.c
+!Iinclude/linux/dcache.h
+     </sect1>
+     <sect1 id="inode_handling"><title>Inode Handling</title>
+!Efs/inode.c
+!Efs/bad_inode.c
+     </sect1>
+     <sect1 id="registration_and_superblocks"><title>Registration and Superblocks</title>
+!Efs/super.c
+     </sect1>
+     <sect1 id="file_locks"><title>File Locks</title>
+!Efs/locks.c
+!Ifs/locks.c
+     </sect1>
+     <sect1 id="other_functions"><title>Other Functions</title>
+!Efs/mpage.c
+!Efs/namei.c
+!Efs/buffer.c
+!Efs/bio.c
+!Efs/seq_file.c
+!Efs/filesystems.c
+!Efs/fs-writeback.c
+!Efs/block_dev.c
+     </sect1>
+  </chapter>
+
+  <chapter id="proc">
+     <title>The proc filesystem</title>
+
+     <sect1 id="sysctl_interface"><title>sysctl interface</title>
+!Ekernel/sysctl.c
+     </sect1>
+
+     <sect1 id="proc_filesystem_interface"><title>proc filesystem interface</title>
+!Ifs/proc/base.c
+     </sect1>
+  </chapter>
+
+  <chapter id="sysfs">
+     <title>The Filesystem for Exporting Kernel Objects</title>
+!Efs/sysfs/file.c
+!Efs/sysfs/symlink.c
+!Efs/sysfs/bin.c
+  </chapter>
+
+  <chapter id="debugfs">
+     <title>The debugfs filesystem</title>
+
+     <sect1 id="debugfs_interface"><title>debugfs interface</title>
+!Efs/debugfs/inode.c
+!Efs/debugfs/file.c
+     </sect1>
+  </chapter>
+
+  <chapter id="LinuxJDBAPI">
+  <chapterinfo>
+  <title>The Linux Journalling API</title>
+
+  <authorgroup>
+  <author>
+     <firstname>Roger</firstname>
+     <surname>Gammans</surname>
+     <affiliation>
+     <address>
+      <email>rgammans@computer-surgery.co.uk</email>
+     </address>
+    </affiliation>
+     </author>
+  </authorgroup>
+
+  <authorgroup>
+   <author>
+    <firstname>Stephen</firstname>
+    <surname>Tweedie</surname>
+    <affiliation>
+     <address>
+      <email>sct@redhat.com</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2002</year>
+   <holder>Roger Gammans</holder>
+  </copyright>
+  </chapterinfo>
+
+  <title>The Linux Journalling API</title>
+
+    <sect1 id="journaling_overview">
+     <title>Overview</title>
+    <sect2 id="journaling_details">
+     <title>Details</title>
+<para>
+The journalling layer is  easy to use. You need to
+first of all create a journal_t data structure. There are
+two calls to do this dependent on how you decide to allocate the physical
+media on which the journal resides. The journal_init_inode() call
+is for journals stored in filesystem inodes, or the journal_init_dev()
+call can be use for journal stored on a raw device (in a continuous range
+of blocks). A journal_t is a typedef for a struct pointer, so when
+you are finally finished make sure you call journal_destroy() on it
+to free up any used kernel memory.
+</para>
+
+<para>
+Once you have got your journal_t object you need to 'mount' or load the journal
+file, unless of course you haven't initialised it yet - in which case you
+need to call journal_create().
+</para>
+
+<para>
+Most of the time however your journal file will already have been created, but
+before you load it you must call journal_wipe() to empty the journal file.
+Hang on, you say , what if the filesystem wasn't cleanly umount()'d . Well, it is the
+job of the client file system to detect this and skip the call to journal_wipe().
+</para>
+
+<para>
+In either case the next call should be to journal_load() which prepares the
+journal file for use. Note that journal_wipe(..,0) calls journal_skip_recovery()
+for you if it detects any outstanding transactions in the journal and similarly
+journal_load() will call journal_recover() if necessary.
+I would advise reading fs/ext3/super.c for examples on this stage.
+[RGG: Why is the journal_wipe() call necessary - doesn't this needlessly
+complicate the API. Or isn't a good idea for the journal layer to hide
+dirty mounts from the client fs]
+</para>
+
+<para>
+Now you can go ahead and start modifying the underlying
+filesystem. Almost.
+</para>
+
+<para>
+
+You still need to actually journal your filesystem changes, this
+is done by wrapping them into transactions. Additionally you
+also need to wrap the modification of each of the buffers
+with calls to the journal layer, so it knows what the modifications
+you are actually making are. To do this use  journal_start() which
+returns a transaction handle.
+</para>
+
+<para>
+journal_start()
+and its counterpart journal_stop(), which indicates the end of a transaction
+are nestable calls, so you can reenter a transaction if necessary,
+but remember you must call journal_stop() the same number of times as
+journal_start() before the transaction is completed (or more accurately
+leaves the update phase). Ext3/VFS makes use of this feature to simplify
+quota support.
+</para>
+
+<para>
+Inside each transaction you need to wrap the modifications to the
+individual buffers (blocks). Before you start to modify a buffer you
+need to call journal_get_{create,write,undo}_access() as appropriate,
+this allows the journalling layer to copy the unmodified data if it
+needs to. After all the buffer may be part of a previously uncommitted
+transaction.
+At this point you are at last ready to modify a buffer, and once
+you are have done so you need to call journal_dirty_{meta,}data().
+Or if you've asked for access to a buffer you now know is now longer
+required to be pushed back on the device you can call journal_forget()
+in much the same way as you might have used bforget() in the past.
+</para>
+
+<para>
+A journal_flush() may be called at any time to commit and checkpoint
+all your transactions.
+</para>
+
+<para>
+Then at umount time , in your put_super() (2.4) or write_super() (2.5)
+you can then call journal_destroy() to clean up your in-core journal object.
+</para>
+
+<para>
+Unfortunately there a couple of ways the journal layer can cause a deadlock.
+The first thing to note is that each task can only have
+a single outstanding transaction at any one time, remember nothing
+commits until the outermost journal_stop(). This means
+you must complete the transaction at the end of each file/inode/address
+etc. operation you perform, so that the journalling system isn't re-entered
+on another journal. Since transactions can't be nested/batched
+across differing journals, and another filesystem other than
+yours (say ext3) may be modified in a later syscall.
+</para>
+
+<para>
+The second case to bear in mind is that journal_start() can
+block if there isn't enough space in the journal for your transaction
+(based on the passed nblocks param) - when it blocks it merely(!) needs to
+wait for transactions to complete and be committed from other tasks,
+so essentially we are waiting for journal_stop(). So to avoid
+deadlocks you must treat journal_start/stop() as if they
+were semaphores and include them in your semaphore ordering rules to prevent
+deadlocks. Note that journal_extend() has similar blocking behaviour to
+journal_start() so you can deadlock here just as easily as on journal_start().
+</para>
+
+<para>
+Try to reserve the right number of blocks the first time. ;-). This will
+be the maximum number of blocks you are going to touch in this transaction.
+I advise having a look at at least ext3_jbd.h to see the basis on which
+ext3 uses to make these decisions.
+</para>
+
+<para>
+Another wriggle to watch out for is your on-disk block allocation strategy.
+why? Because, if you undo a delete, you need to ensure you haven't reused any
+of the freed blocks in a later transaction. One simple way of doing this
+is make sure any blocks you allocate only have checkpointed transactions
+listed against them. Ext3 does this in ext3_test_allocatable().
+</para>
+
+<para>
+Lock is also providing through journal_{un,}lock_updates(),
+ext3 uses this when it wants a window with a clean and stable fs for a moment.
+eg.
+</para>
+
+<programlisting>
+
+	journal_lock_updates() //stop new stuff happening..
+	journal_flush()        // checkpoint everything.
+	..do stuff on stable fs
+	journal_unlock_updates() // carry on with filesystem use.
+</programlisting>
+
+<para>
+The opportunities for abuse and DOS attacks with this should be obvious,
+if you allow unprivileged userspace to trigger codepaths containing these
+calls.
+</para>
+
+<para>
+A new feature of jbd since 2.5.25 is commit callbacks with the new
+journal_callback_set() function you can now ask the journalling layer
+to call you back when the transaction is finally committed to disk, so that
+you can do some of your own management. The key to this is the journal_callback
+struct, this maintains the internal callback information but you can
+extend it like this:-
+</para>
+<programlisting>
+	struct  myfs_callback_s {
+		//Data structure element required by jbd..
+		struct journal_callback for_jbd;
+		// Stuff for myfs allocated together.
+		myfs_inode*    i_commited;
+
+	}
+</programlisting>
+
+<para>
+this would be useful if you needed to know when data was committed to a
+particular inode.
+</para>
+
+    </sect2>
+
+    <sect2 id="jbd_summary">
+     <title>Summary</title>
+<para>
+Using the journal is a matter of wrapping the different context changes,
+being each mount, each modification (transaction) and each changed buffer
+to tell the journalling layer about them.
+</para>
+
+<para>
+Here is a some pseudo code to give you an idea of how it works, as
+an example.
+</para>
+
+<programlisting>
+  journal_t* my_jnrl = journal_create();
+  journal_init_{dev,inode}(jnrl,...)
+  if (clean) journal_wipe();
+  journal_load();
+
+   foreach(transaction) { /*transactions must be
+                            completed before
+                            a syscall returns to
+                            userspace*/
+
+          handle_t * xct=journal_start(my_jnrl);
+          foreach(bh) {
+                journal_get_{create,write,undo}_access(xact,bh);
+                if ( myfs_modify(bh) ) { /* returns true
+                                        if makes changes */
+                           journal_dirty_{meta,}data(xact,bh);
+                } else {
+                           journal_forget(bh);
+                }
+          }
+          journal_stop(xct);
+   }
+   journal_destroy(my_jrnl);
+</programlisting>
+    </sect2>
+
+    </sect1>
+
+    <sect1 id="data_types">
+     <title>Data Types</title>
+     <para>
+	The journalling layer uses typedefs to 'hide' the concrete definitions
+	of the structures used. As a client of the JBD layer you can
+	just rely on the using the pointer as a magic cookie  of some sort.
+
+	Obviously the hiding is not enforced as this is 'C'.
+     </para>
+	<sect2 id="structures"><title>Structures</title>
+!Iinclude/linux/jbd.h
+	</sect2>
+    </sect1>
+
+    <sect1 id="functions">
+     <title>Functions</title>
+     <para>
+	The functions here are split into two groups those that
+	affect a journal as a whole, and those which are used to
+	manage transactions
+     </para>
+	<sect2 id="journal_level"><title>Journal Level</title>
+!Efs/jbd/journal.c
+!Ifs/jbd/recovery.c
+	</sect2>
+	<sect2 id="transaction_level"><title>Transasction Level</title>
+!Efs/jbd/transaction.c
+	</sect2>
+    </sect1>
+    <sect1 id="see_also">
+     <title>See also</title>
+	<para>
+	  <citation>
+	   <ulink url="ftp://ftp.uk.linux.org/pub/linux/sct/fs/jfs/journal-design.ps.gz">
+	   	Journaling the Linux ext2fs Filesystem, LinuxExpo 98, Stephen Tweedie
+	   </ulink>
+	  </citation>
+	</para>
+	<para>
+	   <citation>
+	   <ulink url="http://olstrans.sourceforge.net/release/OLS2000-ext3/OLS2000-ext3.html">
+	   	Ext3 Journalling FileSystem, OLS 2000, Dr. Stephen Tweedie
+	   </ulink>
+	   </citation>
+	</para>
+    </sect1>
+
+  </chapter>
+
+  <chapter id="splice">
+      <title>splice API</title>
+  <para>
+	splice is a method for moving blocks of data around inside the
+	kernel, without continually transferring them between the kernel
+	and user space.
+  </para>
+!Ffs/splice.c
+  </chapter>
+
+  <chapter id="pipes">
+      <title>pipes API</title>
+  <para>
+	Pipe interfaces are all for in-kernel (builtin image) use.
+	They are not exported for use by modules.
+  </para>
+!Iinclude/linux/pipe_fs_i.h
+!Ffs/pipe.c
+  </chapter>
+
+</book>
diff --git a/Documentation/DocBook/gadget.tmpl b/Documentation/DocBook/gadget.tmpl
new file mode 100644
index 0000000..6ef2f00
--- /dev/null
+++ b/Documentation/DocBook/gadget.tmpl
@@ -0,0 +1,793 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="USB-Gadget-API">
+  <bookinfo>
+    <title>USB Gadget API for Linux</title>
+    <date>20 August 2004</date>
+    <edition>20 August 2004</edition>
+  
+    <legalnotice>
+       <para>
+	 This documentation is free software; you can redistribute
+	 it and/or modify it under the terms of the GNU General Public
+	 License as published by the Free Software Foundation; either
+	 version 2 of the License, or (at your option) any later
+	 version.
+       </para>
+	  
+       <para>
+	 This program is distributed in the hope that it will be
+	 useful, but WITHOUT ANY WARRANTY; without even the implied
+	 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+	 See the GNU General Public License for more details.
+       </para>
+	  
+       <para>
+	 You should have received a copy of the GNU General Public
+	 License along with this program; if not, write to the Free
+	 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+	 MA 02111-1307 USA
+       </para>
+	  
+       <para>
+	 For more details see the file COPYING in the source
+	 distribution of Linux.
+       </para>
+    </legalnotice>
+    <copyright>
+      <year>2003-2004</year>
+      <holder>David Brownell</holder>
+    </copyright>
+
+    <author>
+      <firstname>David</firstname> 
+      <surname>Brownell</surname>
+      <affiliation>
+        <address><email>dbrownell@users.sourceforge.net</email></address>
+      </affiliation>
+    </author>
+  </bookinfo>
+
+<toc></toc>
+
+<chapter id="intro"><title>Introduction</title>
+
+<para>This document presents a Linux-USB "Gadget"
+kernel mode
+API, for use within peripherals and other USB devices
+that embed Linux.
+It provides an overview of the API structure,
+and shows how that fits into a system development project.
+This is the first such API released on Linux to address
+a number of important problems, including: </para>
+
+<itemizedlist>
+    <listitem><para>Supports USB 2.0, for high speed devices which
+	can stream data at several dozen megabytes per second.
+	</para></listitem>
+    <listitem><para>Handles devices with dozens of endpoints just as
+	well as ones with just two fixed-function ones.  Gadget drivers
+	can be written so they're easy to port to new hardware.
+	</para></listitem>
+    <listitem><para>Flexible enough to expose more complex USB device
+	capabilities such as multiple configurations, multiple interfaces,
+	composite devices,
+	and alternate interface settings.
+	</para></listitem>
+    <listitem><para>USB "On-The-Go" (OTG) support, in conjunction
+	with updates to the Linux-USB host side.
+	</para></listitem>
+    <listitem><para>Sharing data structures and API models with the
+	Linux-USB host side API.  This helps the OTG support, and
+	looks forward to more-symmetric frameworks (where the same
+	I/O model is used by both host and device side drivers).
+	</para></listitem>
+    <listitem><para>Minimalist, so it's easier to support new device
+	controller hardware.  I/O processing doesn't imply large
+	demands for memory or CPU resources.
+	</para></listitem>
+</itemizedlist>
+
+
+<para>Most Linux developers will not be able to use this API, since they
+have USB "host" hardware in a PC, workstation, or server.
+Linux users with embedded systems are more likely to
+have USB peripheral hardware.
+To distinguish drivers running inside such hardware from the
+more familiar Linux "USB device drivers",
+which are host side proxies for the real USB devices,
+a different term is used:
+the drivers inside the peripherals are "USB gadget drivers".
+In USB protocol interactions, the device driver is the master
+(or "client driver")
+and the gadget driver is the slave (or "function driver").
+</para>
+
+<para>The gadget API resembles the host side Linux-USB API in that both
+use queues of request objects to package I/O buffers, and those requests
+may be submitted or canceled.
+They share common definitions for the standard USB
+<emphasis>Chapter 9</emphasis> messages, structures, and constants.
+Also, both APIs bind and unbind drivers to devices.
+The APIs differ in detail, since the host side's current
+URB framework exposes a number of implementation details
+and assumptions that are inappropriate for a gadget API.
+While the model for control transfers and configuration
+management is necessarily different (one side is a hardware-neutral master,
+the other is a hardware-aware slave), the endpoint I/0 API used here
+should also be usable for an overhead-reduced host side API.
+</para>
+
+</chapter>
+
+<chapter id="structure"><title>Structure of Gadget Drivers</title>
+
+<para>A system running inside a USB peripheral
+normally has at least three layers inside the kernel to handle
+USB protocol processing, and may have additional layers in
+user space code.
+The "gadget" API is used by the middle layer to interact
+with the lowest level (which directly handles hardware).
+</para>
+
+<para>In Linux, from the bottom up, these layers are:
+</para>
+
+<variablelist>
+
+    <varlistentry>
+        <term><emphasis>USB Controller Driver</emphasis></term>
+
+	<listitem>
+	<para>This is the lowest software level.
+	It is the only layer that talks to hardware,
+	through registers, fifos, dma, irqs, and the like.
+	The <filename>&lt;linux/usb/gadget.h&gt;</filename> API abstracts
+	the peripheral controller endpoint hardware.
+	That hardware is exposed through endpoint objects, which accept
+	streams of IN/OUT buffers, and through callbacks that interact
+	with gadget drivers.
+	Since normal USB devices only have one upstream
+	port, they only have one of these drivers.
+	The controller driver can support any number of different
+	gadget drivers, but only one of them can be used at a time.
+	</para>
+
+	<para>Examples of such controller hardware include
+	the PCI-based NetChip 2280 USB 2.0 high speed controller,
+	the SA-11x0 or PXA-25x UDC (found within many PDAs),
+	and a variety of other products.
+	</para>
+
+	</listitem></varlistentry>
+
+    <varlistentry>
+	<term><emphasis>Gadget Driver</emphasis></term>
+
+	<listitem>
+	<para>The lower boundary of this driver implements hardware-neutral
+	USB functions, using calls to the controller driver.
+	Because such hardware varies widely in capabilities and restrictions,
+	and is used in embedded environments where space is at a premium,
+	the gadget driver is often configured at compile time
+	to work with endpoints supported by one particular controller.
+	Gadget drivers may be portable to several different controllers,
+	using conditional compilation.
+	(Recent kernels substantially simplify the work involved in
+	supporting new hardware, by <emphasis>autoconfiguring</emphasis>
+	endpoints automatically for many bulk-oriented drivers.)
+	Gadget driver responsibilities include:
+	</para>
+	<itemizedlist>
+	    <listitem><para>handling setup requests (ep0 protocol responses)
+		possibly including class-specific functionality
+		</para></listitem>
+	    <listitem><para>returning configuration and string descriptors
+		</para></listitem>
+	    <listitem><para>(re)setting configurations and interface
+		altsettings, including enabling and configuring endpoints
+		</para></listitem>
+	    <listitem><para>handling life cycle events, such as managing
+		bindings to hardware,
+		USB suspend/resume, remote wakeup,
+		and disconnection from the USB host.
+		</para></listitem>
+	    <listitem><para>managing IN and OUT transfers on all currently
+		enabled endpoints
+		</para></listitem>
+	</itemizedlist>
+
+	<para>
+	Such drivers may be modules of proprietary code, although
+	that approach is discouraged in the Linux community.
+	</para>
+	</listitem></varlistentry>
+
+    <varlistentry>
+	<term><emphasis>Upper Level</emphasis></term>
+
+	<listitem>
+	<para>Most gadget drivers have an upper boundary that connects
+	to some Linux driver or framework in Linux.
+	Through that boundary flows the data which the gadget driver
+	produces and/or consumes through protocol transfers over USB.
+	Examples include:
+	</para>
+	<itemizedlist>
+	    <listitem><para>user mode code, using generic (gadgetfs)
+	        or application specific files in
+		<filename>/dev</filename>
+		</para></listitem>
+	    <listitem><para>networking subsystem (for network gadgets,
+		like the CDC Ethernet Model gadget driver)
+		</para></listitem>
+	    <listitem><para>data capture drivers, perhaps video4Linux or
+		 a scanner driver; or test and measurement hardware.
+		 </para></listitem>
+	    <listitem><para>input subsystem (for HID gadgets)
+		</para></listitem>
+	    <listitem><para>sound subsystem (for audio gadgets)
+		</para></listitem>
+	    <listitem><para>file system (for PTP gadgets)
+		</para></listitem>
+	    <listitem><para>block i/o subsystem (for usb-storage gadgets)
+		</para></listitem>
+	    <listitem><para>... and more </para></listitem>
+	</itemizedlist>
+	</listitem></varlistentry>
+
+    <varlistentry>
+	<term><emphasis>Additional Layers</emphasis></term>
+
+	<listitem>
+	<para>Other layers may exist.
+	These could include kernel layers, such as network protocol stacks,
+	as well as user mode applications building on standard POSIX
+	system call APIs such as
+	<emphasis>open()</emphasis>, <emphasis>close()</emphasis>,
+	<emphasis>read()</emphasis> and <emphasis>write()</emphasis>.
+	On newer systems, POSIX Async I/O calls may be an option.
+	Such user mode code will not necessarily be subject to
+	the GNU General Public License (GPL).
+	</para>
+	</listitem></varlistentry>
+
+
+</variablelist>
+
+<para>OTG-capable systems will also need to include a standard Linux-USB
+host side stack,
+with <emphasis>usbcore</emphasis>,
+one or more <emphasis>Host Controller Drivers</emphasis> (HCDs),
+<emphasis>USB Device Drivers</emphasis> to support
+the OTG "Targeted Peripheral List",
+and so forth.
+There will also be an <emphasis>OTG Controller Driver</emphasis>,
+which is visible to gadget and device driver developers only indirectly.
+That helps the host and device side USB controllers implement the
+two new OTG protocols (HNP and SRP).
+Roles switch (host to peripheral, or vice versa) using HNP
+during USB suspend processing, and SRP can be viewed as a
+more battery-friendly kind of device wakeup protocol.
+</para>
+
+<para>Over time, reusable utilities are evolving to help make some
+gadget driver tasks simpler.
+For example, building configuration descriptors from vectors of
+descriptors for the configurations interfaces and endpoints is
+now automated, and many drivers now use autoconfiguration to
+choose hardware endpoints and initialize their descriptors.
+
+A potential example of particular interest
+is code implementing standard USB-IF protocols for
+HID, networking, storage, or audio classes.
+Some developers are interested in KDB or KGDB hooks, to let
+target hardware be remotely debugged.
+Most such USB protocol code doesn't need to be hardware-specific,
+any more than network protocols like X11, HTTP, or NFS are.
+Such gadget-side interface drivers should eventually be combined,
+to implement composite devices.
+</para>
+
+</chapter>
+
+
+<chapter id="api"><title>Kernel Mode Gadget API</title>
+
+<para>Gadget drivers declare themselves through a
+<emphasis>struct usb_gadget_driver</emphasis>, which is responsible for
+most parts of enumeration for a <emphasis>struct usb_gadget</emphasis>.
+The response to a set_configuration usually involves
+enabling one or more of the <emphasis>struct usb_ep</emphasis> objects
+exposed by the gadget, and submitting one or more
+<emphasis>struct usb_request</emphasis> buffers to transfer data.
+Understand those four data types, and their operations, and
+you will understand how this API works.
+</para> 
+
+<note><title>Incomplete Data Type Descriptions</title>
+
+<para>This documentation was prepared using the standard Linux
+kernel <filename>docproc</filename> tool, which turns text
+and in-code comments into SGML DocBook and then into usable
+formats such as HTML or PDF.
+Other than the "Chapter 9" data types, most of the significant
+data types and functions are described here.
+</para>
+
+<para>However, docproc does not understand all the C constructs
+that are used, so some relevant information is likely omitted from
+what you are reading.  
+One example of such information is endpoint autoconfiguration.
+You'll have to read the header file, and use example source
+code (such as that for "Gadget Zero"), to fully understand the API.
+</para>
+
+<para>The part of the API implementing some basic
+driver capabilities is specific to the version of the
+Linux kernel that's in use.
+The 2.6 kernel includes a <emphasis>driver model</emphasis>
+framework that has no analogue on earlier kernels;
+so those parts of the gadget API are not fully portable.
+(They are implemented on 2.4 kernels, but in a different way.)
+The driver model state is another part of this API that is
+ignored by the kerneldoc tools.
+</para>
+</note>
+
+<para>The core API does not expose
+every possible hardware feature, only the most widely available ones.
+There are significant hardware features, such as device-to-device DMA
+(without temporary storage in a memory buffer)
+that would be added using hardware-specific APIs.
+</para>
+
+<para>This API allows drivers to use conditional compilation to handle
+endpoint capabilities of different hardware, but doesn't require that.
+Hardware tends to have arbitrary restrictions, relating to
+transfer types, addressing, packet sizes, buffering, and availability.
+As a rule, such differences only matter for "endpoint zero" logic
+that handles device configuration and management.
+The API supports limited run-time
+detection of capabilities, through naming conventions for endpoints.
+Many drivers will be able to at least partially autoconfigure
+themselves.
+In particular, driver init sections will often have endpoint
+autoconfiguration logic that scans the hardware's list of endpoints
+to find ones matching the driver requirements
+(relying on those conventions), to eliminate some of the most
+common reasons for conditional compilation.
+</para>
+
+<para>Like the Linux-USB host side API, this API exposes
+the "chunky" nature of USB messages:  I/O requests are in terms
+of one or more "packets", and packet boundaries are visible to drivers.
+Compared to RS-232 serial protocols, USB resembles
+synchronous protocols like HDLC
+(N bytes per frame, multipoint addressing, host as the primary
+station and devices as secondary stations)
+more than asynchronous ones
+(tty style:  8 data bits per frame, no parity, one stop bit).
+So for example the controller drivers won't buffer
+two single byte writes into a single two-byte USB IN packet,
+although gadget drivers may do so when they implement
+protocols where packet boundaries (and "short packets")
+are not significant.
+</para>
+
+<sect1 id="lifecycle"><title>Driver Life Cycle</title>
+
+<para>Gadget drivers make endpoint I/O requests to hardware without
+needing to know many details of the hardware, but driver
+setup/configuration code needs to handle some differences.
+Use the API like this:
+</para>
+
+<orderedlist numeration='arabic'>
+
+<listitem><para>Register a driver for the particular device side
+usb controller hardware,
+such as the net2280 on PCI (USB 2.0),
+sa11x0 or pxa25x as found in Linux PDAs,
+and so on.
+At this point the device is logically in the USB ch9 initial state
+("attached"), drawing no power and not usable
+(since it does not yet support enumeration).
+Any host should not see the device, since it's not
+activated the data line pullup used by the host to
+detect a device, even if VBUS power is available.
+</para></listitem>
+
+<listitem><para>Register a gadget driver that implements some higher level
+device function.  That will then bind() to a usb_gadget, which
+activates the data line pullup sometime after detecting VBUS.
+</para></listitem>
+
+<listitem><para>The hardware driver can now start enumerating.
+The steps it handles are to accept USB power and set_address requests.
+Other steps are handled by the gadget driver.
+If the gadget driver module is unloaded before the host starts to
+enumerate, steps before step 7 are skipped.
+</para></listitem>
+
+<listitem><para>The gadget driver's setup() call returns usb descriptors,
+based both on what the bus interface hardware provides and on the
+functionality being implemented.
+That can involve alternate settings or configurations,
+unless the hardware prevents such operation.
+For OTG devices, each configuration descriptor includes
+an OTG descriptor.
+</para></listitem>
+
+<listitem><para>The gadget driver handles the last step of enumeration,
+when the USB host issues a set_configuration call.
+It enables all endpoints used in that configuration,
+with all interfaces in their default settings.
+That involves using a list of the hardware's endpoints, enabling each
+endpoint according to its descriptor.
+It may also involve using <function>usb_gadget_vbus_draw</function>
+to let more power be drawn from VBUS, as allowed by that configuration.
+For OTG devices, setting a configuration may also involve reporting
+HNP capabilities through a user interface.
+</para></listitem>
+
+<listitem><para>Do real work and perform data transfers, possibly involving
+changes to interface settings or switching to new configurations, until the
+device is disconnect()ed from the host.
+Queue any number of transfer requests to each endpoint.
+It may be suspended and resumed several times before being disconnected.
+On disconnect, the drivers go back to step 3 (above).
+</para></listitem>
+
+<listitem><para>When the gadget driver module is being unloaded,
+the driver unbind() callback is issued.  That lets the controller
+driver be unloaded.
+</para></listitem>
+
+</orderedlist>
+
+<para>Drivers will normally be arranged so that just loading the
+gadget driver module (or statically linking it into a Linux kernel)
+allows the peripheral device to be enumerated, but some drivers
+will defer enumeration until some higher level component (like
+a user mode daemon) enables it.
+Note that at this lowest level there are no policies about how
+ep0 configuration logic is implemented,
+except that it should obey USB specifications.
+Such issues are in the domain of gadget drivers,
+including knowing about implementation constraints
+imposed by some USB controllers
+or understanding that composite devices might happen to
+be built by integrating reusable components.
+</para>
+
+<para>Note that the lifecycle above can be slightly different
+for OTG devices.
+Other than providing an additional OTG descriptor in each
+configuration, only the HNP-related differences are particularly
+visible to driver code.
+They involve reporting requirements during the SET_CONFIGURATION
+request, and the option to invoke HNP during some suspend callbacks.
+Also, SRP changes the semantics of
+<function>usb_gadget_wakeup</function>
+slightly.
+</para>
+
+</sect1>
+
+<sect1 id="ch9"><title>USB 2.0 Chapter 9 Types and Constants</title>
+
+<para>Gadget drivers
+rely on common USB structures and constants
+defined in the
+<filename>&lt;linux/usb/ch9.h&gt;</filename>
+header file, which is standard in Linux 2.6 kernels.
+These are the same types and constants used by host
+side drivers (and usbcore).
+</para>
+
+!Iinclude/linux/usb/ch9.h
+</sect1>
+
+<sect1 id="core"><title>Core Objects and Methods</title>
+
+<para>These are declared in
+<filename>&lt;linux/usb/gadget.h&gt;</filename>,
+and are used by gadget drivers to interact with
+USB peripheral controller drivers.
+</para>
+
+	<!-- yeech, this is ugly in nsgmls PDF output.
+
+	     the PDF bookmark and refentry output nesting is wrong,
+	     and the member/argument documentation indents ugly.
+
+	     plus something (docproc?) adds whitespace before the
+	     descriptive paragraph text, so it can't line up right
+	     unless the explanations are trivial.
+	  -->
+
+!Iinclude/linux/usb/gadget.h
+</sect1>
+
+<sect1 id="utils"><title>Optional Utilities</title>
+
+<para>The core API is sufficient for writing a USB Gadget Driver,
+but some optional utilities are provided to simplify common tasks.
+These utilities include endpoint autoconfiguration.
+</para>
+
+!Edrivers/usb/gadget/usbstring.c
+!Edrivers/usb/gadget/config.c
+<!-- !Edrivers/usb/gadget/epautoconf.c -->
+</sect1>
+
+<sect1 id="composite"><title>Composite Device Framework</title>
+
+<para>The core API is sufficient for writing drivers for composite
+USB devices (with more than one function in a given configuration),
+and also multi-configuration devices (also more than one function,
+but not necessarily sharing a given configuration).
+There is however an optional framework which makes it easier to
+reuse and combine functions.
+</para>
+
+<para>Devices using this framework provide a <emphasis>struct
+usb_composite_driver</emphasis>, which in turn provides one or
+more <emphasis>struct usb_configuration</emphasis> instances.
+Each such configuration includes at least one
+<emphasis>struct usb_function</emphasis>, which packages a user
+visible role such as "network link" or "mass storage device".
+Management functions may also exist, such as "Device Firmware
+Upgrade".
+</para>
+
+!Iinclude/linux/usb/composite.h
+!Edrivers/usb/gadget/composite.c
+
+</sect1>
+
+<sect1 id="functions"><title>Composite Device Functions</title>
+
+<para>At this writing, a few of the current gadget drivers have
+been converted to this framework.
+Near-term plans include converting all of them, except for "gadgetfs".
+</para>
+
+!Edrivers/usb/gadget/f_acm.c
+!Edrivers/usb/gadget/f_ecm.c
+!Edrivers/usb/gadget/f_subset.c
+!Edrivers/usb/gadget/f_obex.c
+!Edrivers/usb/gadget/f_serial.c
+
+</sect1>
+
+
+</chapter>
+
+<chapter id="controllers"><title>Peripheral Controller Drivers</title>
+
+<para>The first hardware supporting this API was the NetChip 2280
+controller, which supports USB 2.0 high speed and is based on PCI.
+This is the <filename>net2280</filename> driver module.
+The driver supports Linux kernel versions 2.4 and 2.6;
+contact NetChip Technologies for development boards and product
+information.
+</para> 
+
+<para>Other hardware working in the "gadget" framework includes:
+Intel's PXA 25x and IXP42x series processors
+(<filename>pxa2xx_udc</filename>),
+Toshiba TC86c001 "Goku-S" (<filename>goku_udc</filename>),
+Renesas SH7705/7727 (<filename>sh_udc</filename>),
+MediaQ 11xx (<filename>mq11xx_udc</filename>),
+Hynix HMS30C7202 (<filename>h7202_udc</filename>),
+National 9303/4 (<filename>n9604_udc</filename>),
+Texas Instruments OMAP (<filename>omap_udc</filename>),
+Sharp LH7A40x (<filename>lh7a40x_udc</filename>),
+and more.
+Most of those are full speed controllers.
+</para>
+
+<para>At this writing, there are people at work on drivers in
+this framework for several other USB device controllers,
+with plans to make many of them be widely available.
+</para>
+
+<!-- !Edrivers/usb/gadget/net2280.c -->
+
+<para>A partial USB simulator,
+the <filename>dummy_hcd</filename> driver, is available.
+It can act like a net2280, a pxa25x, or an sa11x0 in terms
+of available endpoints and device speeds; and it simulates
+control, bulk, and to some extent interrupt transfers.
+That lets you develop some parts of a gadget driver on a normal PC,
+without any special hardware, and perhaps with the assistance
+of tools such as GDB running with User Mode Linux.
+At least one person has expressed interest in adapting that
+approach, hooking it up to a simulator for a microcontroller.
+Such simulators can help debug subsystems where the runtime hardware
+is unfriendly to software development, or is not yet available.
+</para>
+
+<para>Support for other controllers is expected to be developed
+and contributed
+over time, as this driver framework evolves.
+</para>
+
+</chapter>
+
+<chapter id="gadget"><title>Gadget Drivers</title>
+
+<para>In addition to <emphasis>Gadget Zero</emphasis>
+(used primarily for testing and development with drivers
+for usb controller hardware), other gadget drivers exist.
+</para>
+
+<para>There's an <emphasis>ethernet</emphasis> gadget
+driver, which implements one of the most useful
+<emphasis>Communications Device Class</emphasis> (CDC) models.  
+One of the standards for cable modem interoperability even
+specifies the use of this ethernet model as one of two
+mandatory options.
+Gadgets using this code look to a USB host as if they're
+an Ethernet adapter.
+It provides access to a network where the gadget's CPU is one host,
+which could easily be bridging, routing, or firewalling
+access to other networks.
+Since some hardware can't fully implement the CDC Ethernet
+requirements, this driver also implements a "good parts only"
+subset of CDC Ethernet.
+(That subset doesn't advertise itself as CDC Ethernet,
+to avoid creating problems.)
+</para>
+
+<para>Support for Microsoft's <emphasis>RNDIS</emphasis>
+protocol has been contributed by Pengutronix and Auerswald GmbH.
+This is like CDC Ethernet, but it runs on more slightly USB hardware
+(but less than the CDC subset).
+However, its main claim to fame is being able to connect directly to
+recent versions of Windows, using drivers that Microsoft bundles
+and supports, making it much simpler to network with Windows.
+</para>
+
+<para>There is also support for user mode gadget drivers,
+using <emphasis>gadgetfs</emphasis>.
+This provides a <emphasis>User Mode API</emphasis> that presents
+each endpoint as a single file descriptor.  I/O is done using
+normal <emphasis>read()</emphasis> and <emphasis>read()</emphasis> calls.
+Familiar tools like GDB and pthreads can be used to
+develop and debug user mode drivers, so that once a robust
+controller driver is available many applications for it
+won't require new kernel mode software.
+Linux 2.6 <emphasis>Async I/O (AIO)</emphasis>
+support is available, so that user mode software
+can stream data with only slightly more overhead
+than a kernel driver.
+</para>
+
+<para>There's a USB Mass Storage class driver, which provides
+a different solution for interoperability with systems such
+as MS-Windows and MacOS.
+That <emphasis>File-backed Storage</emphasis> driver uses a
+file or block device as backing store for a drive,
+like the <filename>loop</filename> driver.
+The USB host uses the BBB, CB, or CBI versions of the mass
+storage class specification, using transparent SCSI commands
+to access the data from the backing store.
+</para>
+
+<para>There's a "serial line" driver, useful for TTY style
+operation over USB.
+The latest version of that driver supports CDC ACM style
+operation, like a USB modem, and so on most hardware it can
+interoperate easily with MS-Windows.
+One interesting use of that driver is in boot firmware (like a BIOS),
+which can sometimes use that model with very small systems without
+real serial lines.
+</para>
+
+<para>Support for other kinds of gadget is expected to
+be developed and contributed
+over time, as this driver framework evolves.
+</para>
+
+</chapter>
+
+<chapter id="otg"><title>USB On-The-GO (OTG)</title>
+
+<para>USB OTG support on Linux 2.6 was initially developed
+by Texas Instruments for
+<ulink url="http://www.omap.com">OMAP</ulink> 16xx and 17xx
+series processors.
+Other OTG systems should work in similar ways, but the
+hardware level details could be very different.
+</para> 
+
+<para>Systems need specialized hardware support to implement OTG,
+notably including a special <emphasis>Mini-AB</emphasis> jack
+and associated transciever to support <emphasis>Dual-Role</emphasis>
+operation:
+they can act either as a host, using the standard
+Linux-USB host side driver stack,
+or as a peripheral, using this "gadget" framework.
+To do that, the system software relies on small additions
+to those programming interfaces,
+and on a new internal component (here called an "OTG Controller")
+affecting which driver stack connects to the OTG port.
+In each role, the system can re-use the existing pool of
+hardware-neutral drivers, layered on top of the controller
+driver interfaces (<emphasis>usb_bus</emphasis> or
+<emphasis>usb_gadget</emphasis>).
+Such drivers need at most minor changes, and most of the calls
+added to support OTG can also benefit non-OTG products.
+</para>
+
+<itemizedlist>
+    <listitem><para>Gadget drivers test the <emphasis>is_otg</emphasis>
+	flag, and use it to determine whether or not to include
+	an OTG descriptor in each of their configurations.
+	</para></listitem>
+    <listitem><para>Gadget drivers may need changes to support the
+	two new OTG protocols, exposed in new gadget attributes
+	such as <emphasis>b_hnp_enable</emphasis> flag.
+	HNP support should be reported through a user interface
+	(two LEDs could suffice), and is triggered in some cases
+	when the host suspends the peripheral.
+	SRP support can be user-initiated just like remote wakeup,
+	probably by pressing the same button.
+	</para></listitem>
+    <listitem><para>On the host side, USB device drivers need
+	to be taught to trigger HNP at appropriate moments, using
+	<function>usb_suspend_device()</function>.
+	That also conserves battery power, which is useful even
+	for non-OTG configurations.
+	</para></listitem>
+    <listitem><para>Also on the host side, a driver must support the
+	OTG "Targeted Peripheral List".  That's just a whitelist,
+	used to reject peripherals not supported with a given
+	Linux OTG host.
+	<emphasis>This whitelist is product-specific;
+	each product must modify <filename>otg_whitelist.h</filename>
+	to match its interoperability specification.
+	</emphasis>
+	</para>
+	<para>Non-OTG Linux hosts, like PCs and workstations,
+	normally have some solution for adding drivers, so that
+	peripherals that aren't recognized can eventually be supported.
+	That approach is unreasonable for consumer products that may
+	never have their firmware upgraded, and where it's usually
+	unrealistic to expect traditional PC/workstation/server kinds
+	of support model to work.
+	For example, it's often impractical to change device firmware
+	once the product has been distributed, so driver bugs can't
+	normally be fixed if they're found after shipment.
+	</para></listitem>
+</itemizedlist>
+
+<para>
+Additional changes are needed below those hardware-neutral
+<emphasis>usb_bus</emphasis> and <emphasis>usb_gadget</emphasis>
+driver interfaces; those aren't discussed here in any detail.
+Those affect the hardware-specific code for each USB Host or Peripheral
+controller, and how the HCD initializes (since OTG can be active only
+on a single port).
+They also involve what may be called an <emphasis>OTG Controller
+Driver</emphasis>, managing the OTG transceiver and the OTG state
+machine logic as well as much of the root hub behavior for the
+OTG port.
+The OTG controller driver needs to activate and deactivate USB
+controllers depending on the relevant device role.
+Some related changes were needed inside usbcore, so that it
+can identify OTG-capable devices and respond appropriately
+to HNP or SRP protocols.
+</para> 
+
+</chapter>
+
+</book>
+<!--
+	vim:syntax=sgml:sw=4
+-->
diff --git a/Documentation/DocBook/genericirq.tmpl b/Documentation/DocBook/genericirq.tmpl
new file mode 100644
index 0000000..3a882d9
--- /dev/null
+++ b/Documentation/DocBook/genericirq.tmpl
@@ -0,0 +1,474 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Generic-IRQ-Guide">
+ <bookinfo>
+  <title>Linux generic IRQ handling</title>
+
+  <authorgroup>
+   <author>
+    <firstname>Thomas</firstname>
+    <surname>Gleixner</surname>
+    <affiliation>
+     <address>
+      <email>tglx@linutronix.de</email>
+     </address>
+    </affiliation>
+   </author>
+   <author>
+    <firstname>Ingo</firstname>
+    <surname>Molnar</surname>
+    <affiliation>
+     <address>
+      <email>mingo@elte.hu</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2005-2006</year>
+   <holder>Thomas Gleixner</holder>
+  </copyright>
+  <copyright>
+   <year>2005-2006</year>
+   <holder>Ingo Molnar</holder>
+  </copyright>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License version 2 as published by the Free Software Foundation.
+   </para>
+
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="intro">
+    <title>Introduction</title>
+    <para>
+	The generic interrupt handling layer is designed to provide a
+	complete abstraction of interrupt handling for device drivers.
+	It is able to handle all the different types of interrupt controller
+	hardware. Device drivers use generic API functions to request, enable,
+	disable and free interrupts. The drivers do not have to know anything
+	about interrupt hardware details, so they can be used on different
+	platforms without code changes.
+    </para>
+    <para>
+  	This documentation is provided to developers who want to implement
+	an interrupt subsystem based for their architecture, with the help
+	of the generic IRQ handling layer.
+    </para>
+  </chapter>
+
+  <chapter id="rationale">
+    <title>Rationale</title>
+	<para>
+	The original implementation of interrupt handling in Linux is using
+	the __do_IRQ() super-handler, which is able to deal with every
+	type of interrupt logic.
+	</para>
+	<para>
+	Originally, Russell King identified different types of handlers to
+	build a quite universal set for the ARM interrupt handler
+	implementation in Linux 2.5/2.6. He distinguished between:
+	<itemizedlist>
+	  <listitem><para>Level type</para></listitem>
+	  <listitem><para>Edge type</para></listitem>
+	  <listitem><para>Simple type</para></listitem>
+	</itemizedlist>
+	In the SMP world of the __do_IRQ() super-handler another type
+	was identified:
+	<itemizedlist>
+	  <listitem><para>Per CPU type</para></listitem>
+	</itemizedlist>
+	</para>
+	<para>
+	This split implementation of highlevel IRQ handlers allows us to
+	optimize the flow of the interrupt handling for each specific
+	interrupt type. This reduces complexity in that particular codepath
+	and allows the optimized handling of a given type.
+	</para>
+	<para>
+	The original general IRQ implementation used hw_interrupt_type
+	structures and their ->ack(), ->end() [etc.] callbacks to
+	differentiate the flow control in the super-handler. This leads to
+	a mix of flow logic and lowlevel hardware logic, and it also leads
+	to unnecessary code duplication: for example in i386, there is a
+	ioapic_level_irq and a ioapic_edge_irq irq-type which share many
+	of the lowlevel details but have different flow handling.
+	</para>
+	<para>
+	A more natural abstraction is the clean separation of the
+	'irq flow' and the 'chip details'.
+	</para>
+	<para>
+	Analysing a couple of architecture's IRQ subsystem implementations
+	reveals that most of them can use a generic set of 'irq flow'
+	methods and only need to add the chip level specific code.
+	The separation is also valuable for (sub)architectures
+	which need specific quirks in the irq flow itself but not in the
+	chip-details - and thus provides a more transparent IRQ subsystem
+	design.
+	</para>
+	<para>
+	Each interrupt descriptor is assigned its own highlevel flow
+	handler, which is normally one of the generic
+	implementations. (This highlevel flow handler implementation also
+	makes it simple to provide demultiplexing handlers which can be
+	found in embedded platforms on various architectures.)
+	</para>
+	<para>
+	The separation makes the generic interrupt handling layer more
+	flexible and extensible. For example, an (sub)architecture can
+	use a generic irq-flow implementation for 'level type' interrupts
+	and add a (sub)architecture specific 'edge type' implementation.
+	</para>
+	<para>
+	To make the transition to the new model easier and prevent the
+	breakage of existing implementations, the __do_IRQ() super-handler
+	is still available. This leads to a kind of duality for the time
+	being. Over time the new model should be used in more and more
+	architectures, as it enables smaller and cleaner IRQ subsystems.
+	</para>
+  </chapter>
+  <chapter id="bugs">
+    <title>Known Bugs And Assumptions</title>
+    <para>
+	None (knock on wood).
+    </para>
+  </chapter>
+
+  <chapter id="Abstraction">
+    <title>Abstraction layers</title>
+    <para>
+	There are three main levels of abstraction in the interrupt code:
+	<orderedlist>
+	  <listitem><para>Highlevel driver API</para></listitem>
+	  <listitem><para>Highlevel IRQ flow handlers</para></listitem>
+	  <listitem><para>Chiplevel hardware encapsulation</para></listitem>
+	</orderedlist>
+    </para>
+    <sect1 id="Interrupt_control_flow">
+	<title>Interrupt control flow</title>
+	<para>
+	Each interrupt is described by an interrupt descriptor structure
+	irq_desc. The interrupt is referenced by an 'unsigned int' numeric
+	value which selects the corresponding interrupt decription structure
+	in the descriptor structures array.
+	The descriptor structure contains status information and pointers
+	to the interrupt flow method and the interrupt chip structure
+	which are assigned to this interrupt.
+	</para>
+	<para>
+	Whenever an interrupt triggers, the lowlevel arch code calls into
+	the generic interrupt code by calling desc->handle_irq().
+	This highlevel IRQ handling function only uses desc->chip primitives
+	referenced by the assigned chip descriptor structure.
+	</para>
+    </sect1>
+    <sect1 id="Highlevel_Driver_API">
+	<title>Highlevel Driver API</title>
+	<para>
+	  The highlevel Driver API consists of following functions:
+	  <itemizedlist>
+	  <listitem><para>request_irq()</para></listitem>
+	  <listitem><para>free_irq()</para></listitem>
+	  <listitem><para>disable_irq()</para></listitem>
+	  <listitem><para>enable_irq()</para></listitem>
+	  <listitem><para>disable_irq_nosync() (SMP only)</para></listitem>
+	  <listitem><para>synchronize_irq() (SMP only)</para></listitem>
+	  <listitem><para>set_irq_type()</para></listitem>
+	  <listitem><para>set_irq_wake()</para></listitem>
+	  <listitem><para>set_irq_data()</para></listitem>
+	  <listitem><para>set_irq_chip()</para></listitem>
+	  <listitem><para>set_irq_chip_data()</para></listitem>
+          </itemizedlist>
+	  See the autogenerated function documentation for details.
+	</para>
+    </sect1>
+    <sect1 id="Highlevel_IRQ_flow_handlers">
+	<title>Highlevel IRQ flow handlers</title>
+	<para>
+	  The generic layer provides a set of pre-defined irq-flow methods:
+	  <itemizedlist>
+	  <listitem><para>handle_level_irq</para></listitem>
+	  <listitem><para>handle_edge_irq</para></listitem>
+	  <listitem><para>handle_simple_irq</para></listitem>
+	  <listitem><para>handle_percpu_irq</para></listitem>
+	  </itemizedlist>
+	  The interrupt flow handlers (either predefined or architecture
+	  specific) are assigned to specific interrupts by the architecture
+	  either during bootup or during device initialization.
+	</para>
+	<sect2 id="Default_flow_implementations">
+	<title>Default flow implementations</title>
+	    <sect3 id="Helper_functions">
+	 	<title>Helper functions</title>
+		<para>
+		The helper functions call the chip primitives and
+		are used by the default flow implementations.
+		The following helper functions are implemented (simplified excerpt):
+		<programlisting>
+default_enable(irq)
+{
+	desc->chip->unmask(irq);
+}
+
+default_disable(irq)
+{
+	if (!delay_disable(irq))
+		desc->chip->mask(irq);
+}
+
+default_ack(irq)
+{
+	chip->ack(irq);
+}
+
+default_mask_ack(irq)
+{
+	if (chip->mask_ack) {
+		chip->mask_ack(irq);
+	} else {
+		chip->mask(irq);
+		chip->ack(irq);
+	}
+}
+
+noop(irq)
+{
+}
+
+		</programlisting>
+	        </para>
+	    </sect3>
+	</sect2>
+	<sect2 id="Default_flow_handler_implementations">
+	<title>Default flow handler implementations</title>
+	    <sect3 id="Default_Level_IRQ_flow_handler">
+	 	<title>Default Level IRQ flow handler</title>
+		<para>
+		handle_level_irq provides a generic implementation
+		for level-triggered interrupts.
+		</para>
+		<para>
+		The following control flow is implemented (simplified excerpt):
+		<programlisting>
+desc->chip->start();
+handle_IRQ_event(desc->action);
+desc->chip->end();
+		</programlisting>
+		</para>
+   	    </sect3>
+	    <sect3 id="Default_Edge_IRQ_flow_handler">
+	 	<title>Default Edge IRQ flow handler</title>
+		<para>
+		handle_edge_irq provides a generic implementation
+		for edge-triggered interrupts.
+		</para>
+		<para>
+		The following control flow is implemented (simplified excerpt):
+		<programlisting>
+if (desc->status &amp; running) {
+	desc->chip->hold();
+	desc->status |= pending | masked;
+	return;
+}
+desc->chip->start();
+desc->status |= running;
+do {
+	if (desc->status &amp; masked)
+		desc->chip->enable();
+	desc->status &amp;= ~pending;
+	handle_IRQ_event(desc->action);
+} while (status &amp; pending);
+desc->status &amp;= ~running;
+desc->chip->end();
+		</programlisting>
+		</para>
+   	    </sect3>
+	    <sect3 id="Default_simple_IRQ_flow_handler">
+	 	<title>Default simple IRQ flow handler</title>
+		<para>
+		handle_simple_irq provides a generic implementation
+		for simple interrupts.
+		</para>
+		<para>
+		Note: The simple flow handler does not call any
+		handler/chip primitives.
+		</para>
+		<para>
+		The following control flow is implemented (simplified excerpt):
+		<programlisting>
+handle_IRQ_event(desc->action);
+		</programlisting>
+		</para>
+   	    </sect3>
+	    <sect3 id="Default_per_CPU_flow_handler">
+	 	<title>Default per CPU flow handler</title>
+		<para>
+		handle_percpu_irq provides a generic implementation
+		for per CPU interrupts.
+		</para>
+		<para>
+		Per CPU interrupts are only available on SMP and
+		the handler provides a simplified version without
+		locking.
+		</para>
+		<para>
+		The following control flow is implemented (simplified excerpt):
+		<programlisting>
+desc->chip->start();
+handle_IRQ_event(desc->action);
+desc->chip->end();
+		</programlisting>
+		</para>
+   	    </sect3>
+	</sect2>
+	<sect2 id="Quirks_and_optimizations">
+	<title>Quirks and optimizations</title>
+	<para>
+	The generic functions are intended for 'clean' architectures and chips,
+	which have no platform-specific IRQ handling quirks. If an architecture
+	needs to implement quirks on the 'flow' level then it can do so by
+	overriding the highlevel irq-flow handler.
+	</para>
+	</sect2>
+	<sect2 id="Delayed_interrupt_disable">
+	<title>Delayed interrupt disable</title>
+	<para>
+	This per interrupt selectable feature, which was introduced by Russell
+	King in the ARM interrupt implementation, does not mask an interrupt
+	at the hardware level when disable_irq() is called. The interrupt is
+	kept enabled and is masked in the flow handler when an interrupt event
+	happens. This prevents losing edge interrupts on hardware which does
+	not store an edge interrupt event while the interrupt is disabled at
+	the hardware level. When an interrupt arrives while the IRQ_DISABLED
+	flag is set, then the interrupt is masked at the hardware level and
+	the IRQ_PENDING bit is set. When the interrupt is re-enabled by
+	enable_irq() the pending bit is checked and if it is set, the
+	interrupt is resent either via hardware or by a software resend
+	mechanism. (It's necessary to enable CONFIG_HARDIRQS_SW_RESEND when
+	you want to use the delayed interrupt disable feature and your
+	hardware is not capable of retriggering	an interrupt.)
+	The delayed interrupt disable can be runtime enabled, per interrupt,
+	by setting the IRQ_DELAYED_DISABLE flag in the irq_desc status field.
+	</para>
+	</sect2>
+    </sect1>
+    <sect1 id="Chiplevel_hardware_encapsulation">
+	<title>Chiplevel hardware encapsulation</title>
+	<para>
+	The chip level hardware descriptor structure irq_chip
+	contains all the direct chip relevant functions, which
+	can be utilized by the irq flow implementations.
+	  <itemizedlist>
+	  <listitem><para>ack()</para></listitem>
+	  <listitem><para>mask_ack() - Optional, recommended for performance</para></listitem>
+	  <listitem><para>mask()</para></listitem>
+	  <listitem><para>unmask()</para></listitem>
+	  <listitem><para>retrigger() - Optional</para></listitem>
+	  <listitem><para>set_type() - Optional</para></listitem>
+	  <listitem><para>set_wake() - Optional</para></listitem>
+	  </itemizedlist>
+	These primitives are strictly intended to mean what they say: ack means
+	ACK, masking means masking of an IRQ line, etc. It is up to the flow
+	handler(s) to use these basic units of lowlevel functionality.
+	</para>
+    </sect1>
+  </chapter>
+
+  <chapter id="doirq">
+     <title>__do_IRQ entry point</title>
+     <para>
+ 	The original implementation __do_IRQ() is an alternative entry
+	point for all types of interrupts.
+     </para>
+     <para>
+	This handler turned out to be not suitable for all
+	interrupt hardware and was therefore reimplemented with split
+	functionality for egde/level/simple/percpu interrupts. This is not
+	only a functional optimization. It also shortens code paths for
+	interrupts.
+      </para>
+      <para>
+	To make use of the split implementation, replace the call to
+	__do_IRQ by a call to desc->chip->handle_irq() and associate
+        the appropriate handler function to desc->chip->handle_irq().
+	In most cases the generic handler implementations should
+	be sufficient.
+     </para>
+  </chapter>
+
+  <chapter id="locking">
+     <title>Locking on SMP</title>
+     <para>
+	The locking of chip registers is up to the architecture that
+	defines the chip primitives. There is a chip->lock field that can be used
+	for serialization, but the generic layer does not touch it. The per-irq
+	structure is protected via desc->lock, by the generic layer.
+     </para>
+  </chapter>
+  <chapter id="structs">
+     <title>Structures</title>
+     <para>
+     This chapter contains the autogenerated documentation of the structures which are
+     used in the generic IRQ layer.
+     </para>
+!Iinclude/linux/irq.h
+  </chapter>
+
+  <chapter id="pubfunctions">
+     <title>Public Functions Provided</title>
+     <para>
+     This chapter contains the autogenerated documentation of the kernel API functions
+      which are exported.
+     </para>
+!Ekernel/irq/manage.c
+!Ekernel/irq/chip.c
+  </chapter>
+
+  <chapter id="intfunctions">
+     <title>Internal Functions Provided</title>
+     <para>
+     This chapter contains the autogenerated documentation of the internal functions.
+     </para>
+!Ikernel/irq/handle.c
+!Ikernel/irq/chip.c
+  </chapter>
+
+  <chapter id="credits">
+     <title>Credits</title>
+	<para>
+		The following people have contributed to this document:
+		<orderedlist>
+			<listitem><para>Thomas Gleixner<email>tglx@linutronix.de</email></para></listitem>
+			<listitem><para>Ingo Molnar<email>mingo@elte.hu</email></para></listitem>
+		</orderedlist>
+	</para>
+  </chapter>
+</book>
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
new file mode 100644
index 0000000..5818ff7
--- /dev/null
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -0,0 +1,707 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="LinuxKernelAPI">
+ <bookinfo>
+  <title>The Linux Kernel API</title>
+  
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+      
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+      
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+      
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="Basics">
+     <title>Driver Basics</title>
+     <sect1><title>Driver Entry and Exit points</title>
+!Iinclude/linux/init.h
+     </sect1>
+
+     <sect1><title>Atomic and pointer manipulation</title>
+!Iarch/x86/include/asm/atomic_32.h
+!Iarch/x86/include/asm/unaligned.h
+     </sect1>
+
+     <sect1><title>Delaying, scheduling, and timer routines</title>
+!Iinclude/linux/sched.h
+!Ekernel/sched.c
+!Ekernel/timer.c
+     </sect1>
+     <sect1><title>High-resolution timers</title>
+!Iinclude/linux/ktime.h
+!Iinclude/linux/hrtimer.h
+!Ekernel/hrtimer.c
+     </sect1>
+     <sect1><title>Workqueues and Kevents</title>
+!Ekernel/workqueue.c
+     </sect1>
+     <sect1><title>Internal Functions</title>
+!Ikernel/exit.c
+!Ikernel/signal.c
+!Iinclude/linux/kthread.h
+!Ekernel/kthread.c
+     </sect1>
+
+     <sect1><title>Kernel objects manipulation</title>
+<!--
+X!Iinclude/linux/kobject.h
+-->
+!Elib/kobject.c
+     </sect1>
+
+     <sect1><title>Kernel utility functions</title>
+!Iinclude/linux/kernel.h
+!Ekernel/printk.c
+!Ekernel/panic.c
+!Ekernel/sys.c
+!Ekernel/rcupdate.c
+     </sect1>
+
+     <sect1><title>Device Resource Management</title>
+!Edrivers/base/devres.c
+     </sect1>
+
+  </chapter>
+
+  <chapter id="adt">
+     <title>Data Types</title>
+     <sect1><title>Doubly Linked Lists</title>
+!Iinclude/linux/list.h
+     </sect1>
+  </chapter>
+
+  <chapter id="libc">
+     <title>Basic C Library Functions</title>
+
+     <para>
+       When writing drivers, you cannot in general use routines which are
+       from the C Library.  Some of the functions have been found generally
+       useful and they are listed below.  The behaviour of these functions
+       may vary slightly from those defined by ANSI, and these deviations
+       are noted in the text.
+     </para>
+
+     <sect1><title>String Conversions</title>
+!Ilib/vsprintf.c
+!Elib/vsprintf.c
+     </sect1>
+     <sect1><title>String Manipulation</title>
+<!-- All functions are exported at now
+X!Ilib/string.c
+ -->
+!Elib/string.c
+     </sect1>
+     <sect1><title>Bit Operations</title>
+!Iarch/x86/include/asm/bitops.h
+     </sect1>
+  </chapter>
+
+  <chapter id="kernel-lib">
+     <title>Basic Kernel Library Functions</title>
+
+     <para>
+       The Linux kernel provides more basic utility functions.
+     </para>
+
+     <sect1><title>Bitmap Operations</title>
+!Elib/bitmap.c
+!Ilib/bitmap.c
+     </sect1>
+
+     <sect1><title>Command-line Parsing</title>
+!Elib/cmdline.c
+     </sect1>
+
+     <sect1 id="crc"><title>CRC Functions</title>
+!Elib/crc7.c
+!Elib/crc16.c
+!Elib/crc-itu-t.c
+!Elib/crc32.c
+!Elib/crc-ccitt.c
+     </sect1>
+  </chapter>
+
+  <chapter id="mm">
+     <title>Memory Management in Linux</title>
+     <sect1><title>The Slab Cache</title>
+!Iinclude/linux/slab.h
+!Emm/slab.c
+     </sect1>
+     <sect1><title>User Space Memory Access</title>
+!Iarch/x86/include/asm/uaccess_32.h
+!Earch/x86/lib/usercopy_32.c
+     </sect1>
+     <sect1><title>More Memory Management Functions</title>
+!Emm/readahead.c
+!Emm/filemap.c
+!Emm/memory.c
+!Emm/vmalloc.c
+!Imm/page_alloc.c
+!Emm/mempool.c
+!Emm/dmapool.c
+!Emm/page-writeback.c
+!Emm/truncate.c
+     </sect1>
+  </chapter>
+
+
+  <chapter id="ipc">
+     <title>Kernel IPC facilities</title>
+
+     <sect1><title>IPC utilities</title>
+!Iipc/util.c
+     </sect1>
+  </chapter>
+
+  <chapter id="kfifo">
+     <title>FIFO Buffer</title>
+     <sect1><title>kfifo interface</title>
+!Iinclude/linux/kfifo.h
+!Ekernel/kfifo.c
+     </sect1>
+  </chapter>
+
+  <chapter id="relayfs">
+     <title>relay interface support</title>
+
+     <para>
+	Relay interface support
+	is designed to provide an efficient mechanism for tools and
+	facilities to relay large amounts of data from kernel space to
+	user space.
+     </para>
+
+     <sect1><title>relay interface</title>
+!Ekernel/relay.c
+!Ikernel/relay.c
+     </sect1>
+  </chapter>
+
+  <chapter id="modload">
+     <title>Module Support</title>
+     <sect1><title>Module Loading</title>
+!Ekernel/kmod.c
+     </sect1>
+     <sect1><title>Inter Module support</title>
+        <para>
+           Refer to the file kernel/module.c for more information.
+        </para>
+<!-- FIXME: Removed for now since no structured comments in source
+X!Ekernel/module.c
+-->
+     </sect1>
+  </chapter>
+
+  <chapter id="hardware">
+     <title>Hardware Interfaces</title>
+     <sect1><title>Interrupt Handling</title>
+!Ekernel/irq/manage.c
+     </sect1>
+
+     <sect1><title>DMA Channels</title>
+!Ekernel/dma.c
+     </sect1>
+
+     <sect1><title>Resources Management</title>
+!Ikernel/resource.c
+!Ekernel/resource.c
+     </sect1>
+
+     <sect1><title>MTRR Handling</title>
+!Earch/x86/kernel/cpu/mtrr/main.c
+     </sect1>
+
+     <sect1><title>PCI Support Library</title>
+!Edrivers/pci/pci.c
+!Edrivers/pci/pci-driver.c
+!Edrivers/pci/remove.c
+!Edrivers/pci/pci-acpi.c
+!Edrivers/pci/search.c
+!Edrivers/pci/msi.c
+!Edrivers/pci/bus.c
+<!-- FIXME: Removed for now since no structured comments in source
+X!Edrivers/pci/hotplug.c
+-->
+!Edrivers/pci/probe.c
+!Edrivers/pci/rom.c
+     </sect1>
+     <sect1><title>PCI Hotplug Support Library</title>
+!Edrivers/pci/hotplug/pci_hotplug_core.c
+     </sect1>
+     <sect1><title>MCA Architecture</title>
+	<sect2><title>MCA Device Functions</title>
+           <para>
+              Refer to the file arch/x86/kernel/mca_32.c for more information.
+           </para>
+<!-- FIXME: Removed for now since no structured comments in source
+X!Earch/x86/kernel/mca_32.c
+-->
+	</sect2>
+	<sect2><title>MCA Bus DMA</title>
+!Iarch/x86/include/asm/mca_dma.h
+	</sect2>
+     </sect1>
+  </chapter>
+
+  <chapter id="firmware">
+     <title>Firmware Interfaces</title>
+     <sect1><title>DMI Interfaces</title>
+!Edrivers/firmware/dmi_scan.c
+     </sect1>
+     <sect1><title>EDD Interfaces</title>
+!Idrivers/firmware/edd.c
+     </sect1>
+  </chapter>
+
+  <chapter id="security">
+     <title>Security Framework</title>
+!Isecurity/security.c
+!Esecurity/inode.c
+  </chapter>
+
+  <chapter id="audit">
+     <title>Audit Interfaces</title>
+!Ekernel/audit.c
+!Ikernel/auditsc.c
+!Ikernel/auditfilter.c
+  </chapter>
+
+  <chapter id="accounting">
+     <title>Accounting Framework</title>
+!Ikernel/acct.c
+  </chapter>
+
+  <chapter id="devdrivers">
+     <title>Device drivers infrastructure</title>
+     <sect1><title>Device Drivers Base</title>
+<!--
+X!Iinclude/linux/device.h
+-->
+!Edrivers/base/driver.c
+!Edrivers/base/core.c
+!Edrivers/base/class.c
+!Edrivers/base/firmware_class.c
+!Edrivers/base/transport_class.c
+<!-- Cannot be included, because
+     attribute_container_add_class_device_adapter
+ and attribute_container_classdev_to_container
+     exceed allowed 44 characters maximum
+X!Edrivers/base/attribute_container.c
+-->
+!Edrivers/base/sys.c
+<!--
+X!Edrivers/base/interface.c
+-->
+!Edrivers/base/platform.c
+!Edrivers/base/bus.c
+     </sect1>
+     <sect1><title>Device Drivers Power Management</title>
+!Edrivers/base/power/main.c
+     </sect1>
+     <sect1><title>Device Drivers ACPI Support</title>
+<!-- Internal functions only
+X!Edrivers/acpi/sleep/main.c
+X!Edrivers/acpi/sleep/wakeup.c
+X!Edrivers/acpi/motherboard.c
+X!Edrivers/acpi/bus.c
+-->
+!Edrivers/acpi/scan.c
+!Idrivers/acpi/scan.c
+<!-- No correct structured comments
+X!Edrivers/acpi/pci_bind.c
+-->
+     </sect1>
+     <sect1><title>Device drivers PnP support</title>
+!Idrivers/pnp/core.c
+<!-- No correct structured comments
+X!Edrivers/pnp/system.c
+ -->
+!Edrivers/pnp/card.c
+!Idrivers/pnp/driver.c
+!Edrivers/pnp/manager.c
+!Edrivers/pnp/support.c
+     </sect1>
+     <sect1><title>Userspace IO devices</title>
+!Edrivers/uio/uio.c
+!Iinclude/linux/uio_driver.h
+     </sect1>
+  </chapter>
+
+  <chapter id="blkdev">
+     <title>Block Devices</title>
+!Eblock/blk-core.c
+!Iblock/blk-core.c
+!Eblock/blk-map.c
+!Iblock/blk-sysfs.c
+!Eblock/blk-settings.c
+!Eblock/blk-exec.c
+!Eblock/blk-barrier.c
+!Eblock/blk-tag.c
+!Iblock/blk-tag.c
+!Eblock/blk-integrity.c
+!Iblock/blktrace.c
+!Iblock/genhd.c
+!Eblock/genhd.c
+  </chapter>
+
+  <chapter id="chrdev">
+	<title>Char devices</title>
+!Efs/char_dev.c
+  </chapter>
+
+  <chapter id="miscdev">
+     <title>Miscellaneous Devices</title>
+!Edrivers/char/misc.c
+  </chapter>
+
+  <chapter id="parportdev">
+     <title>Parallel Port Devices</title>
+!Iinclude/linux/parport.h
+!Edrivers/parport/ieee1284.c
+!Edrivers/parport/share.c
+!Idrivers/parport/daisy.c
+  </chapter>
+
+  <chapter id="message_devices">
+	<title>Message-based devices</title>
+     <sect1><title>Fusion message devices</title>
+!Edrivers/message/fusion/mptbase.c
+!Idrivers/message/fusion/mptbase.c
+!Edrivers/message/fusion/mptscsih.c
+!Idrivers/message/fusion/mptscsih.c
+!Idrivers/message/fusion/mptctl.c
+!Idrivers/message/fusion/mptspi.c
+!Idrivers/message/fusion/mptfc.c
+!Idrivers/message/fusion/mptlan.c
+     </sect1>
+     <sect1><title>I2O message devices</title>
+!Iinclude/linux/i2o.h
+!Idrivers/message/i2o/core.h
+!Edrivers/message/i2o/iop.c
+!Idrivers/message/i2o/iop.c
+!Idrivers/message/i2o/config-osm.c
+!Edrivers/message/i2o/exec-osm.c
+!Idrivers/message/i2o/exec-osm.c
+!Idrivers/message/i2o/bus-osm.c
+!Edrivers/message/i2o/device.c
+!Idrivers/message/i2o/device.c
+!Idrivers/message/i2o/driver.c
+!Idrivers/message/i2o/pci.c
+!Idrivers/message/i2o/i2o_block.c
+!Idrivers/message/i2o/i2o_scsi.c
+!Idrivers/message/i2o/i2o_proc.c
+     </sect1>
+  </chapter>
+
+  <chapter id="snddev">
+     <title>Sound Devices</title>
+!Iinclude/sound/core.h
+!Esound/sound_core.c
+!Iinclude/sound/pcm.h
+!Esound/core/pcm.c
+!Esound/core/device.c
+!Esound/core/info.c
+!Esound/core/rawmidi.c
+!Esound/core/sound.c
+!Esound/core/memory.c
+!Esound/core/pcm_memory.c
+!Esound/core/init.c
+!Esound/core/isadma.c
+!Esound/core/control.c
+!Esound/core/pcm_lib.c
+!Esound/core/hwdep.c
+!Esound/core/pcm_native.c
+!Esound/core/memalloc.c
+<!-- FIXME: Removed for now since no structured comments in source
+X!Isound/sound_firmware.c
+-->
+  </chapter>
+
+  <chapter id="uart16x50">
+     <title>16x50 UART Driver</title>
+!Iinclude/linux/serial_core.h
+!Edrivers/serial/serial_core.c
+!Edrivers/serial/8250.c
+  </chapter>
+
+  <chapter id="fbdev">
+     <title>Frame Buffer Library</title>
+
+     <para>
+       The frame buffer drivers depend heavily on four data structures.  
+       These structures are declared in include/linux/fb.h.  They are 
+       fb_info, fb_var_screeninfo, fb_fix_screeninfo and fb_monospecs. 
+       The last three can be made available to and from userland. 
+     </para>
+
+     <para>
+       fb_info defines the current state of a particular video card. 
+       Inside fb_info, there exists a fb_ops structure which is a 
+       collection of needed functions to make fbdev and fbcon work.
+       fb_info is only visible to the kernel.
+     </para>
+
+     <para>
+       fb_var_screeninfo is used to describe the features of a video card 
+       that are user defined.  With fb_var_screeninfo, things such as
+       depth and the resolution may be defined.
+     </para>
+
+     <para>
+       The next structure is fb_fix_screeninfo. This defines the 
+       properties of a card that are created when a mode is set and can't 
+       be changed otherwise.  A good example of this is the start of the 
+       frame buffer memory.  This "locks" the address of the frame buffer
+       memory, so that it cannot be changed or moved.
+     </para>
+
+     <para>
+       The last structure is fb_monospecs. In the old API, there was 
+       little importance for fb_monospecs. This allowed for forbidden things 
+       such as setting a mode of 800x600 on a fix frequency monitor. With 
+       the new API, fb_monospecs prevents such things, and if used 
+       correctly, can prevent a monitor from being cooked.  fb_monospecs
+       will not be useful until kernels 2.5.x.
+     </para>
+
+     <sect1><title>Frame Buffer Memory</title>
+!Edrivers/video/fbmem.c
+     </sect1>
+<!--
+     <sect1><title>Frame Buffer Console</title>
+X!Edrivers/video/console/fbcon.c
+     </sect1>
+-->
+     <sect1><title>Frame Buffer Colormap</title>
+!Edrivers/video/fbcmap.c
+     </sect1>
+<!-- FIXME:
+  drivers/video/fbgen.c has no docs, which stuffs up the sgml.  Comment
+  out until somebody adds docs.  KAO
+     <sect1><title>Frame Buffer Generic Functions</title>
+X!Idrivers/video/fbgen.c
+     </sect1>
+KAO -->
+     <sect1><title>Frame Buffer Video Mode Database</title>
+!Idrivers/video/modedb.c
+!Edrivers/video/modedb.c
+     </sect1>
+     <sect1><title>Frame Buffer Macintosh Video Mode Database</title>
+!Edrivers/video/macmodes.c
+     </sect1>
+     <sect1><title>Frame Buffer Fonts</title>
+        <para>
+           Refer to the file drivers/video/console/fonts.c for more information.
+        </para>
+<!-- FIXME: Removed for now since no structured comments in source
+X!Idrivers/video/console/fonts.c
+-->
+     </sect1>
+  </chapter>
+
+  <chapter id="input_subsystem">
+     <title>Input Subsystem</title>
+!Iinclude/linux/input.h
+!Edrivers/input/input.c
+!Edrivers/input/ff-core.c
+!Edrivers/input/ff-memless.c
+  </chapter>
+
+  <chapter id="spi">
+      <title>Serial Peripheral Interface (SPI)</title>
+  <para>
+	SPI is the "Serial Peripheral Interface", widely used with
+	embedded systems because it is a simple and efficient
+	interface:  basically a multiplexed shift register.
+	Its three signal wires hold a clock (SCK, often in the range
+	of 1-20 MHz), a "Master Out, Slave In" (MOSI) data line, and
+	a "Master In, Slave Out" (MISO) data line.
+	SPI is a full duplex protocol; for each bit shifted out the
+	MOSI line (one per clock) another is shifted in on the MISO line.
+	Those bits are assembled into words of various sizes on the
+	way to and from system memory.
+	An additional chipselect line is usually active-low (nCS);
+	four signals are normally used for each peripheral, plus
+	sometimes an interrupt.
+  </para>
+  <para>
+	The SPI bus facilities listed here provide a generalized
+	interface to declare SPI busses and devices, manage them
+	according to the standard Linux driver model, and perform
+	input/output operations.
+	At this time, only "master" side interfaces are supported,
+	where Linux talks to SPI peripherals and does not implement
+	such a peripheral itself.
+	(Interfaces to support implementing SPI slaves would
+	necessarily look different.)
+  </para>
+  <para>
+	The programming interface is structured around two kinds of driver,
+	and two kinds of device.
+	A "Controller Driver" abstracts the controller hardware, which may
+	be as simple as a set of GPIO pins or as complex as a pair of FIFOs
+	connected to dual DMA engines on the other side of the SPI shift
+	register (maximizing throughput).  Such drivers bridge between
+	whatever bus they sit on (often the platform bus) and SPI, and
+	expose the SPI side of their device as a
+	<structname>struct spi_master</structname>.
+	SPI devices are children of that master, represented as a
+	<structname>struct spi_device</structname> and manufactured from
+	<structname>struct spi_board_info</structname> descriptors which
+	are usually provided by board-specific initialization code.
+	A <structname>struct spi_driver</structname> is called a
+	"Protocol Driver", and is bound to a spi_device using normal
+	driver model calls.
+  </para>
+  <para>
+	The I/O model is a set of queued messages.  Protocol drivers
+	submit one or more <structname>struct spi_message</structname>
+	objects, which are processed and completed asynchronously.
+	(There are synchronous wrappers, however.)  Messages are
+	built from one or more <structname>struct spi_transfer</structname>
+	objects, each of which wraps a full duplex SPI transfer.
+	A variety of protocol tweaking options are needed, because
+	different chips adopt very different policies for how they
+	use the bits transferred with SPI.
+  </para>
+!Iinclude/linux/spi/spi.h
+!Fdrivers/spi/spi.c spi_register_board_info
+!Edrivers/spi/spi.c
+  </chapter>
+
+  <chapter id="i2c">
+     <title>I<superscript>2</superscript>C and SMBus Subsystem</title>
+
+     <para>
+	I<superscript>2</superscript>C (or without fancy typography, "I2C")
+	is an acronym for the "Inter-IC" bus, a simple bus protocol which is
+	widely used where low data rate communications suffice.
+	Since it's also a licensed trademark, some vendors use another
+	name (such as "Two-Wire Interface", TWI) for the same bus.
+	I2C only needs two signals (SCL for clock, SDA for data), conserving
+	board real estate and minimizing signal quality issues.
+	Most I2C devices use seven bit addresses, and bus speeds of up
+	to 400 kHz; there's a high speed extension (3.4 MHz) that's not yet
+	found wide use.
+	I2C is a multi-master bus; open drain signaling is used to
+	arbitrate between masters, as well as to handshake and to
+	synchronize clocks from slower clients.
+     </para>
+
+     <para>
+	The Linux I2C programming interfaces support only the master
+	side of bus interactions, not the slave side.
+	The programming interface is structured around two kinds of driver,
+	and two kinds of device.
+	An I2C "Adapter Driver" abstracts the controller hardware; it binds
+	to a physical device (perhaps a PCI device or platform_device) and
+	exposes a <structname>struct i2c_adapter</structname> representing
+	each I2C bus segment it manages.
+	On each I2C bus segment will be I2C devices represented by a
+	<structname>struct i2c_client</structname>.  Those devices will
+	be bound to a <structname>struct i2c_driver</structname>,
+	which should follow the standard Linux driver model.
+	(At this writing, a legacy model is more widely used.)
+	There are functions to perform various I2C protocol operations; at
+	this writing all such functions are usable only from task context.
+     </para>
+
+     <para>
+	The System Management Bus (SMBus) is a sibling protocol.  Most SMBus
+	systems are also I2C conformant.  The electrical constraints are
+	tighter for SMBus, and it standardizes particular protocol messages
+	and idioms.  Controllers that support I2C can also support most
+	SMBus operations, but SMBus controllers don't support all the protocol
+	options that an I2C controller will.
+	There are functions to perform various SMBus protocol operations,
+	either using I2C primitives or by issuing SMBus commands to
+	i2c_adapter devices which don't support those I2C operations.
+     </para>
+
+!Iinclude/linux/i2c.h
+!Fdrivers/i2c/i2c-boardinfo.c i2c_register_board_info
+!Edrivers/i2c/i2c-core.c
+  </chapter>
+
+  <chapter id="clk">
+     <title>Clock Framework</title>
+
+     <para>
+	The clock framework defines programming interfaces to support
+	software management of the system clock tree.
+	This framework is widely used with System-On-Chip (SOC) platforms
+	to support power management and various devices which may need
+	custom clock rates.
+	Note that these "clocks" don't relate to timekeeping or real
+	time clocks (RTCs), each of which have separate frameworks.
+	These <structname>struct clk</structname> instances may be used
+	to manage for example a 96 MHz signal that is used to shift bits
+	into and out of peripherals or busses, or otherwise trigger
+	synchronous state machine transitions in system hardware.
+     </para>
+
+     <para>
+	Power management is supported by explicit software clock gating:
+	unused clocks are disabled, so the system doesn't waste power
+	changing the state of transistors that aren't in active use.
+	On some systems this may be backed by hardware clock gating,
+	where clocks are gated without being disabled in software.
+	Sections of chips that are powered but not clocked may be able
+	to retain their last state.
+	This low power state is often called a <emphasis>retention
+	mode</emphasis>.
+	This mode still incurs leakage currents, especially with finer
+	circuit geometries, but for CMOS circuits power is mostly used
+	by clocked state changes.
+     </para>
+
+     <para>
+	Power-aware drivers only enable their clocks when the device
+	they manage is in active use.  Also, system sleep states often
+	differ according to which clock domains are active:  while a
+	"standby" state may allow wakeup from several active domains, a
+	"mem" (suspend-to-RAM) state may require a more wholesale shutdown
+	of clocks derived from higher speed PLLs and oscillators, limiting
+	the number of possible wakeup event sources.  A driver's suspend
+	method may need to be aware of system-specific clock constraints
+	on the target sleep state.
+     </para>
+
+     <para>
+        Some platforms support programmable clock generators.  These
+	can be used by external chips of various kinds, such as other
+	CPUs, multimedia codecs, and devices with strict requirements
+	for interface clocking.
+     </para>
+
+!Iinclude/linux/clk.h
+  </chapter>
+
+</book>
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl
new file mode 100644
index 0000000..a50d6cd
--- /dev/null
+++ b/Documentation/DocBook/kernel-hacking.tmpl
@@ -0,0 +1,1327 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="lk-hacking-guide">
+ <bookinfo>
+  <title>Unreliable Guide To Hacking The Linux Kernel</title>
+  
+  <authorgroup>
+   <author>
+    <firstname>Rusty</firstname>
+    <surname>Russell</surname>
+    <affiliation>
+     <address>
+      <email>rusty@rustcorp.com.au</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2005</year>
+   <holder>Rusty Russell</holder>
+  </copyright>
+
+  <legalnotice>
+   <para>
+    This documentation is free software; you can redistribute
+    it and/or modify it under the terms of the GNU General Public
+    License as published by the Free Software Foundation; either
+    version 2 of the License, or (at your option) any later
+    version.
+   </para>
+   
+   <para>
+    This program is distributed in the hope that it will be
+    useful, but WITHOUT ANY WARRANTY; without even the implied
+    warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+    See the GNU General Public License for more details.
+   </para>
+   
+   <para>
+    You should have received a copy of the GNU General Public
+    License along with this program; if not, write to the Free
+    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+    MA 02111-1307 USA
+   </para>
+   
+   <para>
+    For more details see the file COPYING in the source
+    distribution of Linux.
+   </para>
+  </legalnotice>
+
+  <releaseinfo>
+   This is the first release of this document as part of the kernel tarball.
+  </releaseinfo>
+
+ </bookinfo>
+
+ <toc></toc>
+
+ <chapter id="introduction">
+  <title>Introduction</title>
+  <para>
+   Welcome, gentle reader, to Rusty's Remarkably Unreliable Guide to Linux
+   Kernel Hacking.  This document describes the common routines and
+   general requirements for kernel code: its goal is to serve as a
+   primer for Linux kernel development for experienced C
+   programmers.  I avoid implementation details: that's what the
+   code is for, and I ignore whole tracts of useful routines.
+  </para>
+  <para>
+   Before you read this, please understand that I never wanted to
+   write this document, being grossly under-qualified, but I always
+   wanted to read it, and this was the only way.  I hope it will
+   grow into a compendium of best practice, common starting points
+   and random information.
+  </para>
+ </chapter>
+
+ <chapter id="basic-players">
+  <title>The Players</title>
+
+  <para>
+   At any time each of the CPUs in a system can be:
+  </para>
+
+  <itemizedlist>
+   <listitem>
+    <para>
+     not associated with any process, serving a hardware interrupt;
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     not associated with any process, serving a softirq or tasklet;
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     running in kernel space, associated with a process (user context);
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     running a process in user space.
+    </para>
+   </listitem>
+  </itemizedlist>
+
+  <para>
+   There is an ordering between these.  The bottom two can preempt
+   each other, but above that is a strict hierarchy: each can only be
+   preempted by the ones above it.  For example, while a softirq is
+   running on a CPU, no other softirq will preempt it, but a hardware
+   interrupt can.  However, any other CPUs in the system execute
+   independently.
+  </para>
+
+  <para>
+   We'll see a number of ways that the user context can block
+   interrupts, to become truly non-preemptable.
+  </para>
+  
+  <sect1 id="basics-usercontext">
+   <title>User Context</title>
+
+   <para>
+    User context is when you are coming in from a system call or other
+    trap: like userspace, you can be preempted by more important tasks
+    and by interrupts.  You can sleep, by calling
+    <function>schedule()</function>.
+   </para>
+
+   <note>
+    <para>
+     You are always in user context on module load and unload,
+     and on operations on the block device layer.
+    </para>
+   </note>
+
+   <para>
+    In user context, the <varname>current</varname> pointer (indicating 
+    the task we are currently executing) is valid, and
+    <function>in_interrupt()</function>
+    (<filename>include/linux/interrupt.h</filename>) is <returnvalue>false
+    </returnvalue>.  
+   </para>
+
+   <caution>
+    <para>
+     Beware that if you have preemption or softirqs disabled
+     (see below), <function>in_interrupt()</function> will return a 
+     false positive.
+    </para>
+   </caution>
+  </sect1>
+
+  <sect1 id="basics-hardirqs">
+   <title>Hardware Interrupts (Hard IRQs)</title>
+
+   <para>
+    Timer ticks, <hardware>network cards</hardware> and 
+    <hardware>keyboard</hardware> are examples of real
+    hardware which produce interrupts at any time.  The kernel runs
+    interrupt handlers, which services the hardware.  The kernel
+    guarantees that this handler is never re-entered: if the same
+    interrupt arrives, it is queued (or dropped).  Because it
+    disables interrupts, this handler has to be fast: frequently it
+    simply acknowledges the interrupt, marks a 'software interrupt'
+    for execution and exits.
+   </para>
+
+   <para>
+    You can tell you are in a hardware interrupt, because 
+    <function>in_irq()</function> returns <returnvalue>true</returnvalue>.  
+   </para>
+   <caution>
+    <para>
+     Beware that this will return a false positive if interrupts are disabled 
+     (see below).
+    </para>
+   </caution>
+  </sect1>
+
+  <sect1 id="basics-softirqs">
+   <title>Software Interrupt Context: Softirqs and Tasklets</title>
+
+   <para>
+    Whenever a system call is about to return to userspace, or a
+    hardware interrupt handler exits, any 'software interrupts'
+    which are marked pending (usually by hardware interrupts) are
+    run (<filename>kernel/softirq.c</filename>).
+   </para>
+
+   <para>
+    Much of the real interrupt handling work is done here.  Early in
+    the transition to <acronym>SMP</acronym>, there were only 'bottom
+    halves' (BHs), which didn't take advantage of multiple CPUs.  Shortly 
+    after we switched from wind-up computers made of match-sticks and snot,
+    we abandoned this limitation and switched to 'softirqs'.
+   </para>
+
+   <para>
+    <filename class="headerfile">include/linux/interrupt.h</filename> lists the
+    different softirqs.  A very important softirq is the
+    timer softirq (<filename
+    class="headerfile">include/linux/timer.h</filename>): you can
+    register to have it call functions for you in a given length of
+    time.
+   </para>
+
+   <para>
+    Softirqs are often a pain to deal with, since the same softirq
+    will run simultaneously on more than one CPU.  For this reason,
+    tasklets (<filename
+    class="headerfile">include/linux/interrupt.h</filename>) are more
+    often used: they are dynamically-registrable (meaning you can have
+    as many as you want), and they also guarantee that any tasklet
+    will only run on one CPU at any time, although different tasklets
+    can run simultaneously.
+   </para>
+   <caution>
+    <para>
+     The name 'tasklet' is misleading: they have nothing to do with 'tasks',
+     and probably more to do with some bad vodka Alexey Kuznetsov had at the 
+     time.
+    </para>
+   </caution>
+
+   <para>
+    You can tell you are in a softirq (or tasklet)
+    using the <function>in_softirq()</function> macro 
+    (<filename class="headerfile">include/linux/interrupt.h</filename>).
+   </para>
+   <caution>
+    <para>
+     Beware that this will return a false positive if a bh lock (see below)
+     is held.
+    </para>
+   </caution>
+  </sect1>
+ </chapter>
+
+ <chapter id="basic-rules">
+  <title>Some Basic Rules</title>
+
+  <variablelist>
+   <varlistentry>
+    <term>No memory protection</term>
+    <listitem>
+     <para>
+      If you corrupt memory, whether in user context or
+      interrupt context, the whole machine will crash.  Are you
+      sure you can't do what you want in userspace?
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>No floating point or <acronym>MMX</acronym></term>
+    <listitem>
+     <para>
+      The <acronym>FPU</acronym> context is not saved; even in user
+      context the <acronym>FPU</acronym> state probably won't
+      correspond with the current process: you would mess with some
+      user process' <acronym>FPU</acronym> state.  If you really want
+      to do this, you would have to explicitly save/restore the full
+      <acronym>FPU</acronym> state (and avoid context switches).  It
+      is generally a bad idea; use fixed point arithmetic first.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>A rigid stack limit</term>
+    <listitem>
+     <para>
+      Depending on configuration options the kernel stack is about 3K to 6K for most 32-bit architectures: it's
+      about 14K on most 64-bit archs, and often shared with interrupts
+      so you can't use it all.  Avoid deep recursion and huge local
+      arrays on the stack (allocate them dynamically instead).
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>The Linux kernel is portable</term>
+    <listitem>
+     <para>
+      Let's keep it that way.  Your code should be 64-bit clean,
+      and endian-independent.  You should also minimize CPU
+      specific stuff, e.g. inline assembly should be cleanly
+      encapsulated and minimized to ease porting.  Generally it
+      should be restricted to the architecture-dependent part of
+      the kernel tree.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+ </chapter>
+
+ <chapter id="ioctls">
+  <title>ioctls: Not writing a new system call</title>
+
+  <para>
+   A system call generally looks like this
+  </para>
+
+  <programlisting>
+asmlinkage long sys_mycall(int arg)
+{
+        return 0; 
+}
+  </programlisting>
+
+  <para>
+   First, in most cases you don't want to create a new system call.
+   You create a character device and implement an appropriate ioctl
+   for it.  This is much more flexible than system calls, doesn't have
+   to be entered in every architecture's
+   <filename class="headerfile">include/asm/unistd.h</filename> and
+   <filename>arch/kernel/entry.S</filename> file, and is much more
+   likely to be accepted by Linus.
+  </para>
+
+  <para>
+   If all your routine does is read or write some parameter, consider
+   implementing a <function>sysfs</function> interface instead.
+  </para>
+
+  <para>
+   Inside the ioctl you're in user context to a process.  When a
+   error occurs you return a negated errno (see
+   <filename class="headerfile">include/linux/errno.h</filename>),
+   otherwise you return <returnvalue>0</returnvalue>.
+  </para>
+
+  <para>
+   After you slept you should check if a signal occurred: the
+   Unix/Linux way of handling signals is to temporarily exit the
+   system call with the <constant>-ERESTARTSYS</constant> error.  The
+   system call entry code will switch back to user context, process
+   the signal handler and then your system call will be restarted
+   (unless the user disabled that).  So you should be prepared to
+   process the restart, e.g. if you're in the middle of manipulating
+   some data structure.
+  </para>
+
+  <programlisting>
+if (signal_pending()) 
+        return -ERESTARTSYS;
+  </programlisting>
+
+  <para>
+   If you're doing longer computations: first think userspace. If you
+   <emphasis>really</emphasis> want to do it in kernel you should
+   regularly check if you need to give up the CPU (remember there is
+   cooperative multitasking per CPU).  Idiom:
+  </para>
+
+  <programlisting>
+cond_resched(); /* Will sleep */ 
+  </programlisting>
+
+  <para>
+   A short note on interface design: the UNIX system call motto is
+   "Provide mechanism not policy".
+  </para>
+ </chapter>
+
+ <chapter id="deadlock-recipes">
+  <title>Recipes for Deadlock</title>
+
+  <para>
+   You cannot call any routines which may sleep, unless:
+  </para>
+  <itemizedlist>
+   <listitem>
+    <para>
+     You are in user context.
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     You do not own any spinlocks.
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     You have interrupts enabled (actually, Andi Kleen says
+     that the scheduling code will enable them for you, but
+     that's probably not what you wanted).
+    </para>
+   </listitem>
+  </itemizedlist>
+
+  <para>
+   Note that some functions may sleep implicitly: common ones are
+   the user space access functions (*_user) and memory allocation
+   functions without <symbol>GFP_ATOMIC</symbol>.
+  </para>
+
+  <para>
+   You should always compile your kernel
+   <symbol>CONFIG_DEBUG_SPINLOCK_SLEEP</symbol> on, and it will warn
+   you if you break these rules.  If you <emphasis>do</emphasis> break
+   the rules, you will eventually lock up your box.
+  </para>
+
+  <para>
+   Really.
+  </para>
+ </chapter>
+
+ <chapter id="common-routines">
+  <title>Common Routines</title>
+
+  <sect1 id="routines-printk">
+   <title>
+    <function>printk()</function>
+    <filename class="headerfile">include/linux/kernel.h</filename>
+   </title>
+
+   <para>
+    <function>printk()</function> feeds kernel messages to the
+    console, dmesg, and the syslog daemon.  It is useful for debugging
+    and reporting errors, and can be used inside interrupt context,
+    but use with caution: a machine which has its console flooded with
+    printk messages is unusable.  It uses a format string mostly
+    compatible with ANSI C printf, and C string concatenation to give
+    it a first "priority" argument:
+   </para>
+
+   <programlisting>
+printk(KERN_INFO "i = %u\n", i);
+   </programlisting>
+
+   <para>
+    See <filename class="headerfile">include/linux/kernel.h</filename>;
+    for other KERN_ values; these are interpreted by syslog as the
+    level.  Special case: for printing an IP address use
+   </para>
+
+   <programlisting>
+__u32 ipaddress;
+printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress));
+   </programlisting>
+
+   <para>
+    <function>printk()</function> internally uses a 1K buffer and does
+    not catch overruns.  Make sure that will be enough.
+   </para>
+
+   <note>
+    <para>
+     You will know when you are a real kernel hacker
+     when you start typoing printf as printk in your user programs :)
+    </para>
+   </note>
+
+   <!--- From the Lions book reader department --> 
+
+   <note>
+    <para>
+     Another sidenote: the original Unix Version 6 sources had a
+     comment on top of its printf function: "Printf should not be
+     used for chit-chat".  You should follow that advice.
+    </para>
+   </note>
+  </sect1>
+
+  <sect1 id="routines-copy">
+   <title>
+    <function>copy_[to/from]_user()</function>
+    /
+    <function>get_user()</function>
+    /
+    <function>put_user()</function>
+    <filename class="headerfile">include/asm/uaccess.h</filename>
+   </title>  
+
+   <para>
+    <emphasis>[SLEEPS]</emphasis>
+   </para>
+
+   <para>
+    <function>put_user()</function> and <function>get_user()</function>
+    are used to get and put single values (such as an int, char, or
+    long) from and to userspace.  A pointer into userspace should
+    never be simply dereferenced: data should be copied using these
+    routines.  Both return <constant>-EFAULT</constant> or 0.
+   </para>
+   <para>
+    <function>copy_to_user()</function> and
+    <function>copy_from_user()</function> are more general: they copy
+    an arbitrary amount of data to and from userspace.
+    <caution>
+     <para>
+      Unlike <function>put_user()</function> and
+      <function>get_user()</function>, they return the amount of
+      uncopied data (ie. <returnvalue>0</returnvalue> still means
+      success).
+     </para>
+    </caution>
+    [Yes, this moronic interface makes me cringe.  The flamewar comes up every year or so. --RR.]
+   </para>
+   <para>
+    The functions may sleep implicitly. This should never be called
+    outside user context (it makes no sense), with interrupts
+    disabled, or a spinlock held.
+   </para>
+  </sect1>
+
+  <sect1 id="routines-kmalloc">
+   <title><function>kmalloc()</function>/<function>kfree()</function>
+    <filename class="headerfile">include/linux/slab.h</filename></title>
+
+   <para>
+    <emphasis>[MAY SLEEP: SEE BELOW]</emphasis>
+   </para>
+
+   <para>
+    These routines are used to dynamically request pointer-aligned
+    chunks of memory, like malloc and free do in userspace, but
+    <function>kmalloc()</function> takes an extra flag word.
+    Important values:
+   </para>
+
+   <variablelist>
+    <varlistentry>
+     <term>
+      <constant>
+       GFP_KERNEL
+      </constant>
+     </term>
+     <listitem>
+      <para>
+       May sleep and swap to free memory. Only allowed in user
+       context, but is the most reliable way to allocate memory.
+      </para>
+     </listitem>
+    </varlistentry>
+    
+    <varlistentry>
+     <term>
+      <constant>
+       GFP_ATOMIC
+      </constant>
+     </term>
+     <listitem>
+      <para>
+       Don't sleep. Less reliable than <constant>GFP_KERNEL</constant>,
+       but may be called from interrupt context. You should
+       <emphasis>really</emphasis> have a good out-of-memory
+       error-handling strategy.
+      </para>
+     </listitem>
+    </varlistentry>
+    
+    <varlistentry>
+     <term>
+      <constant>
+       GFP_DMA
+      </constant>
+     </term>
+     <listitem>
+      <para>
+       Allocate ISA DMA lower than 16MB. If you don't know what that
+       is you don't need it.  Very unreliable.
+      </para>
+     </listitem>
+    </varlistentry>
+   </variablelist>
+
+   <para>
+    If you see a <errorname>sleeping function called from invalid
+    context</errorname> warning message, then maybe you called a
+    sleeping allocation function from interrupt context without
+    <constant>GFP_ATOMIC</constant>.  You should really fix that.
+    Run, don't walk.
+   </para>
+
+   <para>
+    If you are allocating at least <constant>PAGE_SIZE</constant>
+    (<filename class="headerfile">include/asm/page.h</filename>) bytes,
+    consider using <function>__get_free_pages()</function>
+
+    (<filename class="headerfile">include/linux/mm.h</filename>).  It
+    takes an order argument (0 for page sized, 1 for double page, 2
+    for four pages etc.) and the same memory priority flag word as
+    above.
+   </para>
+
+   <para>
+    If you are allocating more than a page worth of bytes you can use
+    <function>vmalloc()</function>.  It'll allocate virtual memory in
+    the kernel map.  This block is not contiguous in physical memory,
+    but the <acronym>MMU</acronym> makes it look like it is for you
+    (so it'll only look contiguous to the CPUs, not to external device
+    drivers).  If you really need large physically contiguous memory
+    for some weird device, you have a problem: it is poorly supported
+    in Linux because after some time memory fragmentation in a running
+    kernel makes it hard.  The best way is to allocate the block early
+    in the boot process via the <function>alloc_bootmem()</function>
+    routine.
+   </para>
+
+   <para>
+    Before inventing your own cache of often-used objects consider
+    using a slab cache in
+    <filename class="headerfile">include/linux/slab.h</filename>
+   </para>
+  </sect1>
+
+  <sect1 id="routines-current">
+   <title><function>current</function>
+    <filename class="headerfile">include/asm/current.h</filename></title>
+
+   <para>
+    This global variable (really a macro) contains a pointer to
+    the current task structure, so is only valid in user context.
+    For example, when a process makes a system call, this will
+    point to the task structure of the calling process.  It is
+    <emphasis>not NULL</emphasis> in interrupt context.
+   </para>
+  </sect1>
+
+  <sect1 id="routines-udelay">
+   <title><function>mdelay()</function>/<function>udelay()</function>
+     <filename class="headerfile">include/asm/delay.h</filename>
+     <filename class="headerfile">include/linux/delay.h</filename>
+   </title>
+
+   <para>
+    The <function>udelay()</function> and <function>ndelay()</function> functions can be used for small pauses.
+    Do not use large values with them as you risk
+    overflow - the helper function <function>mdelay()</function> is useful
+    here, or consider <function>msleep()</function>.
+   </para> 
+  </sect1>
+ 
+  <sect1 id="routines-endian">
+   <title><function>cpu_to_be32()</function>/<function>be32_to_cpu()</function>/<function>cpu_to_le32()</function>/<function>le32_to_cpu()</function>
+     <filename class="headerfile">include/asm/byteorder.h</filename>
+   </title>
+
+   <para>
+    The <function>cpu_to_be32()</function> family (where the "32" can
+    be replaced by 64 or 16, and the "be" can be replaced by "le") are
+    the general way to do endian conversions in the kernel: they
+    return the converted value.  All variations supply the reverse as
+    well: <function>be32_to_cpu()</function>, etc.
+   </para>
+
+   <para>
+    There are two major variations of these functions: the pointer
+    variation, such as <function>cpu_to_be32p()</function>, which take
+    a pointer to the given type, and return the converted value.  The
+    other variation is the "in-situ" family, such as
+    <function>cpu_to_be32s()</function>, which convert value referred
+    to by the pointer, and return void.
+   </para> 
+  </sect1>
+
+  <sect1 id="routines-local-irqs">
+   <title><function>local_irq_save()</function>/<function>local_irq_restore()</function>
+    <filename class="headerfile">include/asm/system.h</filename>
+   </title>
+
+   <para>
+    These routines disable hard interrupts on the local CPU, and
+    restore them.  They are reentrant; saving the previous state in
+    their one <varname>unsigned long flags</varname> argument.  If you
+    know that interrupts are enabled, you can simply use
+    <function>local_irq_disable()</function> and
+    <function>local_irq_enable()</function>.
+   </para>
+  </sect1>
+
+  <sect1 id="routines-softirqs">
+   <title><function>local_bh_disable()</function>/<function>local_bh_enable()</function>
+    <filename class="headerfile">include/linux/interrupt.h</filename></title>
+
+   <para>
+    These routines disable soft interrupts on the local CPU, and
+    restore them.  They are reentrant; if soft interrupts were
+    disabled before, they will still be disabled after this pair
+    of functions has been called.  They prevent softirqs and tasklets
+    from running on the current CPU.
+   </para>
+  </sect1>
+
+  <sect1 id="routines-processorids">
+   <title><function>smp_processor_id</function>()
+    <filename class="headerfile">include/asm/smp.h</filename></title>
+   
+   <para>
+    <function>get_cpu()</function> disables preemption (so you won't
+    suddenly get moved to another CPU) and returns the current
+    processor number, between 0 and <symbol>NR_CPUS</symbol>.  Note
+    that the CPU numbers are not necessarily continuous.  You return
+    it again with <function>put_cpu()</function> when you are done.
+   </para>
+   <para>
+    If you know you cannot be preempted by another task (ie. you are
+    in interrupt context, or have preemption disabled) you can use
+    smp_processor_id().
+   </para>
+  </sect1>
+
+  <sect1 id="routines-init">
+   <title><type>__init</type>/<type>__exit</type>/<type>__initdata</type>
+    <filename class="headerfile">include/linux/init.h</filename></title>
+
+   <para>
+    After boot, the kernel frees up a special section; functions
+    marked with <type>__init</type> and data structures marked with
+    <type>__initdata</type> are dropped after boot is complete: similarly
+    modules discard this memory after initialization.  <type>__exit</type>
+    is used to declare a function which is only required on exit: the
+    function will be dropped if this file is not compiled as a module.
+    See the header file for use. Note that it makes no sense for a function
+    marked with <type>__init</type> to be exported to modules with 
+    <function>EXPORT_SYMBOL()</function> - this will break.
+   </para>
+
+  </sect1>
+
+  <sect1 id="routines-init-again">
+   <title><function>__initcall()</function>/<function>module_init()</function>
+    <filename class="headerfile">include/linux/init.h</filename></title>
+   <para>
+    Many parts of the kernel are well served as a module
+    (dynamically-loadable parts of the kernel).  Using the
+    <function>module_init()</function> and
+    <function>module_exit()</function> macros it is easy to write code
+    without #ifdefs which can operate both as a module or built into
+    the kernel.
+   </para>
+
+   <para>
+    The <function>module_init()</function> macro defines which
+    function is to be called at module insertion time (if the file is
+    compiled as a module), or at boot time: if the file is not
+    compiled as a module the <function>module_init()</function> macro
+    becomes equivalent to <function>__initcall()</function>, which
+    through linker magic ensures that the function is called on boot.
+   </para>
+
+   <para>
+    The function can return a negative error number to cause
+    module loading to fail (unfortunately, this has no effect if
+    the module is compiled into the kernel).  This function is
+    called in user context with interrupts enabled, so it can sleep.
+   </para>
+  </sect1>
+  
+  <sect1 id="routines-moduleexit">
+   <title> <function>module_exit()</function>
+    <filename class="headerfile">include/linux/init.h</filename> </title>
+
+   <para>
+    This macro defines the function to be called at module removal
+    time (or never, in the case of the file compiled into the
+    kernel).  It will only be called if the module usage count has
+    reached zero.  This function can also sleep, but cannot fail:
+    everything must be cleaned up by the time it returns.
+   </para>
+
+   <para>
+    Note that this macro is optional: if it is not present, your
+    module will not be removable (except for 'rmmod -f').
+   </para>
+  </sect1>
+
+  <sect1 id="routines-module-use-counters">
+   <title> <function>try_module_get()</function>/<function>module_put()</function>
+    <filename class="headerfile">include/linux/module.h</filename></title>
+
+   <para>
+    These manipulate the module usage count, to protect against
+    removal (a module also can't be removed if another module uses one
+    of its exported symbols: see below).  Before calling into module
+    code, you should call <function>try_module_get()</function> on
+    that module: if it fails, then the module is being removed and you
+    should act as if it wasn't there.  Otherwise, you can safely enter
+    the module, and call <function>module_put()</function> when you're
+    finished.
+   </para>
+
+   <para>
+   Most registerable structures have an
+   <structfield>owner</structfield> field, such as in the
+   <structname>file_operations</structname> structure. Set this field
+   to the macro <symbol>THIS_MODULE</symbol>.
+   </para>
+  </sect1>
+
+ <!-- add info on new-style module refcounting here -->
+ </chapter>
+
+ <chapter id="queues">
+  <title>Wait Queues
+   <filename class="headerfile">include/linux/wait.h</filename>
+  </title>
+  <para>
+   <emphasis>[SLEEPS]</emphasis>
+  </para>
+
+  <para>
+   A wait queue is used to wait for someone to wake you up when a
+   certain condition is true.  They must be used carefully to ensure
+   there is no race condition.  You declare a
+   <type>wait_queue_head_t</type>, and then processes which want to
+   wait for that condition declare a <type>wait_queue_t</type>
+   referring to themselves, and place that in the queue.
+  </para>
+
+  <sect1 id="queue-declaring">
+   <title>Declaring</title>
+   
+   <para>
+    You declare a <type>wait_queue_head_t</type> using the
+    <function>DECLARE_WAIT_QUEUE_HEAD()</function> macro, or using the
+    <function>init_waitqueue_head()</function> routine in your
+    initialization code.
+   </para>
+  </sect1>
+  
+  <sect1 id="queue-waitqueue">
+   <title>Queuing</title>
+   
+   <para>
+    Placing yourself in the waitqueue is fairly complex, because you
+    must put yourself in the queue before checking the condition.
+    There is a macro to do this:
+    <function>wait_event_interruptible()</function>
+
+    <filename class="headerfile">include/linux/wait.h</filename> The
+    first argument is the wait queue head, and the second is an
+    expression which is evaluated; the macro returns
+    <returnvalue>0</returnvalue> when this expression is true, or
+    <returnvalue>-ERESTARTSYS</returnvalue> if a signal is received.
+    The <function>wait_event()</function> version ignores signals.
+   </para>
+   <para>
+   Do not use the <function>sleep_on()</function> function family -
+   it is very easy to accidentally introduce races; almost certainly
+   one of the <function>wait_event()</function> family will do, or a
+   loop around <function>schedule_timeout()</function>. If you choose
+   to loop around <function>schedule_timeout()</function> remember
+   you must set the task state (with 
+   <function>set_current_state()</function>) on each iteration to avoid
+   busy-looping.
+   </para>
+ 
+  </sect1>
+
+  <sect1 id="queue-waking">
+   <title>Waking Up Queued Tasks</title>
+   
+   <para>
+    Call <function>wake_up()</function>
+
+    <filename class="headerfile">include/linux/wait.h</filename>;,
+    which will wake up every process in the queue.  The exception is
+    if one has <constant>TASK_EXCLUSIVE</constant> set, in which case
+    the remainder of the queue will not be woken.  There are other variants
+    of this basic function available in the same header.
+   </para>
+  </sect1>
+ </chapter>
+
+ <chapter id="atomic-ops">
+  <title>Atomic Operations</title>
+
+  <para>
+   Certain operations are guaranteed atomic on all platforms.  The
+   first class of operations work on <type>atomic_t</type>
+
+   <filename class="headerfile">include/asm/atomic.h</filename>; this
+   contains a signed integer (at least 32 bits long), and you must use
+   these functions to manipulate or read atomic_t variables.
+   <function>atomic_read()</function> and
+   <function>atomic_set()</function> get and set the counter,
+   <function>atomic_add()</function>,
+   <function>atomic_sub()</function>,
+   <function>atomic_inc()</function>,
+   <function>atomic_dec()</function>, and
+   <function>atomic_dec_and_test()</function> (returns
+   <returnvalue>true</returnvalue> if it was decremented to zero).
+  </para>
+
+  <para>
+   Yes.  It returns <returnvalue>true</returnvalue> (i.e. != 0) if the
+   atomic variable is zero.
+  </para>
+
+  <para>
+   Note that these functions are slower than normal arithmetic, and
+   so should not be used unnecessarily.
+  </para>
+
+  <para>
+   The second class of atomic operations is atomic bit operations on an
+   <type>unsigned long</type>, defined in
+
+   <filename class="headerfile">include/linux/bitops.h</filename>.  These
+   operations generally take a pointer to the bit pattern, and a bit
+   number: 0 is the least significant bit.
+   <function>set_bit()</function>, <function>clear_bit()</function>
+   and <function>change_bit()</function> set, clear, and flip the
+   given bit.  <function>test_and_set_bit()</function>,
+   <function>test_and_clear_bit()</function> and
+   <function>test_and_change_bit()</function> do the same thing,
+   except return true if the bit was previously set; these are
+   particularly useful for atomically setting flags.
+  </para>
+  
+  <para>
+   It is possible to call these operations with bit indices greater
+   than BITS_PER_LONG.  The resulting behavior is strange on big-endian
+   platforms though so it is a good idea not to do this.
+  </para>
+ </chapter>
+
+ <chapter id="symbols">
+  <title>Symbols</title>
+
+  <para>
+   Within the kernel proper, the normal linking rules apply
+   (ie. unless a symbol is declared to be file scope with the
+   <type>static</type> keyword, it can be used anywhere in the
+   kernel).  However, for modules, a special exported symbol table is
+   kept which limits the entry points to the kernel proper.  Modules
+   can also export symbols.
+  </para>
+
+  <sect1 id="sym-exportsymbols">
+   <title><function>EXPORT_SYMBOL()</function>
+    <filename class="headerfile">include/linux/module.h</filename></title>
+
+   <para>
+    This is the classic method of exporting a symbol: dynamically
+    loaded modules will be able to use the symbol as normal.
+   </para>
+  </sect1>
+
+  <sect1 id="sym-exportsymbols-gpl">
+   <title><function>EXPORT_SYMBOL_GPL()</function>
+    <filename class="headerfile">include/linux/module.h</filename></title>
+
+   <para>
+    Similar to <function>EXPORT_SYMBOL()</function> except that the
+    symbols exported by <function>EXPORT_SYMBOL_GPL()</function> can
+    only be seen by modules with a
+    <function>MODULE_LICENSE()</function> that specifies a GPL
+    compatible license.  It implies that the function is considered
+    an internal implementation issue, and not really an interface.
+   </para>
+  </sect1>
+ </chapter>
+
+ <chapter id="conventions">
+  <title>Routines and Conventions</title>
+
+  <sect1 id="conventions-doublelinkedlist">
+   <title>Double-linked lists
+    <filename class="headerfile">include/linux/list.h</filename></title>
+
+   <para>
+    There used to be three sets of linked-list routines in the kernel
+    headers, but this one is the winner.  If you don't have some
+    particular pressing need for a single list, it's a good choice.
+   </para>
+
+   <para>
+    In particular, <function>list_for_each_entry</function> is useful.
+   </para>
+  </sect1>
+
+  <sect1 id="convention-returns">
+   <title>Return Conventions</title>
+
+   <para>
+    For code called in user context, it's very common to defy C
+    convention, and return <returnvalue>0</returnvalue> for success,
+    and a negative error number
+    (eg. <returnvalue>-EFAULT</returnvalue>) for failure.  This can be
+    unintuitive at first, but it's fairly widespread in the kernel.
+   </para>
+
+   <para>
+    Using <function>ERR_PTR()</function>
+
+    <filename class="headerfile">include/linux/err.h</filename>; to
+    encode a negative error number into a pointer, and
+    <function>IS_ERR()</function> and <function>PTR_ERR()</function>
+    to get it back out again: avoids a separate pointer parameter for
+    the error number.  Icky, but in a good way.
+   </para>
+  </sect1>
+
+  <sect1 id="conventions-borkedcompile">
+   <title>Breaking Compilation</title>
+
+   <para>
+    Linus and the other developers sometimes change function or
+    structure names in development kernels; this is not done just to
+    keep everyone on their toes: it reflects a fundamental change
+    (eg. can no longer be called with interrupts on, or does extra
+    checks, or doesn't do checks which were caught before).  Usually
+    this is accompanied by a fairly complete note to the linux-kernel
+    mailing list; search the archive.  Simply doing a global replace
+    on the file usually makes things <emphasis>worse</emphasis>.
+   </para>
+  </sect1>
+
+  <sect1 id="conventions-initialising">
+   <title>Initializing structure members</title>
+
+   <para>
+    The preferred method of initializing structures is to use
+    designated initialisers, as defined by ISO C99, eg:
+   </para>
+   <programlisting>
+static struct block_device_operations opt_fops = {
+        .open               = opt_open,
+        .release            = opt_release,
+        .ioctl              = opt_ioctl,
+        .check_media_change = opt_media_change,
+};
+   </programlisting>
+   <para>
+    This makes it easy to grep for, and makes it clear which
+    structure fields are set.  You should do this because it looks
+    cool.
+   </para>
+  </sect1>
+
+  <sect1 id="conventions-gnu-extns">
+   <title>GNU Extensions</title>
+
+   <para>
+    GNU Extensions are explicitly allowed in the Linux kernel.
+    Note that some of the more complex ones are not very well
+    supported, due to lack of general use, but the following are
+    considered standard (see the GCC info page section "C
+    Extensions" for more details - Yes, really the info page, the
+    man page is only a short summary of the stuff in info).
+   </para>
+   <itemizedlist>
+    <listitem>
+     <para>
+      Inline functions
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      Statement expressions (ie. the ({ and }) constructs).
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      Declaring attributes of a function / variable / type
+      (__attribute__)
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      typeof
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      Zero length arrays
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      Macro varargs
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      Arithmetic on void pointers
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      Non-Constant initializers
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      Assembler Instructions (not outside arch/ and include/asm/)
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      Function names as strings (__func__).
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      __builtin_constant_p()
+     </para>
+    </listitem>
+   </itemizedlist>
+
+   <para>
+    Be wary when using long long in the kernel, the code gcc generates for
+    it is horrible and worse: division and multiplication does not work
+    on i386 because the GCC runtime functions for it are missing from
+    the kernel environment.
+   </para>
+
+    <!-- FIXME: add a note about ANSI aliasing cleanness -->
+  </sect1>
+
+  <sect1 id="conventions-cplusplus">
+   <title>C++</title>
+   
+   <para>
+    Using C++ in the kernel is usually a bad idea, because the
+    kernel does not provide the necessary runtime environment
+    and the include files are not tested for it.  It is still
+    possible, but not recommended.  If you really want to do
+    this, forget about exceptions at least.
+   </para>
+  </sect1>
+
+  <sect1 id="conventions-ifdef">
+   <title>&num;if</title>
+   
+   <para>
+    It is generally considered cleaner to use macros in header files
+    (or at the top of .c files) to abstract away functions rather than
+    using `#if' pre-processor statements throughout the source code.
+   </para>
+  </sect1>
+ </chapter>
+
+ <chapter id="submitting">
+  <title>Putting Your Stuff in the Kernel</title>
+
+  <para>
+   In order to get your stuff into shape for official inclusion, or
+   even to make a neat patch, there's administrative work to be
+   done:
+  </para>
+  <itemizedlist>
+   <listitem>
+    <para>
+     Figure out whose pond you've been pissing in.  Look at the top of
+     the source files, inside the <filename>MAINTAINERS</filename>
+     file, and last of all in the <filename>CREDITS</filename> file.
+     You should coordinate with this person to make sure you're not
+     duplicating effort, or trying something that's already been
+     rejected.
+    </para>
+
+    <para>
+     Make sure you put your name and EMail address at the top of
+     any files you create or mangle significantly.  This is the
+     first place people will look when they find a bug, or when
+     <emphasis>they</emphasis> want to make a change.
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     Usually you want a configuration option for your kernel hack.
+     Edit <filename>Kconfig</filename> in the appropriate directory.
+     The Config language is simple to use by cut and paste, and there's
+     complete documentation in
+     <filename>Documentation/kbuild/kconfig-language.txt</filename>.
+    </para>
+
+    <para>
+     You may well want to make your CONFIG option only visible if
+     <symbol>CONFIG_EXPERIMENTAL</symbol> is enabled: this serves as a
+     warning to users.  There many other fancy things you can do: see
+     the various <filename>Kconfig</filename> files for ideas.
+    </para>
+
+    <para>
+     In your description of the option, make sure you address both the
+     expert user and the user who knows nothing about your feature.  Mention
+     incompatibilities and issues here.  <emphasis> Definitely
+     </emphasis> end your description with <quote> if in doubt, say N
+     </quote> (or, occasionally, `Y'); this is for people who have no
+     idea what you are talking about.
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     Edit the <filename>Makefile</filename>: the CONFIG variables are
+     exported here so you can usually just add a "obj-$(CONFIG_xxx) +=
+     xxx.o" line.  The syntax is documented in
+     <filename>Documentation/kbuild/makefiles.txt</filename>.
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     Put yourself in <filename>CREDITS</filename> if you've done
+     something noteworthy, usually beyond a single file (your name
+     should be at the top of the source files anyway).
+     <filename>MAINTAINERS</filename> means you want to be consulted
+     when changes are made to a subsystem, and hear about bugs; it
+     implies a more-than-passing commitment to some part of the code.
+    </para>
+   </listitem>
+   
+   <listitem>
+    <para>
+     Finally, don't forget to read <filename>Documentation/SubmittingPatches</filename>
+     and possibly <filename>Documentation/SubmittingDrivers</filename>.
+    </para>
+   </listitem>
+  </itemizedlist>
+ </chapter>
+
+ <chapter id="cantrips">
+  <title>Kernel Cantrips</title>
+
+  <para>
+   Some favorites from browsing the source.  Feel free to add to this
+   list.
+  </para>
+
+  <para>
+   <filename>arch/x86/include/asm/delay.h:</filename>
+  </para>
+  <programlisting>
+#define ndelay(n) (__builtin_constant_p(n) ? \
+        ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \
+        __ndelay(n))
+  </programlisting>
+
+  <para>
+   <filename>include/linux/fs.h</filename>:
+  </para>
+  <programlisting>
+/*
+ * Kernel pointers have redundant information, so we can use a
+ * scheme where we can return either an error code or a dentry
+ * pointer with the same return value.
+ *
+ * This should be a per-architecture thing, to allow different
+ * error and pointer decisions.
+ */
+ #define ERR_PTR(err)    ((void *)((long)(err)))
+ #define PTR_ERR(ptr)    ((long)(ptr))
+ #define IS_ERR(ptr)     ((unsigned long)(ptr) > (unsigned long)(-1000))
+</programlisting>
+
+  <para>
+   <filename>arch/x86/include/asm/uaccess_32.h:</filename>
+  </para>
+
+  <programlisting>
+#define copy_to_user(to,from,n)                         \
+        (__builtin_constant_p(n) ?                      \
+         __constant_copy_to_user((to),(from),(n)) :     \
+         __generic_copy_to_user((to),(from),(n)))
+  </programlisting>
+
+  <para>
+   <filename>arch/sparc/kernel/head.S:</filename>
+  </para>
+
+  <programlisting>
+/*
+ * Sun people can't spell worth damn. "compatability" indeed.
+ * At least we *know* we can't spell, and use a spell-checker.
+ */
+
+/* Uh, actually Linus it is I who cannot spell. Too much murky
+ * Sparc assembly will do this to ya.
+ */
+C_LABEL(cputypvar):
+        .asciz "compatability"
+
+/* Tested on SS-5, SS-10. Probably someone at Sun applied a spell-checker. */
+        .align 4
+C_LABEL(cputypvar_sun4m):
+        .asciz "compatible"
+  </programlisting>
+
+  <para>
+   <filename>arch/sparc/lib/checksum.S:</filename>
+  </para>
+
+  <programlisting>
+        /* Sun, you just can't beat me, you just can't.  Stop trying,
+         * give up.  I'm serious, I am going to kick the living shit
+         * out of you, game over, lights out.
+         */
+  </programlisting>
+ </chapter>
+
+ <chapter id="credits">
+  <title>Thanks</title>
+
+  <para>
+   Thanks to Andi Kleen for the idea, answering my questions, fixing
+   my mistakes, filling content, etc.  Philipp Rumpf for more spelling
+   and clarity fixes, and some excellent non-obvious points.  Werner
+   Almesberger for giving me a great summary of
+   <function>disable_irq()</function>, and Jes Sorensen and Andrea
+   Arcangeli added caveats. Michael Elizabeth Chastain for checking
+   and adding to the Configure section. <!-- Rusty insisted on this
+   bit; I didn't do it! --> Telsa Gwynne for teaching me DocBook. 
+  </para>
+ </chapter>
+</book>
+
diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl
new file mode 100644
index 0000000..084f6ad
--- /dev/null
+++ b/Documentation/DocBook/kernel-locking.tmpl
@@ -0,0 +1,2143 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="LKLockingGuide">
+ <bookinfo>
+  <title>Unreliable Guide To Locking</title>
+  
+  <authorgroup>
+   <author>
+    <firstname>Rusty</firstname>
+    <surname>Russell</surname>
+    <affiliation>
+     <address>
+      <email>rusty@rustcorp.com.au</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2003</year>
+   <holder>Rusty Russell</holder>
+  </copyright>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+      
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+      
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+      
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+ <toc></toc>
+  <chapter id="intro">
+   <title>Introduction</title>
+   <para>
+     Welcome, to Rusty's Remarkably Unreliable Guide to Kernel
+     Locking issues.  This document describes the locking systems in
+     the Linux Kernel in 2.6.
+   </para>
+   <para>
+     With the wide availability of HyperThreading, and <firstterm
+     linkend="gloss-preemption">preemption </firstterm> in the Linux
+     Kernel, everyone hacking on the kernel needs to know the
+     fundamentals of concurrency and locking for
+     <firstterm linkend="gloss-smp"><acronym>SMP</acronym></firstterm>.
+   </para>
+  </chapter>
+
+   <chapter id="races">
+    <title>The Problem With Concurrency</title>
+    <para>
+      (Skip this if you know what a Race Condition is).
+    </para>
+    <para>
+      In a normal program, you can increment a counter like so:
+    </para>
+    <programlisting>
+      very_important_count++;
+    </programlisting>
+
+    <para>
+      This is what they would expect to happen:
+    </para>
+
+    <table>
+     <title>Expected Results</title>
+
+     <tgroup cols="2" align="left">
+
+      <thead>
+       <row>
+        <entry>Instance 1</entry>
+        <entry>Instance 2</entry>
+       </row>
+      </thead>
+
+      <tbody>
+       <row>
+        <entry>read very_important_count (5)</entry>
+        <entry></entry>
+       </row>
+       <row>
+        <entry>add 1 (6)</entry>
+        <entry></entry>
+       </row>
+       <row>
+        <entry>write very_important_count (6)</entry>
+        <entry></entry>
+       </row>
+       <row>
+        <entry></entry>
+        <entry>read very_important_count (6)</entry>
+       </row>
+       <row>
+        <entry></entry>
+        <entry>add 1 (7)</entry>
+       </row>
+       <row>
+        <entry></entry>
+        <entry>write very_important_count (7)</entry>
+       </row>
+      </tbody>
+
+     </tgroup>
+    </table>
+
+    <para>
+     This is what might happen:
+    </para>
+
+    <table>
+     <title>Possible Results</title>
+
+     <tgroup cols="2" align="left">
+      <thead>
+       <row>
+        <entry>Instance 1</entry>
+        <entry>Instance 2</entry>
+       </row>
+      </thead>
+
+      <tbody>
+       <row>
+        <entry>read very_important_count (5)</entry>
+        <entry></entry>
+       </row>
+       <row>
+        <entry></entry>
+        <entry>read very_important_count (5)</entry>
+       </row>
+       <row>
+        <entry>add 1 (6)</entry>
+        <entry></entry>
+       </row>
+       <row>
+        <entry></entry>
+        <entry>add 1 (6)</entry>
+       </row>
+       <row>
+        <entry>write very_important_count (6)</entry>
+        <entry></entry>
+       </row>
+       <row>
+        <entry></entry>
+        <entry>write very_important_count (6)</entry>
+       </row>
+      </tbody>
+     </tgroup>
+    </table>
+
+    <sect1 id="race-condition">
+    <title>Race Conditions and Critical Regions</title>
+    <para>
+      This overlap, where the result depends on the
+      relative timing of multiple tasks, is called a <firstterm>race condition</firstterm>.
+      The piece of code containing the concurrency issue is called a
+      <firstterm>critical region</firstterm>.  And especially since Linux starting running
+      on SMP machines, they became one of the major issues in kernel
+      design and implementation.
+    </para>
+    <para>
+      Preemption can have the same effect, even if there is only one
+      CPU: by preempting one task during the critical region, we have
+      exactly the same race condition.  In this case the thread which
+      preempts might run the critical region itself.
+    </para>
+    <para>
+      The solution is to recognize when these simultaneous accesses
+      occur, and use locks to make sure that only one instance can
+      enter the critical region at any time.  There are many
+      friendly primitives in the Linux kernel to help you do this.
+      And then there are the unfriendly primitives, but I'll pretend
+      they don't exist.
+    </para>
+    </sect1>
+  </chapter>
+
+  <chapter id="locks">
+   <title>Locking in the Linux Kernel</title>
+
+   <para>
+     If I could give you one piece of advice: never sleep with anyone
+     crazier than yourself.  But if I had to give you advice on
+     locking: <emphasis>keep it simple</emphasis>.
+   </para>
+
+   <para>
+     Be reluctant to introduce new locks.
+   </para>
+
+   <para>
+     Strangely enough, this last one is the exact reverse of my advice when
+     you <emphasis>have</emphasis> slept with someone crazier than yourself.
+     And you should think about getting a big dog.
+   </para>
+
+   <sect1 id="lock-intro">
+   <title>Two Main Types of Kernel Locks: Spinlocks and Mutexes</title>
+
+   <para>
+     There are two main types of kernel locks.  The fundamental type
+     is the spinlock 
+     (<filename class="headerfile">include/asm/spinlock.h</filename>),
+     which is a very simple single-holder lock: if you can't get the 
+     spinlock, you keep trying (spinning) until you can.  Spinlocks are 
+     very small and fast, and can be used anywhere.
+   </para>
+   <para>
+     The second type is a mutex
+     (<filename class="headerfile">include/linux/mutex.h</filename>): it
+     is like a spinlock, but you may block holding a mutex.
+     If you can't lock a mutex, your task will suspend itself, and be woken
+     up when the mutex is released.  This means the CPU can do something
+     else while you are waiting.  There are many cases when you simply
+     can't sleep (see <xref linkend="sleeping-things"/>), and so have to
+     use a spinlock instead.
+   </para>
+   <para>
+     Neither type of lock is recursive: see
+     <xref linkend="deadlock"/>.
+   </para>
+   </sect1>
+ 
+   <sect1 id="uniprocessor">
+    <title>Locks and Uniprocessor Kernels</title>
+
+    <para>
+      For kernels compiled without <symbol>CONFIG_SMP</symbol>, and
+      without <symbol>CONFIG_PREEMPT</symbol> spinlocks do not exist at
+      all.  This is an excellent design decision: when no-one else can
+      run at the same time, there is no reason to have a lock.
+    </para>
+
+    <para>
+      If the kernel is compiled without <symbol>CONFIG_SMP</symbol>,
+      but <symbol>CONFIG_PREEMPT</symbol> is set, then spinlocks
+      simply disable preemption, which is sufficient to prevent any
+      races.  For most purposes, we can think of preemption as
+      equivalent to SMP, and not worry about it separately.
+    </para>
+
+    <para>
+      You should always test your locking code with <symbol>CONFIG_SMP</symbol>
+      and <symbol>CONFIG_PREEMPT</symbol> enabled, even if you don't have an SMP test box, because it
+      will still catch some kinds of locking bugs.
+    </para>
+
+    <para>
+      Mutexes still exist, because they are required for
+      synchronization between <firstterm linkend="gloss-usercontext">user 
+      contexts</firstterm>, as we will see below.
+    </para>
+   </sect1>
+
+    <sect1 id="usercontextlocking">
+     <title>Locking Only In User Context</title>
+
+     <para>
+       If you have a data structure which is only ever accessed from
+       user context, then you can use a simple mutex
+       (<filename>include/linux/mutex.h</filename>) to protect it.  This
+       is the most trivial case: you initialize the mutex.  Then you can
+       call <function>mutex_lock_interruptible()</function> to grab the mutex,
+       and <function>mutex_unlock()</function> to release it.  There is also a 
+       <function>mutex_lock()</function>, which should be avoided, because it 
+       will not return if a signal is received.
+     </para>
+
+     <para>
+       Example: <filename>net/netfilter/nf_sockopt.c</filename> allows 
+       registration of new <function>setsockopt()</function> and 
+       <function>getsockopt()</function> calls, with
+       <function>nf_register_sockopt()</function>.  Registration and 
+       de-registration are only done on module load and unload (and boot 
+       time, where there is no concurrency), and the list of registrations 
+       is only consulted for an unknown <function>setsockopt()</function>
+       or <function>getsockopt()</function> system call.  The 
+       <varname>nf_sockopt_mutex</varname> is perfect to protect this,
+       especially since the setsockopt and getsockopt calls may well
+       sleep.
+     </para>
+   </sect1>
+
+   <sect1 id="lock-user-bh">
+    <title>Locking Between User Context and Softirqs</title>
+
+    <para>
+      If a <firstterm linkend="gloss-softirq">softirq</firstterm> shares
+      data with user context, you have two problems.  Firstly, the current 
+      user context can be interrupted by a softirq, and secondly, the
+      critical region could be entered from another CPU.  This is where
+      <function>spin_lock_bh()</function> 
+      (<filename class="headerfile">include/linux/spinlock.h</filename>) is
+      used.  It disables softirqs on that CPU, then grabs the lock.
+      <function>spin_unlock_bh()</function> does the reverse.  (The
+      '_bh' suffix is a historical reference to "Bottom Halves", the
+      old name for software interrupts.  It should really be
+      called spin_lock_softirq()' in a perfect world).
+    </para>
+
+    <para>
+      Note that you can also use <function>spin_lock_irq()</function>
+      or <function>spin_lock_irqsave()</function> here, which stop
+      hardware interrupts as well: see <xref linkend="hardirq-context"/>.
+    </para>
+
+    <para>
+      This works perfectly for <firstterm linkend="gloss-up"><acronym>UP
+      </acronym></firstterm> as well: the spin lock vanishes, and this macro 
+      simply becomes <function>local_bh_disable()</function>
+      (<filename class="headerfile">include/linux/interrupt.h</filename>), which
+      protects you from the softirq being run.
+    </para>
+   </sect1>
+
+   <sect1 id="lock-user-tasklet">
+    <title>Locking Between User Context and Tasklets</title>
+
+    <para>
+      This is exactly the same as above, because <firstterm
+      linkend="gloss-tasklet">tasklets</firstterm> are actually run
+      from a softirq.
+    </para>
+   </sect1>
+
+   <sect1 id="lock-user-timers">
+    <title>Locking Between User Context and Timers</title>
+
+    <para>
+      This, too, is exactly the same as above, because <firstterm
+      linkend="gloss-timers">timers</firstterm> are actually run from
+      a softirq.  From a locking point of view, tasklets and timers
+      are identical.
+    </para>
+   </sect1>
+
+   <sect1 id="lock-tasklets">
+    <title>Locking Between Tasklets/Timers</title>
+
+    <para>
+      Sometimes a tasklet or timer might want to share data with
+      another tasklet or timer.
+    </para>
+
+    <sect2 id="lock-tasklets-same">
+     <title>The Same Tasklet/Timer</title>
+     <para>
+       Since a tasklet is never run on two CPUs at once, you don't
+       need to worry about your tasklet being reentrant (running
+       twice at once), even on SMP.
+     </para>
+    </sect2>
+
+    <sect2 id="lock-tasklets-different">
+     <title>Different Tasklets/Timers</title>
+     <para>
+       If another tasklet/timer wants
+       to share data with your tasklet or timer , you will both need to use
+       <function>spin_lock()</function> and
+       <function>spin_unlock()</function> calls.  
+       <function>spin_lock_bh()</function> is
+       unnecessary here, as you are already in a tasklet, and
+       none will be run on the same CPU.
+     </para>
+    </sect2>
+   </sect1>
+
+   <sect1 id="lock-softirqs">
+    <title>Locking Between Softirqs</title>
+
+    <para>
+      Often a softirq might
+      want to share data with itself or a tasklet/timer.
+    </para>
+
+    <sect2 id="lock-softirqs-same">
+     <title>The Same Softirq</title>
+
+     <para>
+       The same softirq can run on the other CPUs: you can use a
+       per-CPU array (see <xref linkend="per-cpu"/>) for better
+       performance.  If you're going so far as to use a softirq,
+       you probably care about scalable performance enough
+       to justify the extra complexity.
+     </para>
+
+     <para>
+       You'll need to use <function>spin_lock()</function> and 
+       <function>spin_unlock()</function> for shared data.
+     </para>
+    </sect2>
+
+    <sect2 id="lock-softirqs-different">
+     <title>Different Softirqs</title>
+
+     <para>
+       You'll need to use <function>spin_lock()</function> and
+       <function>spin_unlock()</function> for shared data, whether it
+       be a timer, tasklet, different softirq or the same or another
+       softirq: any of them could be running on a different CPU.
+     </para>
+    </sect2>
+   </sect1>
+  </chapter>
+
+  <chapter id="hardirq-context">
+   <title>Hard IRQ Context</title>
+
+   <para>
+     Hardware interrupts usually communicate with a
+     tasklet or softirq.  Frequently this involves putting work in a
+     queue, which the softirq will take out.
+   </para>
+
+   <sect1 id="hardirq-softirq">
+    <title>Locking Between Hard IRQ and Softirqs/Tasklets</title>
+
+    <para>
+      If a hardware irq handler shares data with a softirq, you have
+      two concerns.  Firstly, the softirq processing can be
+      interrupted by a hardware interrupt, and secondly, the
+      critical region could be entered by a hardware interrupt on
+      another CPU.  This is where <function>spin_lock_irq()</function> is 
+      used.  It is defined to disable interrupts on that cpu, then grab 
+      the lock. <function>spin_unlock_irq()</function> does the reverse.
+    </para>
+
+    <para>
+      The irq handler does not to use
+      <function>spin_lock_irq()</function>, because the softirq cannot
+      run while the irq handler is running: it can use
+      <function>spin_lock()</function>, which is slightly faster.  The
+      only exception would be if a different hardware irq handler uses
+      the same lock: <function>spin_lock_irq()</function> will stop
+      that from interrupting us.
+    </para>
+
+    <para>
+      This works perfectly for UP as well: the spin lock vanishes,
+      and this macro simply becomes <function>local_irq_disable()</function>
+      (<filename class="headerfile">include/asm/smp.h</filename>), which
+      protects you from the softirq/tasklet/BH being run.
+    </para>
+
+    <para>
+      <function>spin_lock_irqsave()</function> 
+      (<filename>include/linux/spinlock.h</filename>) is a variant
+      which saves whether interrupts were on or off in a flags word,
+      which is passed to <function>spin_unlock_irqrestore()</function>.  This
+      means that the same code can be used inside an hard irq handler (where
+      interrupts are already off) and in softirqs (where the irq
+      disabling is required).
+    </para>
+
+    <para>
+      Note that softirqs (and hence tasklets and timers) are run on
+      return from hardware interrupts, so
+      <function>spin_lock_irq()</function> also stops these.  In that
+      sense, <function>spin_lock_irqsave()</function> is the most
+      general and powerful locking function.
+    </para>
+
+   </sect1>
+   <sect1 id="hardirq-hardirq">
+    <title>Locking Between Two Hard IRQ Handlers</title>
+    <para>
+      It is rare to have to share data between two IRQ handlers, but
+      if you do, <function>spin_lock_irqsave()</function> should be
+      used: it is architecture-specific whether all interrupts are
+      disabled inside irq handlers themselves.
+    </para>
+   </sect1>
+
+  </chapter>
+
+  <chapter id="cheatsheet">
+   <title>Cheat Sheet For Locking</title>
+   <para>
+     Pete Zaitcev gives the following summary:
+   </para>
+   <itemizedlist>
+      <listitem>
+	<para>
+          If you are in a process context (any syscall) and want to
+	lock other process out, use a mutex.  You can take a mutex
+	and sleep (<function>copy_from_user*(</function> or
+	<function>kmalloc(x,GFP_KERNEL)</function>).
+      </para>
+      </listitem>
+      <listitem>
+	<para>
+	Otherwise (== data can be touched in an interrupt), use
+	<function>spin_lock_irqsave()</function> and
+	<function>spin_unlock_irqrestore()</function>.
+	</para>
+      </listitem>
+      <listitem>
+	<para>
+	Avoid holding spinlock for more than 5 lines of code and
+	across any function call (except accessors like
+	<function>readb</function>).
+	</para>
+      </listitem>
+    </itemizedlist>
+
+   <sect1 id="minimum-lock-reqirements">
+   <title>Table of Minimum Requirements</title>
+
+   <para> The following table lists the <emphasis>minimum</emphasis>
+	locking requirements between various contexts.  In some cases,
+	the same context can only be running on one CPU at a time, so
+	no locking is required for that context (eg. a particular
+	thread can only run on one CPU at a time, but if it needs
+	shares data with another thread, locking is required).
+   </para>
+   <para>
+	Remember the advice above: you can always use
+	<function>spin_lock_irqsave()</function>, which is a superset
+	of all other spinlock primitives.
+   </para>
+
+   <table>
+<title>Table of Locking Requirements</title>
+<tgroup cols="11">
+<tbody>
+
+<row>
+<entry></entry>
+<entry>IRQ Handler A</entry>
+<entry>IRQ Handler B</entry>
+<entry>Softirq A</entry>
+<entry>Softirq B</entry>
+<entry>Tasklet A</entry>
+<entry>Tasklet B</entry>
+<entry>Timer A</entry>
+<entry>Timer B</entry>
+<entry>User Context A</entry>
+<entry>User Context B</entry>
+</row>
+
+<row>
+<entry>IRQ Handler A</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>IRQ Handler B</entry>
+<entry>SLIS</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>Softirq A</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SL</entry>
+</row>
+
+<row>
+<entry>Softirq B</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+</row>
+
+<row>
+<entry>Tasklet A</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>Tasklet B</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>Timer A</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>Timer B</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>User Context A</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>User Context B</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>MLI</entry>
+<entry>None</entry>
+</row>
+
+</tbody>
+</tgroup>
+</table>
+
+   <table>
+<title>Legend for Locking Requirements Table</title>
+<tgroup cols="2">
+<tbody>
+
+<row>
+<entry>SLIS</entry>
+<entry>spin_lock_irqsave</entry>
+</row>
+<row>
+<entry>SLI</entry>
+<entry>spin_lock_irq</entry>
+</row>
+<row>
+<entry>SL</entry>
+<entry>spin_lock</entry>
+</row>
+<row>
+<entry>SLBH</entry>
+<entry>spin_lock_bh</entry>
+</row>
+<row>
+<entry>MLI</entry>
+<entry>mutex_lock_interruptible</entry>
+</row>
+
+</tbody>
+</tgroup>
+</table>
+
+</sect1>
+</chapter>
+
+<chapter id="trylock-functions">
+ <title>The trylock Functions</title>
+  <para>
+   There are functions that try to acquire a lock only once and immediately
+   return a value telling about success or failure to acquire the lock.
+   They can be used if you need no access to the data protected with the lock
+   when some other thread is holding the lock. You should acquire the lock
+   later if you then need access to the data protected with the lock.
+  </para>
+
+  <para>
+    <function>spin_trylock()</function> does not spin but returns non-zero if
+    it acquires the spinlock on the first try or 0 if not. This function can
+    be used in all contexts like <function>spin_lock</function>: you must have
+    disabled the contexts that might interrupt you and acquire the spin lock.
+  </para>
+
+  <para>
+    <function>mutex_trylock()</function> does not suspend your task
+    but returns non-zero if it could lock the mutex on the first try
+    or 0 if not. This function cannot be safely used in hardware or software
+    interrupt contexts despite not sleeping.
+  </para>
+</chapter>
+
+  <chapter id="Examples">
+   <title>Common Examples</title>
+    <para>
+Let's step through a simple example: a cache of number to name
+mappings.  The cache keeps a count of how often each of the objects is
+used, and when it gets full, throws out the least used one.
+
+    </para>
+
+   <sect1 id="examples-usercontext">
+    <title>All In User Context</title>
+    <para>
+For our first example, we assume that all operations are in user
+context (ie. from system calls), so we can sleep.  This means we can
+use a mutex to protect the cache and all the objects within
+it.  Here's the code:
+    </para>
+
+    <programlisting>
+#include &lt;linux/list.h&gt;
+#include &lt;linux/slab.h&gt;
+#include &lt;linux/string.h&gt;
+#include &lt;linux/mutex.h&gt;
+#include &lt;asm/errno.h&gt;
+
+struct object
+{
+        struct list_head list;
+        int id;
+        char name[32];
+        int popularity;
+};
+
+/* Protects the cache, cache_num, and the objects within it */
+static DEFINE_MUTEX(cache_lock);
+static LIST_HEAD(cache);
+static unsigned int cache_num = 0;
+#define MAX_CACHE_SIZE 10
+
+/* Must be holding cache_lock */
+static struct object *__cache_find(int id)
+{
+        struct object *i;
+
+        list_for_each_entry(i, &amp;cache, list)
+                if (i-&gt;id == id) {
+                        i-&gt;popularity++;
+                        return i;
+                }
+        return NULL;
+}
+
+/* Must be holding cache_lock */
+static void __cache_delete(struct object *obj)
+{
+        BUG_ON(!obj);
+        list_del(&amp;obj-&gt;list);
+        kfree(obj);
+        cache_num--;
+}
+
+/* Must be holding cache_lock */
+static void __cache_add(struct object *obj)
+{
+        list_add(&amp;obj-&gt;list, &amp;cache);
+        if (++cache_num > MAX_CACHE_SIZE) {
+                struct object *i, *outcast = NULL;
+                list_for_each_entry(i, &amp;cache, list) {
+                        if (!outcast || i-&gt;popularity &lt; outcast-&gt;popularity)
+                                outcast = i;
+                }
+                __cache_delete(outcast);
+        }
+}
+
+int cache_add(int id, const char *name)
+{
+        struct object *obj;
+
+        if ((obj = kmalloc(sizeof(*obj), GFP_KERNEL)) == NULL)
+                return -ENOMEM;
+
+        strlcpy(obj-&gt;name, name, sizeof(obj-&gt;name));
+        obj-&gt;id = id;
+        obj-&gt;popularity = 0;
+
+        mutex_lock(&amp;cache_lock);
+        __cache_add(obj);
+        mutex_unlock(&amp;cache_lock);
+        return 0;
+}
+
+void cache_delete(int id)
+{
+        mutex_lock(&amp;cache_lock);
+        __cache_delete(__cache_find(id));
+        mutex_unlock(&amp;cache_lock);
+}
+
+int cache_find(int id, char *name)
+{
+        struct object *obj;
+        int ret = -ENOENT;
+
+        mutex_lock(&amp;cache_lock);
+        obj = __cache_find(id);
+        if (obj) {
+                ret = 0;
+                strcpy(name, obj-&gt;name);
+        }
+        mutex_unlock(&amp;cache_lock);
+        return ret;
+}
+</programlisting>
+
+    <para>
+Note that we always make sure we have the cache_lock when we add,
+delete, or look up the cache: both the cache infrastructure itself and
+the contents of the objects are protected by the lock.  In this case
+it's easy, since we copy the data for the user, and never let them
+access the objects directly.
+    </para>
+    <para>
+There is a slight (and common) optimization here: in
+<function>cache_add</function> we set up the fields of the object
+before grabbing the lock.  This is safe, as no-one else can access it
+until we put it in cache.
+    </para>
+    </sect1>
+
+   <sect1 id="examples-interrupt">
+    <title>Accessing From Interrupt Context</title>
+    <para>
+Now consider the case where <function>cache_find</function> can be
+called from interrupt context: either a hardware interrupt or a
+softirq.  An example would be a timer which deletes object from the
+cache.
+    </para>
+    <para>
+The change is shown below, in standard patch format: the
+<symbol>-</symbol> are lines which are taken away, and the
+<symbol>+</symbol> are lines which are added.
+    </para>
+<programlisting>
+--- cache.c.usercontext	2003-12-09 13:58:54.000000000 +1100
++++ cache.c.interrupt	2003-12-09 14:07:49.000000000 +1100
+@@ -12,7 +12,7 @@
+         int popularity;
+ };
+
+-static DEFINE_MUTEX(cache_lock);
++static DEFINE_SPINLOCK(cache_lock);
+ static LIST_HEAD(cache);
+ static unsigned int cache_num = 0;
+ #define MAX_CACHE_SIZE 10
+@@ -55,6 +55,7 @@
+ int cache_add(int id, const char *name)
+ {
+         struct object *obj;
++        unsigned long flags;
+
+         if ((obj = kmalloc(sizeof(*obj), GFP_KERNEL)) == NULL)
+                 return -ENOMEM;
+@@ -63,30 +64,33 @@
+         obj-&gt;id = id;
+         obj-&gt;popularity = 0;
+
+-        mutex_lock(&amp;cache_lock);
++        spin_lock_irqsave(&amp;cache_lock, flags);
+         __cache_add(obj);
+-        mutex_unlock(&amp;cache_lock);
++        spin_unlock_irqrestore(&amp;cache_lock, flags);
+         return 0;
+ }
+
+ void cache_delete(int id)
+ {
+-        mutex_lock(&amp;cache_lock);
++        unsigned long flags;
++
++        spin_lock_irqsave(&amp;cache_lock, flags);
+         __cache_delete(__cache_find(id));
+-        mutex_unlock(&amp;cache_lock);
++        spin_unlock_irqrestore(&amp;cache_lock, flags);
+ }
+
+ int cache_find(int id, char *name)
+ {
+         struct object *obj;
+         int ret = -ENOENT;
++        unsigned long flags;
+
+-        mutex_lock(&amp;cache_lock);
++        spin_lock_irqsave(&amp;cache_lock, flags);
+         obj = __cache_find(id);
+         if (obj) {
+                 ret = 0;
+                 strcpy(name, obj-&gt;name);
+         }
+-        mutex_unlock(&amp;cache_lock);
++        spin_unlock_irqrestore(&amp;cache_lock, flags);
+         return ret;
+ }
+</programlisting>
+
+    <para>
+Note that the <function>spin_lock_irqsave</function> will turn off
+interrupts if they are on, otherwise does nothing (if we are already
+in an interrupt handler), hence these functions are safe to call from
+any context.
+    </para>
+    <para>
+Unfortunately, <function>cache_add</function> calls
+<function>kmalloc</function> with the <symbol>GFP_KERNEL</symbol>
+flag, which is only legal in user context.  I have assumed that
+<function>cache_add</function> is still only called in user context,
+otherwise this should become a parameter to
+<function>cache_add</function>.
+    </para>
+  </sect1>
+   <sect1 id="examples-refcnt">
+    <title>Exposing Objects Outside This File</title>
+    <para>
+If our objects contained more information, it might not be sufficient
+to copy the information in and out: other parts of the code might want
+to keep pointers to these objects, for example, rather than looking up
+the id every time.  This produces two problems.
+    </para>
+    <para>
+The first problem is that we use the <symbol>cache_lock</symbol> to
+protect objects: we'd need to make this non-static so the rest of the
+code can use it.  This makes locking trickier, as it is no longer all
+in one place.
+    </para>
+    <para>
+The second problem is the lifetime problem: if another structure keeps
+a pointer to an object, it presumably expects that pointer to remain
+valid.  Unfortunately, this is only guaranteed while you hold the
+lock, otherwise someone might call <function>cache_delete</function>
+and even worse, add another object, re-using the same address.
+    </para>
+    <para>
+As there is only one lock, you can't hold it forever: no-one else would
+get any work done.
+    </para>
+    <para>
+The solution to this problem is to use a reference count: everyone who
+has a pointer to the object increases it when they first get the
+object, and drops the reference count when they're finished with it.
+Whoever drops it to zero knows it is unused, and can actually delete it.
+    </para>
+    <para>
+Here is the code:
+    </para>
+
+<programlisting>
+--- cache.c.interrupt	2003-12-09 14:25:43.000000000 +1100
++++ cache.c.refcnt	2003-12-09 14:33:05.000000000 +1100
+@@ -7,6 +7,7 @@
+ struct object
+ {
+         struct list_head list;
++        unsigned int refcnt;
+         int id;
+         char name[32];
+         int popularity;
+@@ -17,6 +18,35 @@
+ static unsigned int cache_num = 0;
+ #define MAX_CACHE_SIZE 10
+
++static void __object_put(struct object *obj)
++{
++        if (--obj-&gt;refcnt == 0)
++                kfree(obj);
++}
++
++static void __object_get(struct object *obj)
++{
++        obj-&gt;refcnt++;
++}
++
++void object_put(struct object *obj)
++{
++        unsigned long flags;
++
++        spin_lock_irqsave(&amp;cache_lock, flags);
++        __object_put(obj);
++        spin_unlock_irqrestore(&amp;cache_lock, flags);
++}
++
++void object_get(struct object *obj)
++{
++        unsigned long flags;
++
++        spin_lock_irqsave(&amp;cache_lock, flags);
++        __object_get(obj);
++        spin_unlock_irqrestore(&amp;cache_lock, flags);
++}
++
+ /* Must be holding cache_lock */
+ static struct object *__cache_find(int id)
+ {
+@@ -35,6 +65,7 @@
+ {
+         BUG_ON(!obj);
+         list_del(&amp;obj-&gt;list);
++        __object_put(obj);
+         cache_num--;
+ }
+
+@@ -63,6 +94,7 @@
+         strlcpy(obj-&gt;name, name, sizeof(obj-&gt;name));
+         obj-&gt;id = id;
+         obj-&gt;popularity = 0;
++        obj-&gt;refcnt = 1; /* The cache holds a reference */
+
+         spin_lock_irqsave(&amp;cache_lock, flags);
+         __cache_add(obj);
+@@ -79,18 +111,15 @@
+         spin_unlock_irqrestore(&amp;cache_lock, flags);
+ }
+
+-int cache_find(int id, char *name)
++struct object *cache_find(int id)
+ {
+         struct object *obj;
+-        int ret = -ENOENT;
+         unsigned long flags;
+
+         spin_lock_irqsave(&amp;cache_lock, flags);
+         obj = __cache_find(id);
+-        if (obj) {
+-                ret = 0;
+-                strcpy(name, obj-&gt;name);
+-        }
++        if (obj)
++                __object_get(obj);
+         spin_unlock_irqrestore(&amp;cache_lock, flags);
+-        return ret;
++        return obj;
+ }
+</programlisting>
+
+<para>
+We encapsulate the reference counting in the standard 'get' and 'put'
+functions.  Now we can return the object itself from
+<function>cache_find</function> which has the advantage that the user
+can now sleep holding the object (eg. to
+<function>copy_to_user</function> to name to userspace).
+</para>
+<para>
+The other point to note is that I said a reference should be held for
+every pointer to the object: thus the reference count is 1 when first
+inserted into the cache.  In some versions the framework does not hold
+a reference count, but they are more complicated.
+</para>
+
+   <sect2 id="examples-refcnt-atomic">
+    <title>Using Atomic Operations For The Reference Count</title>
+<para>
+In practice, <type>atomic_t</type> would usually be used for
+<structfield>refcnt</structfield>.  There are a number of atomic
+operations defined in
+
+<filename class="headerfile">include/asm/atomic.h</filename>: these are
+guaranteed to be seen atomically from all CPUs in the system, so no
+lock is required.  In this case, it is simpler than using spinlocks,
+although for anything non-trivial using spinlocks is clearer.  The
+<function>atomic_inc</function> and
+<function>atomic_dec_and_test</function> are used instead of the
+standard increment and decrement operators, and the lock is no longer
+used to protect the reference count itself.
+</para>
+
+<programlisting>
+--- cache.c.refcnt	2003-12-09 15:00:35.000000000 +1100
++++ cache.c.refcnt-atomic	2003-12-11 15:49:42.000000000 +1100
+@@ -7,7 +7,7 @@
+ struct object
+ {
+         struct list_head list;
+-        unsigned int refcnt;
++        atomic_t refcnt;
+         int id;
+         char name[32];
+         int popularity;
+@@ -18,33 +18,15 @@
+ static unsigned int cache_num = 0;
+ #define MAX_CACHE_SIZE 10
+
+-static void __object_put(struct object *obj)
+-{
+-        if (--obj-&gt;refcnt == 0)
+-                kfree(obj);
+-}
+-
+-static void __object_get(struct object *obj)
+-{
+-        obj-&gt;refcnt++;
+-}
+-
+ void object_put(struct object *obj)
+ {
+-        unsigned long flags;
+-
+-        spin_lock_irqsave(&amp;cache_lock, flags);
+-        __object_put(obj);
+-        spin_unlock_irqrestore(&amp;cache_lock, flags);
++        if (atomic_dec_and_test(&amp;obj-&gt;refcnt))
++                kfree(obj);
+ }
+
+ void object_get(struct object *obj)
+ {
+-        unsigned long flags;
+-
+-        spin_lock_irqsave(&amp;cache_lock, flags);
+-        __object_get(obj);
+-        spin_unlock_irqrestore(&amp;cache_lock, flags);
++        atomic_inc(&amp;obj-&gt;refcnt);
+ }
+
+ /* Must be holding cache_lock */
+@@ -65,7 +47,7 @@
+ {
+         BUG_ON(!obj);
+         list_del(&amp;obj-&gt;list);
+-        __object_put(obj);
++        object_put(obj);
+         cache_num--;
+ }
+
+@@ -94,7 +76,7 @@
+         strlcpy(obj-&gt;name, name, sizeof(obj-&gt;name));
+         obj-&gt;id = id;
+         obj-&gt;popularity = 0;
+-        obj-&gt;refcnt = 1; /* The cache holds a reference */
++        atomic_set(&amp;obj-&gt;refcnt, 1); /* The cache holds a reference */
+
+         spin_lock_irqsave(&amp;cache_lock, flags);
+         __cache_add(obj);
+@@ -119,7 +101,7 @@
+         spin_lock_irqsave(&amp;cache_lock, flags);
+         obj = __cache_find(id);
+         if (obj)
+-                __object_get(obj);
++                object_get(obj);
+         spin_unlock_irqrestore(&amp;cache_lock, flags);
+         return obj;
+ }
+</programlisting>
+</sect2>
+</sect1>
+
+   <sect1 id="examples-lock-per-obj">
+    <title>Protecting The Objects Themselves</title>
+    <para>
+In these examples, we assumed that the objects (except the reference
+counts) never changed once they are created.  If we wanted to allow
+the name to change, there are three possibilities:
+    </para>
+    <itemizedlist>
+      <listitem>
+	<para>
+You can make <symbol>cache_lock</symbol> non-static, and tell people
+to grab that lock before changing the name in any object.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+You can provide a <function>cache_obj_rename</function> which grabs
+this lock and changes the name for the caller, and tell everyone to
+use that function.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+You can make the <symbol>cache_lock</symbol> protect only the cache
+itself, and use another lock to protect the name.
+        </para>
+      </listitem>
+    </itemizedlist>
+
+      <para>
+Theoretically, you can make the locks as fine-grained as one lock for
+every field, for every object.  In practice, the most common variants
+are:
+</para>
+    <itemizedlist>
+      <listitem>
+	<para>
+One lock which protects the infrastructure (the <symbol>cache</symbol>
+list in this example) and all the objects.  This is what we have done
+so far.
+	</para>
+      </listitem>
+      <listitem>
+        <para>
+One lock which protects the infrastructure (including the list
+pointers inside the objects), and one lock inside the object which
+protects the rest of that object.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+Multiple locks to protect the infrastructure (eg. one lock per hash
+chain), possibly with a separate per-object lock.
+        </para>
+      </listitem>
+    </itemizedlist>
+
+<para>
+Here is the "lock-per-object" implementation:
+</para>
+<programlisting>
+--- cache.c.refcnt-atomic	2003-12-11 15:50:54.000000000 +1100
++++ cache.c.perobjectlock	2003-12-11 17:15:03.000000000 +1100
+@@ -6,11 +6,17 @@
+
+ struct object
+ {
++        /* These two protected by cache_lock. */
+         struct list_head list;
++        int popularity;
++
+         atomic_t refcnt;
++
++        /* Doesn't change once created. */
+         int id;
++
++        spinlock_t lock; /* Protects the name */
+         char name[32];
+-        int popularity;
+ };
+
+ static DEFINE_SPINLOCK(cache_lock);
+@@ -77,6 +84,7 @@
+         obj-&gt;id = id;
+         obj-&gt;popularity = 0;
+         atomic_set(&amp;obj-&gt;refcnt, 1); /* The cache holds a reference */
++        spin_lock_init(&amp;obj-&gt;lock);
+
+         spin_lock_irqsave(&amp;cache_lock, flags);
+         __cache_add(obj);
+</programlisting>
+
+<para>
+Note that I decide that the <structfield>popularity</structfield>
+count should be protected by the <symbol>cache_lock</symbol> rather
+than the per-object lock: this is because it (like the
+<structname>struct list_head</structname> inside the object) is
+logically part of the infrastructure.  This way, I don't need to grab
+the lock of every object in <function>__cache_add</function> when
+seeking the least popular.
+</para>
+
+<para>
+I also decided that the <structfield>id</structfield> member is
+unchangeable, so I don't need to grab each object lock in
+<function>__cache_find()</function> to examine the
+<structfield>id</structfield>: the object lock is only used by a
+caller who wants to read or write the <structfield>name</structfield>
+field.
+</para>
+
+<para>
+Note also that I added a comment describing what data was protected by
+which locks.  This is extremely important, as it describes the runtime
+behavior of the code, and can be hard to gain from just reading.  And
+as Alan Cox says, <quote>Lock data, not code</quote>.
+</para>
+</sect1>
+</chapter>
+
+   <chapter id="common-problems">
+    <title>Common Problems</title>
+    <sect1 id="deadlock">
+    <title>Deadlock: Simple and Advanced</title>
+
+    <para>
+      There is a coding bug where a piece of code tries to grab a
+      spinlock twice: it will spin forever, waiting for the lock to
+      be released (spinlocks, rwlocks and mutexes are not
+      recursive in Linux).  This is trivial to diagnose: not a
+      stay-up-five-nights-talk-to-fluffy-code-bunnies kind of
+      problem.
+    </para>
+
+    <para>
+      For a slightly more complex case, imagine you have a region
+      shared by a softirq and user context.  If you use a
+      <function>spin_lock()</function> call to protect it, it is 
+      possible that the user context will be interrupted by the softirq
+      while it holds the lock, and the softirq will then spin
+      forever trying to get the same lock.
+    </para>
+
+    <para>
+      Both of these are called deadlock, and as shown above, it can
+      occur even with a single CPU (although not on UP compiles,
+      since spinlocks vanish on kernel compiles with 
+      <symbol>CONFIG_SMP</symbol>=n. You'll still get data corruption 
+      in the second example).
+    </para>
+
+    <para>
+      This complete lockup is easy to diagnose: on SMP boxes the
+      watchdog timer or compiling with <symbol>DEBUG_SPINLOCK</symbol> set
+      (<filename>include/linux/spinlock.h</filename>) will show this up 
+      immediately when it happens.
+    </para>
+
+    <para>
+      A more complex problem is the so-called 'deadly embrace',
+      involving two or more locks.  Say you have a hash table: each
+      entry in the table is a spinlock, and a chain of hashed
+      objects.  Inside a softirq handler, you sometimes want to
+      alter an object from one place in the hash to another: you
+      grab the spinlock of the old hash chain and the spinlock of
+      the new hash chain, and delete the object from the old one,
+      and insert it in the new one.
+    </para>
+
+    <para>
+      There are two problems here.  First, if your code ever
+      tries to move the object to the same chain, it will deadlock
+      with itself as it tries to lock it twice.  Secondly, if the
+      same softirq on another CPU is trying to move another object
+      in the reverse direction, the following could happen:
+    </para>
+
+    <table>
+     <title>Consequences</title>
+
+     <tgroup cols="2" align="left">
+
+      <thead>
+       <row>
+        <entry>CPU 1</entry>
+        <entry>CPU 2</entry>
+       </row>
+      </thead>
+
+      <tbody>
+       <row>
+        <entry>Grab lock A -&gt; OK</entry>
+        <entry>Grab lock B -&gt; OK</entry>
+       </row>
+       <row>
+        <entry>Grab lock B -&gt; spin</entry>
+        <entry>Grab lock A -&gt; spin</entry>
+       </row>
+      </tbody>
+     </tgroup>
+    </table>
+
+    <para>
+      The two CPUs will spin forever, waiting for the other to give up
+      their lock.  It will look, smell, and feel like a crash.
+    </para>
+    </sect1>
+
+    <sect1 id="techs-deadlock-prevent">
+     <title>Preventing Deadlock</title>
+
+     <para>
+       Textbooks will tell you that if you always lock in the same
+       order, you will never get this kind of deadlock.  Practice
+       will tell you that this approach doesn't scale: when I
+       create a new lock, I don't understand enough of the kernel
+       to figure out where in the 5000 lock hierarchy it will fit.
+     </para>
+
+     <para>
+       The best locks are encapsulated: they never get exposed in
+       headers, and are never held around calls to non-trivial
+       functions outside the same file.  You can read through this
+       code and see that it will never deadlock, because it never
+       tries to grab another lock while it has that one.  People
+       using your code don't even need to know you are using a
+       lock.
+     </para>
+
+     <para>
+       A classic problem here is when you provide callbacks or
+       hooks: if you call these with the lock held, you risk simple
+       deadlock, or a deadly embrace (who knows what the callback
+       will do?).  Remember, the other programmers are out to get
+       you, so don't do this.
+     </para>
+
+    <sect2 id="techs-deadlock-overprevent">
+     <title>Overzealous Prevention Of Deadlocks</title>
+
+     <para>
+       Deadlocks are problematic, but not as bad as data
+       corruption.  Code which grabs a read lock, searches a list,
+       fails to find what it wants, drops the read lock, grabs a
+       write lock and inserts the object has a race condition.
+     </para>
+
+     <para>
+       If you don't see why, please stay the fuck away from my code.
+     </para>
+    </sect2>
+    </sect1>
+
+   <sect1 id="racing-timers">
+    <title>Racing Timers: A Kernel Pastime</title>
+
+    <para>
+      Timers can produce their own special problems with races.
+      Consider a collection of objects (list, hash, etc) where each
+      object has a timer which is due to destroy it.
+    </para>
+
+    <para>
+      If you want to destroy the entire collection (say on module
+      removal), you might do the following:
+    </para>
+
+    <programlisting>
+        /* THIS CODE BAD BAD BAD BAD: IF IT WAS ANY WORSE IT WOULD USE
+           HUNGARIAN NOTATION */
+        spin_lock_bh(&amp;list_lock);
+
+        while (list) {
+                struct foo *next = list-&gt;next;
+                del_timer(&amp;list-&gt;timer);
+                kfree(list);
+                list = next;
+        }
+
+        spin_unlock_bh(&amp;list_lock);
+    </programlisting>
+
+    <para>
+      Sooner or later, this will crash on SMP, because a timer can
+      have just gone off before the <function>spin_lock_bh()</function>,
+      and it will only get the lock after we
+      <function>spin_unlock_bh()</function>, and then try to free
+      the element (which has already been freed!).
+    </para>
+
+    <para>
+      This can be avoided by checking the result of
+      <function>del_timer()</function>: if it returns
+      <returnvalue>1</returnvalue>, the timer has been deleted.
+      If <returnvalue>0</returnvalue>, it means (in this
+      case) that it is currently running, so we can do:
+    </para>
+
+    <programlisting>
+        retry:
+                spin_lock_bh(&amp;list_lock);
+
+                while (list) {
+                        struct foo *next = list-&gt;next;
+                        if (!del_timer(&amp;list-&gt;timer)) {
+                                /* Give timer a chance to delete this */
+                                spin_unlock_bh(&amp;list_lock);
+                                goto retry;
+                        }
+                        kfree(list);
+                        list = next;
+                }
+
+                spin_unlock_bh(&amp;list_lock);
+    </programlisting>
+
+    <para>
+      Another common problem is deleting timers which restart
+      themselves (by calling <function>add_timer()</function> at the end
+      of their timer function).  Because this is a fairly common case
+      which is prone to races, you should use <function>del_timer_sync()</function>
+      (<filename class="headerfile">include/linux/timer.h</filename>)
+      to handle this case.  It returns the number of times the timer
+      had to be deleted before we finally stopped it from adding itself back
+      in.
+    </para>
+   </sect1>
+
+  </chapter>
+
+ <chapter id="Efficiency">
+    <title>Locking Speed</title>
+
+    <para>
+There are three main things to worry about when considering speed of
+some code which does locking.  First is concurrency: how many things
+are going to be waiting while someone else is holding a lock.  Second
+is the time taken to actually acquire and release an uncontended lock.
+Third is using fewer, or smarter locks.  I'm assuming that the lock is
+used fairly often: otherwise, you wouldn't be concerned about
+efficiency.
+</para>
+    <para>
+Concurrency depends on how long the lock is usually held: you should
+hold the lock for as long as needed, but no longer.  In the cache
+example, we always create the object without the lock held, and then
+grab the lock only when we are ready to insert it in the list.
+</para>
+    <para>
+Acquisition times depend on how much damage the lock operations do to
+the pipeline (pipeline stalls) and how likely it is that this CPU was
+the last one to grab the lock (ie. is the lock cache-hot for this
+CPU): on a machine with more CPUs, this likelihood drops fast.
+Consider a 700MHz Intel Pentium III: an instruction takes about 0.7ns,
+an atomic increment takes about 58ns, a lock which is cache-hot on
+this CPU takes 160ns, and a cacheline transfer from another CPU takes
+an additional 170 to 360ns.  (These figures from Paul McKenney's
+<ulink url="http://www.linuxjournal.com/article.php?sid=6993"> Linux
+Journal RCU article</ulink>).
+</para>
+    <para>
+These two aims conflict: holding a lock for a short time might be done
+by splitting locks into parts (such as in our final per-object-lock
+example), but this increases the number of lock acquisitions, and the
+results are often slower than having a single lock.  This is another
+reason to advocate locking simplicity.
+</para>
+    <para>
+The third concern is addressed below: there are some methods to reduce
+the amount of locking which needs to be done.
+</para>
+
+  <sect1 id="efficiency-rwlocks">
+   <title>Read/Write Lock Variants</title>
+
+   <para>
+      Both spinlocks and mutexes have read/write variants:
+      <type>rwlock_t</type> and <structname>struct rw_semaphore</structname>.
+      These divide users into two classes: the readers and the writers.  If
+      you are only reading the data, you can get a read lock, but to write to
+      the data you need the write lock.  Many people can hold a read lock,
+      but a writer must be sole holder.
+    </para>
+
+   <para>
+      If your code divides neatly along reader/writer lines (as our
+      cache code does), and the lock is held by readers for
+      significant lengths of time, using these locks can help.  They
+      are slightly slower than the normal locks though, so in practice
+      <type>rwlock_t</type> is not usually worthwhile.
+    </para>
+   </sect1>
+
+   <sect1 id="efficiency-read-copy-update">
+    <title>Avoiding Locks: Read Copy Update</title>
+
+    <para>
+      There is a special method of read/write locking called Read Copy
+      Update.  Using RCU, the readers can avoid taking a lock
+      altogether: as we expect our cache to be read more often than
+      updated (otherwise the cache is a waste of time), it is a
+      candidate for this optimization.
+    </para>
+
+    <para>
+      How do we get rid of read locks?  Getting rid of read locks
+      means that writers may be changing the list underneath the
+      readers.  That is actually quite simple: we can read a linked
+      list while an element is being added if the writer adds the
+      element very carefully.  For example, adding
+      <symbol>new</symbol> to a single linked list called
+      <symbol>list</symbol>:
+    </para>
+
+    <programlisting>
+        new-&gt;next = list-&gt;next;
+        wmb();
+        list-&gt;next = new;
+    </programlisting>
+
+    <para>
+      The <function>wmb()</function> is a write memory barrier.  It
+      ensures that the first operation (setting the new element's
+      <symbol>next</symbol> pointer) is complete and will be seen by
+      all CPUs, before the second operation is (putting the new
+      element into the list).  This is important, since modern
+      compilers and modern CPUs can both reorder instructions unless
+      told otherwise: we want a reader to either not see the new
+      element at all, or see the new element with the
+      <symbol>next</symbol> pointer correctly pointing at the rest of
+      the list.
+    </para>
+    <para>
+      Fortunately, there is a function to do this for standard
+      <structname>struct list_head</structname> lists:
+      <function>list_add_rcu()</function>
+      (<filename>include/linux/list.h</filename>).
+    </para>
+    <para>
+      Removing an element from the list is even simpler: we replace
+      the pointer to the old element with a pointer to its successor,
+      and readers will either see it, or skip over it.
+    </para>
+    <programlisting>
+        list-&gt;next = old-&gt;next;
+    </programlisting>
+    <para>
+      There is <function>list_del_rcu()</function>
+      (<filename>include/linux/list.h</filename>) which does this (the
+      normal version poisons the old object, which we don't want).
+    </para>
+    <para>
+      The reader must also be careful: some CPUs can look through the
+      <symbol>next</symbol> pointer to start reading the contents of
+      the next element early, but don't realize that the pre-fetched
+      contents is wrong when the <symbol>next</symbol> pointer changes
+      underneath them.  Once again, there is a
+      <function>list_for_each_entry_rcu()</function>
+      (<filename>include/linux/list.h</filename>) to help you.  Of
+      course, writers can just use
+      <function>list_for_each_entry()</function>, since there cannot
+      be two simultaneous writers.
+    </para>
+    <para>
+      Our final dilemma is this: when can we actually destroy the
+      removed element?  Remember, a reader might be stepping through
+      this element in the list right now: if we free this element and
+      the <symbol>next</symbol> pointer changes, the reader will jump
+      off into garbage and crash.  We need to wait until we know that
+      all the readers who were traversing the list when we deleted the
+      element are finished.  We use <function>call_rcu()</function> to
+      register a callback which will actually destroy the object once
+      the readers are finished.
+    </para>
+    <para>
+      But how does Read Copy Update know when the readers are
+      finished?  The method is this: firstly, the readers always
+      traverse the list inside
+      <function>rcu_read_lock()</function>/<function>rcu_read_unlock()</function>
+      pairs: these simply disable preemption so the reader won't go to
+      sleep while reading the list.
+    </para>
+    <para>
+      RCU then waits until every other CPU has slept at least once:
+      since readers cannot sleep, we know that any readers which were
+      traversing the list during the deletion are finished, and the
+      callback is triggered.  The real Read Copy Update code is a
+      little more optimized than this, but this is the fundamental
+      idea.
+    </para>
+
+<programlisting>
+--- cache.c.perobjectlock	2003-12-11 17:15:03.000000000 +1100
++++ cache.c.rcupdate	2003-12-11 17:55:14.000000000 +1100
+@@ -1,15 +1,18 @@
+ #include &lt;linux/list.h&gt;
+ #include &lt;linux/slab.h&gt;
+ #include &lt;linux/string.h&gt;
++#include &lt;linux/rcupdate.h&gt;
+ #include &lt;linux/mutex.h&gt;
+ #include &lt;asm/errno.h&gt;
+
+ struct object
+ {
+-        /* These two protected by cache_lock. */
++        /* This is protected by RCU */
+         struct list_head list;
+         int popularity;
+
++        struct rcu_head rcu;
++
+         atomic_t refcnt;
+
+         /* Doesn't change once created. */
+@@ -40,7 +43,7 @@
+ {
+         struct object *i;
+
+-        list_for_each_entry(i, &amp;cache, list) {
++        list_for_each_entry_rcu(i, &amp;cache, list) {
+                 if (i-&gt;id == id) {
+                         i-&gt;popularity++;
+                         return i;
+@@ -49,19 +52,25 @@
+         return NULL;
+ }
+
++/* Final discard done once we know no readers are looking. */
++static void cache_delete_rcu(void *arg)
++{
++        object_put(arg);
++}
++
+ /* Must be holding cache_lock */
+ static void __cache_delete(struct object *obj)
+ {
+         BUG_ON(!obj);
+-        list_del(&amp;obj-&gt;list);
+-        object_put(obj);
++        list_del_rcu(&amp;obj-&gt;list);
+         cache_num--;
++        call_rcu(&amp;obj-&gt;rcu, cache_delete_rcu, obj);
+ }
+
+ /* Must be holding cache_lock */
+ static void __cache_add(struct object *obj)
+ {
+-        list_add(&amp;obj-&gt;list, &amp;cache);
++        list_add_rcu(&amp;obj-&gt;list, &amp;cache);
+         if (++cache_num > MAX_CACHE_SIZE) {
+                 struct object *i, *outcast = NULL;
+                 list_for_each_entry(i, &amp;cache, list) {
+@@ -85,6 +94,7 @@
+         obj-&gt;popularity = 0;
+         atomic_set(&amp;obj-&gt;refcnt, 1); /* The cache holds a reference */
+         spin_lock_init(&amp;obj-&gt;lock);
++        INIT_RCU_HEAD(&amp;obj-&gt;rcu);
+
+         spin_lock_irqsave(&amp;cache_lock, flags);
+         __cache_add(obj);
+@@ -104,12 +114,11 @@
+ struct object *cache_find(int id)
+ {
+         struct object *obj;
+-        unsigned long flags;
+
+-        spin_lock_irqsave(&amp;cache_lock, flags);
++        rcu_read_lock();
+         obj = __cache_find(id);
+         if (obj)
+                 object_get(obj);
+-        spin_unlock_irqrestore(&amp;cache_lock, flags);
++        rcu_read_unlock();
+         return obj;
+ }
+</programlisting>
+
+<para>
+Note that the reader will alter the
+<structfield>popularity</structfield> member in
+<function>__cache_find()</function>, and now it doesn't hold a lock.
+One solution would be to make it an <type>atomic_t</type>, but for
+this usage, we don't really care about races: an approximate result is
+good enough, so I didn't change it.
+</para>
+
+<para>
+The result is that <function>cache_find()</function> requires no
+synchronization with any other functions, so is almost as fast on SMP
+as it would be on UP.
+</para>
+
+<para>
+There is a furthur optimization possible here: remember our original
+cache code, where there were no reference counts and the caller simply
+held the lock whenever using the object?  This is still possible: if
+you hold the lock, noone can delete the object, so you don't need to
+get and put the reference count.
+</para>
+
+<para>
+Now, because the 'read lock' in RCU is simply disabling preemption, a
+caller which always has preemption disabled between calling
+<function>cache_find()</function> and
+<function>object_put()</function> does not need to actually get and
+put the reference count: we could expose
+<function>__cache_find()</function> by making it non-static, and
+such callers could simply call that.
+</para>
+<para>
+The benefit here is that the reference count is not written to: the
+object is not altered in any way, which is much faster on SMP
+machines due to caching.
+</para>
+  </sect1>
+
+   <sect1 id="per-cpu">
+    <title>Per-CPU Data</title>
+
+    <para>
+      Another technique for avoiding locking which is used fairly
+      widely is to duplicate information for each CPU.  For example,
+      if you wanted to keep a count of a common condition, you could
+      use a spin lock and a single counter.  Nice and simple.
+    </para>
+
+    <para>
+      If that was too slow (it's usually not, but if you've got a
+      really big machine to test on and can show that it is), you
+      could instead use a counter for each CPU, then none of them need
+      an exclusive lock.  See <function>DEFINE_PER_CPU()</function>,
+      <function>get_cpu_var()</function> and
+      <function>put_cpu_var()</function>
+      (<filename class="headerfile">include/linux/percpu.h</filename>).
+    </para>
+
+    <para>
+      Of particular use for simple per-cpu counters is the
+      <type>local_t</type> type, and the
+      <function>cpu_local_inc()</function> and related functions,
+      which are more efficient than simple code on some architectures
+      (<filename class="headerfile">include/asm/local.h</filename>).
+    </para>
+
+    <para>
+      Note that there is no simple, reliable way of getting an exact
+      value of such a counter, without introducing more locks.  This
+      is not a problem for some uses.
+    </para>
+   </sect1>
+
+   <sect1 id="mostly-hardirq">
+    <title>Data Which Mostly Used By An IRQ Handler</title>
+
+    <para>
+      If data is always accessed from within the same IRQ handler, you
+      don't need a lock at all: the kernel already guarantees that the
+      irq handler will not run simultaneously on multiple CPUs.
+    </para>
+    <para>
+      Manfred Spraul points out that you can still do this, even if
+      the data is very occasionally accessed in user context or
+      softirqs/tasklets.  The irq handler doesn't use a lock, and
+      all other accesses are done as so:
+    </para>
+
+<programlisting>
+	spin_lock(&amp;lock);
+	disable_irq(irq);
+	...
+	enable_irq(irq);
+	spin_unlock(&amp;lock);
+</programlisting>
+    <para>
+      The <function>disable_irq()</function> prevents the irq handler
+      from running (and waits for it to finish if it's currently
+      running on other CPUs).  The spinlock prevents any other
+      accesses happening at the same time.  Naturally, this is slower
+      than just a <function>spin_lock_irq()</function> call, so it
+      only makes sense if this type of access happens extremely
+      rarely.
+    </para>
+   </sect1>
+  </chapter>
+
+ <chapter id="sleeping-things">
+    <title>What Functions Are Safe To Call From Interrupts?</title>
+
+    <para>
+      Many functions in the kernel sleep (ie. call schedule())
+      directly or indirectly: you can never call them while holding a
+      spinlock, or with preemption disabled.  This also means you need
+      to be in user context: calling them from an interrupt is illegal.
+    </para>
+
+   <sect1 id="sleeping">
+    <title>Some Functions Which Sleep</title>
+
+    <para>
+      The most common ones are listed below, but you usually have to
+      read the code to find out if other calls are safe.  If everyone
+      else who calls it can sleep, you probably need to be able to
+      sleep, too.  In particular, registration and deregistration
+      functions usually expect to be called from user context, and can
+      sleep.
+    </para>
+
+    <itemizedlist>
+     <listitem>
+      <para>
+        Accesses to 
+        <firstterm linkend="gloss-userspace">userspace</firstterm>:
+      </para>
+      <itemizedlist>
+       <listitem>
+        <para>
+          <function>copy_from_user()</function>
+        </para>
+       </listitem>
+       <listitem>
+        <para>
+          <function>copy_to_user()</function>
+        </para>
+       </listitem>
+       <listitem>
+        <para>
+          <function>get_user()</function>
+        </para>
+       </listitem>
+       <listitem>
+        <para>
+          <function>put_user()</function>
+        </para>
+       </listitem>
+      </itemizedlist>
+     </listitem>
+
+     <listitem>
+      <para>
+        <function>kmalloc(GFP_KERNEL)</function>
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+      <function>mutex_lock_interruptible()</function> and
+      <function>mutex_lock()</function>
+      </para>
+      <para>
+       There is a <function>mutex_trylock()</function> which can be
+       used inside interrupt context, as it will not sleep.
+       <function>mutex_unlock()</function> will also never sleep.
+      </para>
+     </listitem>
+    </itemizedlist>
+   </sect1>
+
+   <sect1 id="dont-sleep">
+    <title>Some Functions Which Don't Sleep</title>
+
+    <para>
+     Some functions are safe to call from any context, or holding
+     almost any lock.
+    </para>
+
+    <itemizedlist>
+     <listitem>
+      <para>
+	<function>printk()</function>
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+        <function>kfree()</function>
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+	<function>add_timer()</function> and <function>del_timer()</function>
+      </para>
+     </listitem>
+    </itemizedlist>
+   </sect1>
+  </chapter>
+
+  <chapter id="references">
+   <title>Further reading</title>
+
+   <itemizedlist>
+    <listitem>
+     <para>
+       <filename>Documentation/spinlocks.txt</filename>: 
+       Linus Torvalds' spinlocking tutorial in the kernel sources.
+     </para>
+    </listitem>
+
+    <listitem>
+     <para>
+       Unix Systems for Modern Architectures: Symmetric
+       Multiprocessing and Caching for Kernel Programmers:
+     </para>
+
+     <para>
+       Curt Schimmel's very good introduction to kernel level
+       locking (not written for Linux, but nearly everything
+       applies).  The book is expensive, but really worth every
+       penny to understand SMP locking. [ISBN: 0201633388]
+     </para>
+    </listitem>
+   </itemizedlist>
+  </chapter>
+
+  <chapter id="thanks">
+    <title>Thanks</title>
+
+    <para>
+      Thanks to Telsa Gwynne for DocBooking, neatening and adding
+      style.
+    </para>
+
+    <para>
+      Thanks to Martin Pool, Philipp Rumpf, Stephen Rothwell, Paul
+      Mackerras, Ruedi Aschwanden, Alan Cox, Manfred Spraul, Tim
+      Waugh, Pete Zaitcev, James Morris, Robert Love, Paul McKenney,
+      John Ashby for proofreading, correcting, flaming, commenting.
+    </para>
+
+    <para>
+      Thanks to the cabal for having no influence on this document.
+    </para>
+  </chapter>
+
+  <glossary id="glossary">
+   <title>Glossary</title>
+
+   <glossentry id="gloss-preemption">
+    <glossterm>preemption</glossterm>
+     <glossdef>
+      <para>
+        Prior to 2.5, or when <symbol>CONFIG_PREEMPT</symbol> is
+        unset, processes in user context inside the kernel would not
+        preempt each other (ie. you had that CPU until you gave it up,
+        except for interrupts).  With the addition of
+        <symbol>CONFIG_PREEMPT</symbol> in 2.5.4, this changed: when
+        in user context, higher priority tasks can "cut in": spinlocks
+        were changed to disable preemption, even on UP.
+     </para>
+    </glossdef>
+   </glossentry>
+
+   <glossentry id="gloss-bh">
+    <glossterm>bh</glossterm>
+     <glossdef>
+      <para>
+        Bottom Half: for historical reasons, functions with
+        '_bh' in them often now refer to any software interrupt, e.g.
+        <function>spin_lock_bh()</function> blocks any software interrupt 
+        on the current CPU.  Bottom halves are deprecated, and will 
+        eventually be replaced by tasklets.  Only one bottom half will be 
+        running at any time.
+     </para>
+    </glossdef>
+   </glossentry>
+
+   <glossentry id="gloss-hwinterrupt">
+    <glossterm>Hardware Interrupt / Hardware IRQ</glossterm>
+    <glossdef>
+     <para>
+       Hardware interrupt request.  <function>in_irq()</function> returns 
+       <returnvalue>true</returnvalue> in a hardware interrupt handler.
+     </para>
+    </glossdef>
+   </glossentry>
+
+   <glossentry id="gloss-interruptcontext">
+    <glossterm>Interrupt Context</glossterm>
+    <glossdef>
+     <para>
+       Not user context: processing a hardware irq or software irq.
+       Indicated by the <function>in_interrupt()</function> macro 
+       returning <returnvalue>true</returnvalue>.
+     </para>
+    </glossdef>
+   </glossentry>
+
+   <glossentry id="gloss-smp">
+    <glossterm><acronym>SMP</acronym></glossterm>
+    <glossdef>
+     <para>
+       Symmetric Multi-Processor: kernels compiled for multiple-CPU
+       machines.  (CONFIG_SMP=y).
+     </para>
+    </glossdef>
+   </glossentry>
+
+   <glossentry id="gloss-softirq">
+    <glossterm>Software Interrupt / softirq</glossterm>
+    <glossdef>
+     <para>
+       Software interrupt handler.  <function>in_irq()</function> returns
+       <returnvalue>false</returnvalue>; <function>in_softirq()</function>
+       returns <returnvalue>true</returnvalue>.  Tasklets and softirqs
+	both fall into the category of 'software interrupts'.
+     </para>
+     <para>
+       Strictly speaking a softirq is one of up to 32 enumerated software
+       interrupts which can run on multiple CPUs at once.
+       Sometimes used to refer to tasklets as
+       well (ie. all software interrupts).
+     </para>
+    </glossdef>
+   </glossentry>
+
+   <glossentry id="gloss-tasklet">
+    <glossterm>tasklet</glossterm>
+    <glossdef>
+     <para>
+       A dynamically-registrable software interrupt,
+       which is guaranteed to only run on one CPU at a time.
+     </para>
+    </glossdef>
+   </glossentry>
+
+   <glossentry id="gloss-timers">
+    <glossterm>timer</glossterm>
+    <glossdef>
+     <para>
+       A dynamically-registrable software interrupt, which is run at
+       (or close to) a given time.  When running, it is just like a
+       tasklet (in fact, they are called from the TIMER_SOFTIRQ).
+     </para>
+    </glossdef>
+   </glossentry>
+
+   <glossentry id="gloss-up">
+    <glossterm><acronym>UP</acronym></glossterm>
+    <glossdef>
+     <para>
+       Uni-Processor: Non-SMP.  (CONFIG_SMP=n).
+     </para>
+    </glossdef>
+   </glossentry>
+
+   <glossentry id="gloss-usercontext">
+    <glossterm>User Context</glossterm>
+    <glossdef>
+     <para>
+       The kernel executing on behalf of a particular process (ie. a
+       system call or trap) or kernel thread.  You can tell which
+       process with the <symbol>current</symbol> macro.)  Not to
+       be confused with userspace.  Can be interrupted by software or
+       hardware interrupts.
+     </para>
+    </glossdef>
+   </glossentry>
+
+   <glossentry id="gloss-userspace">
+    <glossterm>Userspace</glossterm>
+    <glossdef>
+     <para>
+       A process executing its own code outside the kernel.
+     </para>
+    </glossdef>
+   </glossentry>      
+
+  </glossary>
+</book>
+
diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl
new file mode 100644
index 0000000..372dec2
--- /dev/null
+++ b/Documentation/DocBook/kgdb.tmpl
@@ -0,0 +1,459 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="kgdbOnLinux">
+ <bookinfo>
+  <title>Using kgdb and the kgdb Internals</title>
+
+  <authorgroup>
+   <author>
+    <firstname>Jason</firstname>
+    <surname>Wessel</surname>
+    <affiliation>
+     <address>
+      <email>jason.wessel@windriver.com</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <authorgroup>
+   <author>
+    <firstname>Tom</firstname>
+    <surname>Rini</surname>
+    <affiliation>
+     <address>
+      <email>trini@kernel.crashing.org</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <authorgroup>
+   <author>
+    <firstname>Amit S.</firstname>
+    <surname>Kale</surname>
+    <affiliation>
+     <address>
+      <email>amitkale@linsyssoft.com</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2008</year>
+   <holder>Wind River Systems, Inc.</holder>
+  </copyright>
+  <copyright>
+   <year>2004-2005</year>
+   <holder>MontaVista Software, Inc.</holder>
+  </copyright>
+  <copyright>
+   <year>2004</year>
+   <holder>Amit S. Kale</holder>
+  </copyright>
+
+  <legalnotice>
+   <para>
+   This file is licensed under the terms of the GNU General Public License
+   version 2. This program is licensed "as is" without any warranty of any
+   kind, whether express or implied.
+   </para>
+
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+  <chapter id="Introduction">
+    <title>Introduction</title>
+    <para>
+    kgdb is a source level debugger for linux kernel. It is used along
+    with gdb to debug a linux kernel.  The expectation is that gdb can
+    be used to "break in" to the kernel to inspect memory, variables
+    and look through call stack information similar to what an
+    application developer would use gdb for.  It is possible to place
+    breakpoints in kernel code and perform some limited execution
+    stepping.
+    </para>
+    <para>
+    Two machines are required for using kgdb. One of these machines is a
+    development machine and the other is a test machine.  The kernel
+    to be debugged runs on the test machine. The development machine
+    runs an instance of gdb against the vmlinux file which contains
+    the symbols (not boot image such as bzImage, zImage, uImage...).
+    In gdb the developer specifies the connection parameters and
+    connects to kgdb.  The type of connection a developer makes with
+    gdb depends on the availability of kgdb I/O modules compiled as
+    builtin's or kernel modules in the test machine's kernel.
+    </para>
+  </chapter>
+  <chapter id="CompilingAKernel">
+    <title>Compiling a kernel</title>
+    <para>
+    To enable <symbol>CONFIG_KGDB</symbol> you should first turn on
+    "Prompt for development and/or incomplete code/drivers"
+    (CONFIG_EXPERIMENTAL) in  "General setup", then under the
+    "Kernel debugging" select "KGDB: kernel debugging with remote gdb".
+    </para>
+    <para>
+    It is advised, but not required that you turn on the
+    CONFIG_FRAME_POINTER kernel option.  This option inserts code to
+    into the compiled executable which saves the frame information in
+    registers or on the stack at different points which will allow a
+    debugger such as gdb to more accurately construct stack back traces
+    while debugging the kernel.
+    </para>
+    <para>
+    If the architecture that you are using supports the kernel option
+    CONFIG_DEBUG_RODATA, you should consider turning it off.  This
+    option will prevent the use of software breakpoints because it
+    marks certain regions of the kernel's memory space as read-only.
+    If kgdb supports it for the architecture you are using, you can
+    use hardware breakpoints if you desire to run with the
+    CONFIG_DEBUG_RODATA option turned on, else you need to turn off
+    this option.
+    </para>
+    <para>
+    Next you should choose one of more I/O drivers to interconnect debugging
+    host and debugged target.  Early boot debugging requires a KGDB
+    I/O driver that supports early debugging and the driver must be
+    built into the kernel directly. Kgdb I/O driver configuration
+    takes place via kernel or module parameters, see following
+    chapter.
+    </para>
+    <para>
+    The kgdb test compile options are described in the kgdb test suite chapter.
+    </para>
+
+  </chapter>
+  <chapter id="EnableKGDB">
+   <title>Enable kgdb for debugging</title>
+   <para>
+   In order to use kgdb you must activate it by passing configuration
+   information to one of the kgdb I/O drivers.  If you do not pass any
+   configuration information kgdb will not do anything at all.  Kgdb
+   will only actively hook up to the kernel trap hooks if a kgdb I/O
+   driver is loaded and configured.  If you unconfigure a kgdb I/O
+   driver, kgdb will unregister all the kernel hook points.
+   </para>
+   <para>
+   All drivers can be reconfigured at run time, if
+   <symbol>CONFIG_SYSFS</symbol> and <symbol>CONFIG_MODULES</symbol>
+   are enabled, by echo'ing a new config string to
+   <constant>/sys/module/&lt;driver&gt;/parameter/&lt;option&gt;</constant>.
+   The driver can be unconfigured by passing an empty string.  You cannot
+   change the configuration while the debugger is attached.  Make sure
+   to detach the debugger with the <constant>detach</constant> command
+   prior to trying unconfigure a kgdb I/O driver.
+   </para>
+   <sect1 id="kgdbwait">
+   <title>Kernel parameter: kgdbwait</title>
+   <para>
+   The Kernel command line option <constant>kgdbwait</constant> makes
+   kgdb wait for a debugger connection during booting of a kernel.  You
+   can only use this option you compiled a kgdb I/O driver into the
+   kernel and you specified the I/O driver configuration as a kernel
+   command line option.  The kgdbwait parameter should always follow the
+   configuration parameter for the kgdb I/O driver in the kernel
+   command line else the I/O driver will not be configured prior to
+   asking the kernel to use it to wait.
+   </para>
+   <para>
+   The kernel will stop and wait as early as the I/O driver and
+   architecture will allow when you use this option.  If you build the
+   kgdb I/O driver as a kernel module kgdbwait will not do anything.
+   </para>
+   </sect1>
+  <sect1 id="kgdboc">
+  <title>Kernel parameter: kgdboc</title>
+  <para>
+  The kgdboc driver was originally an abbreviation meant to stand for
+  "kgdb over console".  Kgdboc is designed to work with a single
+  serial port. It was meant to cover the circumstance
+  where you wanted to use a serial console as your primary console as
+  well as using it to perform kernel debugging.  Of course you can
+  also use kgdboc without assigning a console to the same port.
+  </para>
+  <sect2 id="UsingKgdboc">
+  <title>Using kgdboc</title>
+  <para>
+  You can configure kgdboc via sysfs or a module or kernel boot line
+  parameter depending on if you build with CONFIG_KGDBOC as a module
+  or built-in.
+  <orderedlist>
+  <listitem><para>From the module load or build-in</para>
+  <para><constant>kgdboc=&lt;tty-device&gt;,[baud]</constant></para>
+  <para>
+  The example here would be if your console port was typically ttyS0, you would use something like <constant>kgdboc=ttyS0,115200</constant> or on the ARM Versatile AB you would likely use <constant>kgdboc=ttyAMA0,115200</constant>
+  </para>
+  </listitem>
+  <listitem><para>From sysfs</para>
+  <para><constant>echo ttyS0 &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para>
+  </listitem>
+  </orderedlist>
+  </para>
+  <para>
+  NOTE: Kgdboc does not support interrupting the target via the
+  gdb remote protocol.  You must manually send a sysrq-g unless you
+  have a proxy that splits console output to a terminal problem and
+  has a separate port for the debugger to connect to that sends the
+  sysrq-g for you.
+  </para>
+  <para>When using kgdboc with no debugger proxy, you can end up
+  connecting the debugger for one of two entry points.  If an
+  exception occurs after you have loaded kgdboc a message should print
+  on the console stating it is waiting for the debugger.  In case you
+  disconnect your terminal program and then connect the debugger in
+  its place.  If you want to interrupt the target system and forcibly
+  enter a debug session you have to issue a Sysrq sequence and then
+  type the letter <constant>g</constant>.  Then you disconnect the
+  terminal session and connect gdb.  Your options if you don't like
+  this are to hack gdb to send the sysrq-g for you as well as on the
+  initial connect, or to use a debugger proxy that allows an
+  unmodified gdb to do the debugging.
+  </para>
+  </sect2>
+  </sect1>
+  <sect1 id="kgdbcon">
+  <title>Kernel parameter: kgdbcon</title>
+  <para>
+  Kgdb supports using the gdb serial protocol to send console messages
+  to the debugger when the debugger is connected and running.  There
+  are two ways to activate this feature.
+  <orderedlist>
+  <listitem><para>Activate with the kernel command line option:</para>
+  <para><constant>kgdbcon</constant></para>
+  </listitem>
+  <listitem><para>Use sysfs before configuring an io driver</para>
+  <para>
+  <constant>echo 1 &gt; /sys/module/kgdb/parameters/kgdb_use_con</constant>
+  </para>
+  <para>
+  NOTE: If you do this after you configure the kgdb I/O driver, the
+  setting will not take effect until the next point the I/O is
+  reconfigured.
+  </para>
+  </listitem>
+  </orderedlist>
+  </para>
+  <para>
+  IMPORTANT NOTE: Using this option with kgdb over the console
+  (kgdboc) is not supported.
+  </para>
+  </sect1>
+  </chapter>
+  <chapter id="ConnectingGDB">
+  <title>Connecting gdb</title>
+    <para>
+    If you are using kgdboc, you need to have used kgdbwait as a boot
+    argument, issued a sysrq-g, or the system you are going to debug
+    has already taken an exception and is waiting for the debugger to
+    attach before you can connect gdb.
+    </para>
+    <para>
+    If you are not using different kgdb I/O driver other than kgdboc,
+    you should be able to connect and the target will automatically
+    respond.
+    </para>
+    <para>
+    Example (using a serial port):
+    </para>
+    <programlisting>
+    % gdb ./vmlinux
+    (gdb) set remotebaud 115200
+    (gdb) target remote /dev/ttyS0
+    </programlisting>
+    <para>
+    Example (kgdb to a terminal server on tcp port 2012):
+    </para>
+    <programlisting>
+    % gdb ./vmlinux
+    (gdb) target remote 192.168.2.2:2012
+    </programlisting>
+    <para>
+    Once connected, you can debug a kernel the way you would debug an
+    application program.
+    </para>
+    <para>
+    If you are having problems connecting or something is going
+    seriously wrong while debugging, it will most often be the case
+    that you want to enable gdb to be verbose about its target
+    communications.  You do this prior to issuing the <constant>target
+    remote</constant> command by typing in: <constant>set remote debug 1</constant>
+    </para>
+  </chapter>
+  <chapter id="KGDBTestSuite">
+    <title>kgdb Test Suite</title>
+    <para>
+    When kgdb is enabled in the kernel config you can also elect to
+    enable the config parameter KGDB_TESTS.  Turning this on will
+    enable a special kgdb I/O module which is designed to test the
+    kgdb internal functions.
+    </para>
+    <para>
+    The kgdb tests are mainly intended for developers to test the kgdb
+    internals as well as a tool for developing a new kgdb architecture
+    specific implementation.  These tests are not really for end users
+    of the Linux kernel.  The primary source of documentation would be
+    to look in the drivers/misc/kgdbts.c file.
+    </para>
+    <para>
+    The kgdb test suite can also be configured at compile time to run
+    the core set of tests by setting the kernel config parameter
+    KGDB_TESTS_ON_BOOT.  This particular option is aimed at automated
+    regression testing and does not require modifying the kernel boot
+    config arguments.  If this is turned on, the kgdb test suite can
+    be disabled by specifying "kgdbts=" as a kernel boot argument.
+    </para>
+  </chapter>
+  <chapter id="CommonBackEndReq">
+  <title>KGDB Internals</title>
+  <sect1 id="kgdbArchitecture">
+    <title>Architecture Specifics</title>
+      <para>
+      Kgdb is organized into three basic components:
+      <orderedlist>
+      <listitem><para>kgdb core</para>
+      <para>
+      The kgdb core is found in kernel/kgdb.c.  It contains:
+      <itemizedlist>
+      <listitem><para>All the logic to implement the gdb serial protocol</para></listitem>
+      <listitem><para>A generic OS exception handler which includes sync'ing the processors into a stopped state on an multi cpu system.</para></listitem>
+      <listitem><para>The API to talk to the kgdb I/O drivers</para></listitem>
+      <listitem><para>The API to make calls to the arch specific kgdb implementation</para></listitem>
+      <listitem><para>The logic to perform safe memory reads and writes to memory while using the debugger</para></listitem>
+      <listitem><para>A full implementation for software breakpoints unless overridden by the arch</para></listitem>
+      </itemizedlist>
+      </para>
+      </listitem>
+      <listitem><para>kgdb arch specific implementation</para>
+      <para>
+      This implementation is generally found in arch/*/kernel/kgdb.c.
+      As an example, arch/x86/kernel/kgdb.c contains the specifics to
+      implement HW breakpoint as well as the initialization to
+      dynamically register and unregister for the trap handlers on
+      this architecture.  The arch specific portion implements:
+      <itemizedlist>
+      <listitem><para>contains an arch specific trap catcher which
+      invokes kgdb_handle_exception() to start kgdb about doing its
+      work</para></listitem>
+      <listitem><para>translation to and from gdb specific packet format to pt_regs</para></listitem>
+      <listitem><para>Registration and unregistration of architecture specific trap hooks</para></listitem>
+      <listitem><para>Any special exception handling and cleanup</para></listitem>
+      <listitem><para>NMI exception handling and cleanup</para></listitem>
+      <listitem><para>(optional)HW breakpoints</para></listitem>
+      </itemizedlist>
+      </para>
+      </listitem>
+      <listitem><para>kgdb I/O driver</para>
+      <para>
+      Each kgdb I/O driver has to provide an implemenation for the following:
+      <itemizedlist>
+      <listitem><para>configuration via builtin or module</para></listitem>
+      <listitem><para>dynamic configuration and kgdb hook registration calls</para></listitem>
+      <listitem><para>read and write character interface</para></listitem>
+      <listitem><para>A cleanup handler for unconfiguring from the kgdb core</para></listitem>
+      <listitem><para>(optional) Early debug methodology</para></listitem>
+      </itemizedlist>
+      Any given kgdb I/O driver has to operate very closely with the
+      hardware and must do it in such a way that does not enable
+      interrupts or change other parts of the system context without
+      completely restoring them. The kgdb core will repeatedly "poll"
+      a kgdb I/O driver for characters when it needs input.  The I/O
+      driver is expected to return immediately if there is no data
+      available.  Doing so allows for the future possibility to touch
+      watch dog hardware in such a way as to have a target system not
+      reset when these are enabled.
+      </para>
+      </listitem>
+      </orderedlist>
+      </para>
+      <para>
+      If you are intent on adding kgdb architecture specific support
+      for a new architecture, the architecture should define
+      <constant>HAVE_ARCH_KGDB</constant> in the architecture specific
+      Kconfig file.  This will enable kgdb for the architecture, and
+      at that point you must create an architecture specific kgdb
+      implementation.
+      </para>
+      <para>
+      There are a few flags which must be set on every architecture in
+      their &lt;asm/kgdb.h&gt; file.  These are:
+      <itemizedlist>
+        <listitem>
+	  <para>
+	  NUMREGBYTES: The size in bytes of all of the registers, so
+	  that we can ensure they will all fit into a packet.
+	  </para>
+	  <para>
+	  BUFMAX: The size in bytes of the buffer GDB will read into.
+	  This must be larger than NUMREGBYTES.
+	  </para>
+	  <para>
+	  CACHE_FLUSH_IS_SAFE: Set to 1 if it is always safe to call
+	  flush_cache_range or flush_icache_range.  On some architectures,
+	  these functions may not be safe to call on SMP since we keep other
+	  CPUs in a holding pattern.
+	  </para>
+	</listitem>
+      </itemizedlist>
+      </para>
+      <para>
+      There are also the following functions for the common backend,
+      found in kernel/kgdb.c, that must be supplied by the
+      architecture-specific backend unless marked as (optional), in
+      which case a default function maybe used if the architecture
+      does not need to provide a specific implementation.
+      </para>
+!Iinclude/linux/kgdb.h
+  </sect1>
+  <sect1 id="kgdbocDesign">
+  <title>kgdboc internals</title>
+  <para>
+  The kgdboc driver is actually a very thin driver that relies on the
+  underlying low level to the hardware driver having "polling hooks"
+  which the to which the tty driver is attached.  In the initial
+  implementation of kgdboc it the serial_core was changed to expose a
+  low level uart hook for doing polled mode reading and writing of a
+  single character while in an atomic context.  When kgdb makes an I/O
+  request to the debugger, kgdboc invokes a call back in the serial
+  core which in turn uses the call back in the uart driver.  It is
+  certainly possible to extend kgdboc to work with non-uart based
+  consoles in the future.
+  </para>
+  <para>
+  When using kgdboc with a uart, the uart driver must implement two callbacks in the <constant>struct uart_ops</constant>. Example from drivers/8250.c:<programlisting>
+#ifdef CONFIG_CONSOLE_POLL
+	.poll_get_char = serial8250_get_poll_char,
+	.poll_put_char = serial8250_put_poll_char,
+#endif
+  </programlisting>
+  Any implementation specifics around creating a polling driver use the
+  <constant>#ifdef CONFIG_CONSOLE_POLL</constant>, as shown above.
+  Keep in mind that polling hooks have to be implemented in such a way
+  that they can be called from an atomic context and have to restore
+  the state of the uart chip on return such that the system can return
+  to normal when the debugger detaches.  You need to be very careful
+  with any kind of lock you consider, because failing here is most
+  going to mean pressing the reset button.
+  </para>
+  </sect1>
+  </chapter>
+  <chapter id="credits">
+     <title>Credits</title>
+	<para>
+		The following people have contributed to this document:
+		<orderedlist>
+			<listitem><para>Amit Kale<email>amitkale@linsyssoft.com</email></para></listitem>
+			<listitem><para>Tom Rini<email>trini@kernel.crashing.org</email></para></listitem>
+		</orderedlist>
+                In March 2008 this document was completely rewritten by:
+		<itemizedlist>
+		<listitem><para>Jason Wessel<email>jason.wessel@windriver.com</email></para></listitem>
+		</itemizedlist>
+	</para>
+  </chapter>
+</book>
+
diff --git a/Documentation/DocBook/libata.tmpl b/Documentation/DocBook/libata.tmpl
new file mode 100644
index 0000000..ba99757
--- /dev/null
+++ b/Documentation/DocBook/libata.tmpl
@@ -0,0 +1,1632 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="libataDevGuide">
+ <bookinfo>
+  <title>libATA Developer's Guide</title>
+  
+  <authorgroup>
+   <author>
+    <firstname>Jeff</firstname>
+    <surname>Garzik</surname>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2003-2006</year>
+   <holder>Jeff Garzik</holder>
+  </copyright>
+
+  <legalnotice>
+   <para>
+   The contents of this file are subject to the Open
+   Software License version 1.1 that can be found at
+   <ulink url="http://www.opensource.org/licenses/osl-1.1.txt">http://www.opensource.org/licenses/osl-1.1.txt</ulink> and is included herein
+   by reference.
+   </para>
+
+   <para>
+   Alternatively, the contents of this file may be used under the terms
+   of the GNU General Public License version 2 (the "GPL") as distributed
+   in the kernel source COPYING file, in which case the provisions of
+   the GPL are applicable instead of the above.  If you wish to allow
+   the use of your version of this file only under the terms of the
+   GPL and not to allow others to use your version of this file under
+   the OSL, indicate your decision by deleting the provisions above and
+   replace them with the notice and other provisions required by the GPL.
+   If you do not delete the provisions above, a recipient may use your
+   version of this file under either the OSL or the GPL.
+   </para>
+
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="libataIntroduction">
+     <title>Introduction</title>
+  <para>
+  libATA is a library used inside the Linux kernel to support ATA host
+  controllers and devices.  libATA provides an ATA driver API, class
+  transports for ATA and ATAPI devices, and SCSI&lt;-&gt;ATA translation
+  for ATA devices according to the T10 SAT specification.
+  </para>
+  <para>
+  This Guide documents the libATA driver API, library functions, library
+  internals, and a couple sample ATA low-level drivers.
+  </para>
+  </chapter>
+
+  <chapter id="libataDriverApi">
+     <title>libata Driver API</title>
+     <para>
+     struct ata_port_operations is defined for every low-level libata
+     hardware driver, and it controls how the low-level driver
+     interfaces with the ATA and SCSI layers.
+     </para>
+     <para>
+     FIS-based drivers will hook into the system with ->qc_prep() and
+     ->qc_issue() high-level hooks.  Hardware which behaves in a manner
+     similar to PCI IDE hardware may utilize several generic helpers,
+     defining at a bare minimum the bus I/O addresses of the ATA shadow
+     register blocks.
+     </para>
+     <sect1>
+        <title>struct ata_port_operations</title>
+
+	<sect2><title>Disable ATA port</title>
+	<programlisting>
+void (*port_disable) (struct ata_port *);
+	</programlisting>
+
+	<para>
+	Called from ata_bus_probe() and ata_bus_reset() error paths,
+	as well as when unregistering from the SCSI module (rmmod, hot
+	unplug).
+	This function should do whatever needs to be done to take the
+	port out of use.  In most cases, ata_port_disable() can be used
+	as this hook.
+	</para>
+	<para>
+	Called from ata_bus_probe() on a failed probe.
+	Called from ata_bus_reset() on a failed bus reset.
+	Called from ata_scsi_release().
+	</para>
+
+	</sect2>
+
+	<sect2><title>Post-IDENTIFY device configuration</title>
+	<programlisting>
+void (*dev_config) (struct ata_port *, struct ata_device *);
+	</programlisting>
+
+	<para>
+	Called after IDENTIFY [PACKET] DEVICE is issued to each device
+	found.  Typically used to apply device-specific fixups prior to
+	issue of SET FEATURES - XFER MODE, and prior to operation.
+	</para>
+	<para>
+	Called by ata_device_add() after ata_dev_identify() determines
+	a device is present.
+	</para>
+	<para>
+	This entry may be specified as NULL in ata_port_operations.
+	</para>
+
+	</sect2>
+
+	<sect2><title>Set PIO/DMA mode</title>
+	<programlisting>
+void (*set_piomode) (struct ata_port *, struct ata_device *);
+void (*set_dmamode) (struct ata_port *, struct ata_device *);
+void (*post_set_mode) (struct ata_port *);
+unsigned int (*mode_filter) (struct ata_port *, struct ata_device *, unsigned int);
+	</programlisting>
+
+	<para>
+	Hooks called prior to the issue of SET FEATURES - XFER MODE
+	command.  The optional ->mode_filter() hook is called when libata
+	has built a mask of the possible modes. This is passed to the 
+	->mode_filter() function which should return a mask of valid modes
+	after filtering those unsuitable due to hardware limits. It is not
+	valid to use this interface to add modes.
+	</para>
+	<para>
+	dev->pio_mode and dev->dma_mode are guaranteed to be valid when
+	->set_piomode() and when ->set_dmamode() is called. The timings for
+	any other drive sharing the cable will also be valid at this point.
+	That is the library records the decisions for the modes of each
+	drive on a channel before it attempts to set any of them.
+	</para>
+	<para>
+	->post_set_mode() is
+	called unconditionally, after the SET FEATURES - XFER MODE
+	command completes successfully.
+	</para>
+
+	<para>
+	->set_piomode() is always called (if present), but
+	->set_dma_mode() is only called if DMA is possible.
+	</para>
+
+	</sect2>
+
+	<sect2><title>Taskfile read/write</title>
+	<programlisting>
+void (*tf_load) (struct ata_port *ap, struct ata_taskfile *tf);
+void (*tf_read) (struct ata_port *ap, struct ata_taskfile *tf);
+	</programlisting>
+
+	<para>
+	->tf_load() is called to load the given taskfile into hardware
+	registers / DMA buffers.  ->tf_read() is called to read the
+	hardware registers / DMA buffers, to obtain the current set of
+	taskfile register values.
+	Most drivers for taskfile-based hardware (PIO or MMIO) use
+	ata_tf_load() and ata_tf_read() for these hooks.
+	</para>
+
+	</sect2>
+
+	<sect2><title>PIO data read/write</title>
+	<programlisting>
+void (*data_xfer) (struct ata_device *, unsigned char *, unsigned int, int);
+	</programlisting>
+
+	<para>
+All bmdma-style drivers must implement this hook.  This is the low-level
+operation that actually copies the data bytes during a PIO data
+transfer.
+Typically the driver
+will choose one of ata_pio_data_xfer_noirq(), ata_pio_data_xfer(), or
+ata_mmio_data_xfer().
+	</para>
+
+	</sect2>
+
+	<sect2><title>ATA command execute</title>
+	<programlisting>
+void (*exec_command)(struct ata_port *ap, struct ata_taskfile *tf);
+	</programlisting>
+
+	<para>
+	causes an ATA command, previously loaded with
+	->tf_load(), to be initiated in hardware.
+	Most drivers for taskfile-based hardware use ata_exec_command()
+	for this hook.
+	</para>
+
+	</sect2>
+
+	<sect2><title>Per-cmd ATAPI DMA capabilities filter</title>
+	<programlisting>
+int (*check_atapi_dma) (struct ata_queued_cmd *qc);
+	</programlisting>
+
+	<para>
+Allow low-level driver to filter ATA PACKET commands, returning a status
+indicating whether or not it is OK to use DMA for the supplied PACKET
+command.
+	</para>
+	<para>
+	This hook may be specified as NULL, in which case libata will
+	assume that atapi dma can be supported.
+	</para>
+
+	</sect2>
+
+	<sect2><title>Read specific ATA shadow registers</title>
+	<programlisting>
+u8   (*check_status)(struct ata_port *ap);
+u8   (*check_altstatus)(struct ata_port *ap);
+	</programlisting>
+
+	<para>
+	Reads the Status/AltStatus ATA shadow register from
+	hardware.  On some hardware, reading the Status register has
+	the side effect of clearing the interrupt condition.
+	Most drivers for taskfile-based hardware use
+	ata_check_status() for this hook.
+	</para>
+	<para>
+	Note that because this is called from ata_device_add(), at
+	least a dummy function that clears device interrupts must be
+	provided for all drivers, even if the controller doesn't
+	actually have a taskfile status register.
+	</para>
+
+	</sect2>
+
+	<sect2><title>Select ATA device on bus</title>
+	<programlisting>
+void (*dev_select)(struct ata_port *ap, unsigned int device);
+	</programlisting>
+
+	<para>
+	Issues the low-level hardware command(s) that causes one of N
+	hardware devices to be considered 'selected' (active and
+	available for use) on the ATA bus.  This generally has no
+	meaning on FIS-based devices.
+	</para>
+	<para>
+	Most drivers for taskfile-based hardware use
+	ata_std_dev_select() for this hook.  Controllers which do not
+	support second drives on a port (such as SATA contollers) will
+	use ata_noop_dev_select().
+	</para>
+
+	</sect2>
+
+	<sect2><title>Private tuning method</title>
+	<programlisting>
+void (*set_mode) (struct ata_port *ap);
+	</programlisting>
+
+	<para>
+	By default libata performs drive and controller tuning in
+	accordance with the ATA timing rules and also applies blacklists
+	and cable limits. Some controllers need special handling and have
+	custom tuning rules, typically raid controllers that use ATA
+	commands but do not actually do drive timing.
+	</para>
+
+	<warning>
+	<para>
+	This hook should not be used to replace the standard controller
+	tuning logic when a controller has quirks. Replacing the default
+	tuning logic in that case would bypass handling for drive and
+	bridge quirks that may be important to data reliability. If a
+	controller needs to filter the mode selection it should use the
+	mode_filter hook instead.
+	</para>
+	</warning>
+
+	</sect2>
+
+	<sect2><title>Control PCI IDE BMDMA engine</title>
+	<programlisting>
+void (*bmdma_setup) (struct ata_queued_cmd *qc);
+void (*bmdma_start) (struct ata_queued_cmd *qc);
+void (*bmdma_stop) (struct ata_port *ap);
+u8   (*bmdma_status) (struct ata_port *ap);
+	</programlisting>
+
+	<para>
+When setting up an IDE BMDMA transaction, these hooks arm
+(->bmdma_setup), fire (->bmdma_start), and halt (->bmdma_stop)
+the hardware's DMA engine.  ->bmdma_status is used to read the standard
+PCI IDE DMA Status register.
+	</para>
+
+	<para>
+These hooks are typically either no-ops, or simply not implemented, in
+FIS-based drivers.
+	</para>
+	<para>
+Most legacy IDE drivers use ata_bmdma_setup() for the bmdma_setup()
+hook.  ata_bmdma_setup() will write the pointer to the PRD table to
+the IDE PRD Table Address register, enable DMA in the DMA Command
+register, and call exec_command() to begin the transfer.
+	</para>
+	<para>
+Most legacy IDE drivers use ata_bmdma_start() for the bmdma_start()
+hook.  ata_bmdma_start() will write the ATA_DMA_START flag to the DMA
+Command register.
+	</para>
+	<para>
+Many legacy IDE drivers use ata_bmdma_stop() for the bmdma_stop()
+hook.  ata_bmdma_stop() clears the ATA_DMA_START flag in the DMA
+command register.
+	</para>
+	<para>
+Many legacy IDE drivers use ata_bmdma_status() as the bmdma_status() hook.
+	</para>
+
+	</sect2>
+
+	<sect2><title>High-level taskfile hooks</title>
+	<programlisting>
+void (*qc_prep) (struct ata_queued_cmd *qc);
+int (*qc_issue) (struct ata_queued_cmd *qc);
+	</programlisting>
+
+	<para>
+	Higher-level hooks, these two hooks can potentially supercede
+	several of the above taskfile/DMA engine hooks.  ->qc_prep is
+	called after the buffers have been DMA-mapped, and is typically
+	used to populate the hardware's DMA scatter-gather table.
+	Most drivers use the standard ata_qc_prep() helper function, but
+	more advanced drivers roll their own.
+	</para>
+	<para>
+	->qc_issue is used to make a command active, once the hardware
+	and S/G tables have been prepared.  IDE BMDMA drivers use the
+	helper function ata_qc_issue_prot() for taskfile protocol-based
+	dispatch.  More advanced drivers implement their own ->qc_issue.
+	</para>
+	<para>
+	ata_qc_issue_prot() calls ->tf_load(), ->bmdma_setup(), and
+	->bmdma_start() as necessary to initiate a transfer.
+	</para>
+
+	</sect2>
+
+	<sect2><title>Exception and probe handling (EH)</title>
+	<programlisting>
+void (*eng_timeout) (struct ata_port *ap);
+void (*phy_reset) (struct ata_port *ap);
+	</programlisting>
+
+	<para>
+Deprecated.  Use ->error_handler() instead.
+	</para>
+
+	<programlisting>
+void (*freeze) (struct ata_port *ap);
+void (*thaw) (struct ata_port *ap);
+	</programlisting>
+
+	<para>
+ata_port_freeze() is called when HSM violations or some other
+condition disrupts normal operation of the port.  A frozen port
+is not allowed to perform any operation until the port is
+thawed, which usually follows a successful reset.
+	</para>
+
+	<para>
+The optional ->freeze() callback can be used for freezing the port
+hardware-wise (e.g. mask interrupt and stop DMA engine).  If a
+port cannot be frozen hardware-wise, the interrupt handler
+must ack and clear interrupts unconditionally while the port
+is frozen.
+	</para>
+	<para>
+The optional ->thaw() callback is called to perform the opposite of ->freeze():
+prepare the port for normal operation once again.  Unmask interrupts,
+start DMA engine, etc.
+	</para>
+
+	<programlisting>
+void (*error_handler) (struct ata_port *ap);
+	</programlisting>
+
+	<para>
+->error_handler() is a driver's hook into probe, hotplug, and recovery
+and other exceptional conditions.  The primary responsibility of an
+implementation is to call ata_do_eh() or ata_bmdma_drive_eh() with a set
+of EH hooks as arguments:
+	</para>
+
+	<para>
+'prereset' hook (may be NULL) is called during an EH reset, before any other actions
+are taken.
+	</para>
+
+	<para>
+'postreset' hook (may be NULL) is called after the EH reset is performed.  Based on
+existing conditions, severity of the problem, and hardware capabilities,
+	</para>
+
+	<para>
+Either 'softreset' (may be NULL) or 'hardreset' (may be NULL) will be
+called to perform the low-level EH reset.
+	</para>
+
+	<programlisting>
+void (*post_internal_cmd) (struct ata_queued_cmd *qc);
+	</programlisting>
+
+	<para>
+Perform any hardware-specific actions necessary to finish processing
+after executing a probe-time or EH-time command via ata_exec_internal().
+	</para>
+
+	</sect2>
+
+	<sect2><title>Hardware interrupt handling</title>
+	<programlisting>
+irqreturn_t (*irq_handler)(int, void *, struct pt_regs *);
+void (*irq_clear) (struct ata_port *);
+	</programlisting>
+
+	<para>
+	->irq_handler is the interrupt handling routine registered with
+	the system, by libata.  ->irq_clear is called during probe just
+	before the interrupt handler is registered, to be sure hardware
+	is quiet.
+	</para>
+	<para>
+	The second argument, dev_instance, should be cast to a pointer
+	to struct ata_host_set.
+	</para>
+	<para>
+	Most legacy IDE drivers use ata_interrupt() for the
+	irq_handler hook, which scans all ports in the host_set,
+	determines which queued command was active (if any), and calls
+	ata_host_intr(ap,qc).
+	</para>
+	<para>
+	Most legacy IDE drivers use ata_bmdma_irq_clear() for the
+	irq_clear() hook, which simply clears the interrupt and error
+	flags in the DMA status register.
+	</para>
+
+	</sect2>
+
+	<sect2><title>SATA phy read/write</title>
+	<programlisting>
+int (*scr_read) (struct ata_port *ap, unsigned int sc_reg,
+		 u32 *val);
+int (*scr_write) (struct ata_port *ap, unsigned int sc_reg,
+                   u32 val);
+	</programlisting>
+
+	<para>
+	Read and write standard SATA phy registers.  Currently only used
+	if ->phy_reset hook called the sata_phy_reset() helper function.
+	sc_reg is one of SCR_STATUS, SCR_CONTROL, SCR_ERROR, or SCR_ACTIVE.
+	</para>
+
+	</sect2>
+
+	<sect2><title>Init and shutdown</title>
+	<programlisting>
+int (*port_start) (struct ata_port *ap);
+void (*port_stop) (struct ata_port *ap);
+void (*host_stop) (struct ata_host_set *host_set);
+	</programlisting>
+
+	<para>
+	->port_start() is called just after the data structures for each
+	port are initialized.  Typically this is used to alloc per-port
+	DMA buffers / tables / rings, enable DMA engines, and similar
+	tasks.  Some drivers also use this entry point as a chance to
+	allocate driver-private memory for ap->private_data.
+	</para>
+	<para>
+	Many drivers use ata_port_start() as this hook or call
+	it from their own port_start() hooks.  ata_port_start()
+	allocates space for a legacy IDE PRD table and returns.
+	</para>
+	<para>
+	->port_stop() is called after ->host_stop().  It's sole function
+	is to release DMA/memory resources, now that they are no longer
+	actively being used.  Many drivers also free driver-private
+	data from port at this time.
+	</para>
+	<para>
+	Many drivers use ata_port_stop() as this hook, which frees the
+	PRD table.
+	</para>
+	<para>
+	->host_stop() is called after all ->port_stop() calls
+have completed.  The hook must finalize hardware shutdown, release DMA
+and other resources, etc.
+	This hook may be specified as NULL, in which case it is not called.
+	</para>
+
+	</sect2>
+
+     </sect1>
+  </chapter>
+
+  <chapter id="libataEH">
+        <title>Error handling</title>
+
+	<para>
+	This chapter describes how errors are handled under libata.
+	Readers are advised to read SCSI EH
+	(Documentation/scsi/scsi_eh.txt) and ATA exceptions doc first.
+	</para>
+
+	<sect1><title>Origins of commands</title>
+	<para>
+	In libata, a command is represented with struct ata_queued_cmd
+	or qc.  qc's are preallocated during port initialization and
+	repetitively used for command executions.  Currently only one
+	qc is allocated per port but yet-to-be-merged NCQ branch
+	allocates one for each tag and maps each qc to NCQ tag 1-to-1.
+	</para>
+	<para>
+	libata commands can originate from two sources - libata itself
+	and SCSI midlayer.  libata internal commands are used for
+	initialization and error handling.  All normal blk requests
+	and commands for SCSI emulation are passed as SCSI commands
+	through queuecommand callback of SCSI host template.
+	</para>
+	</sect1>
+
+	<sect1><title>How commands are issued</title>
+
+	<variablelist>
+
+	<varlistentry><term>Internal commands</term>
+	<listitem>
+	<para>
+	First, qc is allocated and initialized using
+	ata_qc_new_init().  Although ata_qc_new_init() doesn't
+	implement any wait or retry mechanism when qc is not
+	available, internal commands are currently issued only during
+	initialization and error recovery, so no other command is
+	active and allocation is guaranteed to succeed.
+	</para>
+	<para>
+	Once allocated qc's taskfile is initialized for the command to
+	be executed.  qc currently has two mechanisms to notify
+	completion.  One is via qc->complete_fn() callback and the
+	other is completion qc->waiting.  qc->complete_fn() callback
+	is the asynchronous path used by normal SCSI translated
+	commands and qc->waiting is the synchronous (issuer sleeps in
+	process context) path used by internal commands.
+	</para>
+	<para>
+	Once initialization is complete, host_set lock is acquired
+	and the qc is issued.
+	</para>
+	</listitem>
+	</varlistentry>
+
+	<varlistentry><term>SCSI commands</term>
+	<listitem>
+	<para>
+	All libata drivers use ata_scsi_queuecmd() as
+	hostt->queuecommand callback.  scmds can either be simulated
+	or translated.  No qc is involved in processing a simulated
+	scmd.  The result is computed right away and the scmd is
+	completed.
+	</para>
+	<para>
+	For a translated scmd, ata_qc_new_init() is invoked to
+	allocate a qc and the scmd is translated into the qc.  SCSI
+	midlayer's completion notification function pointer is stored
+	into qc->scsidone.
+	</para>
+	<para>
+	qc->complete_fn() callback is used for completion
+	notification.  ATA commands use ata_scsi_qc_complete() while
+	ATAPI commands use atapi_qc_complete().  Both functions end up
+	calling qc->scsidone to notify upper layer when the qc is
+	finished.  After translation is completed, the qc is issued
+	with ata_qc_issue().
+	</para>
+	<para>
+	Note that SCSI midlayer invokes hostt->queuecommand while
+	holding host_set lock, so all above occur while holding
+	host_set lock.
+	</para>
+	</listitem>
+	</varlistentry>
+
+	</variablelist>
+	</sect1>
+
+	<sect1><title>How commands are processed</title>
+	<para>
+	Depending on which protocol and which controller are used,
+	commands are processed differently.  For the purpose of
+	discussion, a controller which uses taskfile interface and all
+	standard callbacks is assumed.
+	</para>
+	<para>
+	Currently 6 ATA command protocols are used.  They can be
+	sorted into the following four categories according to how
+	they are processed.
+	</para>
+
+	<variablelist>
+	   <varlistentry><term>ATA NO DATA or DMA</term>
+	   <listitem>
+	   <para>
+	   ATA_PROT_NODATA and ATA_PROT_DMA fall into this category.
+	   These types of commands don't require any software
+	   intervention once issued.  Device will raise interrupt on
+	   completion.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry><term>ATA PIO</term>
+	   <listitem>
+	   <para>
+	   ATA_PROT_PIO is in this category.  libata currently
+	   implements PIO with polling.  ATA_NIEN bit is set to turn
+	   off interrupt and pio_task on ata_wq performs polling and
+	   IO.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry><term>ATAPI NODATA or DMA</term>
+	   <listitem>
+	   <para>
+	   ATA_PROT_ATAPI_NODATA and ATA_PROT_ATAPI_DMA are in this
+	   category.  packet_task is used to poll BSY bit after
+	   issuing PACKET command.  Once BSY is turned off by the
+	   device, packet_task transfers CDB and hands off processing
+	   to interrupt handler.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry><term>ATAPI PIO</term>
+	   <listitem>
+	   <para>
+	   ATA_PROT_ATAPI is in this category.  ATA_NIEN bit is set
+	   and, as in ATAPI NODATA or DMA, packet_task submits cdb.
+	   However, after submitting cdb, further processing (data
+	   transfer) is handed off to pio_task.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+	</variablelist>
+        </sect1>
+
+	<sect1><title>How commands are completed</title>
+	<para>
+	Once issued, all qc's are either completed with
+	ata_qc_complete() or time out.  For commands which are handled
+	by interrupts, ata_host_intr() invokes ata_qc_complete(), and,
+	for PIO tasks, pio_task invokes ata_qc_complete().  In error
+	cases, packet_task may also complete commands.
+	</para>
+	<para>
+	ata_qc_complete() does the following.
+	</para>
+
+	<orderedlist>
+
+	<listitem>
+	<para>
+	DMA memory is unmapped.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	ATA_QCFLAG_ACTIVE is clared from qc->flags.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	qc->complete_fn() callback is invoked.  If the return value of
+	the callback is not zero.  Completion is short circuited and
+	ata_qc_complete() returns.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	__ata_qc_complete() is called, which does
+	   <orderedlist>
+
+	   <listitem>
+	   <para>
+	   qc->flags is cleared to zero.
+	   </para>
+	   </listitem>
+
+	   <listitem>
+	   <para>
+	   ap->active_tag and qc->tag are poisoned.
+	   </para>
+	   </listitem>
+
+	   <listitem>
+	   <para>
+	   qc->waiting is claread &amp; completed (in that order).
+	   </para>
+	   </listitem>
+
+	   <listitem>
+	   <para>
+	   qc is deallocated by clearing appropriate bit in ap->qactive.
+	   </para>
+	   </listitem>
+
+	   </orderedlist>
+	</para>
+	</listitem>
+
+	</orderedlist>
+
+	<para>
+	So, it basically notifies upper layer and deallocates qc.  One
+	exception is short-circuit path in #3 which is used by
+	atapi_qc_complete().
+	</para>
+	<para>
+	For all non-ATAPI commands, whether it fails or not, almost
+	the same code path is taken and very little error handling
+	takes place.  A qc is completed with success status if it
+	succeeded, with failed status otherwise.
+	</para>
+	<para>
+	However, failed ATAPI commands require more handling as
+	REQUEST SENSE is needed to acquire sense data.  If an ATAPI
+	command fails, ata_qc_complete() is invoked with error status,
+	which in turn invokes atapi_qc_complete() via
+	qc->complete_fn() callback.
+	</para>
+	<para>
+	This makes atapi_qc_complete() set scmd->result to
+	SAM_STAT_CHECK_CONDITION, complete the scmd and return 1.  As
+	the sense data is empty but scmd->result is CHECK CONDITION,
+	SCSI midlayer will invoke EH for the scmd, and returning 1
+	makes ata_qc_complete() to return without deallocating the qc.
+	This leads us to ata_scsi_error() with partially completed qc.
+	</para>
+
+	</sect1>
+
+	<sect1><title>ata_scsi_error()</title>
+	<para>
+	ata_scsi_error() is the current transportt->eh_strategy_handler()
+	for libata.  As discussed above, this will be entered in two
+	cases - timeout and ATAPI error completion.  This function
+	calls low level libata driver's eng_timeout() callback, the
+	standard callback for which is ata_eng_timeout().  It checks
+	if a qc is active and calls ata_qc_timeout() on the qc if so.
+	Actual error handling occurs in ata_qc_timeout().
+	</para>
+	<para>
+	If EH is invoked for timeout, ata_qc_timeout() stops BMDMA and
+	completes the qc.  Note that as we're currently in EH, we
+	cannot call scsi_done.  As described in SCSI EH doc, a
+	recovered scmd should be either retried with
+	scsi_queue_insert() or finished with scsi_finish_command().
+	Here, we override qc->scsidone with scsi_finish_command() and
+	calls ata_qc_complete().
+	</para>
+	<para>
+	If EH is invoked due to a failed ATAPI qc, the qc here is
+	completed but not deallocated.  The purpose of this
+	half-completion is to use the qc as place holder to make EH
+	code reach this place.  This is a bit hackish, but it works.
+	</para>
+	<para>
+	Once control reaches here, the qc is deallocated by invoking
+	__ata_qc_complete() explicitly.  Then, internal qc for REQUEST
+	SENSE is issued.  Once sense data is acquired, scmd is
+	finished by directly invoking scsi_finish_command() on the
+	scmd.  Note that as we already have completed and deallocated
+	the qc which was associated with the scmd, we don't need
+	to/cannot call ata_qc_complete() again.
+	</para>
+
+	</sect1>
+
+	<sect1><title>Problems with the current EH</title>
+
+	<itemizedlist>
+
+	<listitem>
+	<para>
+	Error representation is too crude.  Currently any and all
+	error conditions are represented with ATA STATUS and ERROR
+	registers.  Errors which aren't ATA device errors are treated
+	as ATA device errors by setting ATA_ERR bit.  Better error
+	descriptor which can properly represent ATA and other
+	errors/exceptions is needed.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	When handling timeouts, no action is taken to make device
+	forget about the timed out command and ready for new commands.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	EH handling via ata_scsi_error() is not properly protected
+	from usual command processing.  On EH entrance, the device is
+	not in quiescent state.  Timed out commands may succeed or
+	fail any time.  pio_task and atapi_task may still be running.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	Too weak error recovery.  Devices / controllers causing HSM
+	mismatch errors and other errors quite often require reset to
+	return to known state.  Also, advanced error handling is
+	necessary to support features like NCQ and hotplug.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	ATA errors are directly handled in the interrupt handler and
+	PIO errors in pio_task.  This is problematic for advanced
+	error handling for the following reasons.
+	</para>
+	<para>
+	First, advanced error handling often requires context and
+	internal qc execution.
+	</para>
+	<para>
+	Second, even a simple failure (say, CRC error) needs
+	information gathering and could trigger complex error handling
+	(say, resetting &amp; reconfiguring).  Having multiple code
+	paths to gather information, enter EH and trigger actions
+	makes life painful.
+	</para>
+	<para>
+	Third, scattered EH code makes implementing low level drivers
+	difficult.  Low level drivers override libata callbacks.  If
+	EH is scattered over several places, each affected callbacks
+	should perform its part of error handling.  This can be error
+	prone and painful.
+	</para>
+	</listitem>
+
+	</itemizedlist>
+	</sect1>
+  </chapter>
+
+  <chapter id="libataExt">
+     <title>libata Library</title>
+!Edrivers/ata/libata-core.c
+  </chapter>
+
+  <chapter id="libataInt">
+     <title>libata Core Internals</title>
+!Idrivers/ata/libata-core.c
+  </chapter>
+
+  <chapter id="libataScsiInt">
+     <title>libata SCSI translation/emulation</title>
+!Edrivers/ata/libata-scsi.c
+!Idrivers/ata/libata-scsi.c
+  </chapter>
+
+  <chapter id="ataExceptions">
+     <title>ATA errors and exceptions</title>
+
+  <para>
+  This chapter tries to identify what error/exception conditions exist
+  for ATA/ATAPI devices and describe how they should be handled in
+  implementation-neutral way.
+  </para>
+
+  <para>
+  The term 'error' is used to describe conditions where either an
+  explicit error condition is reported from device or a command has
+  timed out.
+  </para>
+
+  <para>
+  The term 'exception' is either used to describe exceptional
+  conditions which are not errors (say, power or hotplug events), or
+  to describe both errors and non-error exceptional conditions.  Where
+  explicit distinction between error and exception is necessary, the
+  term 'non-error exception' is used.
+  </para>
+
+  <sect1 id="excat">
+     <title>Exception categories</title>
+     <para>
+     Exceptions are described primarily with respect to legacy
+     taskfile + bus master IDE interface.  If a controller provides
+     other better mechanism for error reporting, mapping those into
+     categories described below shouldn't be difficult.
+     </para>
+
+     <para>
+     In the following sections, two recovery actions - reset and
+     reconfiguring transport - are mentioned.  These are described
+     further in <xref linkend="exrec"/>.
+     </para>
+
+     <sect2 id="excatHSMviolation">
+        <title>HSM violation</title>
+        <para>
+        This error is indicated when STATUS value doesn't match HSM
+        requirement during issuing or excution any ATA/ATAPI command.
+        </para>
+
+	<itemizedlist>
+	<title>Examples</title>
+
+        <listitem>
+	<para>
+	ATA_STATUS doesn't contain !BSY &amp;&amp; DRDY &amp;&amp; !DRQ while trying
+	to issue a command.
+        </para>
+	</listitem>
+
+        <listitem>
+	<para>
+	!BSY &amp;&amp; !DRQ during PIO data transfer.
+        </para>
+	</listitem>
+
+        <listitem>
+	<para>
+	DRQ on command completion.
+        </para>
+	</listitem>
+
+        <listitem>
+	<para>
+	!BSY &amp;&amp; ERR after CDB tranfer starts but before the
+        last byte of CDB is transferred.  ATA/ATAPI standard states
+        that &quot;The device shall not terminate the PACKET command
+        with an error before the last byte of the command packet has
+        been written&quot; in the error outputs description of PACKET
+        command and the state diagram doesn't include such
+        transitions.
+	</para>
+	</listitem>
+
+	</itemizedlist>
+
+	<para>
+	In these cases, HSM is violated and not much information
+	regarding the error can be acquired from STATUS or ERROR
+	register.  IOW, this error can be anything - driver bug,
+	faulty device, controller and/or cable.
+	</para>
+
+	<para>
+	As HSM is violated, reset is necessary to restore known state.
+	Reconfiguring transport for lower speed might be helpful too
+	as transmission errors sometimes cause this kind of errors.
+	</para>
+     </sect2>
+     
+     <sect2 id="excatDevErr">
+        <title>ATA/ATAPI device error (non-NCQ / non-CHECK CONDITION)</title>
+
+	<para>
+	These are errors detected and reported by ATA/ATAPI devices
+	indicating device problems.  For this type of errors, STATUS
+	and ERROR register values are valid and describe error
+	condition.  Note that some of ATA bus errors are detected by
+	ATA/ATAPI devices and reported using the same mechanism as
+	device errors.  Those cases are described later in this
+	section.
+	</para>
+
+	<para>
+	For ATA commands, this type of errors are indicated by !BSY
+	&amp;&amp; ERR during command execution and on completion.
+	</para>
+
+	<para>For ATAPI commands,</para>
+
+	<itemizedlist>
+
+	<listitem>
+	<para>
+	!BSY &amp;&amp; ERR &amp;&amp; ABRT right after issuing PACKET
+	indicates that PACKET command is not supported and falls in
+	this category.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	!BSY &amp;&amp; ERR(==CHK) &amp;&amp; !ABRT after the last
+	byte of CDB is transferred indicates CHECK CONDITION and
+	doesn't fall in this category.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	!BSY &amp;&amp; ERR(==CHK) &amp;&amp; ABRT after the last byte
+        of CDB is transferred *probably* indicates CHECK CONDITION and
+        doesn't fall in this category.
+	</para>
+	</listitem>
+
+	</itemizedlist>
+
+	<para>
+	Of errors detected as above, the followings are not ATA/ATAPI
+	device errors but ATA bus errors and should be handled
+	according to <xref linkend="excatATAbusErr"/>.
+	</para>
+
+	<variablelist>
+
+	   <varlistentry>
+	   <term>CRC error during data transfer</term>
+	   <listitem>
+	   <para>
+	   This is indicated by ICRC bit in the ERROR register and
+	   means that corruption occurred during data transfer.  Upto
+	   ATA/ATAPI-7, the standard specifies that this bit is only
+	   applicable to UDMA transfers but ATA/ATAPI-8 draft revision
+	   1f says that the bit may be applicable to multiword DMA and
+	   PIO.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry>
+	   <term>ABRT error during data transfer or on completion</term>
+	   <listitem>
+	   <para>
+	   Upto ATA/ATAPI-7, the standard specifies that ABRT could be
+	   set on ICRC errors and on cases where a device is not able
+	   to complete a command.  Combined with the fact that MWDMA
+	   and PIO transfer errors aren't allowed to use ICRC bit upto
+	   ATA/ATAPI-7, it seems to imply that ABRT bit alone could
+	   indicate tranfer errors.
+	   </para>
+	   <para>
+	   However, ATA/ATAPI-8 draft revision 1f removes the part
+	   that ICRC errors can turn on ABRT.  So, this is kind of
+	   gray area.  Some heuristics are needed here.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	</variablelist>
+
+	<para>
+	ATA/ATAPI device errors can be further categorized as follows.
+	</para>
+
+	<variablelist>
+
+	   <varlistentry>
+	   <term>Media errors</term>
+	   <listitem>
+	   <para>
+	   This is indicated by UNC bit in the ERROR register.  ATA
+	   devices reports UNC error only after certain number of
+	   retries cannot recover the data, so there's nothing much
+	   else to do other than notifying upper layer.
+	   </para>
+	   <para>
+	   READ and WRITE commands report CHS or LBA of the first
+	   failed sector but ATA/ATAPI standard specifies that the
+	   amount of transferred data on error completion is
+	   indeterminate, so we cannot assume that sectors preceding
+	   the failed sector have been transferred and thus cannot
+	   complete those sectors successfully as SCSI does.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry>
+	   <term>Media changed / media change requested error</term>
+	   <listitem>
+	   <para>
+	   &lt;&lt;TODO: fill here&gt;&gt;
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry><term>Address error</term>
+	   <listitem>
+	   <para>
+	   This is indicated by IDNF bit in the ERROR register.
+	   Report to upper layer.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry><term>Other errors</term>
+	   <listitem>
+	   <para>
+	   This can be invalid command or parameter indicated by ABRT
+	   ERROR bit or some other error condition.  Note that ABRT
+	   bit can indicate a lot of things including ICRC and Address
+	   errors.  Heuristics needed.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	</variablelist>
+
+	<para>
+	Depending on commands, not all STATUS/ERROR bits are
+	applicable.  These non-applicable bits are marked with
+	&quot;na&quot; in the output descriptions but upto ATA/ATAPI-7
+	no definition of &quot;na&quot; can be found.  However,
+	ATA/ATAPI-8 draft revision 1f describes &quot;N/A&quot; as
+	follows.
+	</para>
+
+	<blockquote>
+	<variablelist>
+	   <varlistentry><term>3.2.3.3a N/A</term>
+	   <listitem>
+	   <para>
+	   A keyword the indicates a field has no defined value in
+	   this standard and should not be checked by the host or
+	   device. N/A fields should be cleared to zero.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+	</variablelist>
+	</blockquote>
+
+	<para>
+	So, it seems reasonable to assume that &quot;na&quot; bits are
+	cleared to zero by devices and thus need no explicit masking.
+	</para>
+
+     </sect2>
+
+     <sect2 id="excatATAPIcc">
+        <title>ATAPI device CHECK CONDITION</title>
+
+	<para>
+	ATAPI device CHECK CONDITION error is indicated by set CHK bit
+	(ERR bit) in the STATUS register after the last byte of CDB is
+	transferred for a PACKET command.  For this kind of errors,
+	sense data should be acquired to gather information regarding
+	the errors.  REQUEST SENSE packet command should be used to
+	acquire sense data.
+	</para>
+
+	<para>
+	Once sense data is acquired, this type of errors can be
+	handled similary to other SCSI errors.  Note that sense data
+	may indicate ATA bus error (e.g. Sense Key 04h HARDWARE ERROR
+	&amp;&amp; ASC/ASCQ 47h/00h SCSI PARITY ERROR).  In such
+	cases, the error should be considered as an ATA bus error and
+	handled according to <xref linkend="excatATAbusErr"/>.
+	</para>
+
+     </sect2>
+
+     <sect2 id="excatNCQerr">
+        <title>ATA device error (NCQ)</title>
+
+	<para>
+	NCQ command error is indicated by cleared BSY and set ERR bit
+	during NCQ command phase (one or more NCQ commands
+	outstanding).  Although STATUS and ERROR registers will
+	contain valid values describing the error, READ LOG EXT is
+	required to clear the error condition, determine which command
+	has failed and acquire more information.
+	</para>
+
+	<para>
+	READ LOG EXT Log Page 10h reports which tag has failed and
+	taskfile register values describing the error.  With this
+	information the failed command can be handled as a normal ATA
+	command error as in <xref linkend="excatDevErr"/> and all
+	other in-flight commands must be retried.  Note that this
+	retry should not be counted - it's likely that commands
+	retried this way would have completed normally if it were not
+	for the failed command.
+	</para>
+
+	<para>
+	Note that ATA bus errors can be reported as ATA device NCQ
+	errors.  This should be handled as described in <xref
+	linkend="excatATAbusErr"/>.
+	</para>
+
+	<para>
+	If READ LOG EXT Log Page 10h fails or reports NQ, we're
+	thoroughly screwed.  This condition should be treated
+	according to <xref linkend="excatHSMviolation"/>.
+	</para>
+
+     </sect2>
+
+     <sect2 id="excatATAbusErr">
+        <title>ATA bus error</title>
+
+	<para>
+	ATA bus error means that data corruption occurred during
+	transmission over ATA bus (SATA or PATA).  This type of errors
+	can be indicated by
+	</para>
+
+	<itemizedlist>
+
+	<listitem>
+	<para>
+	ICRC or ABRT error as described in <xref linkend="excatDevErr"/>.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	Controller-specific error completion with error information
+	indicating transmission error.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	On some controllers, command timeout.  In this case, there may
+	be a mechanism to determine that the timeout is due to
+	transmission error.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	Unknown/random errors, timeouts and all sorts of weirdities.
+	</para>
+	</listitem>
+
+	</itemizedlist>
+
+	<para>
+	As described above, transmission errors can cause wide variety
+	of symptoms ranging from device ICRC error to random device
+	lockup, and, for many cases, there is no way to tell if an
+	error condition is due to transmission error or not;
+	therefore, it's necessary to employ some kind of heuristic
+	when dealing with errors and timeouts.  For example,
+	encountering repetitive ABRT errors for known supported
+	command is likely to indicate ATA bus error.
+	</para>
+
+	<para>
+	Once it's determined that ATA bus errors have possibly
+	occurred, lowering ATA bus transmission speed is one of
+	actions which may alleviate the problem.  See <xref
+	linkend="exrecReconf"/> for more information.
+	</para>
+
+     </sect2>
+
+     <sect2 id="excatPCIbusErr">
+        <title>PCI bus error</title>
+
+	<para>
+	Data corruption or other failures during transmission over PCI
+	(or other system bus).  For standard BMDMA, this is indicated
+	by Error bit in the BMDMA Status register.  This type of
+	errors must be logged as it indicates something is very wrong
+	with the system.  Resetting host controller is recommended.
+	</para>
+
+     </sect2>
+
+     <sect2 id="excatLateCompletion">
+        <title>Late completion</title>
+
+	<para>
+	This occurs when timeout occurs and the timeout handler finds
+	out that the timed out command has completed successfully or
+	with error.  This is usually caused by lost interrupts.  This
+	type of errors must be logged.  Resetting host controller is
+	recommended.
+	</para>
+
+     </sect2>
+
+     <sect2 id="excatUnknown">
+        <title>Unknown error (timeout)</title>
+
+	<para>
+	This is when timeout occurs and the command is still
+	processing or the host and device are in unknown state.  When
+	this occurs, HSM could be in any valid or invalid state.  To
+	bring the device to known state and make it forget about the
+	timed out command, resetting is necessary.  The timed out
+	command may be retried.
+	</para>
+
+	<para>
+	Timeouts can also be caused by transmission errors.  Refer to
+	<xref linkend="excatATAbusErr"/> for more details.
+	</para>
+
+     </sect2>
+
+     <sect2 id="excatHoplugPM">
+        <title>Hotplug and power management exceptions</title>
+
+	<para>
+	&lt;&lt;TODO: fill here&gt;&gt;
+	</para>
+
+     </sect2>
+
+  </sect1>
+
+  <sect1 id="exrec">
+     <title>EH recovery actions</title>
+
+     <para>
+     This section discusses several important recovery actions.
+     </para>
+
+     <sect2 id="exrecClr">
+        <title>Clearing error condition</title>
+
+	<para>
+	Many controllers require its error registers to be cleared by
+	error handler.  Different controllers may have different
+	requirements.
+	</para>
+
+	<para>
+	For SATA, it's strongly recommended to clear at least SError
+	register during error handling.
+	</para>
+     </sect2>
+
+     <sect2 id="exrecRst">
+        <title>Reset</title>
+
+	<para>
+	During EH, resetting is necessary in the following cases.
+	</para>
+
+	<itemizedlist>
+
+	<listitem>
+	<para>
+	HSM is in unknown or invalid state
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	HBA is in unknown or invalid state
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	EH needs to make HBA/device forget about in-flight commands
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	HBA/device behaves weirdly
+	</para>
+	</listitem>
+
+	</itemizedlist>
+
+	<para>
+	Resetting during EH might be a good idea regardless of error
+	condition to improve EH robustness.  Whether to reset both or
+	either one of HBA and device depends on situation but the
+	following scheme is recommended.
+	</para>
+
+	<itemizedlist>
+
+	<listitem>
+	<para>
+	When it's known that HBA is in ready state but ATA/ATAPI
+	device is in unknown state, reset only device.
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	If HBA is in unknown state, reset both HBA and device.
+	</para>
+	</listitem>
+
+	</itemizedlist>
+
+	<para>
+	HBA resetting is implementation specific.  For a controller
+	complying to taskfile/BMDMA PCI IDE, stopping active DMA
+	transaction may be sufficient iff BMDMA state is the only HBA
+	context.  But even mostly taskfile/BMDMA PCI IDE complying
+	controllers may have implementation specific requirements and
+	mechanism to reset themselves.  This must be addressed by
+	specific drivers.
+	</para>
+
+	<para>
+	OTOH, ATA/ATAPI standard describes in detail ways to reset
+	ATA/ATAPI devices.
+	</para>
+
+	<variablelist>
+
+	   <varlistentry><term>PATA hardware reset</term>
+	   <listitem>
+	   <para>
+	   This is hardware initiated device reset signalled with
+	   asserted PATA RESET- signal.  There is no standard way to
+	   initiate hardware reset from software although some
+	   hardware provides registers that allow driver to directly
+	   tweak the RESET- signal.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry><term>Software reset</term>
+	   <listitem>
+	   <para>
+	   This is achieved by turning CONTROL SRST bit on for at
+	   least 5us.  Both PATA and SATA support it but, in case of
+	   SATA, this may require controller-specific support as the
+	   second Register FIS to clear SRST should be transmitted
+	   while BSY bit is still set.  Note that on PATA, this resets
+	   both master and slave devices on a channel.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry><term>EXECUTE DEVICE DIAGNOSTIC command</term>
+	   <listitem>
+	   <para>
+	   Although ATA/ATAPI standard doesn't describe exactly, EDD
+	   implies some level of resetting, possibly similar level
+	   with software reset.  Host-side EDD protocol can be handled
+	   with normal command processing and most SATA controllers
+	   should be able to handle EDD's just like other commands.
+	   As in software reset, EDD affects both devices on a PATA
+	   bus.
+	   </para>
+	   <para>
+	   Although EDD does reset devices, this doesn't suit error
+	   handling as EDD cannot be issued while BSY is set and it's
+	   unclear how it will act when device is in unknown/weird
+	   state.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry><term>ATAPI DEVICE RESET command</term>
+	   <listitem>
+	   <para>
+	   This is very similar to software reset except that reset
+	   can be restricted to the selected device without affecting
+	   the other device sharing the cable.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	   <varlistentry><term>SATA phy reset</term>
+	   <listitem>
+	   <para>
+	   This is the preferred way of resetting a SATA device.  In
+	   effect, it's identical to PATA hardware reset.  Note that
+	   this can be done with the standard SCR Control register.
+	   As such, it's usually easier to implement than software
+	   reset.
+	   </para>
+	   </listitem>
+	   </varlistentry>
+
+	</variablelist>
+
+	<para>
+	One more thing to consider when resetting devices is that
+	resetting clears certain configuration parameters and they
+	need to be set to their previous or newly adjusted values
+	after reset.
+	</para>
+
+	<para>
+	Parameters affected are.
+	</para>
+
+	<itemizedlist>
+
+	<listitem>
+	<para>
+	CHS set up with INITIALIZE DEVICE PARAMETERS (seldomly used)
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	Parameters set with SET FEATURES including transfer mode setting
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	Block count set with SET MULTIPLE MODE
+	</para>
+	</listitem>
+
+	<listitem>
+	<para>
+	Other parameters (SET MAX, MEDIA LOCK...)
+	</para>
+	</listitem>
+
+	</itemizedlist>
+
+	<para>
+	ATA/ATAPI standard specifies that some parameters must be
+	maintained across hardware or software reset, but doesn't
+	strictly specify all of them.  Always reconfiguring needed
+	parameters after reset is required for robustness.  Note that
+	this also applies when resuming from deep sleep (power-off).
+	</para>
+
+	<para>
+	Also, ATA/ATAPI standard requires that IDENTIFY DEVICE /
+	IDENTIFY PACKET DEVICE is issued after any configuration
+	parameter is updated or a hardware reset and the result used
+	for further operation.  OS driver is required to implement
+	revalidation mechanism to support this.
+	</para>
+
+     </sect2>
+
+     <sect2 id="exrecReconf">
+        <title>Reconfigure transport</title>
+
+	<para>
+	For both PATA and SATA, a lot of corners are cut for cheap
+	connectors, cables or controllers and it's quite common to see
+	high transmission error rate.  This can be mitigated by
+	lowering transmission speed.
+	</para>
+
+	<para>
+	The following is a possible scheme Jeff Garzik suggested.
+	</para>
+
+	<blockquote>
+	<para>
+	If more than $N (3?) transmission errors happen in 15 minutes,
+	</para>	
+	<itemizedlist>
+	<listitem>
+	<para>
+	if SATA, decrease SATA PHY speed.  if speed cannot be decreased,
+	</para>
+	</listitem>
+	<listitem>
+	<para>
+	decrease UDMA xfer speed.  if at UDMA0, switch to PIO4,
+	</para>
+	</listitem>
+	<listitem>
+	<para>
+	decrease PIO xfer speed.  if at PIO3, complain, but continue
+	</para>
+	</listitem>
+	</itemizedlist>
+	</blockquote>
+
+     </sect2>
+
+  </sect1>
+
+  </chapter>
+
+  <chapter id="PiixInt">
+     <title>ata_piix Internals</title>
+!Idrivers/ata/ata_piix.c
+  </chapter>
+
+  <chapter id="SILInt">
+     <title>sata_sil Internals</title>
+!Idrivers/ata/sata_sil.c
+  </chapter>
+
+  <chapter id="libataThanks">
+     <title>Thanks</title>
+  <para>
+  The bulk of the ATA knowledge comes thanks to long conversations with
+  Andre Hedrick (www.linux-ide.org), and long hours pondering the ATA
+  and SCSI specifications.
+  </para>
+  <para>
+  Thanks to Alan Cox for pointing out similarities 
+  between SATA and SCSI, and in general for motivation to hack on
+  libata.
+  </para>
+  <para>
+  libata's device detection
+  method, ata_pio_devchk, and in general all the early probing was
+  based on extensive study of Hale Landis's probe/reset code in his
+  ATADRVR driver (www.ata-atapi.com).
+  </para>
+  </chapter>
+
+</book>
diff --git a/Documentation/DocBook/librs.tmpl b/Documentation/DocBook/librs.tmpl
new file mode 100644
index 0000000..94f2136
--- /dev/null
+++ b/Documentation/DocBook/librs.tmpl
@@ -0,0 +1,289 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Reed-Solomon-Library-Guide">
+ <bookinfo>
+  <title>Reed-Solomon Library Programming Interface</title>
+  
+  <authorgroup>
+   <author>
+    <firstname>Thomas</firstname>
+    <surname>Gleixner</surname>
+    <affiliation>
+     <address>
+      <email>tglx@linutronix.de</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2004</year>
+   <holder>Thomas Gleixner</holder>
+  </copyright>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License version 2 as published by the Free Software Foundation.
+   </para>
+      
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+      
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+      
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="intro">
+      <title>Introduction</title>
+  <para>
+  	The generic Reed-Solomon Library provides encoding, decoding
+	and error correction functions.
+  </para>
+  <para>
+  	Reed-Solomon codes are used in communication and storage
+	applications to ensure data integrity. 
+  </para>
+  <para>
+  	This documentation is provided for developers who want to utilize
+	the functions provided by the library.
+  </para>
+  </chapter>
+  
+  <chapter id="bugs">
+     <title>Known Bugs And Assumptions</title>
+  <para>
+	None.	
+  </para>
+  </chapter>
+
+  <chapter id="usage">
+     	<title>Usage</title>
+	<para>
+		This chapter provides examples of how to use the library.
+	</para>
+	<sect1>
+		<title>Initializing</title>
+		<para>
+			The init function init_rs returns a pointer to an
+			rs decoder structure, which holds the necessary
+			information for encoding, decoding and error correction
+			with the given polynomial. It either uses an existing
+			matching decoder or creates a new one. On creation all
+			the lookup tables for fast en/decoding are created.
+			The function may take a while, so make sure not to 
+			call it in critical code paths.
+		</para>
+		<programlisting>
+/* the Reed Solomon control structure */
+static struct rs_control *rs_decoder;
+
+/* Symbolsize is 10 (bits)
+ * Primitive polynomial is x^10+x^3+1
+ * first consecutive root is 0
+ * primitive element to generate roots = 1
+ * generator polynomial degree (number of roots) = 6
+ */
+rs_decoder = init_rs (10, 0x409, 0, 1, 6);
+		</programlisting>
+	</sect1>
+	<sect1>
+		<title>Encoding</title>
+		<para>
+			The encoder calculates the Reed-Solomon code over
+			the given data length and stores the result in 
+			the parity buffer. Note that the parity buffer must
+			be initialized before calling the encoder.
+		</para>
+		<para>
+			The expanded data can be inverted on the fly by
+			providing a non-zero inversion mask. The expanded data is
+			XOR'ed with the mask. This is used e.g. for FLASH
+			ECC, where the all 0xFF is inverted to an all 0x00.
+			The Reed-Solomon code for all 0x00 is all 0x00. The
+			code is inverted before storing to FLASH so it is 0xFF
+			too. This prevents that reading from an erased FLASH
+			results in ECC errors.
+		</para>
+		<para>
+			The databytes are expanded to the given symbol size
+			on the fly. There is no support for encoding continuous
+			bitstreams with a symbol size != 8 at the moment. If
+			it is necessary it should be not a big deal to implement
+			such functionality.
+		</para>
+		<programlisting>
+/* Parity buffer. Size = number of roots */
+uint16_t par[6];
+/* Initialize the parity buffer */
+memset(par, 0, sizeof(par));
+/* Encode 512 byte in data8. Store parity in buffer par */
+encode_rs8 (rs_decoder, data8, 512, par, 0);
+		</programlisting>
+	</sect1>
+	<sect1>
+		<title>Decoding</title>
+		<para>
+			The decoder calculates the syndrome over
+			the given data length and the received parity symbols
+			and corrects errors in the data.
+		</para>
+		<para>
+			If a syndrome is available from a hardware decoder
+			then the syndrome calculation is skipped.
+		</para>
+		<para>
+			The correction of the data buffer can be suppressed
+			by providing a correction pattern buffer and an error
+			location buffer to the decoder. The decoder stores the
+			calculated error location and the correction bitmask
+			in the given buffers. This is useful for hardware
+			decoders which use a weird bit ordering scheme.
+		</para>
+		<para>
+			The databytes are expanded to the given symbol size
+			on the fly. There is no support for decoding continuous
+			bitstreams with a symbolsize != 8 at the moment. If
+			it is necessary it should be not a big deal to implement
+			such functionality.
+		</para>
+		
+		<sect2>
+		<title>
+			Decoding with syndrome calculation, direct data correction
+		</title>
+		<programlisting>
+/* Parity buffer. Size = number of roots */
+uint16_t par[6];
+uint8_t  data[512];
+int numerr;
+/* Receive data */
+.....
+/* Receive parity */
+.....
+/* Decode 512 byte in data8.*/
+numerr = decode_rs8 (rs_decoder, data8, par, 512, NULL, 0, NULL, 0, NULL);
+		</programlisting>
+		</sect2>
+
+		<sect2>
+		<title>
+			Decoding with syndrome given by hardware decoder, direct data correction
+		</title>
+		<programlisting>
+/* Parity buffer. Size = number of roots */
+uint16_t par[6], syn[6];
+uint8_t  data[512];
+int numerr;
+/* Receive data */
+.....
+/* Receive parity */
+.....
+/* Get syndrome from hardware decoder */
+.....
+/* Decode 512 byte in data8.*/
+numerr = decode_rs8 (rs_decoder, data8, par, 512, syn, 0, NULL, 0, NULL);
+		</programlisting>
+		</sect2>
+
+		<sect2>
+		<title>
+			Decoding with syndrome given by hardware decoder, no direct data correction.
+		</title>
+		<para>
+			Note: It's not necessary to give data and received parity to the decoder.
+		</para>
+		<programlisting>
+/* Parity buffer. Size = number of roots */
+uint16_t par[6], syn[6], corr[8];
+uint8_t  data[512];
+int numerr, errpos[8];
+/* Receive data */
+.....
+/* Receive parity */
+.....
+/* Get syndrome from hardware decoder */
+.....
+/* Decode 512 byte in data8.*/
+numerr = decode_rs8 (rs_decoder, NULL, NULL, 512, syn, 0, errpos, 0, corr);
+for (i = 0; i &lt; numerr; i++) {
+	do_error_correction_in_your_buffer(errpos[i], corr[i]);
+}
+		</programlisting>
+		</sect2>
+	</sect1>
+	<sect1>
+		<title>Cleanup</title>
+		<para>
+			The function free_rs frees the allocated resources,
+			if the caller is the last user of the decoder.
+		</para>
+		<programlisting>
+/* Release resources */
+free_rs(rs_decoder);
+		</programlisting>
+	</sect1>
+
+  </chapter>
+	
+  <chapter id="structs">
+     <title>Structures</title>
+     <para>
+     This chapter contains the autogenerated documentation of the structures which are
+     used in the Reed-Solomon Library and are relevant for a developer.
+     </para>
+!Iinclude/linux/rslib.h
+  </chapter>
+
+  <chapter id="pubfunctions">
+     <title>Public Functions Provided</title>
+     <para>
+     This chapter contains the autogenerated documentation of the Reed-Solomon functions
+     which are exported.
+     </para>
+!Elib/reed_solomon/reed_solomon.c
+  </chapter>
+  
+  <chapter id="credits">
+     <title>Credits</title>
+	<para>
+		The library code for encoding and decoding was written by Phil Karn.
+	</para>
+	<programlisting>
+		Copyright 2002, Phil Karn, KA9Q
+ 		May be used under the terms of the GNU General Public License (GPL)
+	</programlisting>
+	<para>
+		The wrapper functions and interfaces are written by Thomas Gleixner.
+	</para>
+	<para>
+		Many users have provided bugfixes, improvements and helping hands for testing.
+		Thanks a lot.
+	</para>
+	<para>
+		The following people have contributed to this document:
+	</para>
+	<para>
+		Thomas Gleixner<email>tglx@linutronix.de</email>
+	</para>
+  </chapter>
+</book>
diff --git a/Documentation/DocBook/lsm.tmpl b/Documentation/DocBook/lsm.tmpl
new file mode 100644
index 0000000..fe7664c
--- /dev/null
+++ b/Documentation/DocBook/lsm.tmpl
@@ -0,0 +1,265 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<article class="whitepaper" id="LinuxSecurityModule" lang="en">
+ <articleinfo>
+ <title>Linux Security Modules:  General Security Hooks for Linux</title>
+ <authorgroup>
+ <author>
+ <firstname>Stephen</firstname> 
+ <surname>Smalley</surname>
+ <affiliation>
+ <orgname>NAI Labs</orgname>
+ <address><email>ssmalley@nai.com</email></address>
+ </affiliation>
+ </author>
+ <author>
+ <firstname>Timothy</firstname> 
+ <surname>Fraser</surname>
+ <affiliation>
+ <orgname>NAI Labs</orgname>
+ <address><email>tfraser@nai.com</email></address>
+ </affiliation>
+ </author>
+ <author>
+ <firstname>Chris</firstname> 
+ <surname>Vance</surname>
+ <affiliation>
+ <orgname>NAI Labs</orgname>
+ <address><email>cvance@nai.com</email></address>
+ </affiliation>
+ </author>
+ </authorgroup>
+ </articleinfo>
+
+<sect1 id="Introduction"><title>Introduction</title>
+
+<para>
+In March 2001, the National Security Agency (NSA) gave a presentation
+about Security-Enhanced Linux (SELinux) at the 2.5 Linux Kernel
+Summit.  SELinux is an implementation of flexible and fine-grained
+nondiscretionary access controls in the Linux kernel, originally
+implemented as its own particular kernel patch.  Several other
+security projects (e.g. RSBAC, Medusa) have also developed flexible
+access control architectures for the Linux kernel, and various
+projects have developed particular access control models for Linux
+(e.g. LIDS, DTE, SubDomain).  Each project has developed and
+maintained its own kernel patch to support its security needs.
+</para>
+
+<para>
+In response to the NSA presentation, Linus Torvalds made a set of
+remarks that described a security framework he would be willing to
+consider for inclusion in the mainstream Linux kernel.  He described a
+general framework that would provide a set of security hooks to
+control operations on kernel objects and a set of opaque security
+fields in kernel data structures for maintaining security attributes.
+This framework could then be used by loadable kernel modules to
+implement any desired model of security.  Linus also suggested the
+possibility of migrating the Linux capabilities code into such a
+module.
+</para>
+
+<para>
+The Linux Security Modules (LSM) project was started by WireX to
+develop such a framework.  LSM is a joint development effort by
+several security projects, including Immunix, SELinux, SGI and Janus,
+and several individuals, including Greg Kroah-Hartman and James
+Morris, to develop a Linux kernel patch that implements this
+framework.  The patch is currently tracking the 2.4 series and is
+targeted for integration into the 2.5 development series.  This
+technical report provides an overview of the framework and the example
+capabilities security module provided by the LSM kernel patch.
+</para>
+
+</sect1>
+
+<sect1 id="framework"><title>LSM Framework</title>
+
+<para>
+The LSM kernel patch provides a general kernel framework to support
+security modules.  In particular, the LSM framework is primarily
+focused on supporting access control modules, although future
+development is likely to address other security needs such as
+auditing.  By itself, the framework does not provide any additional
+security; it merely provides the infrastructure to support security
+modules.  The LSM kernel patch also moves most of the capabilities
+logic into an optional security module, with the system defaulting
+to the traditional superuser logic.  This capabilities module
+is discussed further in <xref linkend="cap"/>.
+</para>
+
+<para>
+The LSM kernel patch adds security fields to kernel data structures
+and inserts calls to hook functions at critical points in the kernel
+code to manage the security fields and to perform access control.  It
+also adds functions for registering and unregistering security
+modules, and adds a general <function>security</function> system call
+to support new system calls for security-aware applications.
+</para>
+
+<para>
+The LSM security fields are simply <type>void*</type> pointers.  For
+process and program execution security information, security fields
+were added to <structname>struct task_struct</structname> and 
+<structname>struct linux_binprm</structname>.  For filesystem security
+information, a security field was added to 
+<structname>struct super_block</structname>.  For pipe, file, and socket
+security information, security fields were added to 
+<structname>struct inode</structname> and 
+<structname>struct file</structname>.  For packet and network device security
+information, security fields were added to
+<structname>struct sk_buff</structname> and
+<structname>struct net_device</structname>.  For System V IPC security
+information, security fields were added to
+<structname>struct kern_ipc_perm</structname> and
+<structname>struct msg_msg</structname>; additionally, the definitions
+for <structname>struct msg_msg</structname>, <structname>struct 
+msg_queue</structname>, and <structname>struct 
+shmid_kernel</structname> were moved to header files
+(<filename>include/linux/msg.h</filename> and
+<filename>include/linux/shm.h</filename> as appropriate) to allow
+the security modules to use these definitions.
+</para>
+
+<para>
+Each LSM hook is a function pointer in a global table,
+security_ops. This table is a
+<structname>security_operations</structname> structure as defined by
+<filename>include/linux/security.h</filename>.  Detailed documentation
+for each hook is included in this header file.  At present, this
+structure consists of a collection of substructures that group related
+hooks based on the kernel object (e.g. task, inode, file, sk_buff,
+etc) as well as some top-level hook function pointers for system
+operations.  This structure is likely to be flattened in the future
+for performance.  The placement of the hook calls in the kernel code
+is described by the "called:" lines in the per-hook documentation in
+the header file.  The hook calls can also be easily found in the
+kernel code by looking for the string "security_ops->".
+
+</para>
+
+<para>
+Linus mentioned per-process security hooks in his original remarks as a
+possible alternative to global security hooks.  However, if LSM were
+to start from the perspective of per-process hooks, then the base
+framework would have to deal with how to handle operations that
+involve multiple processes (e.g. kill), since each process might have
+its own hook for controlling the operation.  This would require a
+general mechanism for composing hooks in the base framework.
+Additionally, LSM would still need global hooks for operations that
+have no process context (e.g. network input operations).
+Consequently, LSM provides global security hooks, but a security
+module is free to implement per-process hooks (where that makes sense)
+by storing a security_ops table in each process' security field and
+then invoking these per-process hooks from the global hooks.
+The problem of composition is thus deferred to the module.
+</para>
+
+<para>
+The global security_ops table is initialized to a set of hook
+functions provided by a dummy security module that provides
+traditional superuser logic.  A <function>register_security</function>
+function (in <filename>security/security.c</filename>) is provided to
+allow a security module to set security_ops to refer to its own hook
+functions, and an <function>unregister_security</function> function is
+provided to revert security_ops to the dummy module hooks.  This
+mechanism is used to set the primary security module, which is
+responsible for making the final decision for each hook.
+</para>
+
+<para>
+LSM also provides a simple mechanism for stacking additional security
+modules with the primary security module.  It defines
+<function>register_security</function> and
+<function>unregister_security</function> hooks in the
+<structname>security_operations</structname> structure and provides
+<function>mod_reg_security</function> and
+<function>mod_unreg_security</function> functions that invoke these
+hooks after performing some sanity checking.  A security module can
+call these functions in order to stack with other modules.  However,
+the actual details of how this stacking is handled are deferred to the
+module, which can implement these hooks in any way it wishes
+(including always returning an error if it does not wish to support
+stacking).  In this manner, LSM again defers the problem of
+composition to the module.
+</para>
+
+<para>
+Although the LSM hooks are organized into substructures based on
+kernel object, all of the hooks can be viewed as falling into two
+major categories: hooks that are used to manage the security fields
+and hooks that are used to perform access control.  Examples of the
+first category of hooks include the
+<function>alloc_security</function> and
+<function>free_security</function> hooks defined for each kernel data
+structure that has a security field.  These hooks are used to allocate
+and free security structures for kernel objects.  The first category
+of hooks also includes hooks that set information in the security
+field after allocation, such as the <function>post_lookup</function>
+hook in <structname>struct inode_security_ops</structname>.  This hook
+is used to set security information for inodes after successful lookup
+operations.  An example of the second category of hooks is the
+<function>permission</function> hook in 
+<structname>struct inode_security_ops</structname>.  This hook checks
+permission when accessing an inode.
+</para>
+
+</sect1>
+
+<sect1 id="cap"><title>LSM Capabilities Module</title>
+
+<para>
+The LSM kernel patch moves most of the existing POSIX.1e capabilities
+logic into an optional security module stored in the file
+<filename>security/capability.c</filename>.  This change allows
+users who do not want to use capabilities to omit this code entirely
+from their kernel, instead using the dummy module for traditional
+superuser logic or any other module that they desire.  This change
+also allows the developers of the capabilities logic to maintain and
+enhance their code more freely, without needing to integrate patches
+back into the base kernel.
+</para>
+
+<para>
+In addition to moving the capabilities logic, the LSM kernel patch
+could move the capability-related fields from the kernel data
+structures into the new security fields managed by the security
+modules.  However, at present, the LSM kernel patch leaves the
+capability fields in the kernel data structures.  In his original
+remarks, Linus suggested that this might be preferable so that other
+security modules can be easily stacked with the capabilities module
+without needing to chain multiple security structures on the security field.
+It also avoids imposing extra overhead on the capabilities module
+to manage the security fields.  However, the LSM framework could
+certainly support such a move if it is determined to be desirable,
+with only a few additional changes described below.
+</para>
+
+<para>
+At present, the capabilities logic for computing process capabilities
+on <function>execve</function> and <function>set*uid</function>,
+checking capabilities for a particular process, saving and checking
+capabilities for netlink messages, and handling the
+<function>capget</function> and <function>capset</function> system
+calls have been moved into the capabilities module.  There are still a
+few locations in the base kernel where capability-related fields are
+directly examined or modified, but the current version of the LSM
+patch does allow a security module to completely replace the
+assignment and testing of capabilities.  These few locations would
+need to be changed if the capability-related fields were moved into
+the security field.  The following is a list of known locations that
+still perform such direct examination or modification of
+capability-related fields:
+<itemizedlist>
+<listitem><para><filename>fs/open.c</filename>:<function>sys_access</function></para></listitem>
+<listitem><para><filename>fs/lockd/host.c</filename>:<function>nlm_bind_host</function></para></listitem>
+<listitem><para><filename>fs/nfsd/auth.c</filename>:<function>nfsd_setuser</function></para></listitem>
+<listitem><para><filename>fs/proc/array.c</filename>:<function>task_cap</function></para></listitem>
+</itemizedlist>
+</para>
+
+</sect1>
+
+</article>
diff --git a/Documentation/DocBook/mac80211.tmpl b/Documentation/DocBook/mac80211.tmpl
new file mode 100644
index 0000000..77c3c20
--- /dev/null
+++ b/Documentation/DocBook/mac80211.tmpl
@@ -0,0 +1,331 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="mac80211-developers-guide">
+  <bookinfo>
+    <title>The mac80211 subsystem for kernel developers</title>
+
+    <authorgroup>
+      <author>
+        <firstname>Johannes</firstname>
+        <surname>Berg</surname>
+        <affiliation>
+          <address><email>johannes@sipsolutions.net</email></address>
+        </affiliation>
+      </author>
+    </authorgroup>
+
+    <copyright>
+      <year>2007</year>
+      <year>2008</year>
+      <holder>Johannes Berg</holder>
+    </copyright>
+
+    <legalnotice>
+      <para>
+        This documentation is free software; you can redistribute
+        it and/or modify it under the terms of the GNU General Public
+        License version 2 as published by the Free Software Foundation.
+      </para>
+
+      <para>
+        This documentation is distributed in the hope that it will be
+        useful, but WITHOUT ANY WARRANTY; without even the implied
+        warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+        See the GNU General Public License for more details.
+      </para>
+
+      <para>
+        You should have received a copy of the GNU General Public
+        License along with this documentation; if not, write to the Free
+        Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+        MA 02111-1307 USA
+      </para>
+
+      <para>
+        For more details see the file COPYING in the source
+        distribution of Linux.
+      </para>
+    </legalnotice>
+
+    <abstract>
+!Pinclude/net/mac80211.h Introduction
+!Pinclude/net/mac80211.h Warning
+    </abstract>
+  </bookinfo>
+
+  <toc></toc>
+
+<!--
+Generally, this document shall be ordered by increasing complexity.
+It is important to note that readers should be able to read only
+the first few sections to get a working driver and only advanced
+usage should require reading the full document.
+-->
+
+  <part>
+    <title>The basic mac80211 driver interface</title>
+    <partintro>
+      <para>
+        You should read and understand the information contained
+        within this part of the book while implementing a driver.
+        In some chapters, advanced usage is noted, that may be
+        skipped at first.
+      </para>
+      <para>
+        This part of the book only covers station and monitor mode
+        functionality, additional information required to implement
+        the other modes is covered in the second part of the book.
+      </para>
+    </partintro>
+
+    <chapter id="basics">
+      <title>Basic hardware handling</title>
+      <para>TBD</para>
+      <para>
+        This chapter shall contain information on getting a hw
+        struct allocated and registered with mac80211.
+      </para>
+      <para>
+        Since it is required to allocate rates/modes before registering
+        a hw struct, this chapter shall also contain information on setting
+        up the rate/mode structs.
+      </para>
+      <para>
+        Additionally, some discussion about the callbacks and
+        the general programming model should be in here, including
+        the definition of ieee80211_ops which will be referred to
+        a lot.
+      </para>
+      <para>
+        Finally, a discussion of hardware capabilities should be done
+        with references to other parts of the book.
+      </para>
+<!-- intentionally multiple !F lines to get proper order -->
+!Finclude/net/mac80211.h ieee80211_hw
+!Finclude/net/mac80211.h ieee80211_hw_flags
+!Finclude/net/mac80211.h SET_IEEE80211_DEV
+!Finclude/net/mac80211.h SET_IEEE80211_PERM_ADDR
+!Finclude/net/mac80211.h ieee80211_ops
+!Finclude/net/mac80211.h ieee80211_alloc_hw
+!Finclude/net/mac80211.h ieee80211_register_hw
+!Finclude/net/mac80211.h ieee80211_get_tx_led_name
+!Finclude/net/mac80211.h ieee80211_get_rx_led_name
+!Finclude/net/mac80211.h ieee80211_get_assoc_led_name
+!Finclude/net/mac80211.h ieee80211_get_radio_led_name
+!Finclude/net/mac80211.h ieee80211_unregister_hw
+!Finclude/net/mac80211.h ieee80211_free_hw
+    </chapter>
+
+    <chapter id="phy-handling">
+      <title>PHY configuration</title>
+      <para>TBD</para>
+      <para>
+        This chapter should describe PHY handling including
+        start/stop callbacks and the various structures used.
+      </para>
+!Finclude/net/mac80211.h ieee80211_conf
+!Finclude/net/mac80211.h ieee80211_conf_flags
+    </chapter>
+
+    <chapter id="iface-handling">
+      <title>Virtual interfaces</title>
+      <para>TBD</para>
+      <para>
+        This chapter should describe virtual interface basics
+        that are relevant to the driver (VLANs, MGMT etc are not.)
+        It should explain the use of the add_iface/remove_iface
+        callbacks as well as the interface configuration callbacks.
+      </para>
+      <para>Things related to AP mode should be discussed there.</para>
+      <para>
+        Things related to supporting multiple interfaces should be
+        in the appropriate chapter, a BIG FAT note should be here about
+        this though and the recommendation to allow only a single
+        interface in STA mode at first!
+      </para>
+!Finclude/net/mac80211.h ieee80211_if_init_conf
+!Finclude/net/mac80211.h ieee80211_if_conf
+    </chapter>
+
+    <chapter id="rx-tx">
+      <title>Receive and transmit processing</title>
+      <sect1>
+        <title>what should be here</title>
+        <para>TBD</para>
+        <para>
+          This should describe the receive and transmit
+          paths in mac80211/the drivers as well as
+          transmit status handling.
+        </para>
+      </sect1>
+      <sect1>
+        <title>Frame format</title>
+!Pinclude/net/mac80211.h Frame format
+      </sect1>
+      <sect1>
+        <title>Alignment issues</title>
+        <para>TBD</para>
+      </sect1>
+      <sect1>
+        <title>Calling into mac80211 from interrupts</title>
+!Pinclude/net/mac80211.h Calling mac80211 from interrupts
+      </sect1>
+      <sect1>
+        <title>functions/definitions</title>
+!Finclude/net/mac80211.h ieee80211_rx_status
+!Finclude/net/mac80211.h mac80211_rx_flags
+!Finclude/net/mac80211.h ieee80211_tx_info
+!Finclude/net/mac80211.h ieee80211_rx
+!Finclude/net/mac80211.h ieee80211_rx_irqsafe
+!Finclude/net/mac80211.h ieee80211_tx_status
+!Finclude/net/mac80211.h ieee80211_tx_status_irqsafe
+!Finclude/net/mac80211.h ieee80211_rts_get
+!Finclude/net/mac80211.h ieee80211_rts_duration
+!Finclude/net/mac80211.h ieee80211_ctstoself_get
+!Finclude/net/mac80211.h ieee80211_ctstoself_duration
+!Finclude/net/mac80211.h ieee80211_generic_frame_duration
+!Finclude/net/mac80211.h ieee80211_get_hdrlen_from_skb
+!Finclude/net/mac80211.h ieee80211_hdrlen
+!Finclude/net/mac80211.h ieee80211_wake_queue
+!Finclude/net/mac80211.h ieee80211_stop_queue
+!Finclude/net/mac80211.h ieee80211_wake_queues
+!Finclude/net/mac80211.h ieee80211_stop_queues
+      </sect1>
+    </chapter>
+
+    <chapter id="filters">
+      <title>Frame filtering</title>
+!Pinclude/net/mac80211.h Frame filtering
+!Finclude/net/mac80211.h ieee80211_filter_flags
+    </chapter>
+  </part>
+
+  <part id="advanced">
+    <title>Advanced driver interface</title>
+    <partintro>
+      <para>
+       Information contained within this part of the book is
+       of interest only for advanced interaction of mac80211
+       with drivers to exploit more hardware capabilities and
+       improve performance.
+      </para>
+    </partintro>
+
+    <chapter id="hardware-crypto-offload">
+      <title>Hardware crypto acceleration</title>
+!Pinclude/net/mac80211.h Hardware crypto acceleration
+<!-- intentionally multiple !F lines to get proper order -->
+!Finclude/net/mac80211.h set_key_cmd
+!Finclude/net/mac80211.h ieee80211_key_conf
+!Finclude/net/mac80211.h ieee80211_key_alg
+!Finclude/net/mac80211.h ieee80211_key_flags
+    </chapter>
+
+    <chapter id="qos">
+      <title>Multiple queues and QoS support</title>
+      <para>TBD</para>
+!Finclude/net/mac80211.h ieee80211_tx_queue_params
+!Finclude/net/mac80211.h ieee80211_tx_queue_stats
+    </chapter>
+
+    <chapter id="AP">
+      <title>Access point mode support</title>
+      <para>TBD</para>
+      <para>Some parts of the if_conf should be discussed here instead</para>
+      <para>
+        Insert notes about VLAN interfaces with hw crypto here or
+        in the hw crypto chapter.
+      </para>
+!Finclude/net/mac80211.h ieee80211_get_buffered_bc
+!Finclude/net/mac80211.h ieee80211_beacon_get
+    </chapter>
+
+    <chapter id="multi-iface">
+      <title>Supporting multiple virtual interfaces</title>
+      <para>TBD</para>
+      <para>
+        Note: WDS with identical MAC address should almost always be OK
+      </para>
+      <para>
+        Insert notes about having multiple virtual interfaces with
+        different MAC addresses here, note which configurations are
+        supported by mac80211, add notes about supporting hw crypto
+        with it.
+      </para>
+    </chapter>
+
+    <chapter id="hardware-scan-offload">
+      <title>Hardware scan offload</title>
+      <para>TBD</para>
+!Finclude/net/mac80211.h ieee80211_scan_completed
+    </chapter>
+  </part>
+
+  <part id="rate-control">
+    <title>Rate control interface</title>
+    <partintro>
+      <para>TBD</para>
+      <para>
+       This part of the book describes the rate control algorithm
+       interface and how it relates to mac80211 and drivers.
+      </para>
+    </partintro>
+    <chapter id="dummy">
+      <title>dummy chapter</title>
+      <para>TBD</para>
+    </chapter>
+  </part>
+
+  <part id="internal">
+    <title>Internals</title>
+    <partintro>
+      <para>TBD</para>
+      <para>
+       This part of the book describes mac80211 internals.
+      </para>
+    </partintro>
+
+    <chapter id="key-handling">
+      <title>Key handling</title>
+      <sect1>
+        <title>Key handling basics</title>
+!Pnet/mac80211/key.c Key handling basics
+      </sect1>
+      <sect1>
+        <title>MORE TBD</title>
+        <para>TBD</para>
+      </sect1>
+    </chapter>
+
+    <chapter id="rx-processing">
+      <title>Receive processing</title>
+      <para>TBD</para>
+    </chapter>
+
+    <chapter id="tx-processing">
+      <title>Transmit processing</title>
+      <para>TBD</para>
+    </chapter>
+
+    <chapter id="sta-info">
+      <title>Station info handling</title>
+      <sect1>
+        <title>Programming information</title>
+!Fnet/mac80211/sta_info.h sta_info
+!Fnet/mac80211/sta_info.h ieee80211_sta_info_flags
+      </sect1>
+      <sect1>
+        <title>STA information lifetime rules</title>
+!Pnet/mac80211/sta_info.c STA information lifetime rules
+      </sect1>
+    </chapter>
+
+    <chapter id="synchronisation">
+      <title>Synchronisation</title>
+      <para>TBD</para>
+      <para>Locking, lots of RCU</para>
+    </chapter>
+  </part>
+</book>
diff --git a/Documentation/DocBook/mcabook.tmpl b/Documentation/DocBook/mcabook.tmpl
new file mode 100644
index 0000000..467ccac
--- /dev/null
+++ b/Documentation/DocBook/mcabook.tmpl
@@ -0,0 +1,107 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="MCAGuide">
+ <bookinfo>
+  <title>MCA Driver Programming Interface</title>
+  
+  <authorgroup>
+   <author>
+    <firstname>Alan</firstname>
+    <surname>Cox</surname>
+    <affiliation>
+     <address>
+      <email>alan@lxorguk.ukuu.org.uk</email>
+     </address>
+    </affiliation>
+   </author>
+   <author>
+    <firstname>David</firstname>
+    <surname>Weinehall</surname>
+   </author>
+   <author>
+    <firstname>Chris</firstname>
+    <surname>Beauregard</surname>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2000</year>
+   <holder>Alan Cox</holder>
+   <holder>David Weinehall</holder>
+   <holder>Chris Beauregard</holder>
+  </copyright>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+      
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+      
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+      
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="intro">
+      <title>Introduction</title>
+  <para>
+	The MCA bus functions provide a generalised interface to find MCA
+	bus cards, to claim them for a driver, and to read and manipulate POS 
+	registers without being aware of the motherboard internals or 
+	certain deep magic specific to onboard devices.
+  </para>
+  <para>
+	The basic interface to the MCA bus devices is the slot. Each slot
+	is numbered and virtual slot numbers are assigned to the internal
+	devices. Using a pci_dev as other busses do does not really make
+	sense in the MCA context as the MCA bus resources require card
+	specific interpretation.
+  </para>
+  <para>
+	Finally the MCA bus functions provide a parallel set of DMA
+	functions mimicing the ISA bus DMA functions as closely as possible,
+	although also supporting the additional DMA functionality on the
+	MCA bus controllers.
+  </para>
+  </chapter>
+  <chapter id="bugs">
+     <title>Known Bugs And Assumptions</title>
+  <para>
+	None.	
+  </para>
+  </chapter>
+
+  <chapter id="pubfunctions">
+     <title>Public Functions Provided</title>
+!Edrivers/mca/mca-legacy.c
+  </chapter>
+
+  <chapter id="dmafunctions">
+     <title>DMA Functions Provided</title>
+!Iarch/x86/include/asm/mca_dma.h
+  </chapter>
+
+</book>
diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl
new file mode 100644
index 0000000..8e14585
--- /dev/null
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -0,0 +1,1318 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="MTD-NAND-Guide">
+ <bookinfo>
+  <title>MTD NAND Driver Programming Interface</title>
+  
+  <authorgroup>
+   <author>
+    <firstname>Thomas</firstname>
+    <surname>Gleixner</surname>
+    <affiliation>
+     <address>
+      <email>tglx@linutronix.de</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2004</year>
+   <holder>Thomas Gleixner</holder>
+  </copyright>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License version 2 as published by the Free Software Foundation.
+   </para>
+      
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+      
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+      
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="intro">
+      <title>Introduction</title>
+  <para>
+  	The generic NAND driver supports almost all NAND and AG-AND based
+	chips and connects them to the Memory Technology Devices (MTD)
+	subsystem of the Linux Kernel.
+  </para>
+  <para>
+  	This documentation is provided for developers who want to implement
+	board drivers or filesystem drivers suitable for NAND devices.
+  </para>
+  </chapter>
+  
+  <chapter id="bugs">
+     <title>Known Bugs And Assumptions</title>
+  <para>
+	None.	
+  </para>
+  </chapter>
+
+  <chapter id="dochints">
+     <title>Documentation hints</title>
+     <para>
+     The function and structure docs are autogenerated. Each function and 
+     struct member has a short description which is marked with an [XXX] identifier.
+     The following chapters explain the meaning of those identifiers.
+     </para>
+     <sect1 id="Function_identifiers_XXX">
+	<title>Function identifiers [XXX]</title>
+     	<para>
+	The functions are marked with [XXX] identifiers in the short
+	comment. The identifiers explain the usage and scope of the
+	functions. Following identifiers are used:
+     	</para>
+	<itemizedlist>
+		<listitem><para>
+	  	[MTD Interface]</para><para>
+		These functions provide the interface to the MTD kernel API. 
+		They are not replacable and provide functionality
+		which is complete hardware independent.
+		</para></listitem>
+		<listitem><para>
+	  	[NAND Interface]</para><para>
+		These functions are exported and provide the interface to the NAND kernel API. 
+		</para></listitem>
+		<listitem><para>
+	  	[GENERIC]</para><para>
+		Generic functions are not replacable and provide functionality
+		which is complete hardware independent.
+		</para></listitem>
+		<listitem><para>
+	  	[DEFAULT]</para><para>
+		Default functions provide hardware related functionality which is suitable
+		for most of the implementations. These functions can be replaced by the
+		board driver if neccecary. Those functions are called via pointers in the
+		NAND chip description structure. The board driver can set the functions which
+		should be replaced by board dependent functions before calling nand_scan().
+		If the function pointer is NULL on entry to nand_scan() then the pointer
+		is set to the default function which is suitable for the detected chip type.
+		</para></listitem>
+	</itemizedlist>
+     </sect1>
+     <sect1 id="Struct_member_identifiers_XXX">
+	<title>Struct member identifiers [XXX]</title>
+     	<para>
+	The struct members are marked with [XXX] identifiers in the 
+	comment. The identifiers explain the usage and scope of the
+	members. Following identifiers are used:
+     	</para>
+	<itemizedlist>
+		<listitem><para>
+	  	[INTERN]</para><para>
+		These members are for NAND driver internal use only and must not be
+		modified. Most of these values are calculated from the chip geometry
+		information which is evaluated during nand_scan().
+		</para></listitem>
+		<listitem><para>
+	  	[REPLACEABLE]</para><para>
+		Replaceable members hold hardware related functions which can be 
+		provided by the board driver. The board driver can set the functions which
+		should be replaced by board dependent functions before calling nand_scan().
+		If the function pointer is NULL on entry to nand_scan() then the pointer
+		is set to the default function which is suitable for the detected chip type.
+		</para></listitem>
+		<listitem><para>
+	  	[BOARDSPECIFIC]</para><para>
+		Board specific members hold hardware related information which must
+		be provided by the board driver. The board driver must set the function
+		pointers and datafields before calling nand_scan().
+		</para></listitem>
+		<listitem><para>
+	  	[OPTIONAL]</para><para>
+		Optional members can hold information relevant for the board driver. The
+		generic NAND driver code does not use this information.
+		</para></listitem>
+	</itemizedlist>
+     </sect1>
+  </chapter>   
+
+  <chapter id="basicboarddriver">
+     	<title>Basic board driver</title>
+	<para>
+		For most boards it will be sufficient to provide just the
+		basic functions and fill out some really board dependent
+		members in the nand chip description structure.
+	</para>
+	<sect1 id="Basic_defines">
+		<title>Basic defines</title>
+		<para>
+			At least you have to provide a mtd structure and
+			a storage for the ioremap'ed chip address.
+			You can allocate the mtd structure using kmalloc
+			or you can allocate it statically.
+			In case of static allocation you have to allocate
+			a nand_chip structure too.
+		</para>
+		<para>
+			Kmalloc based example
+		</para>
+		<programlisting>
+static struct mtd_info *board_mtd;
+static unsigned long baseaddr;
+		</programlisting>
+		<para>
+			Static example
+		</para>
+		<programlisting>
+static struct mtd_info board_mtd;
+static struct nand_chip board_chip;
+static unsigned long baseaddr;
+		</programlisting>
+	</sect1>
+	<sect1 id="Partition_defines">
+		<title>Partition defines</title>
+		<para>
+			If you want to divide your device into partitions, then
+			enable the configuration switch CONFIG_MTD_PARTITIONS and define
+			a partitioning scheme suitable to your board.
+		</para>
+		<programlisting>
+#define NUM_PARTITIONS 2
+static struct mtd_partition partition_info[] = {
+	{ .name = "Flash partition 1",
+	  .offset =  0,
+	  .size =    8 * 1024 * 1024 },
+	{ .name = "Flash partition 2",
+	  .offset =  MTDPART_OFS_NEXT,
+	  .size =    MTDPART_SIZ_FULL },
+};
+		</programlisting>
+	</sect1>
+	<sect1 id="Hardware_control_functions">
+		<title>Hardware control function</title>
+		<para>
+			The hardware control function provides access to the 
+			control pins of the NAND chip(s). 
+			The access can be done by GPIO pins or by address lines.
+			If you use address lines, make sure that the timing
+			requirements are met.
+		</para>
+		<para>
+			<emphasis>GPIO based example</emphasis>
+		</para>
+		<programlisting>
+static void board_hwcontrol(struct mtd_info *mtd, int cmd)
+{
+	switch(cmd){
+		case NAND_CTL_SETCLE: /* Set CLE pin high */ break;
+		case NAND_CTL_CLRCLE: /* Set CLE pin low */ break;
+		case NAND_CTL_SETALE: /* Set ALE pin high */ break;
+		case NAND_CTL_CLRALE: /* Set ALE pin low */ break;
+		case NAND_CTL_SETNCE: /* Set nCE pin low */ break;
+		case NAND_CTL_CLRNCE: /* Set nCE pin high */ break;
+	}
+}
+		</programlisting>
+		<para>
+			<emphasis>Address lines based example.</emphasis> It's assumed that the
+			nCE pin is driven by a chip select decoder.
+		</para>
+		<programlisting>
+static void board_hwcontrol(struct mtd_info *mtd, int cmd)
+{
+	struct nand_chip *this = (struct nand_chip *) mtd->priv;
+	switch(cmd){
+		case NAND_CTL_SETCLE: this->IO_ADDR_W |= CLE_ADRR_BIT;  break;
+		case NAND_CTL_CLRCLE: this->IO_ADDR_W &amp;= ~CLE_ADRR_BIT; break;
+		case NAND_CTL_SETALE: this->IO_ADDR_W |= ALE_ADRR_BIT;  break;
+		case NAND_CTL_CLRALE: this->IO_ADDR_W &amp;= ~ALE_ADRR_BIT; break;
+	}
+}
+		</programlisting>
+	</sect1>
+	<sect1 id="Device_ready_function">
+		<title>Device ready function</title>
+		<para>
+			If the hardware interface has the ready busy pin of the NAND chip connected to a
+			GPIO or other accesible I/O pin, this function is used to read back the state of the
+			pin. The function has no arguments and should return 0, if the device is busy (R/B pin 
+			is low) and 1, if the device is ready (R/B pin is high).
+			If the hardware interface does not give access to the ready busy pin, then
+			the function must not be defined and the function pointer this->dev_ready is set to NULL.		
+		</para>
+	</sect1>
+	<sect1 id="Init_function">
+		<title>Init function</title>
+		<para>
+			The init function allocates memory and sets up all the board
+			specific parameters and function pointers. When everything
+			is set up nand_scan() is called. This function tries to
+			detect and identify then chip. If a chip is found all the
+			internal data fields are initialized accordingly.
+			The structure(s) have to be zeroed out first and then filled with the neccecary 
+			information about the device.
+		</para>
+		<programlisting>
+int __init board_init (void)
+{
+	struct nand_chip *this;
+	int err = 0;
+
+	/* Allocate memory for MTD device structure and private data */
+	board_mtd = kzalloc(sizeof(struct mtd_info) + sizeof(struct nand_chip), GFP_KERNEL);
+	if (!board_mtd) {
+		printk ("Unable to allocate NAND MTD device structure.\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* map physical address */
+	baseaddr = (unsigned long)ioremap(CHIP_PHYSICAL_ADDRESS, 1024);
+	if(!baseaddr){
+		printk("Ioremap to access NAND chip failed\n");
+		err = -EIO;
+		goto out_mtd;
+	}
+
+	/* Get pointer to private data */
+	this = (struct nand_chip *) ();
+	/* Link the private data with the MTD structure */
+	board_mtd->priv = this;
+
+	/* Set address of NAND IO lines */
+	this->IO_ADDR_R = baseaddr;
+	this->IO_ADDR_W = baseaddr;
+	/* Reference hardware control function */
+	this->hwcontrol = board_hwcontrol;
+	/* Set command delay time, see datasheet for correct value */
+	this->chip_delay = CHIP_DEPENDEND_COMMAND_DELAY;
+	/* Assign the device ready function, if available */
+	this->dev_ready = board_dev_ready;
+	this->eccmode = NAND_ECC_SOFT;
+
+	/* Scan to find existence of the device */
+	if (nand_scan (board_mtd, 1)) {
+		err = -ENXIO;
+		goto out_ior;
+	}
+	
+	add_mtd_partitions(board_mtd, partition_info, NUM_PARTITIONS);
+	goto out;
+
+out_ior:
+	iounmap((void *)baseaddr);
+out_mtd:
+	kfree (board_mtd);
+out:
+	return err;
+}
+module_init(board_init);
+		</programlisting>
+	</sect1>
+	<sect1 id="Exit_function">
+		<title>Exit function</title>
+		<para>
+			The exit function is only neccecary if the driver is
+			compiled as a module. It releases all resources which
+			are held by the chip driver and unregisters the partitions
+			in the MTD layer.
+		</para>
+		<programlisting>
+#ifdef MODULE
+static void __exit board_cleanup (void)
+{
+	/* Release resources, unregister device */
+	nand_release (board_mtd);
+
+	/* unmap physical address */
+	iounmap((void *)baseaddr);
+	
+	/* Free the MTD device structure */
+	kfree (board_mtd);
+}
+module_exit(board_cleanup);
+#endif
+		</programlisting>
+	</sect1>
+  </chapter>
+
+  <chapter id="boarddriversadvanced">
+     	<title>Advanced board driver functions</title>
+	<para>
+		This chapter describes the advanced functionality of the NAND
+		driver. For a list of functions which can be overridden by the board
+		driver see the documentation of the nand_chip structure.
+	</para>
+	<sect1 id="Multiple_chip_control">
+		<title>Multiple chip control</title>
+		<para>
+			The nand driver can control chip arrays. Therefor the
+			board driver must provide an own select_chip function. This
+			function must (de)select the requested chip.
+			The function pointer in the nand_chip structure must
+			be set before calling nand_scan(). The maxchip parameter
+			of nand_scan() defines the maximum number of chips to
+			scan for. Make sure that the select_chip function can
+			handle the requested number of chips.
+		</para>
+		<para>
+			The nand driver concatenates the chips to one virtual
+			chip and provides this virtual chip to the MTD layer.
+		</para>
+		<para>
+			<emphasis>Note: The driver can only handle linear chip arrays
+			of equally sized chips. There is no support for
+			parallel arrays which extend the buswidth.</emphasis>
+		</para>
+		<para>
+			<emphasis>GPIO based example</emphasis>
+		</para>
+		<programlisting>
+static void board_select_chip (struct mtd_info *mtd, int chip)
+{
+	/* Deselect all chips, set all nCE pins high */
+	GPIO(BOARD_NAND_NCE) |= 0xff;	
+	if (chip >= 0)
+		GPIO(BOARD_NAND_NCE) &amp;= ~ (1 &lt;&lt; chip);
+}
+		</programlisting>
+		<para>
+			<emphasis>Address lines based example.</emphasis>
+			Its assumed that the nCE pins are connected to an
+			address decoder.
+		</para>
+		<programlisting>
+static void board_select_chip (struct mtd_info *mtd, int chip)
+{
+	struct nand_chip *this = (struct nand_chip *) mtd->priv;
+	
+	/* Deselect all chips */
+	this->IO_ADDR_R &amp;= ~BOARD_NAND_ADDR_MASK;
+	this->IO_ADDR_W &amp;= ~BOARD_NAND_ADDR_MASK;
+	switch (chip) {
+	case 0:
+		this->IO_ADDR_R |= BOARD_NAND_ADDR_CHIP0;
+		this->IO_ADDR_W |= BOARD_NAND_ADDR_CHIP0;
+		break;
+	....	
+	case n:
+		this->IO_ADDR_R |= BOARD_NAND_ADDR_CHIPn;
+		this->IO_ADDR_W |= BOARD_NAND_ADDR_CHIPn;
+		break;
+	}	
+}
+		</programlisting>
+	</sect1>
+	<sect1 id="Hardware_ECC_support">
+		<title>Hardware ECC support</title>
+		<sect2 id="Functions_and_constants">
+			<title>Functions and constants</title>
+			<para>
+				The nand driver supports three different types of
+				hardware ECC.
+				<itemizedlist>
+				<listitem><para>NAND_ECC_HW3_256</para><para>
+				Hardware ECC generator providing 3 bytes ECC per
+				256 byte.
+				</para>	</listitem>
+				<listitem><para>NAND_ECC_HW3_512</para><para>
+				Hardware ECC generator providing 3 bytes ECC per
+				512 byte.
+				</para>	</listitem>
+				<listitem><para>NAND_ECC_HW6_512</para><para>
+				Hardware ECC generator providing 6 bytes ECC per
+				512 byte.
+				</para>	</listitem>
+				<listitem><para>NAND_ECC_HW8_512</para><para>
+				Hardware ECC generator providing 6 bytes ECC per
+				512 byte.
+				</para>	</listitem>
+				</itemizedlist>
+				If your hardware generator has a different functionality
+				add it at the appropriate place in nand_base.c
+			</para>
+			<para>
+				The board driver must provide following functions:
+				<itemizedlist>
+				<listitem><para>enable_hwecc</para><para>
+				This function is called before reading / writing to
+				the chip. Reset or initialize the hardware generator
+				in this function. The function is called with an
+				argument which let you distinguish between read 
+				and write operations.
+				</para>	</listitem>
+				<listitem><para>calculate_ecc</para><para>
+				This function is called after read / write from / to
+				the chip. Transfer the ECC from the hardware to
+				the buffer. If the option NAND_HWECC_SYNDROME is set
+				then the function is only called on write. See below.
+				</para>	</listitem>
+				<listitem><para>correct_data</para><para>
+				In case of an ECC error this function is called for
+				error detection and correction. Return 1 respectively 2
+				in case the error can be corrected. If the error is
+				not correctable return -1. If your hardware generator
+				matches the default algorithm of the nand_ecc software
+				generator then use the correction function provided
+				by nand_ecc instead of implementing duplicated code.
+				</para>	</listitem>
+				</itemizedlist>
+			</para>
+		</sect2>
+		<sect2 id="Hardware_ECC_with_syndrome_calculation">
+		<title>Hardware ECC with syndrome calculation</title>
+			<para>
+				Many hardware ECC implementations provide Reed-Solomon
+				codes and calculate an error syndrome on read. The syndrome
+				must be converted to a standard Reed-Solomon syndrome
+				before calling the error correction code in the generic
+				Reed-Solomon library.
+			</para>
+			<para>
+				The ECC bytes must be placed immidiately after the data
+				bytes in order to make the syndrome generator work. This
+				is contrary to the usual layout used by software ECC. The
+				seperation of data and out of band area is not longer
+				possible. The nand driver code handles this layout and
+				the remaining free bytes in the oob area are managed by 
+				the autoplacement code. Provide a matching oob-layout
+				in this case. See rts_from4.c and diskonchip.c for 
+				implementation reference. In those cases we must also
+				use bad block tables on FLASH, because the ECC layout is
+				interferring with the bad block marker positions.
+				See bad block table support for details.
+			</para>
+		</sect2>
+	</sect1>
+	<sect1 id="Bad_Block_table_support">
+		<title>Bad block table support</title>
+		<para>
+			Most NAND chips mark the bad blocks at a defined
+			position in the spare area. Those blocks must 
+			not be erased under any circumstances as the bad 
+			block information would be lost.
+			It is possible to check the bad block mark each
+			time when the blocks are accessed by reading the
+			spare area of the first page in the block. This
+			is time consuming so a bad block table is used.
+		</para>
+		<para>
+			The nand driver supports various types of bad block
+			tables.
+			<itemizedlist>
+			<listitem><para>Per device</para><para>
+			The bad block table contains all bad block information
+			of the device which can consist of multiple chips.
+			</para>	</listitem>
+			<listitem><para>Per chip</para><para>
+			A bad block table is used per chip and contains the
+			bad block information for this particular chip.
+			</para>	</listitem>
+			<listitem><para>Fixed offset</para><para>
+			The bad block table is located at a fixed offset
+			in the chip (device). This applies to various
+			DiskOnChip devices.
+			</para>	</listitem>
+			<listitem><para>Automatic placed</para><para>
+			The bad block table is automatically placed and
+			detected either at the end or at the beginning
+			of a chip (device)
+			</para>	</listitem>
+			<listitem><para>Mirrored tables</para><para>
+			The bad block table is mirrored on the chip (device) to
+			allow updates of the bad block table without data loss.
+			</para>	</listitem>
+			</itemizedlist>
+		</para>
+		<para>	
+			nand_scan() calls the function nand_default_bbt(). 
+			nand_default_bbt() selects appropriate default
+			bad block table desriptors depending on the chip information
+			which was retrieved by nand_scan().
+		</para>
+		<para>
+			The standard policy is scanning the device for bad 
+			blocks and build a ram based bad block table which
+			allows faster access than always checking the
+			bad block information on the flash chip itself.
+		</para>
+		<sect2 id="Flash_based_tables">
+			<title>Flash based tables</title>
+			<para>
+				It may be desired or neccecary to keep a bad block table in FLASH. 
+				For AG-AND chips this is mandatory, as they have no factory marked
+				bad blocks. They have factory marked good blocks. The marker pattern
+				is erased when the block is erased to be reused. So in case of
+				powerloss before writing the pattern back to the chip this block 
+				would be lost and added to the bad blocks. Therefor we scan the 
+				chip(s) when we detect them the first time for good blocks and 
+				store this information in a bad block table before erasing any 
+				of the blocks.
+			</para>
+			<para>
+				The blocks in which the tables are stored are procteted against
+				accidental access by marking them bad in the memory bad block
+				table. The bad block table managment functions are allowed
+				to circumvernt this protection.
+			</para>
+			<para>
+				The simplest way to activate the FLASH based bad block table support 
+				is to set the option NAND_USE_FLASH_BBT in the option field of
+				the nand chip structure before calling nand_scan(). For AG-AND
+				chips is this done by default.
+				This activates the default FLASH based bad block table functionality 
+				of the NAND driver. The default bad block table options are
+				<itemizedlist>
+				<listitem><para>Store bad block table per chip</para></listitem>
+				<listitem><para>Use 2 bits per block</para></listitem>
+				<listitem><para>Automatic placement at the end of the chip</para></listitem>
+				<listitem><para>Use mirrored tables with version numbers</para></listitem>
+				<listitem><para>Reserve 4 blocks at the end of the chip</para></listitem>
+				</itemizedlist>
+			</para>
+		</sect2>
+		<sect2 id="User_defined_tables">
+			<title>User defined tables</title>
+			<para>
+				User defined tables are created by filling out a 
+				nand_bbt_descr structure and storing the pointer in the
+				nand_chip structure member bbt_td before calling nand_scan(). 
+				If a mirror table is neccecary a second structure must be
+				created and a pointer to this structure must be stored
+				in bbt_md inside the nand_chip structure. If the bbt_md 
+				member is set to NULL then only the main table is used
+				and no scan for the mirrored table is performed.
+			</para>
+			<para>
+				The most important field in the nand_bbt_descr structure
+				is the options field. The options define most of the 
+				table properties. Use the predefined constants from
+				nand.h to define the options.
+				<itemizedlist>
+				<listitem><para>Number of bits per block</para>
+				<para>The supported number of bits is 1, 2, 4, 8.</para></listitem>
+				<listitem><para>Table per chip</para>
+				<para>Setting the constant NAND_BBT_PERCHIP selects that
+				a bad block table is managed for each chip in a chip array.
+				If this option is not set then a per device bad block table
+				is used.</para></listitem>
+				<listitem><para>Table location is absolute</para>
+				<para>Use the option constant NAND_BBT_ABSPAGE and
+				define the absolute page number where the bad block
+				table starts in the field pages. If you have selected bad block
+				tables per chip and you have a multi chip array then the start page
+				must be given for each chip in the chip array. Note: there is no scan
+				for a table ident pattern performed, so the fields 
+				pattern, veroffs, offs, len can be left uninitialized</para></listitem>
+				<listitem><para>Table location is automatically detected</para>
+				<para>The table can either be located in the first or the last good
+				blocks of the chip (device). Set NAND_BBT_LASTBLOCK to place
+				the bad block table at the end of the chip (device). The
+				bad block tables are marked and identified by a pattern which
+				is stored in the spare area of the first page in the block which
+				holds the bad block table. Store a pointer to the pattern  
+				in the pattern field. Further the length of the pattern has to be 
+				stored in len and the offset in the spare area must be given
+				in the offs member of the nand_bbt_descr stucture. For mirrored
+				bad block tables different patterns are mandatory.</para></listitem>
+				<listitem><para>Table creation</para>
+				<para>Set the option NAND_BBT_CREATE to enable the table creation
+				if no table can be found during the scan. Usually this is done only 
+				once if a new chip is found. </para></listitem>
+				<listitem><para>Table write support</para>
+				<para>Set the option NAND_BBT_WRITE to enable the table write support.
+				This allows the update of the bad block table(s) in case a block has
+				to be marked bad due to wear. The MTD interface function block_markbad
+				is calling the update function of the bad block table. If the write
+				support is enabled then the table is updated on FLASH.</para>
+				<para>
+				Note: Write support should only be enabled for mirrored tables with
+				version control.
+				</para></listitem>
+				<listitem><para>Table version control</para>
+				<para>Set the option NAND_BBT_VERSION to enable the table version control.
+				It's highly recommended to enable this for mirrored tables with write
+				support. It makes sure that the risk of loosing the bad block
+				table information is reduced to the loss of the information about the
+				one worn out block which should be marked bad. The version is stored in
+				4 consecutive bytes in the spare area of the device. The position of
+				the version number is defined by the member veroffs in the bad block table
+				descriptor.</para></listitem>
+				<listitem><para>Save block contents on write</para>
+				<para>
+				In case that the block which holds the bad block table does contain
+				other useful information, set the option NAND_BBT_SAVECONTENT. When
+				the bad block table is written then the whole block is read the bad
+				block table is updated and the block is erased and everything is 
+				written back. If this option is not set only the bad block table
+				is written and everything else in the block is ignored and erased.
+				</para></listitem>
+				<listitem><para>Number of reserved blocks</para>
+				<para>
+				For automatic placement some blocks must be reserved for
+				bad block table storage. The number of reserved blocks is defined 
+				in the maxblocks member of the babd block table description structure.
+				Reserving 4 blocks for mirrored tables should be a reasonable number. 
+				This also limits the number of blocks which are scanned for the bad
+				block table ident pattern.
+				</para></listitem>
+				</itemizedlist>
+			</para>
+		</sect2>
+	</sect1>
+	<sect1 id="Spare_area_placement">
+		<title>Spare area (auto)placement</title>
+		<para>
+			The nand driver implements different possibilities for
+			placement of filesystem data in the spare area, 
+			<itemizedlist>
+			<listitem><para>Placement defined by fs driver</para></listitem>
+			<listitem><para>Automatic placement</para></listitem>
+			</itemizedlist>
+			The default placement function is automatic placement. The
+			nand driver has built in default placement schemes for the
+			various chiptypes. If due to hardware ECC functionality the
+			default placement does not fit then the board driver can
+			provide a own placement scheme.
+		</para>
+		<para>
+			File system drivers can provide a own placement scheme which
+			is used instead of the default placement scheme.
+		</para>
+		<para>
+			Placement schemes are defined by a nand_oobinfo structure
+	     		<programlisting>
+struct nand_oobinfo {
+	int	useecc;
+	int	eccbytes;
+	int	eccpos[24];
+	int	oobfree[8][2];
+};
+	     		</programlisting>
+			<itemizedlist>
+			<listitem><para>useecc</para><para>
+				The useecc member controls the ecc and placement function. The header
+				file include/mtd/mtd-abi.h contains constants to select ecc and
+				placement. MTD_NANDECC_OFF switches off the ecc complete. This is
+				not recommended and available for testing and diagnosis only.
+				MTD_NANDECC_PLACE selects caller defined placement, MTD_NANDECC_AUTOPLACE
+				selects automatic placement.
+			</para></listitem>
+			<listitem><para>eccbytes</para><para>
+				The eccbytes member defines the number of ecc bytes per page.
+			</para></listitem>
+			<listitem><para>eccpos</para><para>
+				The eccpos array holds the byte offsets in the spare area where
+				the ecc codes are placed.
+			</para></listitem>
+			<listitem><para>oobfree</para><para>
+				The oobfree array defines the areas in the spare area which can be
+				used for automatic placement. The information is given in the format
+				{offset, size}. offset defines the start of the usable area, size the
+				length in bytes. More than one area can be defined. The list is terminated
+				by an {0, 0} entry.
+			</para></listitem>
+			</itemizedlist>
+		</para>
+		<sect2 id="Placement_defined_by_fs_driver">
+			<title>Placement defined by fs driver</title>
+			<para>
+				The calling function provides a pointer to a nand_oobinfo
+				structure which defines the ecc placement. For writes the
+				caller must provide a spare area buffer along with the
+				data buffer. The spare area buffer size is (number of pages) *
+				(size of spare area). For reads the buffer size is
+				(number of pages) * ((size of spare area) + (number of ecc
+				steps per page) * sizeof (int)). The driver stores the
+				result of the ecc check for each tuple in the spare buffer.
+				The storage sequence is 
+			</para>
+			<para>
+				&lt;spare data page 0&gt;&lt;ecc result 0&gt;...&lt;ecc result n&gt;
+			</para>
+			<para>
+				...
+			</para>
+			<para>
+				&lt;spare data page n&gt;&lt;ecc result 0&gt;...&lt;ecc result n&gt;
+			</para>
+			<para>
+				This is a legacy mode used by YAFFS1.
+			</para>
+			<para>
+				If the spare area buffer is NULL then only the ECC placement is
+				done according to the given scheme in the nand_oobinfo structure.
+			</para>
+		</sect2>
+		<sect2 id="Automatic_placement">
+			<title>Automatic placement</title>
+			<para>
+				Automatic placement uses the built in defaults to place the
+				ecc bytes in the spare area. If filesystem data have to be stored /
+				read into the spare area then the calling function must provide a
+				buffer. The buffer size per page is determined by the oobfree array in
+				the nand_oobinfo structure.
+			</para>
+			<para>
+				If the spare area buffer is NULL then only the ECC placement is
+				done according to the default builtin scheme.
+			</para>
+		</sect2>
+		<sect2 id="User_space_placement_selection">
+			<title>User space placement selection</title>
+		<para>
+			All non ecc functions like mtd->read and mtd->write use an internal 
+			structure, which can be set by an ioctl. This structure is preset 
+			to the autoplacement default.
+	     		<programlisting>
+	ioctl (fd, MEMSETOOBSEL, oobsel);
+	     		</programlisting>
+			oobsel is a pointer to a user supplied structure of type
+			nand_oobconfig. The contents of this structure must match the 
+			criteria of the filesystem, which will be used. See an example in utils/nandwrite.c.
+		</para>
+		</sect2>
+	</sect1>	
+	<sect1 id="Spare_area_autoplacement_default">
+		<title>Spare area autoplacement default schemes</title>
+		<sect2 id="pagesize_256">
+			<title>256 byte pagesize</title>
+<informaltable><tgroup cols="3"><tbody>
+<row>
+<entry>Offset</entry>
+<entry>Content</entry>
+<entry>Comment</entry>
+</row>
+<row>
+<entry>0x00</entry>
+<entry>ECC byte 0</entry>
+<entry>Error correction code byte 0</entry>
+</row>
+<row>
+<entry>0x01</entry>
+<entry>ECC byte 1</entry>
+<entry>Error correction code byte 1</entry>
+</row>
+<row>
+<entry>0x02</entry>
+<entry>ECC byte 2</entry>
+<entry>Error correction code byte 2</entry>
+</row>
+<row>
+<entry>0x03</entry>
+<entry>Autoplace 0</entry>
+<entry></entry>
+</row>
+<row>
+<entry>0x04</entry>
+<entry>Autoplace 1</entry>
+<entry></entry>
+</row>
+<row>
+<entry>0x05</entry>
+<entry>Bad block marker</entry>
+<entry>If any bit in this byte is zero, then this block is bad.
+This applies only to the first page in a block. In the remaining
+pages this byte is reserved</entry>
+</row>
+<row>
+<entry>0x06</entry>
+<entry>Autoplace 2</entry>
+<entry></entry>
+</row>
+<row>
+<entry>0x07</entry>
+<entry>Autoplace 3</entry>
+<entry></entry>
+</row>
+</tbody></tgroup></informaltable>
+		</sect2>
+		<sect2 id="pagesize_512">
+			<title>512 byte pagesize</title>
+<informaltable><tgroup cols="3"><tbody>
+<row>
+<entry>Offset</entry>
+<entry>Content</entry>
+<entry>Comment</entry>
+</row>
+<row>
+<entry>0x00</entry>
+<entry>ECC byte 0</entry>
+<entry>Error correction code byte 0 of the lower 256 Byte data in
+this page</entry>
+</row>
+<row>
+<entry>0x01</entry>
+<entry>ECC byte 1</entry>
+<entry>Error correction code byte 1 of the lower 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x02</entry>
+<entry>ECC byte 2</entry>
+<entry>Error correction code byte 2 of the lower 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x03</entry>
+<entry>ECC byte 3</entry>
+<entry>Error correction code byte 0 of the upper 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x04</entry>
+<entry>reserved</entry>
+<entry>reserved</entry>
+</row>
+<row>
+<entry>0x05</entry>
+<entry>Bad block marker</entry>
+<entry>If any bit in this byte is zero, then this block is bad.
+This applies only to the first page in a block. In the remaining
+pages this byte is reserved</entry>
+</row>
+<row>
+<entry>0x06</entry>
+<entry>ECC byte 4</entry>
+<entry>Error correction code byte 1 of the upper 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x07</entry>
+<entry>ECC byte 5</entry>
+<entry>Error correction code byte 2 of the upper 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x08 - 0x0F</entry>
+<entry>Autoplace 0 - 7</entry>
+<entry></entry>
+</row>
+</tbody></tgroup></informaltable>
+		</sect2>
+		<sect2 id="pagesize_2048">
+			<title>2048 byte pagesize</title>
+<informaltable><tgroup cols="3"><tbody>
+<row>
+<entry>Offset</entry>
+<entry>Content</entry>
+<entry>Comment</entry>
+</row>
+<row>
+<entry>0x00</entry>
+<entry>Bad block marker</entry>
+<entry>If any bit in this byte is zero, then this block is bad.
+This applies only to the first page in a block. In the remaining
+pages this byte is reserved</entry>
+</row>
+<row>
+<entry>0x01</entry>
+<entry>Reserved</entry>
+<entry>Reserved</entry>
+</row>
+<row>
+<entry>0x02-0x27</entry>
+<entry>Autoplace 0 - 37</entry>
+<entry></entry>
+</row>
+<row>
+<entry>0x28</entry>
+<entry>ECC byte 0</entry>
+<entry>Error correction code byte 0 of the first 256 Byte data in
+this page</entry>
+</row>
+<row>
+<entry>0x29</entry>
+<entry>ECC byte 1</entry>
+<entry>Error correction code byte 1 of the first 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x2A</entry>
+<entry>ECC byte 2</entry>
+<entry>Error correction code byte 2 of the first 256 Bytes data in
+this page</entry>
+</row>
+<row>
+<entry>0x2B</entry>
+<entry>ECC byte 3</entry>
+<entry>Error correction code byte 0 of the second 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x2C</entry>
+<entry>ECC byte 4</entry>
+<entry>Error correction code byte 1 of the second 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x2D</entry>
+<entry>ECC byte 5</entry>
+<entry>Error correction code byte 2 of the second 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x2E</entry>
+<entry>ECC byte 6</entry>
+<entry>Error correction code byte 0 of the third 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x2F</entry>
+<entry>ECC byte 7</entry>
+<entry>Error correction code byte 1 of the third 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x30</entry>
+<entry>ECC byte 8</entry>
+<entry>Error correction code byte 2 of the third 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x31</entry>
+<entry>ECC byte 9</entry>
+<entry>Error correction code byte 0 of the fourth 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x32</entry>
+<entry>ECC byte 10</entry>
+<entry>Error correction code byte 1 of the fourth 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x33</entry>
+<entry>ECC byte 11</entry>
+<entry>Error correction code byte 2 of the fourth 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x34</entry>
+<entry>ECC byte 12</entry>
+<entry>Error correction code byte 0 of the fifth 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x35</entry>
+<entry>ECC byte 13</entry>
+<entry>Error correction code byte 1 of the fifth 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x36</entry>
+<entry>ECC byte 14</entry>
+<entry>Error correction code byte 2 of the fifth 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x37</entry>
+<entry>ECC byte 15</entry>
+<entry>Error correction code byte 0 of the sixt 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x38</entry>
+<entry>ECC byte 16</entry>
+<entry>Error correction code byte 1 of the sixt 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x39</entry>
+<entry>ECC byte 17</entry>
+<entry>Error correction code byte 2 of the sixt 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x3A</entry>
+<entry>ECC byte 18</entry>
+<entry>Error correction code byte 0 of the seventh 256 Bytes of
+data in this page</entry>
+</row>
+<row>
+<entry>0x3B</entry>
+<entry>ECC byte 19</entry>
+<entry>Error correction code byte 1 of the seventh 256 Bytes of
+data in this page</entry>
+</row>
+<row>
+<entry>0x3C</entry>
+<entry>ECC byte 20</entry>
+<entry>Error correction code byte 2 of the seventh 256 Bytes of
+data in this page</entry>
+</row>
+<row>
+<entry>0x3D</entry>
+<entry>ECC byte 21</entry>
+<entry>Error correction code byte 0 of the eigth 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x3E</entry>
+<entry>ECC byte 22</entry>
+<entry>Error correction code byte 1 of the eigth 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x3F</entry>
+<entry>ECC byte 23</entry>
+<entry>Error correction code byte 2 of the eigth 256 Bytes of data
+in this page</entry>
+</row>
+</tbody></tgroup></informaltable>
+		</sect2>
+     	</sect1>
+  </chapter>
+
+  <chapter id="filesystems">
+     	<title>Filesystem support</title>
+	<para>
+		The NAND driver provides all neccecary functions for a
+		filesystem via the MTD interface.
+	</para>
+	<para>
+		Filesystems must be aware of the NAND pecularities and
+		restrictions. One major restrictions of NAND Flash is, that you cannot 
+		write as often as you want to a page. The consecutive writes to a page, 
+		before erasing it again, are restricted to 1-3 writes, depending on the 
+		manufacturers specifications. This applies similar to the spare area. 
+	</para>
+	<para>
+		Therefor NAND aware filesystems must either write in page size chunks
+		or hold a writebuffer to collect smaller writes until they sum up to 
+		pagesize. Available NAND aware filesystems: JFFS2, YAFFS. 		
+	</para>
+	<para>
+		The spare area usage to store filesystem data is controlled by
+		the spare area placement functionality which is described in one
+		of the earlier chapters.
+	</para>
+  </chapter>	
+  <chapter id="tools">
+     	<title>Tools</title>
+	<para>
+		The MTD project provides a couple of helpful tools to handle NAND Flash.
+		<itemizedlist>
+		<listitem><para>flasherase, flasheraseall: Erase and format FLASH partitions</para></listitem>
+		<listitem><para>nandwrite: write filesystem images to NAND FLASH</para></listitem>
+		<listitem><para>nanddump: dump the contents of a NAND FLASH partitions</para></listitem>
+		</itemizedlist>
+	</para>
+	<para>
+		These tools are aware of the NAND restrictions. Please use those tools
+		instead of complaining about errors which are caused by non NAND aware
+		access methods.
+	</para>
+  </chapter>	
+
+  <chapter id="defines">
+     <title>Constants</title>
+     <para>
+     This chapter describes the constants which might be relevant for a driver developer.
+     </para>
+     <sect1 id="Chip_option_constants">
+	<title>Chip option constants</title>
+     	<sect2 id="Constants_for_chip_id_table">
+		<title>Constants for chip id table</title>
+     		<para>
+		These constants are defined in nand.h. They are ored together to describe
+		the chip functionality.
+     		<programlisting>
+/* Chip can not auto increment pages */
+#define NAND_NO_AUTOINCR	0x00000001
+/* Buswitdh is 16 bit */
+#define NAND_BUSWIDTH_16	0x00000002
+/* Device supports partial programming without padding */
+#define NAND_NO_PADDING		0x00000004
+/* Chip has cache program function */
+#define NAND_CACHEPRG		0x00000008
+/* Chip has copy back function */
+#define NAND_COPYBACK		0x00000010
+/* AND Chip which has 4 banks and a confusing page / block 
+ * assignment. See Renesas datasheet for further information */
+#define NAND_IS_AND		0x00000020
+/* Chip has a array of 4 pages which can be read without
+ * additional ready /busy waits */
+#define NAND_4PAGE_ARRAY	0x00000040 
+		</programlisting>
+     		</para>
+     	</sect2>
+     	<sect2 id="Constants_for_runtime_options">
+		<title>Constants for runtime options</title>
+     		<para>
+		These constants are defined in nand.h. They are ored together to describe
+		the functionality.
+     		<programlisting>
+/* Use a flash based bad block table. This option is parsed by the
+ * default bad block table function (nand_default_bbt). */
+#define NAND_USE_FLASH_BBT	0x00010000
+/* The hw ecc generator provides a syndrome instead a ecc value on read 
+ * This can only work if we have the ecc bytes directly behind the 
+ * data bytes. Applies for DOC and AG-AND Renesas HW Reed Solomon generators */
+#define NAND_HWECC_SYNDROME	0x00020000
+		</programlisting>
+     		</para>
+     	</sect2>
+     </sect1>	
+
+     <sect1 id="EEC_selection_constants">
+	<title>ECC selection constants</title>
+	<para>
+	Use these constants to select the ECC algorithm.
+  	<programlisting>
+/* No ECC. Usage is not recommended ! */
+#define NAND_ECC_NONE		0
+/* Software ECC 3 byte ECC per 256 Byte data */
+#define NAND_ECC_SOFT		1
+/* Hardware ECC 3 byte ECC per 256 Byte data */
+#define NAND_ECC_HW3_256	2
+/* Hardware ECC 3 byte ECC per 512 Byte data */
+#define NAND_ECC_HW3_512	3
+/* Hardware ECC 6 byte ECC per 512 Byte data */
+#define NAND_ECC_HW6_512	4
+/* Hardware ECC 6 byte ECC per 512 Byte data */
+#define NAND_ECC_HW8_512	6
+	</programlisting>
+	</para>
+     </sect1>	
+
+     <sect1 id="Hardware_control_related_constants">
+	<title>Hardware control related constants</title>
+	<para>
+	These constants describe the requested hardware access function when
+	the boardspecific hardware control function is called
+  	<programlisting>
+/* Select the chip by setting nCE to low */
+#define NAND_CTL_SETNCE 	1
+/* Deselect the chip by setting nCE to high */
+#define NAND_CTL_CLRNCE		2
+/* Select the command latch by setting CLE to high */
+#define NAND_CTL_SETCLE		3
+/* Deselect the command latch by setting CLE to low */
+#define NAND_CTL_CLRCLE		4
+/* Select the address latch by setting ALE to high */
+#define NAND_CTL_SETALE		5
+/* Deselect the address latch by setting ALE to low */
+#define NAND_CTL_CLRALE		6
+/* Set write protection by setting WP to high. Not used! */
+#define NAND_CTL_SETWP		7
+/* Clear write protection by setting WP to low. Not used! */
+#define NAND_CTL_CLRWP		8
+	</programlisting>
+	</para>
+     </sect1>	
+
+     <sect1 id="Bad_block_table_constants">
+	<title>Bad block table related constants</title>
+	<para>
+	These constants describe the options used for bad block
+	table descriptors.
+  	<programlisting>
+/* Options for the bad block table descriptors */
+
+/* The number of bits used per block in the bbt on the device */
+#define NAND_BBT_NRBITS_MSK	0x0000000F
+#define NAND_BBT_1BIT		0x00000001
+#define NAND_BBT_2BIT		0x00000002
+#define NAND_BBT_4BIT		0x00000004
+#define NAND_BBT_8BIT		0x00000008
+/* The bad block table is in the last good block of the device */
+#define	NAND_BBT_LASTBLOCK	0x00000010
+/* The bbt is at the given page, else we must scan for the bbt */
+#define NAND_BBT_ABSPAGE	0x00000020
+/* The bbt is at the given page, else we must scan for the bbt */
+#define NAND_BBT_SEARCH		0x00000040
+/* bbt is stored per chip on multichip devices */
+#define NAND_BBT_PERCHIP	0x00000080
+/* bbt has a version counter at offset veroffs */
+#define NAND_BBT_VERSION	0x00000100
+/* Create a bbt if none axists */
+#define NAND_BBT_CREATE		0x00000200
+/* Search good / bad pattern through all pages of a block */
+#define NAND_BBT_SCANALLPAGES	0x00000400
+/* Scan block empty during good / bad block scan */
+#define NAND_BBT_SCANEMPTY	0x00000800
+/* Write bbt if neccecary */
+#define NAND_BBT_WRITE		0x00001000
+/* Read and write back block contents when writing bbt */
+#define NAND_BBT_SAVECONTENT	0x00002000
+	</programlisting>
+	</para>
+     </sect1>	
+
+  </chapter>
+  	
+  <chapter id="structs">
+     <title>Structures</title>
+     <para>
+     This chapter contains the autogenerated documentation of the structures which are
+     used in the NAND driver and might be relevant for a driver developer. Each  
+     struct member has a short description which is marked with an [XXX] identifier.
+     See the chapter "Documentation hints" for an explanation.
+     </para>
+!Iinclude/linux/mtd/nand.h
+  </chapter>
+
+  <chapter id="pubfunctions">
+     <title>Public Functions Provided</title>
+     <para>
+     This chapter contains the autogenerated documentation of the NAND kernel API functions
+      which are exported. Each function has a short description which is marked with an [XXX] identifier.
+     See the chapter "Documentation hints" for an explanation.
+     </para>
+!Edrivers/mtd/nand/nand_base.c
+!Edrivers/mtd/nand/nand_bbt.c
+!Edrivers/mtd/nand/nand_ecc.c
+  </chapter>
+  
+  <chapter id="intfunctions">
+     <title>Internal Functions Provided</title>
+     <para>
+     This chapter contains the autogenerated documentation of the NAND driver internal functions.
+     Each function has a short description which is marked with an [XXX] identifier.
+     See the chapter "Documentation hints" for an explanation.
+     The functions marked with [DEFAULT] might be relevant for a board driver developer.
+     </para>
+!Idrivers/mtd/nand/nand_base.c
+!Idrivers/mtd/nand/nand_bbt.c
+<!-- No internal functions for kernel-doc:
+X!Idrivers/mtd/nand/nand_ecc.c
+-->
+  </chapter>
+
+  <chapter id="credits">
+     <title>Credits</title>
+	<para>
+		The following people have contributed to the NAND driver:
+		<orderedlist>
+			<listitem><para>Steven J. Hill<email>sjhill@realitydiluted.com</email></para></listitem>
+			<listitem><para>David Woodhouse<email>dwmw2@infradead.org</email></para></listitem>
+			<listitem><para>Thomas Gleixner<email>tglx@linutronix.de</email></para></listitem>
+		</orderedlist>
+		A lot of users have provided bugfixes, improvements and helping hands for testing.
+		Thanks a lot.
+	</para>
+	<para>
+		The following people have contributed to this document:
+		<orderedlist>
+			<listitem><para>Thomas Gleixner<email>tglx@linutronix.de</email></para></listitem>
+		</orderedlist>
+	</para>
+  </chapter>
+</book>
diff --git a/Documentation/DocBook/networking.tmpl b/Documentation/DocBook/networking.tmpl
new file mode 100644
index 0000000..f24f9e8
--- /dev/null
+++ b/Documentation/DocBook/networking.tmpl
@@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="LinuxNetworking">
+ <bookinfo>
+  <title>Linux Networking and Network Devices APIs</title>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="netcore">
+     <title>Linux Networking</title>
+     <sect1><title>Networking Base Types</title>
+!Iinclude/linux/net.h
+     </sect1>
+     <sect1><title>Socket Buffer Functions</title>
+!Iinclude/linux/skbuff.h
+!Iinclude/net/sock.h
+!Enet/socket.c
+!Enet/core/skbuff.c
+!Enet/core/sock.c
+!Enet/core/datagram.c
+!Enet/core/stream.c
+     </sect1>
+     <sect1><title>Socket Filter</title>
+!Enet/core/filter.c
+     </sect1>
+     <sect1><title>Generic Network Statistics</title>
+!Iinclude/linux/gen_stats.h
+!Enet/core/gen_stats.c
+!Enet/core/gen_estimator.c
+     </sect1>
+     <sect1><title>SUN RPC subsystem</title>
+<!-- The !D functionality is not perfect, garbage has to be protected by comments
+!Dnet/sunrpc/sunrpc_syms.c
+-->
+!Enet/sunrpc/xdr.c
+!Enet/sunrpc/svc_xprt.c
+!Enet/sunrpc/xprt.c
+!Enet/sunrpc/sched.c
+!Enet/sunrpc/socklib.c
+!Enet/sunrpc/stats.c
+!Enet/sunrpc/rpc_pipe.c
+!Enet/sunrpc/rpcb_clnt.c
+!Enet/sunrpc/clnt.c
+     </sect1>
+  </chapter>
+
+  <chapter id="netdev">
+     <title>Network device support</title>
+     <sect1><title>Driver Support</title>
+!Enet/core/dev.c
+!Enet/ethernet/eth.c
+!Enet/sched/sch_generic.c
+!Iinclude/linux/etherdevice.h
+!Iinclude/linux/netdevice.h
+     </sect1>
+     <sect1><title>PHY Support</title>
+!Edrivers/net/phy/phy.c
+!Idrivers/net/phy/phy.c
+!Edrivers/net/phy/phy_device.c
+!Idrivers/net/phy/phy_device.c
+!Edrivers/net/phy/mdio_bus.c
+!Idrivers/net/phy/mdio_bus.c
+     </sect1>
+<!-- FIXME: Removed for now since no structured comments in source
+     <sect1><title>Wireless</title>
+X!Enet/core/wireless.c
+     </sect1>
+-->
+     <sect1><title>Synchronous PPP</title>
+!Edrivers/net/wan/syncppp.c
+     </sect1>
+  </chapter>
+
+</book>
diff --git a/Documentation/DocBook/procfs-guide.tmpl b/Documentation/DocBook/procfs-guide.tmpl
new file mode 100644
index 0000000..9eba4b7
--- /dev/null
+++ b/Documentation/DocBook/procfs-guide.tmpl
@@ -0,0 +1,626 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" [
+<!ENTITY procfsexample SYSTEM "procfs_example.xml">
+]>
+
+<book id="LKProcfsGuide">
+  <bookinfo>
+    <title>Linux Kernel Procfs Guide</title>
+
+    <authorgroup>
+      <author>
+	<firstname>Erik</firstname>
+	<othername>(J.A.K.)</othername>
+	<surname>Mouw</surname>
+	<affiliation>
+	  <address>
+            <email>mouw@nl.linux.org</email>
+          </address>
+	</affiliation>
+      </author>
+      <othercredit>
+	<contrib>
+	This software and documentation were written while working on the
+	LART computing board
+	(<ulink url="http://www.lartmaker.nl/">http://www.lartmaker.nl/</ulink>),
+	which was sponsored by the Delt University of Technology projects
+	Mobile Multi-media Communications and Ubiquitous Communications.
+	</contrib>
+      </othercredit>
+    </authorgroup>
+
+    <revhistory>
+      <revision>
+	<revnumber>1.0</revnumber>
+	<date>May 30, 2001</date>
+	<revremark>Initial revision posted to linux-kernel</revremark>
+      </revision>
+      <revision>
+	<revnumber>1.1</revnumber>
+	<date>June 3, 2001</date>
+	<revremark>Revised after comments from linux-kernel</revremark>
+      </revision>
+    </revhistory>
+
+    <copyright>
+      <year>2001</year>
+      <holder>Erik Mouw</holder>
+    </copyright>
+
+
+    <legalnotice>
+      <para>
+        This documentation is free software; you can redistribute it
+        and/or modify it under the terms of the GNU General Public
+        License as published by the Free Software Foundation; either
+        version 2 of the License, or (at your option) any later
+        version.
+      </para>
+      
+      <para>
+        This documentation is distributed in the hope that it will be
+        useful, but WITHOUT ANY WARRANTY; without even the implied
+        warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+        PURPOSE.  See the GNU General Public License for more details.
+      </para>
+      
+      <para>
+        You should have received a copy of the GNU General Public
+        License along with this program; if not, write to the Free
+        Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+        MA 02111-1307 USA
+      </para>
+      
+      <para>
+        For more details see the file COPYING in the source
+        distribution of Linux.
+      </para>
+    </legalnotice>
+  </bookinfo>
+
+
+
+
+  <toc>
+  </toc>
+
+
+
+
+  <preface id="Preface">
+    <title>Preface</title>
+
+    <para>
+      This guide describes the use of the procfs file system from
+      within the Linux kernel. The idea to write this guide came up on
+      the #kernelnewbies IRC channel (see <ulink
+      url="http://www.kernelnewbies.org/">http://www.kernelnewbies.org/</ulink>),
+      when Jeff Garzik explained the use of procfs and forwarded me a
+      message Alexander Viro wrote to the linux-kernel mailing list. I
+      agreed to write it up nicely, so here it is.
+    </para>
+
+    <para>
+      I'd like to thank Jeff Garzik
+      <email>jgarzik@pobox.com</email> and Alexander Viro
+      <email>viro@parcelfarce.linux.theplanet.co.uk</email> for their input,
+      Tim Waugh <email>twaugh@redhat.com</email> for his <ulink
+      url="http://people.redhat.com/twaugh/docbook/selfdocbook/">Selfdocbook</ulink>,
+      and Marc Joosen <email>marcj@historia.et.tudelft.nl</email> for
+      proofreading.
+    </para>
+
+    <para>
+      Erik
+    </para>
+  </preface>
+
+
+
+
+  <chapter id="intro">
+    <title>Introduction</title>
+
+    <para>
+      The <filename class="directory">/proc</filename> file system
+      (procfs) is a special file system in the linux kernel. It's a
+      virtual file system: it is not associated with a block device
+      but exists only in memory. The files in the procfs are there to
+      allow userland programs access to certain information from the
+      kernel (like process information in <filename
+      class="directory">/proc/[0-9]+/</filename>), but also for debug
+      purposes (like <filename>/proc/ksyms</filename>).
+    </para>
+
+    <para>
+      This guide describes the use of the procfs file system from
+      within the Linux kernel. It starts by introducing all relevant
+      functions to manage the files within the file system. After that
+      it shows how to communicate with userland, and some tips and
+      tricks will be pointed out. Finally a complete example will be
+      shown.
+    </para>
+
+    <para>
+      Note that the files in <filename
+      class="directory">/proc/sys</filename> are sysctl files: they
+      don't belong to procfs and are governed by a completely
+      different API described in the Kernel API book.
+    </para>
+  </chapter>
+
+
+
+
+  <chapter id="managing">
+    <title>Managing procfs entries</title>
+    
+    <para>
+      This chapter describes the functions that various kernel
+      components use to populate the procfs with files, symlinks,
+      device nodes, and directories.
+    </para>
+
+    <para>
+      A minor note before we start: if you want to use any of the
+      procfs functions, be sure to include the correct header file! 
+      This should be one of the first lines in your code:
+    </para>
+
+    <programlisting>
+#include &lt;linux/proc_fs.h&gt;
+    </programlisting>
+
+
+
+
+    <sect1 id="regularfile">
+      <title>Creating a regular file</title>
+      
+      <funcsynopsis>
+	<funcprototype>
+	  <funcdef>struct proc_dir_entry* <function>create_proc_entry</function></funcdef>
+	  <paramdef>const char* <parameter>name</parameter></paramdef>
+	  <paramdef>mode_t <parameter>mode</parameter></paramdef>
+	  <paramdef>struct proc_dir_entry* <parameter>parent</parameter></paramdef>
+	</funcprototype>
+      </funcsynopsis>
+
+      <para>
+        This function creates a regular file with the name
+        <parameter>name</parameter>, file mode
+        <parameter>mode</parameter> in the directory
+        <parameter>parent</parameter>. To create a file in the root of
+        the procfs, use <constant>NULL</constant> as
+        <parameter>parent</parameter> parameter. When successful, the
+        function will return a pointer to the freshly created
+        <structname>struct proc_dir_entry</structname>; otherwise it
+        will return <constant>NULL</constant>. <xref
+        linkend="userland"/> describes how to do something useful with
+        regular files.
+      </para>
+
+      <para>
+        Note that it is specifically supported that you can pass a
+        path that spans multiple directories. For example
+        <function>create_proc_entry</function>(<parameter>"drivers/via0/info"</parameter>)
+        will create the <filename class="directory">via0</filename>
+        directory if necessary, with standard
+        <constant>0755</constant> permissions.
+      </para>
+
+    <para>
+      If you only want to be able to read the file, the function
+      <function>create_proc_read_entry</function> described in <xref
+      linkend="convenience"/> may be used to create and initialise
+      the procfs entry in one single call.
+    </para>
+    </sect1>
+
+
+
+
+    <sect1 id="Creating_a_symlink">
+      <title>Creating a symlink</title>
+
+      <funcsynopsis>
+	<funcprototype>
+	  <funcdef>struct proc_dir_entry*
+	  <function>proc_symlink</function></funcdef> <paramdef>const
+	  char* <parameter>name</parameter></paramdef>
+	  <paramdef>struct proc_dir_entry*
+	  <parameter>parent</parameter></paramdef> <paramdef>const
+	  char* <parameter>dest</parameter></paramdef>
+	</funcprototype>
+      </funcsynopsis>
+      
+      <para>
+        This creates a symlink in the procfs directory
+        <parameter>parent</parameter> that points from
+        <parameter>name</parameter> to
+        <parameter>dest</parameter>. This translates in userland to
+        <literal>ln -s</literal> <parameter>dest</parameter>
+        <parameter>name</parameter>.
+      </para>
+    </sect1>
+
+    <sect1 id="Creating_a_directory">
+      <title>Creating a directory</title>
+      
+      <funcsynopsis>
+	<funcprototype>
+	  <funcdef>struct proc_dir_entry* <function>proc_mkdir</function></funcdef>
+	  <paramdef>const char* <parameter>name</parameter></paramdef>
+	  <paramdef>struct proc_dir_entry* <parameter>parent</parameter></paramdef>
+	</funcprototype>
+      </funcsynopsis>
+
+      <para>
+        Create a directory <parameter>name</parameter> in the procfs
+        directory <parameter>parent</parameter>.
+      </para>
+    </sect1>
+
+
+
+
+    <sect1 id="Removing_an_entry">
+      <title>Removing an entry</title>
+      
+      <funcsynopsis>
+	<funcprototype>
+	  <funcdef>void <function>remove_proc_entry</function></funcdef>
+	  <paramdef>const char* <parameter>name</parameter></paramdef>
+	  <paramdef>struct proc_dir_entry* <parameter>parent</parameter></paramdef>
+	</funcprototype>
+      </funcsynopsis>
+
+      <para>
+        Removes the entry <parameter>name</parameter> in the directory
+        <parameter>parent</parameter> from the procfs. Entries are
+        removed by their <emphasis>name</emphasis>, not by the
+        <structname>struct proc_dir_entry</structname> returned by the
+        various create functions. Note that this function doesn't
+        recursively remove entries.
+      </para>
+
+      <para>
+        Be sure to free the <structfield>data</structfield> entry from
+        the <structname>struct proc_dir_entry</structname> before
+        <function>remove_proc_entry</function> is called (that is: if
+        there was some <structfield>data</structfield> allocated, of
+        course). See <xref linkend="usingdata"/> for more information
+        on using the <structfield>data</structfield> entry.
+      </para>
+    </sect1>
+  </chapter>
+
+
+
+
+  <chapter id="userland">
+    <title>Communicating with userland</title>
+    
+    <para>
+       Instead of reading (or writing) information directly from
+       kernel memory, procfs works with <emphasis>call back
+       functions</emphasis> for files: functions that are called when
+       a specific file is being read or written. Such functions have
+       to be initialised after the procfs file is created by setting
+       the <structfield>read_proc</structfield> and/or
+       <structfield>write_proc</structfield> fields in the
+       <structname>struct proc_dir_entry*</structname> that the
+       function <function>create_proc_entry</function> returned:
+    </para>
+
+    <programlisting>
+struct proc_dir_entry* entry;
+
+entry->read_proc = read_proc_foo;
+entry->write_proc = write_proc_foo;
+    </programlisting>
+
+    <para>
+      If you only want to use a the
+      <structfield>read_proc</structfield>, the function
+      <function>create_proc_read_entry</function> described in <xref
+      linkend="convenience"/> may be used to create and initialise the
+      procfs entry in one single call.
+    </para>
+
+
+
+    <sect1 id="Reading_data">
+      <title>Reading data</title>
+
+      <para>
+        The read function is a call back function that allows userland
+        processes to read data from the kernel. The read function
+        should have the following format:
+      </para>
+
+      <funcsynopsis>
+	<funcprototype>
+	  <funcdef>int <function>read_func</function></funcdef>
+	  <paramdef>char* <parameter>buffer</parameter></paramdef>
+	  <paramdef>char** <parameter>start</parameter></paramdef>
+	  <paramdef>off_t <parameter>off</parameter></paramdef>
+	  <paramdef>int <parameter>count</parameter></paramdef>
+	  <paramdef>int* <parameter>peof</parameter></paramdef>
+	  <paramdef>void* <parameter>data</parameter></paramdef>
+	</funcprototype>
+      </funcsynopsis>
+
+      <para>
+        The read function should write its information into the
+        <parameter>buffer</parameter>, which will be exactly
+        <literal>PAGE_SIZE</literal> bytes long.
+      </para>
+
+      <para>
+        The parameter
+        <parameter>peof</parameter> should be used to signal that the
+        end of the file has been reached by writing
+        <literal>1</literal> to the memory location
+        <parameter>peof</parameter> points to.
+      </para>
+
+      <para>
+        The <parameter>data</parameter>
+        parameter can be used to create a single call back function for
+        several files, see <xref linkend="usingdata"/>.
+      </para>
+
+      <para>
+        The rest of the parameters and the return value are described
+	by a comment in <filename>fs/proc/generic.c</filename> as follows:
+      </para>
+
+      <blockquote>
+        <para>
+	You have three ways to return data:
+       	</para>
+        <orderedlist>
+          <listitem>
+            <para>
+	      Leave <literal>*start = NULL</literal>.  (This is the default.)
+	      Put the data of the requested offset at that
+	      offset within the buffer.  Return the number (<literal>n</literal>)
+	      of bytes there are from the beginning of the
+	      buffer up to the last byte of data.  If the
+	      number of supplied bytes (<literal>= n - offset</literal>) is
+	      greater than zero and you didn't signal eof
+	      and the reader is prepared to take more data
+	      you will be called again with the requested
+	      offset advanced by the number of bytes
+	      absorbed.  This interface is useful for files
+	      no larger than the buffer.
+	    </para>
+	  </listitem>
+	  <listitem>
+            <para>
+	      Set <literal>*start</literal> to an unsigned long value less than
+	      the buffer address but greater than zero.
+	      Put the data of the requested offset at the
+	      beginning of the buffer.  Return the number of
+	      bytes of data placed there.  If this number is
+	      greater than zero and you didn't signal eof
+	      and the reader is prepared to take more data
+	      you will be called again with the requested
+	      offset advanced by <literal>*start</literal>.  This interface is
+	      useful when you have a large file consisting
+	      of a series of blocks which you want to count
+	      and return as wholes.
+	      (Hack by Paul.Russell@rustcorp.com.au)
+	    </para>
+	  </listitem>
+	  <listitem>
+            <para>
+	      Set <literal>*start</literal> to an address within the buffer.
+	      Put the data of the requested offset at <literal>*start</literal>.
+	      Return the number of bytes of data placed there.
+	      If this number is greater than zero and you
+	      didn't signal eof and the reader is prepared to
+	      take more data you will be called again with the
+	      requested offset advanced by the number of bytes
+	      absorbed.
+	    </para>
+	  </listitem>
+	</orderedlist>
+      </blockquote>
+
+      <para>
+        <xref linkend="example"/> shows how to use a read call back
+        function.
+      </para>
+    </sect1>
+
+
+
+
+    <sect1 id="Writing_data">
+      <title>Writing data</title>
+
+      <para>
+        The write call back function allows a userland process to write
+        data to the kernel, so it has some kind of control over the
+        kernel. The write function should have the following format:
+      </para>
+
+      <funcsynopsis>
+	<funcprototype>
+	  <funcdef>int <function>write_func</function></funcdef>
+	  <paramdef>struct file* <parameter>file</parameter></paramdef>
+	  <paramdef>const char* <parameter>buffer</parameter></paramdef>
+	  <paramdef>unsigned long <parameter>count</parameter></paramdef>
+	  <paramdef>void* <parameter>data</parameter></paramdef>
+	</funcprototype>
+      </funcsynopsis>
+
+      <para>
+        The write function should read <parameter>count</parameter>
+        bytes at maximum from the <parameter>buffer</parameter>. Note
+        that the <parameter>buffer</parameter> doesn't live in the
+        kernel's memory space, so it should first be copied to kernel
+        space with <function>copy_from_user</function>. The
+        <parameter>file</parameter> parameter is usually
+        ignored. <xref linkend="usingdata"/> shows how to use the
+        <parameter>data</parameter> parameter.
+      </para>
+
+      <para>
+        Again, <xref linkend="example"/> shows how to use this call back
+        function.
+      </para>
+    </sect1>
+
+
+
+
+    <sect1 id="usingdata">
+      <title>A single call back for many files</title>
+
+      <para>
+         When a large number of almost identical files is used, it's
+         quite inconvenient to use a separate call back function for
+         each file. A better approach is to have a single call back
+         function that distinguishes between the files by using the
+         <structfield>data</structfield> field in <structname>struct
+         proc_dir_entry</structname>. First of all, the
+         <structfield>data</structfield> field has to be initialised:
+      </para>
+
+      <programlisting>
+struct proc_dir_entry* entry;
+struct my_file_data *file_data;
+
+file_data = kmalloc(sizeof(struct my_file_data), GFP_KERNEL);
+entry->data = file_data;
+      </programlisting>
+     
+      <para>
+          The <structfield>data</structfield> field is a <type>void
+          *</type>, so it can be initialised with anything.
+      </para>
+
+      <para>
+        Now that the <structfield>data</structfield> field is set, the
+        <function>read_proc</function> and
+        <function>write_proc</function> can use it to distinguish
+        between files because they get it passed into their
+        <parameter>data</parameter> parameter:
+      </para>
+
+      <programlisting>
+int foo_read_func(char *page, char **start, off_t off,
+                  int count, int *eof, void *data)
+{
+        int len;
+
+        if(data == file_data) {
+                /* special case for this file */
+        } else {
+                /* normal processing */
+        }
+
+        return len;
+}
+      </programlisting>
+
+      <para>
+        Be sure to free the <structfield>data</structfield> data field
+        when removing the procfs entry.
+      </para>
+    </sect1>
+  </chapter>
+
+
+
+
+  <chapter id="tips">
+    <title>Tips and tricks</title>
+
+
+
+
+    <sect1 id="convenience">
+      <title>Convenience functions</title>
+
+      <funcsynopsis>
+	<funcprototype>
+	  <funcdef>struct proc_dir_entry* <function>create_proc_read_entry</function></funcdef>
+	  <paramdef>const char* <parameter>name</parameter></paramdef>
+	  <paramdef>mode_t <parameter>mode</parameter></paramdef>
+	  <paramdef>struct proc_dir_entry* <parameter>parent</parameter></paramdef>
+	  <paramdef>read_proc_t* <parameter>read_proc</parameter></paramdef>
+	  <paramdef>void* <parameter>data</parameter></paramdef>
+	</funcprototype>
+      </funcsynopsis>
+      
+      <para>
+        This function creates a regular file in exactly the same way
+        as <function>create_proc_entry</function> from <xref
+        linkend="regularfile"/> does, but also allows to set the read
+        function <parameter>read_proc</parameter> in one call. This
+        function can set the <parameter>data</parameter> as well, like
+        explained in <xref linkend="usingdata"/>.
+      </para>
+    </sect1>
+
+
+
+    <sect1 id="Modules">
+      <title>Modules</title>
+
+      <para>
+        If procfs is being used from within a module, be sure to set
+        the <structfield>owner</structfield> field in the
+        <structname>struct proc_dir_entry</structname> to
+        <constant>THIS_MODULE</constant>.
+      </para>
+
+      <programlisting>
+struct proc_dir_entry* entry;
+
+entry->owner = THIS_MODULE;
+      </programlisting>
+    </sect1>
+
+
+
+
+    <sect1 id="Mode_and_ownership">
+      <title>Mode and ownership</title>
+
+      <para>
+        Sometimes it is useful to change the mode and/or ownership of
+        a procfs entry. Here is an example that shows how to achieve
+        that:
+      </para>
+
+      <programlisting>
+struct proc_dir_entry* entry;
+
+entry->mode =  S_IWUSR |S_IRUSR | S_IRGRP | S_IROTH;
+entry->uid = 0;
+entry->gid = 100;
+      </programlisting>
+
+    </sect1>
+  </chapter>
+
+
+
+
+  <chapter id="example">
+    <title>Example</title>
+
+    <!-- be careful with the example code: it shouldn't be wider than
+    approx. 60 columns, or otherwise it won't fit properly on a page
+    -->
+
+&procfsexample;
+
+  </chapter>
+</book>
diff --git a/Documentation/DocBook/procfs_example.c b/Documentation/DocBook/procfs_example.c
new file mode 100644
index 0000000..8c6396e
--- /dev/null
+++ b/Documentation/DocBook/procfs_example.c
@@ -0,0 +1,210 @@
+/*
+ * procfs_example.c: an example proc interface
+ *
+ * Copyright (C) 2001, Erik Mouw (mouw@nl.linux.org)
+ *
+ * This file accompanies the procfs-guide in the Linux kernel
+ * source. Its main use is to demonstrate the concepts and
+ * functions described in the guide.
+ *
+ * This software has been developed while working on the LART
+ * computing board (http://www.lartmaker.nl), which was sponsored
+ * by the Delt University of Technology projects Mobile Multi-media
+ * Communications and Ubiquitous Communications.
+ *
+ * This program is free software; you can redistribute
+ * it and/or modify it under the terms of the GNU General
+ * Public License as published by the Free Software
+ * Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE.  See the GNU General Public License for more
+ * details.
+ * 
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/jiffies.h>
+#include <asm/uaccess.h>
+
+
+#define MODULE_VERS "1.0"
+#define MODULE_NAME "procfs_example"
+
+#define FOOBAR_LEN 8
+
+struct fb_data_t {
+	char name[FOOBAR_LEN + 1];
+	char value[FOOBAR_LEN + 1];
+};
+
+
+static struct proc_dir_entry *example_dir, *foo_file,
+	*bar_file, *jiffies_file, *symlink;
+
+
+struct fb_data_t foo_data, bar_data;
+
+
+static int proc_read_jiffies(char *page, char **start,
+			     off_t off, int count,
+			     int *eof, void *data)
+{
+	int len;
+
+	len = sprintf(page, "jiffies = %ld\n",
+                      jiffies);
+
+	return len;
+}
+
+
+static int proc_read_foobar(char *page, char **start,
+			    off_t off, int count, 
+			    int *eof, void *data)
+{
+	int len;
+	struct fb_data_t *fb_data = (struct fb_data_t *)data;
+
+	/* DON'T DO THAT - buffer overruns are bad */
+	len = sprintf(page, "%s = '%s'\n", 
+		      fb_data->name, fb_data->value);
+
+	return len;
+}
+
+
+static int proc_write_foobar(struct file *file,
+			     const char *buffer,
+			     unsigned long count, 
+			     void *data)
+{
+	int len;
+	struct fb_data_t *fb_data = (struct fb_data_t *)data;
+
+	if(count > FOOBAR_LEN)
+		len = FOOBAR_LEN;
+	else
+		len = count;
+
+	if(copy_from_user(fb_data->value, buffer, len))
+		return -EFAULT;
+
+	fb_data->value[len] = '\0';
+
+	return len;
+}
+
+
+static int __init init_procfs_example(void)
+{
+	int rv = 0;
+
+	/* create directory */
+	example_dir = proc_mkdir(MODULE_NAME, NULL);
+	if(example_dir == NULL) {
+		rv = -ENOMEM;
+		goto out;
+	}
+	
+	example_dir->owner = THIS_MODULE;
+	
+	/* create jiffies using convenience function */
+	jiffies_file = create_proc_read_entry("jiffies", 
+					      0444, example_dir, 
+					      proc_read_jiffies,
+					      NULL);
+	if(jiffies_file == NULL) {
+		rv  = -ENOMEM;
+		goto no_jiffies;
+	}
+
+	jiffies_file->owner = THIS_MODULE;
+
+	/* create foo and bar files using same callback
+	 * functions 
+	 */
+	foo_file = create_proc_entry("foo", 0644, example_dir);
+	if(foo_file == NULL) {
+		rv = -ENOMEM;
+		goto no_foo;
+	}
+
+	strcpy(foo_data.name, "foo");
+	strcpy(foo_data.value, "foo");
+	foo_file->data = &foo_data;
+	foo_file->read_proc = proc_read_foobar;
+	foo_file->write_proc = proc_write_foobar;
+	foo_file->owner = THIS_MODULE;
+		
+	bar_file = create_proc_entry("bar", 0644, example_dir);
+	if(bar_file == NULL) {
+		rv = -ENOMEM;
+		goto no_bar;
+	}
+
+	strcpy(bar_data.name, "bar");
+	strcpy(bar_data.value, "bar");
+	bar_file->data = &bar_data;
+	bar_file->read_proc = proc_read_foobar;
+	bar_file->write_proc = proc_write_foobar;
+	bar_file->owner = THIS_MODULE;
+		
+	/* create symlink */
+	symlink = proc_symlink("jiffies_too", example_dir, 
+			       "jiffies");
+	if(symlink == NULL) {
+		rv = -ENOMEM;
+		goto no_symlink;
+	}
+
+	symlink->owner = THIS_MODULE;
+
+	/* everything OK */
+	printk(KERN_INFO "%s %s initialised\n",
+	       MODULE_NAME, MODULE_VERS);
+	return 0;
+
+no_symlink:
+	remove_proc_entry("bar", example_dir);
+no_bar:
+	remove_proc_entry("foo", example_dir);
+no_foo:
+	remove_proc_entry("jiffies", example_dir);
+no_jiffies:			      
+	remove_proc_entry(MODULE_NAME, NULL);
+out:
+	return rv;
+}
+
+
+static void __exit cleanup_procfs_example(void)
+{
+	remove_proc_entry("jiffies_too", example_dir);
+	remove_proc_entry("bar", example_dir);
+	remove_proc_entry("foo", example_dir);
+	remove_proc_entry("jiffies", example_dir);
+	remove_proc_entry(MODULE_NAME, NULL);
+
+	printk(KERN_INFO "%s %s removed\n",
+	       MODULE_NAME, MODULE_VERS);
+}
+
+
+module_init(init_procfs_example);
+module_exit(cleanup_procfs_example);
+
+MODULE_AUTHOR("Erik Mouw");
+MODULE_DESCRIPTION("procfs examples");
+MODULE_LICENSE("GPL");
diff --git a/Documentation/DocBook/rapidio.tmpl b/Documentation/DocBook/rapidio.tmpl
new file mode 100644
index 0000000..54eb26b
--- /dev/null
+++ b/Documentation/DocBook/rapidio.tmpl
@@ -0,0 +1,159 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+        "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" [
+	<!ENTITY rapidio SYSTEM "rapidio.xml">
+	]>
+
+<book id="RapidIO-Guide">
+ <bookinfo>
+  <title>RapidIO Subsystem Guide</title>
+
+  <authorgroup>
+   <author>
+    <firstname>Matt</firstname>
+    <surname>Porter</surname>
+    <affiliation>
+     <address>
+      <email>mporter@kernel.crashing.org</email>
+      <email>mporter@mvista.com</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2005</year>
+   <holder>MontaVista Software, Inc.</holder>
+  </copyright>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License version 2 as published by the Free Software Foundation.
+   </para>
+
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="intro">
+      <title>Introduction</title>
+  <para>
+	RapidIO is a high speed switched fabric interconnect with
+	features aimed at the embedded market.  RapidIO provides
+	support for memory-mapped I/O as well as message-based
+	transactions over the switched fabric network. RapidIO has
+	a standardized discovery mechanism not unlike the PCI bus
+	standard that allows simple detection of devices in a
+	network.
+  </para>
+  <para>
+  	This documentation is provided for developers intending
+	to support RapidIO on new architectures, write new drivers,
+	or to understand the subsystem internals.
+  </para>
+  </chapter>
+
+  <chapter id="bugs">
+     <title>Known Bugs and Limitations</title>
+
+     <sect1 id="known_bugs">
+     	<title>Bugs</title>
+	  <para>None. ;)</para>
+     </sect1>
+     <sect1 id="Limitations">
+     	<title>Limitations</title>
+	  <para>
+	    <orderedlist>
+	      <listitem><para>Access/management of RapidIO memory regions is not supported</para></listitem>
+	      <listitem><para>Multiple host enumeration is not supported</para></listitem>
+	    </orderedlist>
+	 </para>
+     </sect1>
+  </chapter>
+
+  <chapter id="drivers">
+     	<title>RapidIO driver interface</title>
+	<para>
+		Drivers are provided a set of calls in order
+		to interface with the subsystem to gather info
+		on devices, request/map memory region resources,
+		and manage mailboxes/doorbells.
+	</para>
+	<sect1 id="Functions">
+		<title>Functions</title>
+!Iinclude/linux/rio_drv.h
+!Edrivers/rapidio/rio-driver.c
+!Edrivers/rapidio/rio.c
+	</sect1>
+  </chapter>
+
+  <chapter id="internals">
+     <title>Internals</title>
+
+     <para>
+     This chapter contains the autogenerated documentation of the RapidIO
+     subsystem.
+     </para>
+
+     <sect1 id="Structures"><title>Structures</title>
+!Iinclude/linux/rio.h
+     </sect1>
+     <sect1 id="Enumeration_and_Discovery"><title>Enumeration and Discovery</title>
+!Idrivers/rapidio/rio-scan.c
+     </sect1>
+     <sect1 id="Driver_functionality"><title>Driver functionality</title>
+!Idrivers/rapidio/rio.c
+!Idrivers/rapidio/rio-access.c
+     </sect1>
+     <sect1 id="Device_model_support"><title>Device model support</title>
+!Idrivers/rapidio/rio-driver.c
+     </sect1>
+     <sect1 id="Sysfs_support"><title>Sysfs support</title>
+!Idrivers/rapidio/rio-sysfs.c
+     </sect1>
+     <sect1 id="PPC32_support"><title>PPC32 support</title>
+!Earch/powerpc/sysdev/fsl_rio.c
+!Iarch/powerpc/sysdev/fsl_rio.c
+     </sect1>
+  </chapter>
+
+  <chapter id="credits">
+     <title>Credits</title>
+	<para>
+		The following people have contributed to the RapidIO
+		subsystem directly or indirectly:
+		<orderedlist>
+			<listitem><para>Matt Porter<email>mporter@kernel.crashing.org</email></para></listitem>
+			<listitem><para>Randy Vinson<email>rvinson@mvista.com</email></para></listitem>
+			<listitem><para>Dan Malek<email>dan@embeddedalley.com</email></para></listitem>
+		</orderedlist>
+	</para>
+	<para>
+		The following people have contributed to this document:
+		<orderedlist>
+			<listitem><para>Matt Porter<email>mporter@kernel.crashing.org</email></para></listitem>
+		</orderedlist>
+	</para>
+  </chapter>
+</book>
diff --git a/Documentation/DocBook/s390-drivers.tmpl b/Documentation/DocBook/s390-drivers.tmpl
new file mode 100644
index 0000000..95bfc12
--- /dev/null
+++ b/Documentation/DocBook/s390-drivers.tmpl
@@ -0,0 +1,161 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="s390drivers">
+ <bookinfo>
+  <title>Writing s390 channel device drivers</title>
+
+  <authorgroup>
+   <author>
+    <firstname>Cornelia</firstname>
+    <surname>Huck</surname>
+    <affiliation>
+     <address>
+       <email>cornelia.huck@de.ibm.com</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2007</year>
+   <holder>IBM Corp.</holder>
+  </copyright>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="intro">
+   <title>Introduction</title>
+  <para>
+    This document describes the interfaces available for device drivers that
+    drive s390 based channel attached I/O devices. This includes interfaces for
+    interaction with the hardware and interfaces for interacting with the
+    common driver core. Those interfaces are provided by the s390 common I/O
+    layer.
+  </para>
+  <para>
+    The document assumes a familarity with the technical terms associated
+    with the s390 channel I/O architecture. For a description of this
+    architecture, please refer to the "z/Architecture: Principles of
+    Operation", IBM publication no. SA22-7832.
+  </para>
+  <para>
+    While most I/O devices on a s390 system are typically driven through the
+    channel I/O mechanism described here, there are various other methods
+    (like the diag interface). These are out of the scope of this document.
+  </para>
+  <para>
+    Some additional information can also be found in the kernel source
+    under Documentation/s390/driver-model.txt.
+  </para>
+  </chapter>
+  <chapter id="ccw">
+   <title>The ccw bus</title>
+  <para>
+	The ccw bus typically contains the majority of devices available to
+	a s390 system. Named after the channel command word (ccw), the basic
+	command structure used to address its devices, the ccw bus contains
+	so-called channel attached devices. They are addressed via I/O
+	subchannels, visible on the css bus. A device driver for
+	channel-attached devices, however, will never interact	with the
+	subchannel directly, but only via the I/O device on the ccw bus,
+	the ccw device.
+  </para>
+    <sect1 id="channelIO">
+     <title>I/O functions for channel-attached devices</title>
+    <para>
+      Some hardware structures have been translated into C structures for use
+      by the common I/O layer and device drivers. For more information on
+      the hardware structures represented here, please consult the Principles
+      of Operation.
+    </para>
+!Iarch/s390/include/asm/cio.h
+    </sect1>
+    <sect1 id="ccwdev">
+     <title>ccw devices</title>
+    <para>
+      Devices that want to initiate channel I/O need to attach to the ccw bus.
+      Interaction with the driver core is done via the common I/O layer, which
+      provides the abstractions of ccw devices and ccw device drivers.
+    </para>
+    <para>
+      The functions that initiate or terminate channel I/O all act upon a
+      ccw device structure. Device drivers must not bypass those functions
+      or strange side effects may happen.
+    </para>
+!Iarch/s390/include/asm/ccwdev.h
+!Edrivers/s390/cio/device.c
+!Edrivers/s390/cio/device_ops.c
+    </sect1>
+    <sect1 id="cmf">
+     <title>The channel-measurement facility</title>
+  <para>
+	The channel-measurement facility provides a means to collect
+	measurement data which is made available by the channel subsystem
+	for each channel attached device.
+  </para>
+!Iarch/s390/include/asm/cmb.h
+!Edrivers/s390/cio/cmf.c
+    </sect1>
+  </chapter>
+
+  <chapter id="ccwgroup">
+   <title>The ccwgroup bus</title>
+  <para>
+	The ccwgroup bus only contains artificial devices, created by the user.
+	Many networking devices (e.g. qeth) are in fact composed of several
+	ccw devices (like read, write and data channel for qeth). The
+	ccwgroup bus provides a mechanism to create a meta-device which
+	contains those ccw devices as slave devices and can be associated
+	with the netdevice.
+  </para>
+   <sect1 id="ccwgroupdevices">
+    <title>ccw group devices</title>
+!Iarch/s390/include/asm/ccwgroup.h
+!Edrivers/s390/cio/ccwgroup.c
+   </sect1>
+  </chapter>
+
+  <chapter id="genericinterfaces">
+   <title>Generic interfaces</title>
+  <para>
+	Some interfaces are available to other drivers that do not necessarily
+	have anything to do with the busses described above, but still are
+	indirectly using basic infrastructure in the common I/O layer.
+	One example is the support for adapter interrupts.
+  </para>
+!Edrivers/s390/cio/airq.c
+  </chapter>
+
+</book>
diff --git a/Documentation/DocBook/scsi.tmpl b/Documentation/DocBook/scsi.tmpl
new file mode 100644
index 0000000..10a150a
--- /dev/null
+++ b/Documentation/DocBook/scsi.tmpl
@@ -0,0 +1,409 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="scsimid">
+  <bookinfo>
+    <title>SCSI Interfaces Guide</title>
+
+    <authorgroup>
+      <author>
+        <firstname>James</firstname>
+        <surname>Bottomley</surname>
+        <affiliation>
+          <address>
+            <email>James.Bottomley@hansenpartnership.com</email>
+          </address>
+        </affiliation>
+      </author>
+
+      <author>
+        <firstname>Rob</firstname>
+        <surname>Landley</surname>
+        <affiliation>
+          <address>
+            <email>rob@landley.net</email>
+          </address>
+        </affiliation>
+      </author>
+
+    </authorgroup>
+
+    <copyright>
+      <year>2007</year>
+      <holder>Linux Foundation</holder>
+    </copyright>
+
+    <legalnotice>
+      <para>
+        This documentation is free software; you can redistribute
+        it and/or modify it under the terms of the GNU General Public
+        License version 2.
+      </para>
+
+      <para>
+        This program is distributed in the hope that it will be
+        useful, but WITHOUT ANY WARRANTY; without even the implied
+        warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+        For more details see the file COPYING in the source
+        distribution of Linux.
+      </para>
+    </legalnotice>
+  </bookinfo>
+
+  <toc></toc>
+
+  <chapter id="intro">
+    <title>Introduction</title>
+    <sect1 id="protocol_vs_bus">
+      <title>Protocol vs bus</title>
+      <para>
+        Once upon a time, the Small Computer Systems Interface defined both
+        a parallel I/O bus and a data protocol to connect a wide variety of
+        peripherals (disk drives, tape drives, modems, printers, scanners,
+        optical drives, test equipment, and medical devices) to a host
+        computer.
+      </para>
+      <para>
+        Although the old parallel (fast/wide/ultra) SCSI bus has largely
+        fallen out of use, the SCSI command set is more widely used than ever
+        to communicate with devices over a number of different busses.
+      </para>
+      <para>
+        The <ulink url='http://www.t10.org/scsi-3.htm'>SCSI protocol</ulink>
+        is a big-endian peer-to-peer packet based protocol.  SCSI commands
+        are 6, 10, 12, or 16 bytes long, often followed by an associated data
+        payload.
+      </para>
+      <para>
+        SCSI commands can be transported over just about any kind of bus, and
+        are the default protocol for storage devices attached to USB, SATA,
+        SAS, Fibre Channel, FireWire, and ATAPI devices.  SCSI packets are
+        also commonly exchanged over Infiniband,
+        <ulink url='http://i2o.shadowconnect.com/faq.php'>I20</ulink>, TCP/IP
+        (<ulink url='http://en.wikipedia.org/wiki/ISCSI'>iSCSI</ulink>), even
+        <ulink url='http://cyberelk.net/tim/parport/parscsi.html'>Parallel
+        ports</ulink>.
+      </para>
+    </sect1>
+    <sect1 id="subsystem_design">
+      <title>Design of the Linux SCSI subsystem</title>
+      <para>
+        The SCSI subsystem uses a three layer design, with upper, mid, and low
+        layers.  Every operation involving the SCSI subsystem (such as reading
+        a sector from a disk) uses one driver at each of the 3 levels: one
+        upper layer driver, one lower layer driver, and the SCSI midlayer.
+      </para>
+      <para>
+        The SCSI upper layer provides the interface between userspace and the
+        kernel, in the form of block and char device nodes for I/O and
+        ioctl().  The SCSI lower layer contains drivers for specific hardware
+        devices.
+      </para>
+      <para>
+        In between is the SCSI mid-layer, analogous to a network routing
+        layer such as the IPv4 stack.  The SCSI mid-layer routes a packet
+        based data protocol between the upper layer's /dev nodes and the
+        corresponding devices in the lower layer.  It manages command queues,
+        provides error handling and power management functions, and responds
+        to ioctl() requests.
+      </para>
+    </sect1>
+  </chapter>
+
+  <chapter id="upper_layer">
+    <title>SCSI upper layer</title>
+    <para>
+      The upper layer supports the user-kernel interface by providing
+      device nodes.
+    </para>
+    <sect1 id="sd">
+      <title>sd (SCSI Disk)</title>
+      <para>sd (sd_mod.o)</para>
+<!-- !Idrivers/scsi/sd.c -->
+    </sect1>
+    <sect1 id="sr">
+      <title>sr (SCSI CD-ROM)</title>
+      <para>sr (sr_mod.o)</para>
+    </sect1>
+    <sect1 id="st">
+      <title>st (SCSI Tape)</title>
+      <para>st (st.o)</para>
+    </sect1>
+    <sect1 id="sg">
+      <title>sg (SCSI Generic)</title>
+      <para>sg (sg.o)</para>
+    </sect1>
+    <sect1 id="ch">
+      <title>ch (SCSI Media Changer)</title>
+      <para>ch (ch.c)</para>
+    </sect1>
+  </chapter>
+
+  <chapter id="mid_layer">
+    <title>SCSI mid layer</title>
+
+    <sect1 id="midlayer_implementation">
+      <title>SCSI midlayer implementation</title>
+      <sect2 id="scsi_device.h">
+        <title>include/scsi/scsi_device.h</title>
+        <para>
+        </para>
+!Iinclude/scsi/scsi_device.h
+      </sect2>
+
+      <sect2 id="scsi.c">
+        <title>drivers/scsi/scsi.c</title>
+        <para>Main file for the SCSI midlayer.</para>
+!Edrivers/scsi/scsi.c
+      </sect2>
+      <sect2 id="scsicam.c">
+        <title>drivers/scsi/scsicam.c</title>
+        <para>
+          <ulink url='http://www.t10.org/ftp/t10/drafts/cam/cam-r12b.pdf'>SCSI
+          Common Access Method</ulink> support functions, for use with
+          HDIO_GETGEO, etc.
+        </para>
+!Edrivers/scsi/scsicam.c
+      </sect2>
+      <sect2 id="scsi_error.c">
+        <title>drivers/scsi/scsi_error.c</title>
+        <para>Common SCSI error/timeout handling routines.</para>
+!Edrivers/scsi/scsi_error.c
+      </sect2>
+      <sect2 id="scsi_devinfo.c">
+        <title>drivers/scsi/scsi_devinfo.c</title>
+        <para>
+          Manage scsi_dev_info_list, which tracks blacklisted and whitelisted
+          devices.
+        </para>
+!Idrivers/scsi/scsi_devinfo.c
+      </sect2>
+      <sect2 id="scsi_ioctl.c">
+        <title>drivers/scsi/scsi_ioctl.c</title>
+        <para>
+          Handle ioctl() calls for SCSI devices.
+        </para>
+!Edrivers/scsi/scsi_ioctl.c
+      </sect2>
+      <sect2 id="scsi_lib.c">
+        <title>drivers/scsi/scsi_lib.c</title>
+        <para>
+          SCSI queuing library.
+        </para>
+!Edrivers/scsi/scsi_lib.c
+      </sect2>
+      <sect2 id="scsi_lib_dma.c">
+        <title>drivers/scsi/scsi_lib_dma.c</title>
+        <para>
+          SCSI library functions depending on DMA
+          (map and unmap scatter-gather lists).
+        </para>
+!Edrivers/scsi/scsi_lib_dma.c
+      </sect2>
+      <sect2 id="scsi_module.c">
+        <title>drivers/scsi/scsi_module.c</title>
+        <para>
+          The file drivers/scsi/scsi_module.c contains legacy support for
+          old-style host templates.  It should never be used by any new driver.
+        </para>
+      </sect2>
+      <sect2 id="scsi_proc.c">
+        <title>drivers/scsi/scsi_proc.c</title>
+        <para>
+          The functions in this file provide an interface between
+          the PROC file system and the SCSI device drivers
+          It is mainly used for debugging, statistics and to pass
+          information directly to the lowlevel driver.
+
+          I.E. plumbing to manage /proc/scsi/*
+        </para>
+!Idrivers/scsi/scsi_proc.c
+      </sect2>
+      <sect2 id="scsi_netlink.c">
+        <title>drivers/scsi/scsi_netlink.c</title>
+        <para>
+          Infrastructure to provide async events from transports to userspace
+          via netlink, using a single NETLINK_SCSITRANSPORT protocol for all
+          transports.
+
+          See <ulink url='http://marc.info/?l=linux-scsi&amp;m=115507374832500&amp;w=2'>the
+          original patch submission</ulink> for more details.
+        </para>
+!Idrivers/scsi/scsi_netlink.c
+      </sect2>
+      <sect2 id="scsi_scan.c">
+        <title>drivers/scsi/scsi_scan.c</title>
+        <para>
+          Scan a host to determine which (if any) devices are attached.
+
+          The general scanning/probing algorithm is as follows, exceptions are
+          made to it depending on device specific flags, compilation options,
+          and global variable (boot or module load time) settings.
+
+          A specific LUN is scanned via an INQUIRY command; if the LUN has a
+          device attached, a scsi_device is allocated and setup for it.
+
+          For every id of every channel on the given host, start by scanning
+          LUN 0.  Skip hosts that don't respond at all to a scan of LUN 0.
+          Otherwise, if LUN 0 has a device attached, allocate and setup a
+          scsi_device for it.  If target is SCSI-3 or up, issue a REPORT LUN,
+          and scan all of the LUNs returned by the REPORT LUN; else,
+          sequentially scan LUNs up until some maximum is reached, or a LUN is
+          seen that cannot have a device attached to it.
+        </para>
+!Idrivers/scsi/scsi_scan.c
+      </sect2>
+      <sect2 id="scsi_sysctl.c">
+        <title>drivers/scsi/scsi_sysctl.c</title>
+        <para>
+          Set up the sysctl entry: "/dev/scsi/logging_level"
+          (DEV_SCSI_LOGGING_LEVEL) which sets/returns scsi_logging_level.
+        </para>
+      </sect2>
+      <sect2 id="scsi_sysfs.c">
+        <title>drivers/scsi/scsi_sysfs.c</title>
+        <para>
+          SCSI sysfs interface routines.
+        </para>
+!Edrivers/scsi/scsi_sysfs.c
+      </sect2>
+      <sect2 id="hosts.c">
+        <title>drivers/scsi/hosts.c</title>
+        <para>
+          mid to lowlevel SCSI driver interface
+        </para>
+!Edrivers/scsi/hosts.c
+      </sect2>
+      <sect2 id="constants.c">
+        <title>drivers/scsi/constants.c</title>
+        <para>
+          mid to lowlevel SCSI driver interface
+        </para>
+!Edrivers/scsi/constants.c
+      </sect2>
+    </sect1>
+
+    <sect1 id="Transport_classes">
+      <title>Transport classes</title>
+      <para>
+        Transport classes are service libraries for drivers in the SCSI
+        lower layer, which expose transport attributes in sysfs.
+      </para>
+      <sect2 id="Fibre_Channel_transport">
+        <title>Fibre Channel transport</title>
+        <para>
+          The file drivers/scsi/scsi_transport_fc.c defines transport attributes
+          for Fibre Channel.
+        </para>
+!Edrivers/scsi/scsi_transport_fc.c
+      </sect2>
+      <sect2 id="iSCSI_transport">
+        <title>iSCSI transport class</title>
+        <para>
+          The file drivers/scsi/scsi_transport_iscsi.c defines transport
+          attributes for the iSCSI class, which sends SCSI packets over TCP/IP
+          connections.
+        </para>
+!Edrivers/scsi/scsi_transport_iscsi.c
+      </sect2>
+      <sect2 id="SAS_transport">
+        <title>Serial Attached SCSI (SAS) transport class</title>
+        <para>
+          The file drivers/scsi/scsi_transport_sas.c defines transport
+          attributes for Serial Attached SCSI, a variant of SATA aimed at
+          large high-end systems.
+        </para>
+        <para>
+          The SAS transport class contains common code to deal with SAS HBAs,
+          an aproximated representation of SAS topologies in the driver model,
+          and various sysfs attributes to expose these topologies and managment
+          interfaces to userspace.
+        </para>
+        <para>
+          In addition to the basic SCSI core objects this transport class
+          introduces two additional intermediate objects:  The SAS PHY
+          as represented by struct sas_phy defines an "outgoing" PHY on
+          a SAS HBA or Expander, and the SAS remote PHY represented by
+          struct sas_rphy defines an "incoming" PHY on a SAS Expander or
+          end device.  Note that this is purely a software concept, the
+          underlying hardware for a PHY and a remote PHY is the exactly
+          the same.
+        </para>
+        <para>
+          There is no concept of a SAS port in this code, users can see
+          what PHYs form a wide port based on the port_identifier attribute,
+          which is the same for all PHYs in a port.
+        </para>
+!Edrivers/scsi/scsi_transport_sas.c
+      </sect2>
+      <sect2 id="SATA_transport">
+        <title>SATA transport class</title>
+        <para>
+          The SATA transport is handled by libata, which has its own book of
+          documentation in this directory.
+        </para>
+      </sect2>
+      <sect2 id="SPI_transport">
+        <title>Parallel SCSI (SPI) transport class</title>
+        <para>
+          The file drivers/scsi/scsi_transport_spi.c defines transport
+          attributes for traditional (fast/wide/ultra) SCSI busses.
+        </para>
+!Edrivers/scsi/scsi_transport_spi.c
+      </sect2>
+      <sect2 id="SRP_transport">
+        <title>SCSI RDMA (SRP) transport class</title>
+        <para>
+          The file drivers/scsi/scsi_transport_srp.c defines transport
+          attributes for SCSI over Remote Direct Memory Access.
+        </para>
+!Edrivers/scsi/scsi_transport_srp.c
+      </sect2>
+    </sect1>
+
+  </chapter>
+
+  <chapter id="lower_layer">
+    <title>SCSI lower layer</title>
+    <sect1 id="hba_drivers">
+      <title>Host Bus Adapter transport types</title>
+      <para>
+        Many modern device controllers use the SCSI command set as a protocol to
+        communicate with their devices through many different types of physical
+        connections.
+      </para>
+      <para>
+        In SCSI language a bus capable of carrying SCSI commands is
+        called a "transport", and a controller connecting to such a bus is
+        called a "host bus adapter" (HBA).
+      </para>
+      <sect2 id="scsi_debug.c">
+        <title>Debug transport</title>
+        <para>
+          The file drivers/scsi/scsi_debug.c simulates a host adapter with a
+          variable number of disks (or disk like devices) attached, sharing a
+          common amount of RAM.  Does a lot of checking to make sure that we are
+          not getting blocks mixed up, and panics the kernel if anything out of
+          the ordinary is seen.
+        </para>
+        <para>
+          To be more realistic, the simulated devices have the transport
+          attributes of SAS disks.
+        </para>
+        <para>
+          For documentation see
+          <ulink url='http://www.torque.net/sg/sdebug26.html'>http://www.torque.net/sg/sdebug26.html</ulink>
+        </para>
+<!-- !Edrivers/scsi/scsi_debug.c -->
+      </sect2>
+      <sect2 id="todo">
+        <title>todo</title>
+        <para>Parallel (fast/wide/ultra) SCSI, USB, SATA,
+        SAS, Fibre Channel, FireWire, ATAPI devices, Infiniband,
+        I20, iSCSI, Parallel ports, netlink...
+        </para>
+      </sect2>
+    </sect1>
+  </chapter>
+</book>
diff --git a/Documentation/DocBook/sh.tmpl b/Documentation/DocBook/sh.tmpl
new file mode 100644
index 0000000..0c3dc4c
--- /dev/null
+++ b/Documentation/DocBook/sh.tmpl
@@ -0,0 +1,105 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="sh-drivers">
+ <bookinfo>
+  <title>SuperH Interfaces Guide</title>
+  
+  <authorgroup>
+   <author>
+    <firstname>Paul</firstname>
+    <surname>Mundt</surname>
+    <affiliation>
+     <address>
+      <email>lethal@linux-sh.org</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2008</year>
+   <holder>Paul Mundt</holder>
+  </copyright>
+  <copyright>
+   <year>2008</year>
+   <holder>Renesas Technology Corp.</holder>
+  </copyright>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License version 2 as published by the Free Software Foundation.
+   </para>
+      
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+      
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+      
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="mm">
+    <title>Memory Management</title>
+    <sect1 id="sh4">
+    <title>SH-4</title>
+      <sect2 id="sq">
+        <title>Store Queue API</title>
+!Earch/sh/kernel/cpu/sh4/sq.c
+      </sect2>
+    </sect1>
+    <sect1 id="sh5">
+      <title>SH-5</title>
+      <sect2 id="tlb">
+	<title>TLB Interfaces</title>
+!Iarch/sh/mm/tlb-sh5.c
+!Iarch/sh/include/asm/tlb_64.h
+      </sect2>
+    </sect1>
+  </chapter>
+  <chapter id="clk">
+    <title>Clock Framework Extensions</title>
+!Iarch/sh/include/asm/clock.h
+  </chapter>
+  <chapter id="mach">
+    <title>Machine Specific Interfaces</title>
+    <sect1 id="dreamcast">
+      <title>mach-dreamcast</title>
+!Iarch/sh/boards/mach-dreamcast/rtc.c
+    </sect1>
+    <sect1 id="x3proto">
+      <title>mach-x3proto</title>
+!Earch/sh/boards/mach-x3proto/ilsel.c
+    </sect1>
+  </chapter>
+  <chapter id="busses">
+    <title>Busses</title>
+    <sect1 id="superhyway">
+      <title>SuperHyway</title>
+!Edrivers/sh/superhyway/superhyway.c
+    </sect1>
+
+    <sect1 id="maple">
+      <title>Maple</title>
+!Edrivers/sh/maple/maple.c
+    </sect1>
+  </chapter>
+</book>
diff --git a/Documentation/DocBook/stylesheet.xsl b/Documentation/DocBook/stylesheet.xsl
new file mode 100644
index 0000000..974e17c
--- /dev/null
+++ b/Documentation/DocBook/stylesheet.xsl
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<stylesheet xmlns="http://www.w3.org/1999/XSL/Transform" version="1.0">
+<param name="chunk.quietly">1</param>
+<param name="funcsynopsis.style">ansi</param>
+<param name="funcsynopsis.tabular.threshold">80</param>
+<!-- <param name="paper.type">A4</param> -->
+<param name="generate.section.toc.level">2</param>
+</stylesheet>
diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl
new file mode 100644
index 0000000..df87d1b
--- /dev/null
+++ b/Documentation/DocBook/uio-howto.tmpl
@@ -0,0 +1,620 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" []>
+
+<book id="index">
+<bookinfo>
+<title>The Userspace I/O HOWTO</title>
+
+<author>
+      <firstname>Hans-Jürgen</firstname>
+      <surname>Koch</surname>
+      <authorblurb><para>Linux developer, Linutronix</para></authorblurb>
+	<affiliation>
+	<orgname>
+		<ulink url="http://www.linutronix.de">Linutronix</ulink>
+	</orgname>
+
+	<address>
+	   <email>hjk@linutronix.de</email>
+	</address>
+    </affiliation>
+</author>
+
+<copyright>
+	<year>2006-2008</year>
+	<holder>Hans-Jürgen Koch.</holder>
+</copyright>
+
+<legalnotice>
+<para>
+This documentation is Free Software licensed under the terms of the
+GPL version 2.
+</para>
+</legalnotice>
+
+<pubdate>2006-12-11</pubdate>
+
+<abstract>
+	<para>This HOWTO describes concept and usage of Linux kernel's
+		Userspace I/O system.</para>
+</abstract>
+
+<revhistory>
+	<revision>
+	<revnumber>0.5</revnumber>
+	<date>2008-05-22</date>
+	<authorinitials>hjk</authorinitials>
+	<revremark>Added description of write() function.</revremark>
+	</revision>
+	<revision>
+	<revnumber>0.4</revnumber>
+	<date>2007-11-26</date>
+	<authorinitials>hjk</authorinitials>
+	<revremark>Removed section about uio_dummy.</revremark>
+	</revision>
+	<revision>
+	<revnumber>0.3</revnumber>
+	<date>2007-04-29</date>
+	<authorinitials>hjk</authorinitials>
+	<revremark>Added section about userspace drivers.</revremark>
+	</revision>
+	<revision>
+	<revnumber>0.2</revnumber>
+	<date>2007-02-13</date>
+	<authorinitials>hjk</authorinitials>
+	<revremark>Update after multiple mappings were added.</revremark>
+	</revision>
+	<revision>
+	<revnumber>0.1</revnumber>
+	<date>2006-12-11</date>
+	<authorinitials>hjk</authorinitials>
+	<revremark>First draft.</revremark>
+	</revision>
+</revhistory>
+</bookinfo>
+
+<chapter id="aboutthisdoc">
+<?dbhtml filename="aboutthis.html"?>
+<title>About this document</title>
+
+<sect1 id="translations">
+<?dbhtml filename="translations.html"?>
+<title>Translations</title>
+
+<para>If you know of any translations for this document, or you are
+interested in translating it, please email me
+<email>hjk@linutronix.de</email>.
+</para>
+</sect1>
+
+<sect1 id="preface">
+<title>Preface</title>
+	<para>
+	For many types of devices, creating a Linux kernel driver is
+	overkill.  All that is really needed is some way to handle an
+	interrupt and provide access to the memory space of the
+	device.  The logic of controlling the device does not
+	necessarily have to be within the kernel, as the device does
+	not need to take advantage of any of other resources that the
+	kernel provides.  One such common class of devices that are
+	like this are for industrial I/O cards.
+	</para>
+	<para>
+	To address this situation, the userspace I/O system (UIO) was
+	designed.  For typical industrial I/O cards, only a very small
+	kernel module is needed. The main part of the driver will run in
+	user space. This simplifies development and reduces the risk of
+	serious bugs within a kernel module.
+	</para>
+	<para>
+	Please note that UIO is not an universal driver interface. Devices
+	that are already handled well by other kernel subsystems (like
+	networking or serial or USB) are no candidates for an UIO driver.
+	Hardware that is ideally suited for an UIO driver fulfills all of
+	the following:
+	</para>
+<itemizedlist>
+<listitem>
+	<para>The device has memory that can be mapped. The device can be
+	controlled completely by writing to this memory.</para>
+</listitem>
+<listitem>
+	<para>The device usually generates interrupts.</para>
+</listitem>
+<listitem>
+	<para>The device does not fit into one of the standard kernel
+	subsystems.</para>
+</listitem>
+</itemizedlist>
+</sect1>
+
+<sect1 id="thanks">
+<title>Acknowledgments</title>
+	<para>I'd like to thank Thomas Gleixner and Benedikt Spranger of
+	Linutronix, who have not only written most of the UIO code, but also
+	helped greatly writing this HOWTO by giving me all kinds of background
+	information.</para>
+</sect1>
+
+<sect1 id="feedback">
+<title>Feedback</title>
+	<para>Find something wrong with this document? (Or perhaps something
+	right?) I would love to hear from you. Please email me at
+	<email>hjk@linutronix.de</email>.</para>
+</sect1>
+</chapter>
+
+<chapter id="about">
+<?dbhtml filename="about.html"?>
+<title>About UIO</title>
+
+<para>If you use UIO for your card's driver, here's what you get:</para>
+
+<itemizedlist>
+<listitem>
+	<para>only one small kernel module to write and maintain.</para>
+</listitem>
+<listitem>
+	<para>develop the main part of your driver in user space,
+	with all the tools and libraries you're used to.</para>
+</listitem>
+<listitem>
+	<para>bugs in your driver won't crash the kernel.</para>
+</listitem>
+<listitem>
+	<para>updates of your driver can take place without recompiling
+	the kernel.</para>
+</listitem>
+</itemizedlist>
+
+<sect1 id="how_uio_works">
+<title>How UIO works</title>
+	<para>
+	Each UIO device is accessed through a device file and several
+	sysfs attribute files. The device file will be called
+	<filename>/dev/uio0</filename> for the first device, and
+	<filename>/dev/uio1</filename>, <filename>/dev/uio2</filename>
+	and so on for subsequent devices.
+	</para>
+
+	<para><filename>/dev/uioX</filename> is used to access the
+	address space of the card. Just use
+	<function>mmap()</function> to access registers or RAM
+	locations of your card.
+	</para>
+
+	<para>
+	Interrupts are handled by reading from
+	<filename>/dev/uioX</filename>. A blocking
+	<function>read()</function> from
+	<filename>/dev/uioX</filename> will return as soon as an
+	interrupt occurs. You can also use
+	<function>select()</function> on
+	<filename>/dev/uioX</filename> to wait for an interrupt. The
+	integer value read from <filename>/dev/uioX</filename>
+	represents the total interrupt count. You can use this number
+	to figure out if you missed some interrupts.
+	</para>
+	<para>
+	For some hardware that has more than one interrupt source internally,
+	but not separate IRQ mask and status registers, there might be
+	situations where userspace cannot determine what the interrupt source
+	was if the kernel handler disables them by writing to the chip's IRQ
+	register. In such a case, the kernel has to disable the IRQ completely
+	to leave the chip's register untouched. Now the userspace part can
+	determine the cause of the interrupt, but it cannot re-enable
+	interrupts. Another cornercase is chips where re-enabling interrupts
+	is a read-modify-write operation to a combined IRQ status/acknowledge
+	register. This would be racy if a new interrupt occurred
+	simultaneously.
+	</para>
+	<para>
+	To address these problems, UIO also implements a write() function. It
+	is normally not used and can be ignored for hardware that has only a
+	single interrupt source or has separate IRQ mask and status registers.
+	If you need it, however, a write to <filename>/dev/uioX</filename>
+	will call the <function>irqcontrol()</function> function implemented
+	by the driver. You have to write a 32-bit value that is usually either
+	0 or 1 to disable or enable interrupts. If a driver does not implement
+	<function>irqcontrol()</function>, <function>write()</function> will
+	return with <varname>-ENOSYS</varname>.
+	</para>
+
+	<para>
+	To handle interrupts properly, your custom kernel module can
+	provide its own interrupt handler. It will automatically be
+	called by the built-in handler.
+	</para>
+
+	<para>
+	For cards that don't generate interrupts but need to be
+	polled, there is the possibility to set up a timer that
+	triggers the interrupt handler at configurable time intervals.
+	This interrupt simulation is done by calling
+	<function>uio_event_notify()</function>
+	from the timer's event handler.
+	</para>
+
+	<para>
+	Each driver provides attributes that are used to read or write
+	variables. These attributes are accessible through sysfs
+	files.  A custom kernel driver module can add its own
+	attributes to the device owned by the uio driver, but not added
+	to the UIO device itself at this time.  This might change in the
+	future if it would be found to be useful.
+	</para>
+
+	<para>
+	The following standard attributes are provided by the UIO
+	framework:
+	</para>
+<itemizedlist>
+<listitem>
+	<para>
+	<filename>name</filename>: The name of your device. It is
+	recommended to use the name of your kernel module for this.
+	</para>
+</listitem>
+<listitem>
+	<para>
+	<filename>version</filename>: A version string defined by your
+	driver. This allows the user space part of your driver to deal
+	with different versions of the kernel module.
+	</para>
+</listitem>
+<listitem>
+	<para>
+	<filename>event</filename>: The total number of interrupts
+	handled by the driver since the last time the device node was
+	read.
+	</para>
+</listitem>
+</itemizedlist>
+<para>
+	These attributes appear under the
+	<filename>/sys/class/uio/uioX</filename> directory.  Please
+	note that this directory might be a symlink, and not a real
+	directory.  Any userspace code that accesses it must be able
+	to handle this.
+</para>
+<para>
+	Each UIO device can make one or more memory regions available for
+	memory mapping. This is necessary because some industrial I/O cards
+	require access to more than one PCI memory region in a driver.
+</para>
+<para>
+	Each mapping has its own directory in sysfs, the first mapping
+	appears as <filename>/sys/class/uio/uioX/maps/map0/</filename>.
+	Subsequent mappings create directories <filename>map1/</filename>,
+	<filename>map2/</filename>, and so on. These directories will only
+	appear if the size of the mapping is not 0.
+</para>
+<para>
+	Each <filename>mapX/</filename> directory contains two read-only files
+	that show start address and size of the memory:
+</para>
+<itemizedlist>
+<listitem>
+	<para>
+	<filename>addr</filename>: The address of memory that can be mapped.
+	</para>
+</listitem>
+<listitem>
+	<para>
+	<filename>size</filename>: The size, in bytes, of the memory
+	pointed to by addr.
+	</para>
+</listitem>
+</itemizedlist>
+
+<para>
+	From userspace, the different mappings are distinguished by adjusting
+	the <varname>offset</varname> parameter of the
+	<function>mmap()</function> call. To map the memory of mapping N, you
+	have to use N times the page size as your offset:
+</para>
+<programlisting format="linespecific">
+offset = N * getpagesize();
+</programlisting>
+
+</sect1>
+</chapter>
+
+<chapter id="custom_kernel_module" xreflabel="Writing your own kernel module">
+<?dbhtml filename="custom_kernel_module.html"?>
+<title>Writing your own kernel module</title>
+	<para>
+	Please have a look at <filename>uio_cif.c</filename> as an
+	example. The following paragraphs explain the different
+	sections of this file.
+	</para>
+
+<sect1 id="uio_info">
+<title>struct uio_info</title>
+	<para>
+	This structure tells the framework the details of your driver,
+	Some of the members are required, others are optional.
+	</para>
+
+<itemizedlist>
+<listitem><para>
+<varname>char *name</varname>: Required. The name of your driver as
+it will appear in sysfs. I recommend using the name of your module for this.
+</para></listitem>
+
+<listitem><para>
+<varname>char *version</varname>: Required. This string appears in
+<filename>/sys/class/uio/uioX/version</filename>.
+</para></listitem>
+
+<listitem><para>
+<varname>struct uio_mem mem[ MAX_UIO_MAPS ]</varname>: Required if you
+have memory that can be mapped with <function>mmap()</function>. For each
+mapping you need to fill one of the <varname>uio_mem</varname> structures.
+See the description below for details.
+</para></listitem>
+
+<listitem><para>
+<varname>long irq</varname>: Required. If your hardware generates an
+interrupt, it's your modules task to determine the irq number during
+initialization. If you don't have a hardware generated interrupt but
+want to trigger the interrupt handler in some other way, set
+<varname>irq</varname> to <varname>UIO_IRQ_CUSTOM</varname>.
+If you had no interrupt at all, you could set
+<varname>irq</varname> to <varname>UIO_IRQ_NONE</varname>, though this
+rarely makes sense.
+</para></listitem>
+
+<listitem><para>
+<varname>unsigned long irq_flags</varname>: Required if you've set
+<varname>irq</varname> to a hardware interrupt number. The flags given
+here will be used in the call to <function>request_irq()</function>.
+</para></listitem>
+
+<listitem><para>
+<varname>int (*mmap)(struct uio_info *info, struct vm_area_struct
+*vma)</varname>: Optional. If you need a special
+<function>mmap()</function> function, you can set it here. If this
+pointer is not NULL, your <function>mmap()</function> will be called
+instead of the built-in one.
+</para></listitem>
+
+<listitem><para>
+<varname>int (*open)(struct uio_info *info, struct inode *inode)
+</varname>: Optional. You might want to have your own
+<function>open()</function>, e.g. to enable interrupts only when your
+device is actually used.
+</para></listitem>
+
+<listitem><para>
+<varname>int (*release)(struct uio_info *info, struct inode *inode)
+</varname>: Optional. If you define your own
+<function>open()</function>, you will probably also want a custom
+<function>release()</function> function.
+</para></listitem>
+
+<listitem><para>
+<varname>int (*irqcontrol)(struct uio_info *info, s32 irq_on)
+</varname>: Optional. If you need to be able to enable or disable
+interrupts from userspace by writing to <filename>/dev/uioX</filename>,
+you can implement this function. The parameter <varname>irq_on</varname>
+will be 0 to disable interrupts and 1 to enable them.
+</para></listitem>
+</itemizedlist>
+
+<para>
+Usually, your device will have one or more memory regions that can be mapped
+to user space. For each region, you have to set up a
+<varname>struct uio_mem</varname> in the <varname>mem[]</varname> array.
+Here's a description of the fields of <varname>struct uio_mem</varname>:
+</para>
+
+<itemizedlist>
+<listitem><para>
+<varname>int memtype</varname>: Required if the mapping is used. Set this to
+<varname>UIO_MEM_PHYS</varname> if you you have physical memory on your
+card to be mapped. Use <varname>UIO_MEM_LOGICAL</varname> for logical
+memory (e.g. allocated with <function>kmalloc()</function>). There's also
+<varname>UIO_MEM_VIRTUAL</varname> for virtual memory.
+</para></listitem>
+
+<listitem><para>
+<varname>unsigned long addr</varname>: Required if the mapping is used.
+Fill in the address of your memory block. This address is the one that
+appears in sysfs.
+</para></listitem>
+
+<listitem><para>
+<varname>unsigned long size</varname>: Fill in the size of the
+memory block that <varname>addr</varname> points to. If <varname>size</varname>
+is zero, the mapping is considered unused. Note that you
+<emphasis>must</emphasis> initialize <varname>size</varname> with zero for
+all unused mappings.
+</para></listitem>
+
+<listitem><para>
+<varname>void *internal_addr</varname>: If you have to access this memory
+region from within your kernel module, you will want to map it internally by
+using something like <function>ioremap()</function>. Addresses
+returned by this function cannot be mapped to user space, so you must not
+store it in <varname>addr</varname>. Use <varname>internal_addr</varname>
+instead to remember such an address.
+</para></listitem>
+</itemizedlist>
+
+<para>
+Please do not touch the <varname>kobj</varname> element of
+<varname>struct uio_mem</varname>! It is used by the UIO framework
+to set up sysfs files for this mapping. Simply leave it alone.
+</para>
+</sect1>
+
+<sect1 id="adding_irq_handler">
+<title>Adding an interrupt handler</title>
+	<para>
+	What you need to do in your interrupt handler depends on your
+	hardware and on how you want to	handle it. You should try to
+	keep the amount of code in your kernel interrupt handler low.
+	If your hardware requires no action that you
+	<emphasis>have</emphasis> to perform after each interrupt,
+	then your handler can be empty.</para> <para>If, on the other
+	hand, your hardware <emphasis>needs</emphasis> some action to
+	be performed after each interrupt, then you
+	<emphasis>must</emphasis> do it in your kernel module. Note
+	that you cannot rely on the userspace part of your driver. Your
+	userspace program can terminate at any time, possibly leaving
+	your hardware in a state where proper interrupt handling is
+	still required.
+	</para>
+
+	<para>
+	There might also be applications where you want to read data
+	from your hardware at each interrupt and buffer it in a piece
+	of kernel memory you've allocated for that purpose.  With this
+	technique you could avoid loss of data if your userspace
+	program misses an interrupt.
+	</para>
+
+	<para>
+	A note on shared interrupts: Your driver should support
+	interrupt sharing whenever this is possible. It is possible if
+	and only if your driver can detect whether your hardware has
+	triggered the interrupt or not. This is usually done by looking
+	at an interrupt status register. If your driver sees that the
+	IRQ bit is actually set, it will perform its actions, and the
+	handler returns IRQ_HANDLED. If the driver detects that it was
+	not your hardware that caused the interrupt, it will do nothing
+	and return IRQ_NONE, allowing the kernel to call the next
+	possible interrupt handler.
+	</para>
+
+	<para>
+	If you decide not to support shared interrupts, your card
+	won't work in computers with no free interrupts. As this
+	frequently happens on the PC platform, you can save yourself a
+	lot of trouble by supporting interrupt sharing.
+	</para>
+</sect1>
+
+</chapter>
+
+<chapter id="userspace_driver" xreflabel="Writing a driver in user space">
+<?dbhtml filename="userspace_driver.html"?>
+<title>Writing a driver in userspace</title>
+	<para>
+	Once you have a working kernel module for your hardware, you can
+	write the userspace part of your driver. You don't need any special
+	libraries, your driver can be written in any reasonable language,
+	you can use floating point numbers and so on. In short, you can
+	use all the tools and libraries you'd normally use for writing a
+	userspace application.
+	</para>
+
+<sect1 id="getting_uio_information">
+<title>Getting information about your UIO device</title>
+	<para>
+	Information about all UIO devices is available in sysfs. The
+	first thing you should do in your driver is check
+	<varname>name</varname> and <varname>version</varname> to
+	make sure your talking to the right device and that its kernel
+	driver has the version you expect.
+	</para>
+	<para>
+	You should also make sure that the memory mapping you need
+	exists and has the size you expect.
+	</para>
+	<para>
+	There is a tool called <varname>lsuio</varname> that lists
+	UIO devices and their attributes. It is available here:
+	</para>
+	<para>
+	<ulink url="http://www.osadl.org/projects/downloads/UIO/user/">
+		http://www.osadl.org/projects/downloads/UIO/user/</ulink>
+	</para>
+	<para>
+	With <varname>lsuio</varname> you can quickly check if your
+	kernel module is loaded and which attributes it exports.
+	Have a look at the manpage for details.
+	</para>
+	<para>
+	The source code of <varname>lsuio</varname> can serve as an
+	example for getting information about an UIO device.
+	The file <filename>uio_helper.c</filename> contains a lot of
+	functions you could use in your userspace driver code.
+	</para>
+</sect1>
+
+<sect1 id="mmap_device_memory">
+<title>mmap() device memory</title>
+	<para>
+	After you made sure you've got the right device with the
+	memory mappings you need, all you have to do is to call
+	<function>mmap()</function> to map the device's memory
+	to userspace.
+	</para>
+	<para>
+	The parameter <varname>offset</varname> of the
+	<function>mmap()</function> call has a special meaning
+	for UIO devices: It is used to select which mapping of
+	your device you want to map. To map the memory of
+	mapping N, you have to use N times the page size as
+	your offset:
+	</para>
+<programlisting format="linespecific">
+	offset = N * getpagesize();
+</programlisting>
+	<para>
+	N starts from zero, so if you've got only one memory
+	range to map, set <varname>offset = 0</varname>.
+	A drawback of this technique is that memory is always
+	mapped beginning with its start address.
+	</para>
+</sect1>
+
+<sect1 id="wait_for_interrupts">
+<title>Waiting for interrupts</title>
+	<para>
+	After you successfully mapped your devices memory, you
+	can access it like an ordinary array. Usually, you will
+	perform some initialization. After that, your hardware
+	starts working and will generate an interrupt as soon
+	as it's finished, has some data available, or needs your
+	attention because an error occured.
+	</para>
+	<para>
+	<filename>/dev/uioX</filename> is a read-only file. A
+	<function>read()</function> will always block until an
+	interrupt occurs. There is only one legal value for the
+	<varname>count</varname> parameter of
+	<function>read()</function>, and that is the size of a
+	signed 32 bit integer (4). Any other value for
+	<varname>count</varname> causes <function>read()</function>
+	to fail. The signed 32 bit integer read is the interrupt
+	count of your device. If the value is one more than the value
+	you read the last time, everything is OK. If the difference
+	is greater than one, you missed interrupts.
+	</para>
+	<para>
+	You can also use <function>select()</function> on
+	<filename>/dev/uioX</filename>.
+	</para>
+</sect1>
+
+</chapter>
+
+<appendix id="app1">
+<title>Further information</title>
+<itemizedlist>
+	<listitem><para>
+			<ulink url="http://www.osadl.org">
+				OSADL homepage.</ulink>
+		</para></listitem>
+	<listitem><para>
+		<ulink url="http://www.linutronix.de">
+		 Linutronix homepage.</ulink>
+		</para></listitem>
+</itemizedlist>
+</appendix>
+
+</book>
diff --git a/Documentation/DocBook/usb.tmpl b/Documentation/DocBook/usb.tmpl
new file mode 100644
index 0000000..af29360
--- /dev/null
+++ b/Documentation/DocBook/usb.tmpl
@@ -0,0 +1,980 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Linux-USB-API">
+ <bookinfo>
+  <title>The Linux-USB Host Side API</title>
+  
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+      
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+      
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+      
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+<chapter id="intro">
+    <title>Introduction to USB on Linux</title>
+
+    <para>A Universal Serial Bus (USB) is used to connect a host,
+    such as a PC or workstation, to a number of peripheral
+    devices.  USB uses a tree structure, with the host as the
+    root (the system's master), hubs as interior nodes, and
+    peripherals as leaves (and slaves).
+    Modern PCs support several such trees of USB devices, usually
+    one USB 2.0 tree (480 Mbit/sec each) with
+    a few USB 1.1 trees (12 Mbit/sec each) that are used when you
+    connect a USB 1.1 device directly to the machine's "root hub".
+    </para>
+
+    <para>That master/slave asymmetry was designed-in for a number of
+    reasons, one being ease of use.  It is not physically possible to
+    assemble (legal) USB cables incorrectly:  all upstream "to the host"
+    connectors are the rectangular type (matching the sockets on
+    root hubs), and all downstream connectors are the squarish type
+    (or they are built into the peripheral).
+    Also, the host software doesn't need to deal with distributed
+    auto-configuration since the pre-designated master node manages all that.
+    And finally, at the electrical level, bus protocol overhead is reduced by
+    eliminating arbitration and moving scheduling into the host software.
+    </para>
+
+    <para>USB 1.0 was announced in January 1996 and was revised
+    as USB 1.1 (with improvements in hub specification and
+    support for interrupt-out transfers) in September 1998.
+    USB 2.0 was released in April 2000, adding high-speed
+    transfers and transaction-translating hubs (used for USB 1.1
+    and 1.0 backward compatibility).
+    </para>
+
+    <para>Kernel developers added USB support to Linux early in the 2.2 kernel
+    series, shortly before 2.3 development forked.  Updates from 2.3 were
+    regularly folded back into 2.2 releases, which improved reliability and
+    brought <filename>/sbin/hotplug</filename> support as well more drivers.
+    Such improvements were continued in the 2.5 kernel series, where they added
+    USB 2.0 support, improved performance, and made the host controller drivers
+    (HCDs) more consistent.  They also simplified the API (to make bugs less
+    likely) and added internal "kerneldoc" documentation.
+    </para>
+
+    <para>Linux can run inside USB devices as well as on
+    the hosts that control the devices.
+    But USB device drivers running inside those peripherals
+    don't do the same things as the ones running inside hosts,
+    so they've been given a different name:
+    <emphasis>gadget drivers</emphasis>.
+    This document does not cover gadget drivers.
+    </para>
+
+    </chapter>
+
+<chapter id="host">
+    <title>USB Host-Side API Model</title>
+
+    <para>Host-side drivers for USB devices talk to the "usbcore" APIs.
+    There are two.  One is intended for
+    <emphasis>general-purpose</emphasis> drivers (exposed through
+    driver frameworks), and the other is for drivers that are
+    <emphasis>part of the core</emphasis>.
+    Such core drivers include the <emphasis>hub</emphasis> driver
+    (which manages trees of USB devices) and several different kinds
+    of <emphasis>host controller drivers</emphasis>,
+    which control individual busses.
+    </para>
+
+    <para>The device model seen by USB drivers is relatively complex.
+    </para>
+     
+    <itemizedlist>
+
+	<listitem><para>USB supports four kinds of data transfers
+	(control, bulk, interrupt, and isochronous).  Two of them (control
+	and bulk) use bandwidth as it's available,
+	while the other two (interrupt and isochronous)
+	are scheduled to provide guaranteed bandwidth.
+	</para></listitem>
+
+	<listitem><para>The device description model includes one or more
+	"configurations" per device, only one of which is active at a time.
+	Devices that are capable of high-speed operation must also support
+	full-speed configurations, along with a way to ask about the
+	"other speed" configurations which might be used.
+	</para></listitem>
+
+	<listitem><para>Configurations have one or more "interfaces", each
+	of which may have "alternate settings".  Interfaces may be
+	standardized by USB "Class" specifications, or may be specific to
+	a vendor or device.</para>
+
+	<para>USB device drivers actually bind to interfaces, not devices.
+	Think of them as "interface drivers", though you
+	may not see many devices where the distinction is important.
+	<emphasis>Most USB devices are simple, with only one configuration,
+	one interface, and one alternate setting.</emphasis>
+	</para></listitem>
+
+	<listitem><para>Interfaces have one or more "endpoints", each of
+	which supports one type and direction of data transfer such as
+	"bulk out" or "interrupt in".  The entire configuration may have
+	up to sixteen endpoints in each direction, allocated as needed
+	among all the interfaces.
+	</para></listitem>
+
+	<listitem><para>Data transfer on USB is packetized; each endpoint
+	has a maximum packet size.
+	Drivers must often be aware of conventions such as flagging the end
+	of bulk transfers using "short" (including zero length) packets.
+	</para></listitem>
+
+	<listitem><para>The Linux USB API supports synchronous calls for
+	control and bulk messages.
+	It also supports asynchnous calls for all kinds of data transfer,
+	using request structures called "URBs" (USB Request Blocks).
+	</para></listitem>
+
+    </itemizedlist>
+
+    <para>Accordingly, the USB Core API exposed to device drivers
+    covers quite a lot of territory.  You'll probably need to consult
+    the USB 2.0 specification, available online from www.usb.org at
+    no cost, as well as class or device specifications.
+    </para>
+
+    <para>The only host-side drivers that actually touch hardware
+    (reading/writing registers, handling IRQs, and so on) are the HCDs.
+    In theory, all HCDs provide the same functionality through the same
+    API.  In practice, that's becoming more true on the 2.5 kernels,
+    but there are still differences that crop up especially with
+    fault handling.  Different controllers don't necessarily report
+    the same aspects of failures, and recovery from faults (including
+    software-induced ones like unlinking an URB) isn't yet fully
+    consistent.
+    Device driver authors should make a point of doing disconnect
+    testing (while the device is active) with each different host
+    controller driver, to make sure drivers don't have bugs of
+    their own as well as to make sure they aren't relying on some
+    HCD-specific behavior.
+    (You will need external USB 1.1 and/or
+    USB 2.0 hubs to perform all those tests.)
+    </para>
+
+    </chapter>
+
+<chapter id="types"><title>USB-Standard Types</title>
+
+    <para>In <filename>&lt;linux/usb/ch9.h&gt;</filename> you will find
+    the USB data types defined in chapter 9 of the USB specification.
+    These data types are used throughout USB, and in APIs including
+    this host side API, gadget APIs, and usbfs.
+    </para>
+
+!Iinclude/linux/usb/ch9.h
+
+    </chapter>
+
+<chapter id="hostside"><title>Host-Side Data Types and Macros</title>
+
+    <para>The host side API exposes several layers to drivers, some of
+    which are more necessary than others.
+    These support lifecycle models for host side drivers
+    and devices, and support passing buffers through usbcore to
+    some HCD that performs the I/O for the device driver.
+    </para>
+
+
+!Iinclude/linux/usb.h
+
+    </chapter>
+
+    <chapter id="usbcore"><title>USB Core APIs</title>
+
+    <para>There are two basic I/O models in the USB API.
+    The most elemental one is asynchronous:  drivers submit requests
+    in the form of an URB, and the URB's completion callback
+    handle the next step.
+    All USB transfer types support that model, although there
+    are special cases for control URBs (which always have setup
+    and status stages, but may not have a data stage) and
+    isochronous URBs (which allow large packets and include
+    per-packet fault reports).
+    Built on top of that is synchronous API support, where a
+    driver calls a routine that allocates one or more URBs,
+    submits them, and waits until they complete.
+    There are synchronous wrappers for single-buffer control
+    and bulk transfers (which are awkward to use in some
+    driver disconnect scenarios), and for scatterlist based
+    streaming i/o (bulk or interrupt).
+    </para>
+
+    <para>USB drivers need to provide buffers that can be
+    used for DMA, although they don't necessarily need to
+    provide the DMA mapping themselves.
+    There are APIs to use used when allocating DMA buffers,
+    which can prevent use of bounce buffers on some systems.
+    In some cases, drivers may be able to rely on 64bit DMA
+    to eliminate another kind of bounce buffer.
+    </para>
+
+!Edrivers/usb/core/urb.c
+!Edrivers/usb/core/message.c
+!Edrivers/usb/core/file.c
+!Edrivers/usb/core/driver.c
+!Edrivers/usb/core/usb.c
+!Edrivers/usb/core/hub.c
+    </chapter>
+
+    <chapter id="hcd"><title>Host Controller APIs</title>
+
+    <para>These APIs are only for use by host controller drivers,
+    most of which implement standard register interfaces such as
+    EHCI, OHCI, or UHCI.
+    UHCI was one of the first interfaces, designed by Intel and
+    also used by VIA; it doesn't do much in hardware.
+    OHCI was designed later, to have the hardware do more work
+    (bigger transfers, tracking protocol state, and so on).
+    EHCI was designed with USB 2.0; its design has features that
+    resemble OHCI (hardware does much more work) as well as
+    UHCI (some parts of ISO support, TD list processing).
+    </para>
+
+    <para>There are host controllers other than the "big three",
+    although most PCI based controllers (and a few non-PCI based
+    ones) use one of those interfaces.
+    Not all host controllers use DMA; some use PIO, and there
+    is also a simulator.
+    </para>
+
+    <para>The same basic APIs are available to drivers for all
+    those controllers.  
+    For historical reasons they are in two layers:
+    <structname>struct usb_bus</structname> is a rather thin
+    layer that became available in the 2.2 kernels, while
+    <structname>struct usb_hcd</structname> is a more featureful
+    layer (available in later 2.4 kernels and in 2.5) that
+    lets HCDs share common code, to shrink driver size
+    and significantly reduce hcd-specific behaviors.
+    </para>
+
+!Edrivers/usb/core/hcd.c
+!Edrivers/usb/core/hcd-pci.c
+!Idrivers/usb/core/buffer.c
+    </chapter>
+
+    <chapter id="usbfs">
+	<title>The USB Filesystem (usbfs)</title>
+
+	<para>This chapter presents the Linux <emphasis>usbfs</emphasis>.
+	You may prefer to avoid writing new kernel code for your
+	USB driver; that's the problem that usbfs set out to solve.
+	User mode device drivers are usually packaged as applications
+	or libraries, and may use usbfs through some programming library
+	that wraps it.  Such libraries include
+	<ulink url="http://libusb.sourceforge.net">libusb</ulink>
+	for C/C++, and
+	<ulink url="http://jUSB.sourceforge.net">jUSB</ulink> for Java.
+	</para>
+
+	<note><title>Unfinished</title>
+	    <para>This particular documentation is incomplete,
+	    especially with respect to the asynchronous mode.
+	    As of kernel 2.5.66 the code and this (new) documentation
+	    need to be cross-reviewed.
+	    </para>
+	    </note>
+
+	<para>Configure usbfs into Linux kernels by enabling the
+	<emphasis>USB filesystem</emphasis> option (CONFIG_USB_DEVICEFS),
+	and you get basic support for user mode USB device drivers.
+	Until relatively recently it was often (confusingly) called
+	<emphasis>usbdevfs</emphasis> although it wasn't solving what
+	<emphasis>devfs</emphasis> was.
+	Every USB device will appear in usbfs, regardless of whether or
+	not it has a kernel driver.
+	</para>
+
+	<sect1 id="usbfs-files">
+	    <title>What files are in "usbfs"?</title>
+
+	    <para>Conventionally mounted at
+	    <filename>/proc/bus/usb</filename>, usbfs 
+	    features include:
+	    <itemizedlist>
+		<listitem><para><filename>/proc/bus/usb/devices</filename>
+		    ... a text file
+		    showing each of the USB devices on known to the kernel,
+		    and their configuration descriptors.
+		    You can also poll() this to learn about new devices.
+		    </para></listitem>
+		<listitem><para><filename>/proc/bus/usb/BBB/DDD</filename>
+		    ... magic files
+		    exposing the each device's configuration descriptors, and
+		    supporting a series of ioctls for making device requests,
+		    including I/O to devices.  (Purely for access by programs.)
+		    </para></listitem>
+	    </itemizedlist>
+	    </para>
+
+	    <para> Each bus is given a number (BBB) based on when it was
+	    enumerated; within each bus, each device is given a similar
+	    number (DDD).
+	    Those BBB/DDD paths are not "stable" identifiers;
+	    expect them to change even if you always leave the devices
+	    plugged in to the same hub port.
+	    <emphasis>Don't even think of saving these in application
+	    configuration files.</emphasis>
+	    Stable identifiers are available, for user mode applications
+	    that want to use them.  HID and networking devices expose
+	    these stable IDs, so that for example you can be sure that
+	    you told the right UPS to power down its second server.
+	    "usbfs" doesn't (yet) expose those IDs.
+	    </para>
+
+	</sect1>
+
+	<sect1 id="usbfs-fstab">
+	    <title>Mounting and Access Control</title>
+
+	    <para>There are a number of mount options for usbfs, which will
+	    be of most interest to you if you need to override the default
+	    access control policy.
+	    That policy is that only root may read or write device files
+	    (<filename>/proc/bus/BBB/DDD</filename>) although anyone may read
+	    the <filename>devices</filename>
+	    or <filename>drivers</filename> files.
+	    I/O requests to the device also need the CAP_SYS_RAWIO capability,
+	    </para>
+
+	    <para>The significance of that is that by default, all user mode
+	    device drivers need super-user privileges.
+	    You can change modes or ownership in a driver setup
+	    when the device hotplugs, or maye just start the
+	    driver right then, as a privileged server (or some activity
+	    within one).
+	    That's the most secure approach for multi-user systems,
+	    but for single user systems ("trusted" by that user)
+	    it's more convenient just to grant everyone all access
+	    (using the <emphasis>devmode=0666</emphasis> option)
+	    so the driver can start whenever it's needed.
+	    </para>
+
+	    <para>The mount options for usbfs, usable in /etc/fstab or
+	    in command line invocations of <emphasis>mount</emphasis>, are:
+
+	    <variablelist>
+		<varlistentry>
+		    <term><emphasis>busgid</emphasis>=NNNNN</term>
+		    <listitem><para>Controls the GID used for the
+		    /proc/bus/usb/BBB
+		    directories.  (Default: 0)</para></listitem></varlistentry>
+		<varlistentry><term><emphasis>busmode</emphasis>=MMM</term>
+		    <listitem><para>Controls the file mode used for the
+		    /proc/bus/usb/BBB
+		    directories.  (Default: 0555)
+		    </para></listitem></varlistentry>
+		<varlistentry><term><emphasis>busuid</emphasis>=NNNNN</term>
+		    <listitem><para>Controls the UID used for the
+		    /proc/bus/usb/BBB
+		    directories.  (Default: 0)</para></listitem></varlistentry>
+
+		<varlistentry><term><emphasis>devgid</emphasis>=NNNNN</term>
+		    <listitem><para>Controls the GID used for the
+		    /proc/bus/usb/BBB/DDD
+		    files.  (Default: 0)</para></listitem></varlistentry>
+		<varlistentry><term><emphasis>devmode</emphasis>=MMM</term>
+		    <listitem><para>Controls the file mode used for the
+		    /proc/bus/usb/BBB/DDD
+		    files.  (Default: 0644)</para></listitem></varlistentry>
+		<varlistentry><term><emphasis>devuid</emphasis>=NNNNN</term>
+		    <listitem><para>Controls the UID used for the
+		    /proc/bus/usb/BBB/DDD
+		    files.  (Default: 0)</para></listitem></varlistentry>
+
+		<varlistentry><term><emphasis>listgid</emphasis>=NNNNN</term>
+		    <listitem><para>Controls the GID used for the
+		    /proc/bus/usb/devices and drivers files.
+		    (Default: 0)</para></listitem></varlistentry>
+		<varlistentry><term><emphasis>listmode</emphasis>=MMM</term>
+		    <listitem><para>Controls the file mode used for the
+		    /proc/bus/usb/devices and drivers files.
+		    (Default: 0444)</para></listitem></varlistentry>
+		<varlistentry><term><emphasis>listuid</emphasis>=NNNNN</term>
+		    <listitem><para>Controls the UID used for the
+		    /proc/bus/usb/devices and drivers files.
+		    (Default: 0)</para></listitem></varlistentry>
+	    </variablelist>
+
+	    </para>
+
+	    <para>Note that many Linux distributions hard-wire the mount options
+	    for usbfs in their init scripts, such as
+	    <filename>/etc/rc.d/rc.sysinit</filename>,
+	    rather than making it easy to set this per-system
+	    policy in <filename>/etc/fstab</filename>.
+	    </para>
+
+	</sect1>
+
+	<sect1 id="usbfs-devices">
+	    <title>/proc/bus/usb/devices</title>
+
+	    <para>This file is handy for status viewing tools in user
+	    mode, which can scan the text format and ignore most of it.
+	    More detailed device status (including class and vendor
+	    status) is available from device-specific files.
+	    For information about the current format of this file,
+	    see the
+	    <filename>Documentation/usb/proc_usb_info.txt</filename>
+	    file in your Linux kernel sources.
+	    </para>
+
+	    <para>This file, in combination with the poll() system call, can
+	    also be used to detect when devices are added or removed:
+<programlisting>int fd;
+struct pollfd pfd;
+
+fd = open("/proc/bus/usb/devices", O_RDONLY);
+pfd = { fd, POLLIN, 0 };
+for (;;) {
+	/* The first time through, this call will return immediately. */
+	poll(&amp;pfd, 1, -1);
+
+	/* To see what's changed, compare the file's previous and current
+	   contents or scan the filesystem.  (Scanning is more precise.) */
+}</programlisting>
+	    Note that this behavior is intended to be used for informational
+	    and debug purposes.  It would be more appropriate to use programs
+	    such as udev or HAL to initialize a device or start a user-mode
+	    helper program, for instance.
+	    </para>
+	</sect1>
+
+	<sect1 id="usbfs-bbbddd">
+	    <title>/proc/bus/usb/BBB/DDD</title>
+
+	    <para>Use these files in one of these basic ways:
+	    </para>
+
+	    <para><emphasis>They can be read,</emphasis>
+	    producing first the device descriptor
+	    (18 bytes) and then the descriptors for the current configuration.
+	    See the USB 2.0 spec for details about those binary data formats.
+	    You'll need to convert most multibyte values from little endian
+	    format to your native host byte order, although a few of the
+	    fields in the device descriptor (both of the BCD-encoded fields,
+	    and the vendor and product IDs) will be byteswapped for you.
+	    Note that configuration descriptors include descriptors for
+	    interfaces, altsettings, endpoints, and maybe additional
+	    class descriptors.
+	    </para>
+
+	    <para><emphasis>Perform USB operations</emphasis> using 
+	    <emphasis>ioctl()</emphasis> requests to make endpoint I/O
+	    requests (synchronously or asynchronously) or manage
+	    the device.
+	    These requests need the CAP_SYS_RAWIO capability,
+	    as well as filesystem access permissions.
+	    Only one ioctl request can be made on one of these
+	    device files at a time.
+	    This means that if you are synchronously reading an endpoint
+	    from one thread, you won't be able to write to a different
+	    endpoint from another thread until the read completes.
+	    This works for <emphasis>half duplex</emphasis> protocols,
+	    but otherwise you'd use asynchronous i/o requests. 
+	    </para>
+
+	    </sect1>
+
+
+	<sect1 id="usbfs-lifecycle">
+	    <title>Life Cycle of User Mode Drivers</title>
+
+	    <para>Such a driver first needs to find a device file
+	    for a device it knows how to handle.
+	    Maybe it was told about it because a
+	    <filename>/sbin/hotplug</filename> event handling agent
+	    chose that driver to handle the new device.
+	    Or maybe it's an application that scans all the
+	    /proc/bus/usb device files, and ignores most devices.
+	    In either case, it should <function>read()</function> all
+	    the descriptors from the device file,
+	    and check them against what it knows how to handle.
+	    It might just reject everything except a particular
+	    vendor and product ID, or need a more complex policy.
+	    </para>
+
+	    <para>Never assume there will only be one such device
+	    on the system at a time!
+	    If your code can't handle more than one device at
+	    a time, at least detect when there's more than one, and
+	    have your users choose which device to use.
+	    </para>
+
+	    <para>Once your user mode driver knows what device to use,
+	    it interacts with it in either of two styles.
+	    The simple style is to make only control requests; some
+	    devices don't need more complex interactions than those.
+	    (An example might be software using vendor-specific control
+	    requests for some initialization or configuration tasks,
+	    with a kernel driver for the rest.)
+	    </para>
+
+	    <para>More likely, you need a more complex style driver:
+	    one using non-control endpoints, reading or writing data
+	    and claiming exclusive use of an interface.
+	    <emphasis>Bulk</emphasis> transfers are easiest to use,
+	    but only their sibling <emphasis>interrupt</emphasis> transfers 
+	    work with low speed devices.
+	    Both interrupt and <emphasis>isochronous</emphasis> transfers
+	    offer service guarantees because their bandwidth is reserved.
+	    Such "periodic" transfers are awkward to use through usbfs,
+	    unless you're using the asynchronous calls.  However, interrupt
+	    transfers can also be used in a synchronous "one shot" style.
+	    </para>
+
+	    <para>Your user-mode driver should never need to worry
+	    about cleaning up request state when the device is
+	    disconnected, although it should close its open file
+	    descriptors as soon as it starts seeing the ENODEV
+	    errors.
+	    </para>
+
+	    </sect1>
+
+	<sect1 id="usbfs-ioctl"><title>The ioctl() Requests</title>
+
+	    <para>To use these ioctls, you need to include the following
+	    headers in your userspace program:
+<programlisting>#include &lt;linux/usb.h&gt;
+#include &lt;linux/usbdevice_fs.h&gt;
+#include &lt;asm/byteorder.h&gt;</programlisting>
+	    The standard USB device model requests, from "Chapter 9" of
+	    the USB 2.0 specification, are automatically included from
+	    the <filename>&lt;linux/usb/ch9.h&gt;</filename> header.
+	    </para>
+
+	    <para>Unless noted otherwise, the ioctl requests
+	    described here will
+	    update the modification time on the usbfs file to which
+	    they are applied (unless they fail).
+	    A return of zero indicates success; otherwise, a
+	    standard USB error code is returned.  (These are
+	    documented in
+	    <filename>Documentation/usb/error-codes.txt</filename>
+	    in your kernel sources.)
+	    </para>
+
+	    <para>Each of these files multiplexes access to several
+	    I/O streams, one per endpoint.
+	    Each device has one control endpoint (endpoint zero)
+	    which supports a limited RPC style RPC access.
+	    Devices are configured
+	    by khubd (in the kernel) setting a device-wide
+	    <emphasis>configuration</emphasis> that affects things
+	    like power consumption and basic functionality.
+	    The endpoints are part of USB <emphasis>interfaces</emphasis>,
+	    which may have <emphasis>altsettings</emphasis>
+	    affecting things like which endpoints are available.
+	    Many devices only have a single configuration and interface,
+	    so drivers for them will ignore configurations and altsettings.
+	    </para>
+
+
+	    <sect2 id="usbfs-mgmt">
+		<title>Management/Status Requests</title>
+
+		<para>A number of usbfs requests don't deal very directly
+		with device I/O.
+		They mostly relate to device management and status.
+		These are all synchronous requests.
+		</para>
+
+		<variablelist>
+
+		<varlistentry><term>USBDEVFS_CLAIMINTERFACE</term>
+		    <listitem><para>This is used to force usbfs to
+		    claim a specific interface,
+		    which has not previously been claimed by usbfs or any other
+		    kernel driver.
+		    The ioctl parameter is an integer holding the number of
+		    the interface (bInterfaceNumber from descriptor).
+		    </para><para>
+		    Note that if your driver doesn't claim an interface
+		    before trying to use one of its endpoints, and no
+		    other driver has bound to it, then the interface is
+		    automatically claimed by usbfs.
+		    </para><para>
+		    This claim will be released by a RELEASEINTERFACE ioctl,
+		    or by closing the file descriptor.
+		    File modification time is not updated by this request.
+		    </para></listitem></varlistentry>
+
+		<varlistentry><term>USBDEVFS_CONNECTINFO</term>
+		    <listitem><para>Says whether the device is lowspeed.
+		    The ioctl parameter points to a structure like this:
+<programlisting>struct usbdevfs_connectinfo {
+        unsigned int   devnum;
+        unsigned char  slow;
+}; </programlisting>
+		    File modification time is not updated by this request.
+		    </para><para>
+		    <emphasis>You can't tell whether a "not slow"
+		    device is connected at high speed (480 MBit/sec)
+		    or just full speed (12 MBit/sec).</emphasis>
+		    You should know the devnum value already,
+		    it's the DDD value of the device file name.
+		    </para></listitem></varlistentry>
+
+		<varlistentry><term>USBDEVFS_GETDRIVER</term>
+		    <listitem><para>Returns the name of the kernel driver
+		    bound to a given interface (a string).  Parameter
+		    is a pointer to this structure, which is modified:
+<programlisting>struct usbdevfs_getdriver {
+        unsigned int  interface;
+        char          driver[USBDEVFS_MAXDRIVERNAME + 1];
+};</programlisting>
+		    File modification time is not updated by this request.
+		    </para></listitem></varlistentry>
+
+		<varlistentry><term>USBDEVFS_IOCTL</term>
+		    <listitem><para>Passes a request from userspace through
+		    to a kernel driver that has an ioctl entry in the
+		    <emphasis>struct usb_driver</emphasis> it registered.
+<programlisting>struct usbdevfs_ioctl {
+        int     ifno;
+        int     ioctl_code;
+        void    *data;
+};
+
+/* user mode call looks like this.
+ * 'request' becomes the driver->ioctl() 'code' parameter.
+ * the size of 'param' is encoded in 'request', and that data
+ * is copied to or from the driver->ioctl() 'buf' parameter.
+ */
+static int
+usbdev_ioctl (int fd, int ifno, unsigned request, void *param)
+{
+        struct usbdevfs_ioctl	wrapper;
+
+        wrapper.ifno = ifno;
+        wrapper.ioctl_code = request;
+        wrapper.data = param;
+
+        return ioctl (fd, USBDEVFS_IOCTL, &amp;wrapper);
+} </programlisting>
+		    File modification time is not updated by this request.
+		    </para><para>
+		    This request lets kernel drivers talk to user mode code
+		    through filesystem operations even when they don't create
+		    a charactor or block special device.
+		    It's also been used to do things like ask devices what
+		    device special file should be used.
+		    Two pre-defined ioctls are used
+		    to disconnect and reconnect kernel drivers, so
+		    that user mode code can completely manage binding
+		    and configuration of devices.
+		    </para></listitem></varlistentry>
+
+		<varlistentry><term>USBDEVFS_RELEASEINTERFACE</term>
+		    <listitem><para>This is used to release the claim usbfs
+		    made on interface, either implicitly or because of a
+		    USBDEVFS_CLAIMINTERFACE call, before the file
+		    descriptor is closed.
+		    The ioctl parameter is an integer holding the number of
+		    the interface (bInterfaceNumber from descriptor);
+		    File modification time is not updated by this request.
+		    </para><warning><para>
+		    <emphasis>No security check is made to ensure
+		    that the task which made the claim is the one
+		    which is releasing it.
+		    This means that user mode driver may interfere
+		    other ones.  </emphasis>
+		    </para></warning></listitem></varlistentry>
+
+		<varlistentry><term>USBDEVFS_RESETEP</term>
+		    <listitem><para>Resets the data toggle value for an endpoint
+		    (bulk or interrupt) to DATA0.
+		    The ioctl parameter is an integer endpoint number
+		    (1 to 15, as identified in the endpoint descriptor),
+		    with USB_DIR_IN added if the device's endpoint sends
+		    data to the host.
+		    </para><warning><para>
+		    <emphasis>Avoid using this request.
+		    It should probably be removed.</emphasis>
+		    Using it typically means the device and driver will lose
+		    toggle synchronization.  If you really lost synchronization,
+		    you likely need to completely handshake with the device,
+		    using a request like CLEAR_HALT
+		    or SET_INTERFACE.
+		    </para></warning></listitem></varlistentry>
+
+		</variablelist>
+
+		</sect2>
+
+	    <sect2 id="usbfs-sync">
+		<title>Synchronous I/O Support</title>
+
+		<para>Synchronous requests involve the kernel blocking
+		until the user mode request completes, either by
+		finishing successfully or by reporting an error.
+		In most cases this is the simplest way to use usbfs,
+		although as noted above it does prevent performing I/O
+		to more than one endpoint at a time.
+		</para>
+
+		<variablelist>
+
+		<varlistentry><term>USBDEVFS_BULK</term>
+		    <listitem><para>Issues a bulk read or write request to the
+		    device.
+		    The ioctl parameter is a pointer to this structure:
+<programlisting>struct usbdevfs_bulktransfer {
+        unsigned int  ep;
+        unsigned int  len;
+        unsigned int  timeout; /* in milliseconds */
+        void          *data;
+};</programlisting>
+		    </para><para>The "ep" value identifies a
+		    bulk endpoint number (1 to 15, as identified in an endpoint
+		    descriptor),
+		    masked with USB_DIR_IN when referring to an endpoint which
+		    sends data to the host from the device.
+		    The length of the data buffer is identified by "len";
+		    Recent kernels support requests up to about 128KBytes.
+		    <emphasis>FIXME say how read length is returned,
+		    and how short reads are handled.</emphasis>.
+		    </para></listitem></varlistentry>
+
+		<varlistentry><term>USBDEVFS_CLEAR_HALT</term>
+		    <listitem><para>Clears endpoint halt (stall) and
+		    resets the endpoint toggle.  This is only
+		    meaningful for bulk or interrupt endpoints.
+		    The ioctl parameter is an integer endpoint number
+		    (1 to 15, as identified in an endpoint descriptor),
+		    masked with USB_DIR_IN when referring to an endpoint which
+		    sends data to the host from the device.
+		    </para><para>
+		    Use this on bulk or interrupt endpoints which have
+		    stalled, returning <emphasis>-EPIPE</emphasis> status
+		    to a data transfer request.
+		    Do not issue the control request directly, since
+		    that could invalidate the host's record of the
+		    data toggle.
+		    </para></listitem></varlistentry>
+
+		<varlistentry><term>USBDEVFS_CONTROL</term>
+		    <listitem><para>Issues a control request to the device.
+		    The ioctl parameter points to a structure like this:
+<programlisting>struct usbdevfs_ctrltransfer {
+        __u8   bRequestType;
+        __u8   bRequest;
+        __u16  wValue;
+        __u16  wIndex;
+        __u16  wLength;
+        __u32  timeout;  /* in milliseconds */
+        void   *data;
+};</programlisting>
+		    </para><para>
+		    The first eight bytes of this structure are the contents
+		    of the SETUP packet to be sent to the device; see the
+		    USB 2.0 specification for details.
+		    The bRequestType value is composed by combining a
+		    USB_TYPE_* value, a USB_DIR_* value, and a
+		    USB_RECIP_* value (from
+		    <emphasis>&lt;linux/usb.h&gt;</emphasis>).
+		    If wLength is nonzero, it describes the length of the data
+		    buffer, which is either written to the device
+		    (USB_DIR_OUT) or read from the device (USB_DIR_IN).
+		    </para><para>
+		    At this writing, you can't transfer more than 4 KBytes
+		    of data to or from a device; usbfs has a limit, and
+		    some host controller drivers have a limit.
+		    (That's not usually a problem.)
+		    <emphasis>Also</emphasis> there's no way to say it's
+		    not OK to get a short read back from the device.
+		    </para></listitem></varlistentry>
+
+		<varlistentry><term>USBDEVFS_RESET</term>
+		    <listitem><para>Does a USB level device reset.
+		    The ioctl parameter is ignored.
+		    After the reset, this rebinds all device interfaces.
+		    File modification time is not updated by this request.
+		    </para><warning><para>
+		    <emphasis>Avoid using this call</emphasis>
+		    until some usbcore bugs get fixed,
+		    since it does not fully synchronize device, interface,
+		    and driver (not just usbfs) state.
+		    </para></warning></listitem></varlistentry>
+	    
+		<varlistentry><term>USBDEVFS_SETINTERFACE</term>
+		    <listitem><para>Sets the alternate setting for an
+		    interface.  The ioctl parameter is a pointer to a
+		    structure like this:
+<programlisting>struct usbdevfs_setinterface {
+        unsigned int  interface;
+        unsigned int  altsetting;
+}; </programlisting>
+		    File modification time is not updated by this request.
+		    </para><para>
+		    Those struct members are from some interface descriptor
+		    applying to the current configuration.
+		    The interface number is the bInterfaceNumber value, and
+		    the altsetting number is the bAlternateSetting value.
+		    (This resets each endpoint in the interface.)
+		    </para></listitem></varlistentry>
+
+		<varlistentry><term>USBDEVFS_SETCONFIGURATION</term>
+		    <listitem><para>Issues the
+		    <function>usb_set_configuration</function> call
+		    for the device.
+		    The parameter is an integer holding the number of
+		    a configuration (bConfigurationValue from descriptor).
+		    File modification time is not updated by this request.
+		    </para><warning><para>
+		    <emphasis>Avoid using this call</emphasis>
+		    until some usbcore bugs get fixed,
+		    since it does not fully synchronize device, interface,
+		    and driver (not just usbfs) state.
+		    </para></warning></listitem></varlistentry>
+
+		</variablelist>
+	    </sect2>
+
+	    <sect2 id="usbfs-async">
+		<title>Asynchronous I/O Support</title>
+
+		<para>As mentioned above, there are situations where it may be
+		important to initiate concurrent operations from user mode code.
+		This is particularly important for periodic transfers
+		(interrupt and isochronous), but it can be used for other
+		kinds of USB requests too.
+		In such cases, the asynchronous requests described here
+		are essential.  Rather than submitting one request and having
+		the kernel block until it completes, the blocking is separate.
+		</para>
+
+		<para>These requests are packaged into a structure that
+		resembles the URB used by kernel device drivers.
+		(No POSIX Async I/O support here, sorry.)
+		It identifies the endpoint type (USBDEVFS_URB_TYPE_*),
+		endpoint (number, masked with USB_DIR_IN as appropriate),
+		buffer and length, and a user "context" value serving to
+		uniquely identify each request.
+		(It's usually a pointer to per-request data.)
+		Flags can modify requests (not as many as supported for
+		kernel drivers).
+		</para>
+
+		<para>Each request can specify a realtime signal number
+		(between SIGRTMIN and SIGRTMAX, inclusive) to request a
+		signal be sent when the request completes.
+		</para>
+
+		<para>When usbfs returns these urbs, the status value
+		is updated, and the buffer may have been modified.
+		Except for isochronous transfers, the actual_length is
+		updated to say how many bytes were transferred; if the
+		USBDEVFS_URB_DISABLE_SPD flag is set
+		("short packets are not OK"), if fewer bytes were read
+		than were requested then you get an error report.
+		</para>
+
+<programlisting>struct usbdevfs_iso_packet_desc {
+        unsigned int                     length;
+        unsigned int                     actual_length;
+        unsigned int                     status;
+};
+
+struct usbdevfs_urb {
+        unsigned char                    type;
+        unsigned char                    endpoint;
+        int                              status;
+        unsigned int                     flags;
+        void                             *buffer;
+        int                              buffer_length;
+        int                              actual_length;
+        int                              start_frame;
+        int                              number_of_packets;
+        int                              error_count;
+        unsigned int                     signr;
+        void                             *usercontext;
+        struct usbdevfs_iso_packet_desc  iso_frame_desc[];
+};</programlisting>
+
+		<para> For these asynchronous requests, the file modification
+		time reflects when the request was initiated.
+		This contrasts with their use with the synchronous requests,
+		where it reflects when requests complete.
+		</para>
+
+		<variablelist>
+
+		<varlistentry><term>USBDEVFS_DISCARDURB</term>
+		    <listitem><para>
+		    <emphasis>TBS</emphasis>
+		    File modification time is not updated by this request.
+		    </para><para>
+		    </para></listitem></varlistentry>
+
+		<varlistentry><term>USBDEVFS_DISCSIGNAL</term>
+		    <listitem><para>
+		    <emphasis>TBS</emphasis>
+		    File modification time is not updated by this request.
+		    </para><para>
+		    </para></listitem></varlistentry>
+
+		<varlistentry><term>USBDEVFS_REAPURB</term>
+		    <listitem><para>
+		    <emphasis>TBS</emphasis>
+		    File modification time is not updated by this request.
+		    </para><para>
+		    </para></listitem></varlistentry>
+
+		<varlistentry><term>USBDEVFS_REAPURBNDELAY</term>
+		    <listitem><para>
+		    <emphasis>TBS</emphasis>
+		    File modification time is not updated by this request.
+		    </para><para>
+		    </para></listitem></varlistentry>
+
+		<varlistentry><term>USBDEVFS_SUBMITURB</term>
+		    <listitem><para>
+		    <emphasis>TBS</emphasis>
+		    </para><para>
+		    </para></listitem></varlistentry>
+
+		</variablelist>
+	    </sect2>
+
+	</sect1>
+
+    </chapter>
+
+</book>
+<!-- vim:syntax=sgml:sw=4
+-->
diff --git a/Documentation/DocBook/wanbook.tmpl b/Documentation/DocBook/wanbook.tmpl
new file mode 100644
index 0000000..8c93db1
--- /dev/null
+++ b/Documentation/DocBook/wanbook.tmpl
@@ -0,0 +1,99 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="WANGuide">
+ <bookinfo>
+  <title>Synchronous PPP and Cisco HDLC Programming Guide</title>
+  
+  <authorgroup>
+   <author>
+    <firstname>Alan</firstname>
+    <surname>Cox</surname>
+    <affiliation>
+     <address>
+      <email>alan@lxorguk.ukuu.org.uk</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2000</year>
+   <holder>Alan Cox</holder>
+  </copyright>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+      
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+      
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+      
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="intro">
+      <title>Introduction</title>
+  <para>
+	The syncppp drivers in Linux provide a fairly complete 
+	implementation of Cisco HDLC and a minimal implementation of
+	PPP. The longer term goal is to switch the PPP layer to the
+	generic PPP interface that is new in Linux 2.3.x. The API should
+	remain unchanged when this is done, but support will then be
+	available for IPX, compression and other PPP features
+  </para>
+  </chapter>
+  <chapter id="bugs">
+     <title>Known Bugs And Assumptions</title>
+  <para>
+  <variablelist>
+    <varlistentry><term>PPP is minimal</term>
+    <listitem>
+    <para>
+	The current PPP implementation is very basic, although sufficient
+	for most wan usages.
+    </para>
+    </listitem></varlistentry>
+
+    <varlistentry><term>Cisco HDLC Quirks</term>
+    <listitem>
+    <para>
+	Currently we do not end all packets with the correct Cisco multicast
+	or unicast flags. Nothing appears to mind too much but this should
+	be corrected.
+    </para>
+    </listitem></varlistentry>
+  </variablelist>
+	
+  </para>
+  </chapter>
+
+  <chapter id="pubfunctions">
+     <title>Public Functions Provided</title>
+!Edrivers/net/wan/syncppp.c
+  </chapter>
+
+</book>
diff --git a/Documentation/DocBook/writing_usb_driver.tmpl b/Documentation/DocBook/writing_usb_driver.tmpl
new file mode 100644
index 0000000..eeff19c
--- /dev/null
+++ b/Documentation/DocBook/writing_usb_driver.tmpl
@@ -0,0 +1,412 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="USBDeviceDriver">
+ <bookinfo>
+  <title>Writing USB Device Drivers</title>
+  
+  <authorgroup>
+   <author>
+    <firstname>Greg</firstname>
+    <surname>Kroah-Hartman</surname>
+    <affiliation>
+     <address>
+      <email>greg@kroah.com</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2001-2002</year>
+   <holder>Greg Kroah-Hartman</holder>
+  </copyright>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+      
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+      
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+      
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+
+   <para>
+     This documentation is based on an article published in 
+     Linux Journal Magazine, October 2001, Issue 90.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="intro">
+      <title>Introduction</title>
+  <para>
+      The Linux USB subsystem has grown from supporting only two different
+      types of devices in the 2.2.7 kernel (mice and keyboards), to over 20
+      different types of devices in the 2.4 kernel. Linux currently supports
+      almost all USB class devices (standard types of devices like keyboards,
+      mice, modems, printers and speakers) and an ever-growing number of
+      vendor-specific devices (such as USB to serial converters, digital
+      cameras, Ethernet devices and MP3 players). For a full list of the
+      different USB devices currently supported, see Resources.
+  </para>
+  <para>
+      The remaining kinds of USB devices that do not have support on Linux are
+      almost all vendor-specific devices. Each vendor decides to implement a
+      custom protocol to talk to their device, so a custom driver usually needs
+      to be created. Some vendors are open with their USB protocols and help
+      with the creation of Linux drivers, while others do not publish them, and
+      developers are forced to reverse-engineer. See Resources for some links
+      to handy reverse-engineering tools.
+  </para>
+  <para>
+      Because each different protocol causes a new driver to be created, I have
+      written a generic USB driver skeleton, modeled after the pci-skeleton.c
+      file in the kernel source tree upon which many PCI network drivers have
+      been based. This USB skeleton can be found at drivers/usb/usb-skeleton.c
+      in the kernel source tree. In this article I will walk through the basics
+      of the skeleton driver, explaining the different pieces and what needs to
+      be done to customize it to your specific device.
+  </para>
+  </chapter>
+
+  <chapter id="basics">
+      <title>Linux USB Basics</title>
+  <para>
+      If you are going to write a Linux USB driver, please become familiar with
+      the USB protocol specification. It can be found, along with many other
+      useful documents, at the USB home page (see Resources). An excellent
+      introduction to the Linux USB subsystem can be found at the USB Working
+      Devices List (see Resources). It explains how the Linux USB subsystem is
+      structured and introduces the reader to the concept of USB urbs
+      (USB Request Blocks), which are essential to USB drivers.
+  </para>
+  <para>
+      The first thing a Linux USB driver needs to do is register itself with
+      the Linux USB subsystem, giving it some information about which devices
+      the driver supports and which functions to call when a device supported
+      by the driver is inserted or removed from the system. All of this
+      information is passed to the USB subsystem in the usb_driver structure.
+      The skeleton driver declares a usb_driver as:
+  </para>
+  <programlisting>
+static struct usb_driver skel_driver = {
+        .name        = "skeleton",
+        .probe       = skel_probe,
+        .disconnect  = skel_disconnect,
+        .fops        = &amp;skel_fops,
+        .minor       = USB_SKEL_MINOR_BASE,
+        .id_table    = skel_table,
+};
+  </programlisting>
+  <para>
+      The variable name is a string that describes the driver. It is used in
+      informational messages printed to the system log. The probe and
+      disconnect function pointers are called when a device that matches the
+      information provided in the id_table variable is either seen or removed.
+  </para>
+  <para>
+      The fops and minor variables are optional. Most USB drivers hook into
+      another kernel subsystem, such as the SCSI, network or TTY subsystem.
+      These types of drivers register themselves with the other kernel
+      subsystem, and any user-space interactions are provided through that
+      interface. But for drivers that do not have a matching kernel subsystem,
+      such as MP3 players or scanners, a method of interacting with user space
+      is needed. The USB subsystem provides a way to register a minor device
+      number and a set of file_operations function pointers that enable this
+      user-space interaction. The skeleton driver needs this kind of interface,
+      so it provides a minor starting number and a pointer to its
+      file_operations functions.
+  </para>
+  <para>
+      The USB driver is then registered with a call to usb_register, usually in
+      the driver's init function, as shown here:
+  </para>
+  <programlisting>
+static int __init usb_skel_init(void)
+{
+        int result;
+
+        /* register this driver with the USB subsystem */
+        result = usb_register(&amp;skel_driver);
+        if (result &lt; 0) {
+                err(&quot;usb_register failed for the &quot;__FILE__ &quot;driver.&quot;
+                    &quot;Error number %d&quot;, result);
+                return -1;
+        }
+
+        return 0;
+}
+module_init(usb_skel_init);
+  </programlisting>
+  <para>
+      When the driver is unloaded from the system, it needs to deregister
+      itself with the USB subsystem. This is done with the usb_deregister
+      function:
+  </para>
+  <programlisting>
+static void __exit usb_skel_exit(void)
+{
+        /* deregister this driver with the USB subsystem */
+        usb_deregister(&amp;skel_driver);
+}
+module_exit(usb_skel_exit);
+  </programlisting>
+  <para>
+     To enable the linux-hotplug system to load the driver automatically when
+     the device is plugged in, you need to create a MODULE_DEVICE_TABLE. The
+     following code tells the hotplug scripts that this module supports a
+     single device with a specific vendor and product ID:
+  </para>
+  <programlisting>
+/* table of devices that work with this driver */
+static struct usb_device_id skel_table [] = {
+        { USB_DEVICE(USB_SKEL_VENDOR_ID, USB_SKEL_PRODUCT_ID) },
+        { }                      /* Terminating entry */
+};
+MODULE_DEVICE_TABLE (usb, skel_table);
+  </programlisting>
+  <para>
+     There are other macros that can be used in describing a usb_device_id for
+     drivers that support a whole class of USB drivers. See usb.h for more
+     information on this.
+  </para>
+  </chapter>
+
+  <chapter id="device">
+      <title>Device operation</title>
+  <para>
+     When a device is plugged into the USB bus that matches the device ID
+     pattern that your driver registered with the USB core, the probe function
+     is called. The usb_device structure, interface number and the interface ID
+     are passed to the function:
+  </para>
+  <programlisting>
+static int skel_probe(struct usb_interface *interface,
+    const struct usb_device_id *id)
+  </programlisting>
+  <para>
+     The driver now needs to verify that this device is actually one that it
+     can accept. If so, it returns 0.
+     If not, or if any error occurs during initialization, an errorcode
+     (such as <literal>-ENOMEM</literal> or <literal>-ENODEV</literal>)
+     is returned from the probe function.
+  </para>
+  <para>
+     In the skeleton driver, we determine what end points are marked as bulk-in
+     and bulk-out. We create buffers to hold the data that will be sent and
+     received from the device, and a USB urb to write data to the device is
+     initialized.
+  </para>
+  <para>
+     Conversely, when the device is removed from the USB bus, the disconnect
+     function is called with the device pointer. The driver needs to clean any
+     private data that has been allocated at this time and to shut down any
+     pending urbs that are in the USB system.
+  </para>
+  <para>
+     Now that the device is plugged into the system and the driver is bound to
+     the device, any of the functions in the file_operations structure that
+     were passed to the USB subsystem will be called from a user program trying
+     to talk to the device. The first function called will be open, as the
+     program tries to open the device for I/O. We increment our private usage
+     count and save a pointer to our internal structure in the file
+     structure. This is done so that future calls to file operations will
+     enable the driver to determine which device the user is addressing.  All
+     of this is done with the following code:
+  </para>
+  <programlisting>
+/* increment our usage count for the module */
+++skel->open_count;
+
+/* save our object in the file's private structure */
+file->private_data = dev;
+  </programlisting>
+  <para>
+     After the open function is called, the read and write functions are called
+     to receive and send data to the device. In the skel_write function, we
+     receive a pointer to some data that the user wants to send to the device
+     and the size of the data. The function determines how much data it can
+     send to the device based on the size of the write urb it has created (this
+     size depends on the size of the bulk out end point that the device has).
+     Then it copies the data from user space to kernel space, points the urb to
+     the data and submits the urb to the USB subsystem.  This can be seen in
+     the following code:
+  </para>
+  <programlisting>
+/* we can only write as much as 1 urb will hold */
+bytes_written = (count > skel->bulk_out_size) ? skel->bulk_out_size : count;
+
+/* copy the data from user space into our urb */
+copy_from_user(skel->write_urb->transfer_buffer, buffer, bytes_written);
+
+/* set up our urb */
+usb_fill_bulk_urb(skel->write_urb,
+                  skel->dev,
+                  usb_sndbulkpipe(skel->dev, skel->bulk_out_endpointAddr),
+                  skel->write_urb->transfer_buffer,
+                  bytes_written,
+                  skel_write_bulk_callback,
+                  skel);
+
+/* send the data out the bulk port */
+result = usb_submit_urb(skel->write_urb);
+if (result) {
+        err(&quot;Failed submitting write urb, error %d&quot;, result);
+}
+  </programlisting>
+  <para>
+     When the write urb is filled up with the proper information using the
+     usb_fill_bulk_urb function, we point the urb's completion callback to call our
+     own skel_write_bulk_callback function. This function is called when the
+     urb is finished by the USB subsystem. The callback function is called in
+     interrupt context, so caution must be taken not to do very much processing
+     at that time. Our implementation of skel_write_bulk_callback merely
+     reports if the urb was completed successfully or not and then returns.
+  </para>
+  <para>
+     The read function works a bit differently from the write function in that
+     we do not use an urb to transfer data from the device to the driver.
+     Instead we call the usb_bulk_msg function, which can be used to send or
+     receive data from a device without having to create urbs and handle
+     urb completion callback functions. We call the usb_bulk_msg function,
+     giving it a buffer into which to place any data received from the device
+     and a timeout value. If the timeout period expires without receiving any
+     data from the device, the function will fail and return an error message.
+     This can be shown with the following code:
+  </para>
+  <programlisting>
+/* do an immediate bulk read to get data from the device */
+retval = usb_bulk_msg (skel->dev,
+                       usb_rcvbulkpipe (skel->dev,
+                       skel->bulk_in_endpointAddr),
+                       skel->bulk_in_buffer,
+                       skel->bulk_in_size,
+                       &amp;count, HZ*10);
+/* if the read was successful, copy the data to user space */
+if (!retval) {
+        if (copy_to_user (buffer, skel->bulk_in_buffer, count))
+                retval = -EFAULT;
+        else
+                retval = count;
+}
+  </programlisting>
+  <para>
+     The usb_bulk_msg function can be very useful for doing single reads or
+     writes to a device; however, if you need to read or write constantly to a
+     device, it is recommended to set up your own urbs and submit them to the
+     USB subsystem.
+  </para>
+  <para>
+     When the user program releases the file handle that it has been using to
+     talk to the device, the release function in the driver is called. In this
+     function we decrement our private usage count and wait for possible
+     pending writes:
+  </para>
+  <programlisting>
+/* decrement our usage count for the device */
+--skel->open_count;
+  </programlisting>
+  <para>
+     One of the more difficult problems that USB drivers must be able to handle
+     smoothly is the fact that the USB device may be removed from the system at
+     any point in time, even if a program is currently talking to it. It needs
+     to be able to shut down any current reads and writes and notify the
+     user-space programs that the device is no longer there. The following
+     code (function <function>skel_delete</function>)
+     is an example of how to do this: </para>
+  <programlisting>
+static inline void skel_delete (struct usb_skel *dev)
+{
+    kfree (dev->bulk_in_buffer);
+    if (dev->bulk_out_buffer != NULL)
+        usb_buffer_free (dev->udev, dev->bulk_out_size,
+            dev->bulk_out_buffer,
+            dev->write_urb->transfer_dma);
+    usb_free_urb (dev->write_urb);
+    kfree (dev);
+}
+  </programlisting>
+  <para>
+     If a program currently has an open handle to the device, we reset the flag
+     <literal>device_present</literal>. For
+     every read, write, release and other functions that expect a device to be
+     present, the driver first checks this flag to see if the device is
+     still present. If not, it releases that the device has disappeared, and a
+     -ENODEV error is returned to the user-space program. When the release
+     function is eventually called, it determines if there is no device
+     and if not, it does the cleanup that the skel_disconnect
+     function normally does if there are no open files on the device (see
+     Listing 5).
+  </para>
+  </chapter>
+
+  <chapter id="iso">
+      <title>Isochronous Data</title>
+  <para>
+     This usb-skeleton driver does not have any examples of interrupt or
+     isochronous data being sent to or from the device. Interrupt data is sent
+     almost exactly as bulk data is, with a few minor exceptions.  Isochronous
+     data works differently with continuous streams of data being sent to or
+     from the device. The audio and video camera drivers are very good examples
+     of drivers that handle isochronous data and will be useful if you also
+     need to do this.
+  </para>
+  </chapter>
+  
+  <chapter id="Conclusion">
+      <title>Conclusion</title>
+  <para>
+     Writing Linux USB device drivers is not a difficult task as the
+     usb-skeleton driver shows. This driver, combined with the other current
+     USB drivers, should provide enough examples to help a beginning author
+     create a working driver in a minimal amount of time. The linux-usb-devel
+     mailing list archives also contain a lot of helpful information.
+  </para>
+  </chapter>
+
+  <chapter id="resources">
+      <title>Resources</title>
+  <para>
+     The Linux USB Project: <ulink url="http://www.linux-usb.org">http://www.linux-usb.org/</ulink>
+  </para>
+  <para>
+     Linux Hotplug Project: <ulink url="http://linux-hotplug.sourceforge.net">http://linux-hotplug.sourceforge.net/</ulink>
+  </para>
+  <para>
+     Linux USB Working Devices List: <ulink url="http://www.qbik.ch/usb/devices">http://www.qbik.ch/usb/devices/</ulink>
+  </para>
+  <para>
+     linux-usb-devel Mailing List Archives: <ulink url="http://marc.theaimsgroup.com/?l=linux-usb-devel">http://marc.theaimsgroup.com/?l=linux-usb-devel</ulink>
+  </para>
+  <para>
+     Programming Guide for Linux USB Device Drivers: <ulink url="http://usb.cs.tum.edu/usbdoc">http://usb.cs.tum.edu/usbdoc</ulink>
+  </para>
+  <para>
+     USB Home Page: <ulink url="http://www.usb.org">http://www.usb.org</ulink>
+  </para>
+  </chapter>
+
+</book>
diff --git a/Documentation/DocBook/z8530book.tmpl b/Documentation/DocBook/z8530book.tmpl
new file mode 100644
index 0000000..6f3883b
--- /dev/null
+++ b/Documentation/DocBook/z8530book.tmpl
@@ -0,0 +1,371 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Z85230Guide">
+ <bookinfo>
+  <title>Z8530 Programming Guide</title>
+  
+  <authorgroup>
+   <author>
+    <firstname>Alan</firstname>
+    <surname>Cox</surname>
+    <affiliation>
+     <address>
+      <email>alan@lxorguk.ukuu.org.uk</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2000</year>
+   <holder>Alan Cox</holder>
+  </copyright>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+      
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+      
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+      
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="intro">
+      <title>Introduction</title>
+  <para>
+	The Z85x30 family synchronous/asynchronous controller chips are
+	used on a large number of cheap network interface cards. The
+	kernel provides a core interface layer that is designed to make
+	it easy to provide WAN services using this chip.
+  </para>
+  <para>
+	The current driver only support synchronous operation. Merging the
+	asynchronous driver support into this code to allow any Z85x30
+	device to be used as both a tty interface and as a synchronous 
+	controller is a project for Linux post the 2.4 release
+  </para>
+  </chapter>
+  
+  <chapter id="Driver_Modes">
+ 	<title>Driver Modes</title>
+  <para>
+	The Z85230 driver layer can drive Z8530, Z85C30 and Z85230 devices
+	in three different modes. Each mode can be applied to an individual
+	channel on the chip (each chip has two channels).
+  </para>
+  <para>
+	The PIO synchronous mode supports the most common Z8530 wiring. Here
+	the chip is interface to the I/O and interrupt facilities of the
+	host machine but not to the DMA subsystem. When running PIO the
+	Z8530 has extremely tight timing requirements. Doing high speeds,
+	even with a Z85230 will be tricky. Typically you should expect to
+	achieve at best 9600 baud with a Z8C530 and 64Kbits with a Z85230.
+  </para>
+  <para>
+	The DMA mode supports the chip when it is configured to use dual DMA
+	channels on an ISA bus. The better cards tend to support this mode
+	of operation for a single channel. With DMA running the Z85230 tops
+	out when it starts to hit ISA DMA constraints at about 512Kbits. It
+	is worth noting here that many PC machines hang or crash when the
+	chip is driven fast enough to hold the ISA bus solid.
+  </para>
+  <para>
+	Transmit DMA mode uses a single DMA channel. The DMA channel is used
+	for transmission as the transmit FIFO is smaller than the receive
+	FIFO. it gives better performance than pure PIO mode but is nowhere
+	near as ideal as pure DMA mode. 
+  </para>
+  </chapter>
+
+  <chapter id="Using_the_Z85230_driver">
+ 	<title>Using the Z85230 driver</title>
+  <para>
+	The Z85230 driver provides the back end interface to your board. To
+	configure a Z8530 interface you need to detect the board and to 
+	identify its ports and interrupt resources. It is also your problem
+	to verify the resources are available.
+  </para>
+  <para>
+	Having identified the chip you need to fill in a struct z8530_dev,
+	which describes each chip. This object must exist until you finally
+	shutdown the board. Firstly zero the active field. This ensures 
+	nothing goes off without you intending it. The irq field should
+	be set to the interrupt number of the chip. (Each chip has a single
+	interrupt source rather than each channel). You are responsible
+	for allocating the interrupt line. The interrupt handler should be
+	set to <function>z8530_interrupt</function>. The device id should
+	be set to the z8530_dev structure pointer. Whether the interrupt can
+	be shared or not is board dependent, and up to you to initialise.
+  </para>
+  <para>
+	The structure holds two channel structures. 
+	Initialise chanA.ctrlio and chanA.dataio with the address of the
+	control and data ports. You can or this with Z8530_PORT_SLEEP to
+	indicate your interface needs the 5uS delay for chip settling done
+	in software. The PORT_SLEEP option is architecture specific. Other
+	flags may become available on future platforms, eg for MMIO.
+	Initialise the chanA.irqs to &amp;z8530_nop to start the chip up
+	as disabled and discarding interrupt events. This ensures that
+	stray interrupts will be mopped up and not hang the bus. Set
+	chanA.dev to point to the device structure itself. The
+	private and name field you may use as you wish. The private field
+	is unused by the Z85230 layer. The name is used for error reporting
+	and it may thus make sense to make it match the network name.
+  </para>
+  <para>
+	Repeat the same operation with the B channel if your chip has
+	both channels wired to something useful. This isn't always the
+	case. If it is not wired then the I/O values do not matter, but
+	you must initialise chanB.dev.
+  </para>
+  <para>
+	If your board has DMA facilities then initialise the txdma and
+	rxdma fields for the relevant channels. You must also allocate the
+	ISA DMA channels and do any necessary board level initialisation
+	to configure them. The low level driver will do the Z8530 and
+	DMA controller programming but not board specific magic.
+  </para>
+  <para>
+	Having initialised the device you can then call
+	<function>z8530_init</function>. This will probe the chip and 
+	reset it into a known state. An identification sequence is then
+	run to identify the chip type. If the checks fail to pass the
+	function returns a non zero error code. Typically this indicates
+	that the port given is not valid. After this call the
+	type field of the z8530_dev structure is initialised to either
+	Z8530, Z85C30 or Z85230 according to the chip found.
+  </para>
+  <para>
+	Once you have called z8530_init you can also make use of the utility
+	function <function>z8530_describe</function>. This provides a 
+	consistent reporting format for the Z8530 devices, and allows all
+	the drivers to provide consistent reporting.
+  </para>
+  </chapter>
+
+  <chapter id="Attaching_Network_Interfaces">
+ 	<title>Attaching Network Interfaces</title>
+  <para>
+	If you wish to use the network interface facilities of the driver,
+	then you need to attach a network device to each channel that is
+	present and in use. In addition to use the generic HDLC
+	you need to follow some additional plumbing rules. They may seem 
+	complex but a look at the example hostess_sv11 driver should
+	reassure you.
+  </para>
+  <para>
+	The network device used for each channel should be pointed to by
+	the netdevice field of each channel. The hdlc-&gt; priv field of the
+	network device points to your private data - you will need to be
+	able to find your private data from this.
+  </para>
+  <para>
+	The way most drivers approach this particular problem is to
+	create a structure holding the Z8530 device definition and
+	put that into the private field of the network device. The
+	network device fields of the channels then point back to the
+	network devices.
+  </para>
+  <para>
+	If you wish to use the generic HDLC then you need to register
+	the HDLC device.
+  </para>
+  <para>
+	Before you register your network device you will also need to
+	provide suitable handlers for most of the network device callbacks. 
+	See the network device documentation for more details on this.
+  </para>
+  </chapter>
+
+  <chapter id="Configuring_And_Activating_The_Port">
+ 	<title>Configuring And Activating The Port</title>
+  <para>
+	The Z85230 driver provides helper functions and tables to load the
+	port registers on the Z8530 chips. When programming the register
+	settings for a channel be aware that the documentation recommends
+	initialisation orders. Strange things happen when these are not
+	followed. 
+  </para>
+  <para>
+	<function>z8530_channel_load</function> takes an array of
+	pairs of initialisation values in an array of u8 type. The first
+	value is the Z8530 register number. Add 16 to indicate the alternate
+	register bank on the later chips. The array is terminated by a 255.
+  </para>
+  <para>
+	The driver provides a pair of public tables. The
+	z8530_hdlc_kilostream table is for the UK 'Kilostream' service and
+	also happens to cover most other end host configurations. The
+	z8530_hdlc_kilostream_85230 table is the same configuration using
+	the enhancements of the 85230 chip. The configuration loaded is
+	standard NRZ encoded synchronous data with HDLC bitstuffing. All
+	of the timing is taken from the other end of the link.
+  </para>
+  <para>
+	When writing your own tables be aware that the driver internally
+	tracks register values. It may need to reload values. You should
+	therefore be sure to set registers 1-7, 9-11, 14 and 15 in all
+	configurations. Where the register settings depend on DMA selection
+	the driver will update the bits itself when you open or close.
+	Loading a new table with the interface open is not recommended.
+  </para>
+  <para>
+	There are three standard configurations supported by the core
+	code. In PIO mode the interface is programmed up to use
+	interrupt driven PIO. This places high demands on the host processor
+	to avoid latency. The driver is written to take account of latency
+	issues but it cannot avoid latencies caused by other drivers,
+	notably IDE in PIO mode. Because the drivers allocate buffers you
+	must also prevent MTU changes while the port is open.
+  </para>
+  <para>
+	Once the port is open it will call the rx_function of each channel
+	whenever a completed packet arrived. This is invoked from
+	interrupt context and passes you the channel and a network	
+	buffer (struct sk_buff) holding the data. The data includes
+	the CRC bytes so most users will want to trim the last two
+	bytes before processing the data. This function is very timing
+	critical. When you wish to simply discard data the support
+	code provides the function <function>z8530_null_rx</function>
+	to discard the data.
+  </para>
+  <para>
+	To active PIO mode sending and receiving the <function>
+	z8530_sync_open</function> is called. This expects to be passed
+	the network device and the channel. Typically this is called from
+	your network device open callback. On a failure a non zero error
+	status is returned. The <function>z8530_sync_close</function> 
+	function shuts down a PIO channel. This must be done before the 
+	channel is opened again	and before the driver shuts down 
+	and unloads.
+  </para>
+  <para>
+	The ideal mode of operation is dual channel DMA mode. Here the
+	kernel driver will configure the board for DMA in both directions.
+	The driver also handles ISA DMA issues such as controller
+	programming and the memory range limit for you. This mode is
+	activated by calling the <function>z8530_sync_dma_open</function>
+	function. On failure a non zero error value is returned.
+	Once this mode is activated it can be shut down by calling the
+	<function>z8530_sync_dma_close</function>. You must call the close
+	function matching the open mode you used.
+  </para>
+  <para>
+	The final supported mode uses a single DMA channel to drive the
+	transmit side. As the Z85C30 has a larger FIFO on the receive
+	channel	this tends to increase the maximum speed a little. 
+	This is activated by calling the <function>z8530_sync_txdma_open
+	</function>. This returns a non zero error code on failure. The
+	<function>z8530_sync_txdma_close</function> function closes down
+	the Z8530 interface from this mode.
+  </para>
+  </chapter>
+
+  <chapter id="Network_Layer_Functions">
+ 	<title>Network Layer Functions</title>
+  <para>
+	The Z8530 layer provides functions to queue packets for
+	transmission. The driver internally buffers the frame currently
+	being transmitted and one further frame (in order to keep back
+	to back transmission running). Any further buffering is up to
+	the caller.
+  </para>
+  <para>
+	The function <function>z8530_queue_xmit</function> takes a network
+	buffer in sk_buff format and queues it for transmission. The
+	caller must provide the entire packet with the exception of the
+	bitstuffing and CRC. This is normally done by the caller via
+	the generic HDLC interface layer. It returns 0 if the buffer has been
+	queued and non zero values for queue full. If the function accepts
+	the buffer it becomes property of the Z8530 layer and the caller
+	should not free it.
+  </para>
+  <para>
+	The function <function>z8530_get_stats</function> returns a pointer
+	to an internally maintained per interface statistics block. This
+	provides most of the interface code needed to implement the network
+	layer get_stats callback.
+  </para>
+  </chapter>
+
+  <chapter id="Porting_The_Z8530_Driver">
+     <title>Porting The Z8530 Driver</title>
+  <para>
+	The Z8530 driver is written to be portable. In DMA mode it makes
+	assumptions about the use of ISA DMA. These are probably warranted
+	in most cases as the Z85230 in particular was designed to glue to PC
+	type machines. The PIO mode makes no real assumptions.
+  </para>
+  <para>
+	Should you need to retarget the Z8530 driver to another architecture
+	the only code that should need changing are the port I/O functions.
+	At the moment these assume PC I/O port accesses. This may not be
+	appropriate for all platforms. Replacing 
+	<function>z8530_read_port</function> and <function>z8530_write_port
+	</function> is intended to be all that is required to port this
+	driver layer.
+  </para>
+  </chapter>
+
+  <chapter id="bugs">
+     <title>Known Bugs And Assumptions</title>
+  <para>
+  <variablelist>
+    <varlistentry><term>Interrupt Locking</term>
+    <listitem>
+    <para>
+	The locking in the driver is done via the global cli/sti lock. This
+	makes for relatively poor SMP performance. Switching this to use a
+	per device spin lock would probably materially improve performance.
+    </para>
+    </listitem></varlistentry>
+
+    <varlistentry><term>Occasional Failures</term>
+    <listitem>
+    <para>
+	We have reports of occasional failures when run for very long
+	periods of time and the driver starts to receive junk frames. At
+	the moment the cause of this is not clear.
+    </para>
+    </listitem></varlistentry>
+  </variablelist>
+	
+  </para>
+  </chapter>
+
+  <chapter id="pubfunctions">
+     <title>Public Functions Provided</title>
+!Edrivers/net/wan/z85230.c
+  </chapter>
+
+  <chapter id="intfunctions">
+     <title>Internal Functions</title>
+!Idrivers/net/wan/z85230.c
+  </chapter>
+
+</book>